diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 750c71d5..5d70d495 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,19 +15,20 @@ concurrency: cancel-in-progress: true jobs: - check: + typescript-checks: + name: TypeScript checks runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Setup pnpm - uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 with: run_install: false - name: Setup Node.js - uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: "24" cache: "pnpm" @@ -39,19 +40,101 @@ jobs: - name: Run TypeScript checks run: pnpm run check - - name: Run slow TypeScript tests - run: pnpm run test:slow + slow-context-tests: + name: Slow context tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Build TypeScript packages + run: pnpm run build + + - name: Run slow context tests + run: pnpm --filter @ktx/context run test:slow + + slow-cli-tests: + name: Slow CLI tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Build TypeScript packages + run: pnpm run build + + - name: Run slow CLI tests + run: pnpm --filter @ktx/cli run test:slow + + cli-smoke-tests: + name: CLI smoke tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile - name: Run CLI smoke tests run: pnpm run smoke + python-checks: + name: Python checks + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Setup Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.13" - name: Setup uv - uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -62,11 +145,47 @@ jobs: - name: Run Python checks run: uv run pytest + artifact-checks: + name: Artifact checks + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Setup Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.13" + + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install Python dependencies + run: uv sync --all-packages + - name: Build and verify package artifacts run: pnpm run artifacts:check - name: Upload package artifacts - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: ktx-package-artifacts-${{ github.sha }} path: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 16c9f1e2..2a8f696e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,12 +24,12 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Setup pnpm - uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 with: run_install: false - name: Setup Node.js - uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: "24" cache: "pnpm" @@ -44,7 +44,7 @@ jobs: python-version: "3.13" - name: Setup uv - uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: enable-cache: true cache-dependency-glob: "uv.lock" diff --git a/README.md b/README.md index 8b4d32a2..84592226 100644 --- a/README.md +++ b/README.md @@ -1,312 +1,167 @@ -# KTX +

+ KTX +

-KTX is a workspace-first context layer for database agents. It stores warehouse -memory in a project directory, generates and validates semantic-layer YAML, -indexes knowledge, scans database schemas, and exposes the result through a CLI -and MCP server. +

+ The context layer for analytics agents +

-KTX projects are plain files: YAML, Markdown, SQLite state, and generated -artifacts. You can inspect them, commit them, and serve them to any MCP client. +

+ npm version + License + GitHub stars +

-## What KTX provides +--- -- Durable warehouse memory with semantic-layer sources and knowledge pages. -- Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server, - BigQuery, and Snowflake. -- Agentic ingest with provenance links, tool transcripts, and replay metadata. -- Local semantic-layer query planning and optional query execution. -- A stdio MCP server with tools for connections, knowledge, semantic-layer - sources, ingest reports, and replay. +KTX turns warehouse metadata, semantic definitions, and business knowledge into +reviewable project files that agents can use while planning, querying, and +updating analytics work. + +A KTX project is a directory of plain files — YAML semantic sources, Markdown +knowledge pages, and SQLite state — that you commit to git and review in PRs, +just like dbt models. + +## Who KTX is for + +KTX is built for analytics engineers and data teams who want data agents to +work on real analytics systems — not just generate one-off SQL. + +Use KTX when you want agents to: + +- **Generate SQL** from approved measures and joins +- **Repair semantic definitions** through reviewable diffs +- **Explain metric provenance** with warehouse evidence +- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI + platforms + +Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and +SQLite. ## Quick start -Run the pre-seeded demo through the public npm package: +Install the CLI and run the setup wizard: ```bash -npx @kaelio/ktx setup demo --no-input -npx @kaelio/ktx setup demo inspect -``` - -The default demo uses packaged sample data and prebuilt context. It does not -require API keys, network access, or an LLM provider. - -To replay the packaged ingest run, use: - -```bash -npx @kaelio/ktx setup demo --mode replay --no-input -``` - -To run the full agentic demo with an LLM provider, set a provider key for the -current process: - -```bash -ANTHROPIC_API_KEY=$YOUR_ANTHROPIC_API_KEY \ - npx @kaelio/ktx setup demo --mode full --no-input -``` - -Interactive full-demo setup can prompt for a provider key without writing the -key to `ktx.yaml`. - -You can also install the CLI in a project or globally: - -```bash -npm install @kaelio/ktx -npx ktx --help npm install -g @kaelio/ktx -ktx --help +ktx setup ``` -## Build a local project +The wizard walks through six steps: configuring your LLM provider, setting up +embeddings, connecting your database, adding context sources (dbt, LookML, +Metabase, Looker, Notion), building context, and installing agent integration. -Create a project from a local workspace: +If it exits before completion, rerun `ktx setup` to resume where you left off. + +Check your project status: ```bash -npm install @kaelio/ktx -PROJECT_DIR="$(mktemp -d)/ktx-demo" -npx ktx init "$PROJECT_DIR" --name ktx-demo +ktx status ``` -Create a SQLite warehouse: +``` +KTX project: /home/user/analytics +Project ready: yes +LLM ready: yes (claude-sonnet-4-6) +Embeddings ready: yes (text-embedding-3-small) +Primary sources configured: yes (postgres-warehouse) +Context sources configured: yes (dbt-main) +KTX context built: yes +Agent integration ready: yes (claude-code:project) +``` + +## What's in a project + +``` +my-project/ +├── ktx.yaml # Project configuration +├── semantic-layer/ +│ └── warehouse/ +│ ├── orders.yaml # Semantic source definitions +│ ├── customers.yaml +│ └── order_items.yaml +├── knowledge/ +│ ├── global/ +│ │ ├── revenue.md # Business definitions and rules +│ │ └── segment-classification.md +│ └── user/ +│ └── local/ +├── raw-sources/ +│ └── warehouse/ +│ └── live-database/ # Scan artifacts and reports +└── .ktx/ + └── db.sqlite # Local state (git-ignored) +``` + +Semantic sources and knowledge pages are committed to git. The `.ktx/` directory +holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the +next run. + +## Serve agents + +KTX integrates with coding agents through CLI skills, an MCP server, or both. +The setup wizard configures this automatically — here's what each mode looks +like. + +**CLI skills** — the agent calls `ktx` commands directly through a skill file +installed in your agent's config (e.g., `.claude/skills/ktx/SKILL.md`): ```bash -python - "$PROJECT_DIR/demo.db" <<'PY' -import sqlite3 -import sys - -conn = sqlite3.connect(sys.argv[1]) -conn.executescript(""" -DROP TABLE IF EXISTS accounts; -CREATE TABLE accounts ( - account_id INTEGER PRIMARY KEY, - account_name TEXT NOT NULL, - segment TEXT NOT NULL, - region TEXT NOT NULL -); -INSERT INTO accounts VALUES - (1, 'Acme Analytics', 'Mid-Market', 'NA'), - (2, 'Beacon Bank', 'Enterprise', 'EMEA'), - (3, 'Cobalt Coffee', 'SMB', 'NA'), - (4, 'Delta Devices', 'Mid-Market', 'APAC'), - (5, 'Evergreen Energy', 'Enterprise', 'NA'); -""") -conn.close() -PY +ktx sl query --measure orders.revenue --dimension orders.status --format sql +ktx wiki search "revenue definition" +ktx sl validate orders ``` -Replace the generated `ktx.yaml`: +**MCP server** — the agent calls KTX tools over the Model Context Protocol: ```bash -cat > "$PROJECT_DIR/ktx.yaml" <" -memory: - auto_commit: true -YAML -``` - -Write and validate a semantic-layer source: - -```bash -npx ktx sl write accounts --project-dir "$PROJECT_DIR" \ - --connection-id warehouse --yaml 'name: accounts -table: accounts -description: CRM accounts with segmentation attributes. -grain: - - account_id -columns: - - name: account_id - type: number - - name: account_name - type: string - - name: segment - type: string - - name: region - type: string -measures: - - name: account_count - expr: count(account_id) -joins: [] -' - -npx ktx sl validate accounts --project-dir "$PROJECT_DIR" \ - --connection-id warehouse -``` - -Generate SQL and execute the query: - -```bash -npx ktx sl query --project-dir "$PROJECT_DIR" \ - --connection-id warehouse \ - --measure accounts.account_count \ - --dimension accounts.segment \ - --order-by accounts.account_count:desc \ - --limit 5 \ - --format sql - -npx ktx sl query --project-dir "$PROJECT_DIR" \ - --connection-id warehouse \ - --measure accounts.account_count \ - --dimension accounts.segment \ - --order-by accounts.account_count:desc \ - --limit 5 \ - --execute \ - --max-rows 5 -``` - -List and test the warehouse connection: - -```bash -npx ktx connection list --project-dir "$PROJECT_DIR" -npx ktx connection test warehouse --project-dir "$PROJECT_DIR" -``` - -The connection test prints the configured driver and discovered table count: - -```text -Driver: sqlite -Tables: 1 -``` - -### Scan the demo warehouse - -Scan artifacts are written under -`raw-sources/warehouse/live-database//` in the project directory. - -```bash - -SCAN_OUTPUT="$(npx ktx scan warehouse --project-dir "$PROJECT_DIR")" -printf '%s\n' "$SCAN_OUTPUT" -SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')" -npx ktx scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" -npx ktx scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" -``` - -For non-SQLite drivers, prefer credential references such as `--url env:NAME` -or `--url file:PATH` over literal credential URLs. - -## Managed Python runtime - -KTX installs its Python runtime only when a Python-backed command needs it. -The runtime lives outside the npm cache, is versioned by the installed CLI -version, and is managed by `ktx runtime` commands. - -KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with -your system package manager or the official installer before running Python- -backed KTX commands. KTX doesn't download `uv` automatically; run -`ktx runtime doctor` if runtime installation fails: - -```bash -npx ktx runtime install --yes -npx ktx runtime status -npx ktx runtime doctor -npx ktx runtime start -npx ktx runtime stop -npx ktx runtime prune --dry-run -npx ktx runtime prune --yes -``` - -Use `runtime prune --dry-run` to preview stale runtime directories from older -CLI versions. Add `--yes` to remove those stale directories after daemon -processes are stopped. - -Commands such as `npx @kaelio/ktx sl query ... --yes` can install the core -runtime lazily from the bundled wheel. Local embeddings remain lazy; prepare -them only when you select local `sentence-transformers` embeddings: - -```bash -npx ktx runtime install --feature local-embeddings --yes -npx ktx runtime start --feature local-embeddings -``` - -## Serve MCP - -Start the stdio MCP server from the project directory: - -```bash -npx ktx serve --mcp stdio --project-dir "$PROJECT_DIR" \ +ktx serve --mcp stdio \ --user-id local \ --semantic-compute \ --execute-queries \ --yes ``` -The `--semantic-compute` flag uses the managed Python runtime when no explicit -semantic compute URL is provided. KTX starts or reuses the managed runtime as -needed. +This exposes tools for connections, knowledge search, semantic-layer sources, +validation, queries, ingestion, and replay. The `--semantic-compute` flag starts +the managed Python runtime for query planning automatically. -The MCP server exposes `connection_list`, `knowledge_search`, -`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`, -`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`, -`ingest_status`, `ingest_report`, and `ingest_replay`. +Supported agents: Claude Code, Codex, Cursor, OpenCode, and any agent that +reads `.agents/` skills or MCP configuration. ## Workspace packages -- `packages/context`: core TypeScript context library. -- `packages/cli`: CLI wrapper over the context package. -- `packages/llm`: LLM and embedding provider helpers. -- `packages/connector-bigquery`: BigQuery scan connector. -- `packages/connector-clickhouse`: ClickHouse scan connector. -- `packages/connector-mysql`: MySQL scan connector. -- `packages/connector-postgres`: Postgres scan connector. -- `packages/connector-snowflake`: Snowflake scan connector. -- `packages/connector-sqlite`: SQLite scan connector. -- `packages/connector-sqlserver`: SQL Server scan connector. -- `python/ktx-sl`: semantic-layer engine. -- `python/ktx-daemon`: portable compute service for semantic-layer operations. +| Package | Purpose | +|---------|---------| +| `packages/cli` | CLI entry point | +| `packages/context` | Core context engine | +| `packages/llm` | LLM and embedding providers | +| `packages/connector-*` | Database connectors (Postgres, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, SQLite) | +| `python/ktx-sl` | Semantic-layer query planning | +| `python/ktx-daemon` | Portable compute service | ## Development -Install dependencies and run checks: - ```bash +git clone https://github.com/kaelio/ktx.git +cd ktx pnpm install +uv sync --all-groups +pnpm run build pnpm run check -uv sync --all-packages -source .venv/bin/activate -uv run pytest ``` -Use the optional development binary when you want a local `ktx-dev` command: +Use the development CLI for local testing: ```bash +pnpm run setup:dev pnpm run link:dev ktx-dev --help ``` The repository uses `pnpm` for TypeScript packages and `uv` for Python -packages. - -## Release status - -This repository builds one public npm artifact named `@kaelio/ktx`. The release -artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` -runtime wheel. The first public npm handoff is policy-gated through -`release-policy.json`, which keeps Python package publishing disabled because -KTX-owned Python code ships inside the npm package as a bundled wheel. The -`python/ktx-sl` and `python/ktx-daemon` directories remain source packages for -development, not public release artifacts. - -Build local package artifacts and verify the guarded dry-run publish path with: - -```bash -source .venv/bin/activate -pnpm run artifacts:check -pnpm run release:readiness -pnpm run release:npm-publish -``` - -Run the live npm publish only from the manual `KTX Release` workflow with the -`publish_live` input enabled after the `NPM_TOKEN` secret is configured. +packages. See [Contributing](docs-site/content/docs/community/contributing.mdx) +for full development setup, testing, and PR guidelines. ## License diff --git a/assets/ktx-readme-header.png b/assets/ktx-readme-header.png new file mode 100644 index 00000000..11cfb4e4 Binary files /dev/null and b/assets/ktx-readme-header.png differ diff --git a/docs-site/app/docs/[[...slug]]/page.tsx b/docs-site/app/docs/[[...slug]]/page.tsx index f2b560e6..1e0c18ad 100644 --- a/docs-site/app/docs/[[...slug]]/page.tsx +++ b/docs-site/app/docs/[[...slug]]/page.tsx @@ -5,23 +5,51 @@ import { DocsTitle, DocsDescription, } from "fumadocs-ui/page"; -import { notFound } from "next/navigation"; +import { notFound, redirect } from "next/navigation"; import defaultMdxComponents from "fumadocs-ui/mdx"; import { CodeBlock } from "@/components/code-block"; +import { DocsPageActions } from "@/components/docs-page-actions"; + +const docsIndexPath = "/docs/getting-started/introduction"; +const docsIndexSlug = ["getting-started", "introduction"] as const; + +function isDocsIndex(slug: string[] | undefined) { + return slug === undefined || slug.length === 0 || slug.join("/") === ""; +} + +function isHeroPage(slug: string[] | undefined) { + return slug?.join("/") === "getting-started/introduction"; +} export default async function Page(props: { params: Promise<{ slug?: string[] }>; }) { const params = await props.params; + if (isDocsIndex(params.slug)) { + redirect(docsIndexPath); + } + const page = source.getPage(params.slug); if (!page) notFound(); const MDX = page.data.body; + const hero = isHeroPage(params.slug); + return ( - {page.data.title} - {page.data.description} + {!hero && ( + <> +
+ {page.data.title} + +
+ {page.data.description} + + )} @@ -30,14 +58,16 @@ export default async function Page(props: { } export function generateStaticParams() { - return source.generateParams(); + return [{ slug: [""] }, ...source.generateParams()]; } export async function generateMetadata(props: { params: Promise<{ slug?: string[] }>; }) { const params = await props.params; - const page = source.getPage(params.slug); + const page = source.getPage( + isDocsIndex(params.slug) ? [...docsIndexSlug] : params.slug, + ); if (!page) notFound(); return { diff --git a/docs-site/app/global.css b/docs-site/app/global.css index d9570d79..f3425e78 100644 --- a/docs-site/app/global.css +++ b/docs-site/app/global.css @@ -188,6 +188,24 @@ pre { border-radius: 0 !important; } +.ktx-code code { + display: grid; + min-width: max-content; + padding: 0 !important; + border: 0 !important; + border-radius: 0 !important; + background: transparent !important; + font-size: inherit !important; + line-height: inherit !important; + color: inherit; +} + +.ktx-code .line { + display: block; + min-height: 1.7em; + padding-inline: 0 !important; +} + /* Neutralize the outer figure styling that our wrapper now owns */ figure:has(> .ktx-code), figure[data-rehype-pretty-code-figure]:has(.ktx-code) { @@ -244,6 +262,74 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) { color: #c8c3bc !important; } +/* ── Mode D: Output preview (wizard prompts, status output) ── */ +.ktx-code-output { + background: var(--color-fd-muted); + border: 1px solid var(--color-fd-border); + border-left: 3px solid color-mix(in oklch, var(--color-fd-primary) 50%, var(--color-fd-border)); + position: relative; + box-shadow: 0 1px 2px rgba(27, 27, 24, 0.02); +} + +.dark .ktx-code-output { + background: #111a1e; + border-color: rgba(255, 255, 255, 0.05); + border-left-color: rgba(34, 211, 238, 0.25); +} + +.ktx-code-output:hover { + border-color: color-mix(in oklch, var(--color-fd-primary) 25%, var(--color-fd-border)); + border-left-color: var(--color-fd-primary); +} + +.dark .ktx-code-output:hover { + border-color: rgba(255, 255, 255, 0.08); + border-left-color: rgba(34, 211, 238, 0.45); +} + +.ktx-code-output-label { + position: absolute; + top: 8px; + right: 14px; + font-size: 10px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--color-fd-muted-foreground); + font-family: var(--font-display), var(--font-sans), sans-serif; + opacity: 0.4; + pointer-events: none; + z-index: 1; +} + +.ktx-code-output-copy { + position: absolute !important; + top: 6px !important; + right: 6px !important; + opacity: 0; + transform: translateY(-4px); + transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease); + z-index: 2; +} + +.ktx-code-output:hover .ktx-code-output-copy { + opacity: 0.5; + transform: translateY(0); +} + +.ktx-code-output:hover .ktx-code-output-label { + opacity: 0; +} + +.ktx-code-body-output { + background: transparent !important; + color: var(--ktx-ink-soft) !important; +} + +.dark .ktx-code-body-output { + color: #8a9da6 !important; +} + /* ── Mode B: VS Code tab (filename) ───────── */ .ktx-code-tab { background: var(--color-fd-card); @@ -477,14 +563,20 @@ th { opacity: 0.7; } +/* Hide the vertical indicator lines in sidebar sections */ +#nd-sidebar div[data-state]::before, +#nd-sidebar a[data-active]::before { + content: none !important; + display: none !important; +} + /* Page link items */ #nd-sidebar a[data-active] { font-size: 14px; padding: 6px 12px; border-radius: 6px; margin-left: 0; - border-left: 2px solid transparent; - transition: background 0.15s ease, color 0.15s ease, border-color 0.15s ease; + transition: background 0.15s ease, color 0.15s ease; } #nd-sidebar a[data-active="false"]:hover { @@ -494,7 +586,6 @@ th { #nd-sidebar a[data-active="true"] { background: color-mix(in oklch, var(--color-fd-primary) 8%, transparent) !important; - border-left-color: var(--color-fd-primary) !important; color: var(--color-fd-primary) !important; font-weight: 500; } diff --git a/docs-site/app/llms-full.txt/route.ts b/docs-site/app/llms-full.txt/route.ts new file mode 100644 index 00000000..0edf170c --- /dev/null +++ b/docs-site/app/llms-full.txt/route.ts @@ -0,0 +1,11 @@ +import { buildLlmsFullTxt } from "@/lib/llm-docs"; + +export const dynamic = "force-static"; + +export async function GET() { + return new Response(await buildLlmsFullTxt(), { + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + }); +} diff --git a/docs-site/app/llms.mdx/docs/[[...slug]]/route.ts b/docs-site/app/llms.mdx/docs/[[...slug]]/route.ts new file mode 100644 index 00000000..1372d556 --- /dev/null +++ b/docs-site/app/llms.mdx/docs/[[...slug]]/route.ts @@ -0,0 +1,33 @@ +import { + getLlmDocsPage, + getLlmDocsPages, + getPageMarkdown, +} from "@/lib/llm-docs"; + +export const dynamic = "force-static"; + +export async function GET( + _request: Request, + props: { params: Promise<{ slug?: string[] }> }, +) { + const params = await props.params; + const page = getLlmDocsPage(params.slug); + if (!page) { + return new Response("Documentation page not found.\n", { + status: 404, + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + }); + } + + return new Response(await getPageMarkdown(page), { + headers: { + "Content-Type": "text/markdown; charset=utf-8", + }, + }); +} + +export function generateStaticParams() { + return getLlmDocsPages().map((page) => ({ slug: page.slug })); +} diff --git a/docs-site/app/llms.txt/route.ts b/docs-site/app/llms.txt/route.ts new file mode 100644 index 00000000..7b65782a --- /dev/null +++ b/docs-site/app/llms.txt/route.ts @@ -0,0 +1,11 @@ +import { buildLlmsTxt } from "@/lib/llm-docs"; + +export const dynamic = "force-static"; + +export function GET() { + return new Response(buildLlmsTxt(), { + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + }); +} diff --git a/docs-site/components/code-block.tsx b/docs-site/components/code-block.tsx index 37c7a44e..15ae5ce7 100644 --- a/docs-site/components/code-block.tsx +++ b/docs-site/components/code-block.tsx @@ -52,15 +52,14 @@ export function CodeBlock(props: Props) { const language = detectLanguage(props, children); const codeText = extractText(children); - const isTerminal = - (language !== null && TERMINAL_LANGS.has(language)) || - WIZARD_GLYPHS.test(codeText); + const isTerminal = language !== null && TERMINAL_LANGS.has(language); + const isOutput = !isTerminal && WIZARD_GLYPHS.test(codeText); const hasTitle = typeof title === "string" && title.length > 0; - // Mode A — Terminal + // Mode A — Terminal (commands the user types) if (isTerminal) { return ( -
+
@@ -80,10 +79,23 @@ export function CodeBlock(props: Props) { ); } + // Mode D — Output preview (wizard prompts, terminal output) + if (isOutput) { + return ( +
+ output + +
+          {children}
+        
+
+ ); + } + // Mode B — VS Code tab (filename present) if (hasTitle) { return ( -
+
{title} @@ -99,7 +111,7 @@ export function CodeBlock(props: Props) { // Mode C — Minimal default return ( -
+
{language && {language}}
diff --git a/docs-site/components/docs-page-actions.tsx b/docs-site/components/docs-page-actions.tsx
new file mode 100644
index 00000000..95bf93a4
--- /dev/null
+++ b/docs-site/components/docs-page-actions.tsx
@@ -0,0 +1,110 @@
+"use client";
+
+import { useState } from "react";
+
+type CopyState = "idle" | "copied" | "error";
+
+type Props = {
+  markdownUrl: string;
+  mdxSource: string;
+};
+
+export function DocsPageActions({ markdownUrl, mdxSource }: Props) {
+  return (
+    
+  );
+}
+
+function CopyMarkdownButton({ markdownUrl }: { markdownUrl: string }) {
+  const [state, setState] = useState("idle");
+
+  const onClick = async () => {
+    try {
+      const response = await fetch(markdownUrl, {
+        headers: { Accept: "text/markdown" },
+      });
+
+      if (!response.ok) {
+        throw new Error(`Failed to fetch ${markdownUrl}`);
+      }
+
+      await navigator.clipboard.writeText(await response.text());
+      flash(setState, "copied");
+    } catch {
+      flash(setState, "error");
+    }
+  };
+
+  return (
+    
+  );
+}
+
+function CopyTextButton({ label, text }: { label: string; text: string }) {
+  const [state, setState] = useState("idle");
+
+  const onClick = async () => {
+    try {
+      await navigator.clipboard.writeText(text);
+      flash(setState, "copied");
+    } catch {
+      flash(setState, "error");
+    }
+  };
+
+  return (
+    
+  );
+}
+
+function ActionButton({
+  label,
+  onClick,
+  state,
+}: {
+  label: string;
+  onClick: () => void;
+  state: CopyState;
+}) {
+  return (
+    
+  );
+}
+
+function labelForState(state: CopyState, label: string) {
+  if (state === "copied") return "Copied";
+  if (state === "error") return "Copy failed";
+  return label;
+}
+
+function flash(
+  setState: (state: CopyState) => void,
+  state: Exclude,
+) {
+  setState(state);
+  window.setTimeout(() => setState("idle"), 1500);
+}
diff --git a/docs-site/components/logo.tsx b/docs-site/components/logo.tsx
index e47cc394..db67a689 100644
--- a/docs-site/components/logo.tsx
+++ b/docs-site/components/logo.tsx
@@ -2,42 +2,12 @@ export function Logo() {
   return (
     
- + className="h-8 w-8 object-contain" + />
.md over rendered HTML. +4. Use https://docs.kaelio.com/ktx/llms-full.txt only when the task needs broad docs context. +5. Quote commands exactly from docs pages. +6. If docs and local repository behavior disagree, say what differs and prefer local verified output for code changes. +``` + +## What this is for + +This page is for documentation consumption only: + +- answering questions about KTX +- finding the right docs page +- citing setup or CLI guidance +- helping an assistant avoid stale or invented commands + +It does not describe local tool configuration. + +## Minimal project prompt + +```text +You are helping with KTX. Read https://docs.kaelio.com/ktx/llms.txt first, then fetch only the Markdown pages needed for the task. Do not scrape the rendered docs site when a .md route exists. +``` + +## Repository prompt + +```text +Before editing KTX docs, read /llms.txt and the affected .md docs pages. Keep AI Resources focused on docs consumption. After editing, verify /llms.txt, /llms-full.txt, and any changed .md routes. +``` diff --git a/docs-site/content/docs/ai-resources/agent-quickstart.mdx b/docs-site/content/docs/ai-resources/agent-quickstart.mdx new file mode 100644 index 00000000..40983224 --- /dev/null +++ b/docs-site/content/docs/ai-resources/agent-quickstart.mdx @@ -0,0 +1,50 @@ +--- +title: Agent Quickstart +description: A task-first route for coding agents that need to understand KTX docs. +--- + +This page is for coding assistants reading or citing the KTX docs. It is intentionally limited to documentation lookup, docs navigation, and safe command discovery. + +## First read + +Agents should start with the smallest source that answers the task: + +1. [`/llms.txt`](/llms.txt) - discover the docs and preferred entry points. +2. The relevant per-page Markdown URL, for example `/docs/getting-started/quickstart.md`. +3. [`/llms-full.txt`](/llms-full.txt) - use only when the task needs broad context across many pages. + +## Task router + +| User asks the agent to explain... | Read first | Then read | +|------------------------------------|------------|-----------| +| What KTX does | [Introduction](/docs/getting-started/introduction) | [The Context Layer](/docs/concepts/the-context-layer) | +| How to start from a checkout | [Quickstart](/docs/getting-started/quickstart) | [ktx setup](/docs/cli-reference/ktx-setup) | +| How to check project readiness | [ktx status](/docs/cli-reference/ktx-status) | [Quickstart](/docs/getting-started/quickstart) | +| How context gets built | [Building Context](/docs/guides/building-context) | [ktx ingest](/docs/cli-reference/ktx-ingest) | +| How semantic YAML works | [Writing Context](/docs/guides/writing-context) | [ktx sl](/docs/cli-reference/ktx-sl) | +| How machine-readable CLI output is shaped | [ktx agent](/docs/cli-reference/ktx-agent) | [Markdown Access](/docs/ai-resources/markdown-access) | + +## Operating workflow + +Use this workflow when the user asks an assistant to answer a KTX docs question: + +1. Read [`/llms.txt`](/llms.txt). +2. Pick the smallest relevant `.md` page. +3. Use [`/llms-full.txt`](/llms-full.txt) only if the answer needs multiple sections of the docs. +4. Quote commands exactly from the docs page. +5. If a command affects a local project, ask the user before assuming credentials or live services are available. + +## Docs lookup from a shell + +```bash +curl https://docs.kaelio.com/ktx/llms.txt +curl https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md +``` + +## Guardrails + +- Do not invent CLI flags. Fetch the relevant CLI reference page. +- Do not scrape rendered HTML when a `.md` route exists. +- Do not assume docs lookup requires agent-client configuration. +- Do not include credentials or secrets in prompts, URLs, or copied docs snippets. +- When docs and local CLI behavior disagree, prefer the local CLI output and mention the mismatch. diff --git a/docs-site/content/docs/ai-resources/index.mdx b/docs-site/content/docs/ai-resources/index.mdx new file mode 100644 index 00000000..be315453 --- /dev/null +++ b/docs-site/content/docs/ai-resources/index.mdx @@ -0,0 +1,38 @@ +--- +title: AI Resources +description: Machine-readable docs and prompt recipes for coding assistants reading KTX documentation. +--- + +Use this section when a coding assistant, IDE agent, or automation system needs to understand the KTX documentation. + +> **Documentation index** +> +> Start with [`/llms.txt`](/llms.txt) to discover the available docs. Use [`/llms-full.txt`](/llms-full.txt) when the assistant needs the complete docs corpus in one Markdown response. + +## Choose the right path + +| Goal | Use this page | +|------|---------------| +| Tell a coding assistant how to approach KTX docs | [Agent Quickstart](/docs/ai-resources/agent-quickstart) | +| Fetch docs as Markdown instead of HTML | [Markdown Access](/docs/ai-resources/markdown-access) | +| Add lightweight instructions to an assistant prompt | [Agent Instructions](/docs/ai-resources/agent-instructions) | +| Copy prompts for common agent workflows | [Prompt Recipes](/docs/ai-resources/prompt-recipes) | + +## Available resources + +| Resource | What it gives agents | +|----------|----------------------| +| [`/llms.txt`](/llms.txt) | Curated index of high-value KTX docs and Markdown endpoints | +| [`/llms-full.txt`](/llms-full.txt) | Complete docs corpus in one plain-text Markdown response | +| `/docs/.md` | Per-page Markdown for any docs page | +| Page-level actions | Copy Markdown, view Markdown, or copy MDX from rendered docs pages | +| Prompt recipes | Reusable prompts for docs lookup, setup help, and docs editing | + +## Agent usage notes + +When an assistant is unsure where to begin, use this order: + +1. Read [`/llms.txt`](/llms.txt). +2. Fetch the specific Markdown page for the task. +3. Use [Agent Quickstart](/docs/ai-resources/agent-quickstart) to choose the next command or page. +4. Use page-level copy actions when the user wants the exact Markdown or MDX source. diff --git a/docs-site/content/docs/ai-resources/markdown-access.mdx b/docs-site/content/docs/ai-resources/markdown-access.mdx new file mode 100644 index 00000000..c363a215 --- /dev/null +++ b/docs-site/content/docs/ai-resources/markdown-access.mdx @@ -0,0 +1,75 @@ +--- +title: Markdown Access +description: Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown. +--- + +KTX docs are available as plain Markdown so assistants do not need to parse the rendered HTML site. + +## Index + +Fetch the curated index: + +```text +https://docs.kaelio.com/ktx/llms.txt +``` + +Use this file to discover high-value pages, task-specific entry points, and Markdown URLs. + +## Full corpus + +Fetch the complete docs corpus: + +```text +https://docs.kaelio.com/ktx/llms-full.txt +``` + +Use this when an assistant needs broad context across setup, concepts, CLI reference, integrations, and troubleshooting. Prefer the smaller per-page Markdown route for narrow tasks. + +## Per-page Markdown + +Every docs page has a Markdown route: + +```text +https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md +https://docs.kaelio.com/ktx/docs/cli-reference/ktx-agent.md +https://docs.kaelio.com/ktx/docs/guides/building-context.md +``` + +Requests that ask for Markdown can also use the normal docs URL with `Accept: text/markdown`: + +```bash +curl -H "Accept: text/markdown" https://docs.kaelio.com/ktx/docs/getting-started/quickstart +``` + +## Recommended retrieval order + +1. Fetch `/llms.txt`. +2. Select one or two relevant page Markdown URLs. +3. Fetch `/llms-full.txt` only when page-level docs are not enough. + +## Output contract + +Markdown responses are designed for agent consumption: + +- Frontmatter is removed. +- Each page includes a title, description, canonical URL, and Markdown URL. +- Code blocks stay as code blocks. +- Tables stay as Markdown tables. +- Missing docs pages return a plain-text `404` instead of silently falling back to HTML. + +## Page actions + +Rendered docs pages include page-level actions near the title: + +- **Copy MD** copies the generated Markdown for the current page. +- **View MD** opens the generated Markdown route. +- **Copy MDX** copies the source MDX for the current page. + +## Common mistakes + +| Mistake | Better path | +|---------|-------------| +| Scraping the HTML page for a docs answer | Fetch the `.md` route instead | +| Loading `/llms-full.txt` for a single CLI flag lookup | Fetch the relevant CLI reference page | +| Treating `/llms.txt` as complete documentation | Use it as an index, then fetch linked pages | +| Copying rendered text by hand | Use **Copy MD** or **Copy MDX** from the page actions | diff --git a/docs-site/content/docs/ai-resources/meta.json b/docs-site/content/docs/ai-resources/meta.json new file mode 100644 index 00000000..ff555283 --- /dev/null +++ b/docs-site/content/docs/ai-resources/meta.json @@ -0,0 +1,11 @@ +{ + "title": "AI Resources", + "defaultOpen": true, + "pages": [ + "index", + "agent-quickstart", + "markdown-access", + "agent-instructions", + "prompt-recipes" + ] +} diff --git a/docs-site/content/docs/ai-resources/prompt-recipes.mdx b/docs-site/content/docs/ai-resources/prompt-recipes.mdx new file mode 100644 index 00000000..35f8a4ae --- /dev/null +++ b/docs-site/content/docs/ai-resources/prompt-recipes.mdx @@ -0,0 +1,54 @@ +--- +title: Prompt Recipes +description: Copyable prompts for common KTX agent workflows. +--- + +Use these prompts when asking a coding assistant to work with KTX. Replace project names, connection ids, and business terms with your own values. + +## Learn the docs + +```text +Read https://docs.kaelio.com/ktx/llms.txt first. Then fetch only the KTX Markdown pages needed for this task. Do not scrape rendered HTML unless no Markdown route exists. +``` + +## Set up a project + +```text +Set up KTX in this repository. Start by reading /docs/ai-resources/agent-quickstart.md and /docs/getting-started/quickstart.md. Use pnpm, not npm. After setup, run ktx status and summarize which steps are complete, which files changed, and what still needs credentials or user input. +``` + +## Find a command + +```text +Find the correct KTX command for this task: . Start with /llms.txt, then fetch the smallest relevant CLI reference .md page. Quote the exact command and flags from the docs. +``` + +## Explain setup + +```text +Explain how to set up KTX for this repo. Read /docs/getting-started/quickstart.md and the relevant CLI reference pages. Summarize prerequisites, commands, generated files, and any credentials the user must provide manually. +``` + +## Compare concepts + +```text +Explain the difference between these KTX concepts: . Start from /llms.txt, fetch the relevant concept and guide pages as Markdown, and answer with links to the source pages. +``` + +## Review semantic changes + +```text +Review the KTX semantic-layer and knowledge changes in this branch. Check that measures have clear definitions, joins use valid keys, hidden/internal columns are not exposed to agents, and validation passes. List concrete file and line issues first. +``` + +## Copy exact docs source + +```text +Open the relevant KTX docs page and use the page action to copy the generated Markdown or source MDX. Preserve code fences and tables exactly. +``` + +## Update docs + +```text +Update the KTX docs for agent readability. Keep AI Resources focused on docs consumption. After editing, verify /llms.txt, /llms-full.txt, and the affected .md routes. +``` diff --git a/docs-site/content/docs/benchmarks/link-detection.mdx b/docs-site/content/docs/benchmarks/link-detection.mdx deleted file mode 100644 index 142cc197..00000000 --- a/docs-site/content/docs/benchmarks/link-detection.mdx +++ /dev/null @@ -1,152 +0,0 @@ ---- -title: Link Detection -description: How KTX's relationship detection performs on real-world schemas. ---- - -KTX infers foreign key relationships between tables even when the database declares no primary keys or foreign key constraints. This is critical for analytics warehouses, where constraints are rarely enforced. This page documents the methodology, scoring pipeline, and a reproducible benchmark you can run yourself. - -## What this measures - -Most analytics warehouses — Snowflake, BigQuery, Redshift — don't enforce referential integrity constraints. Tables like `fct_product_events` reference `dim_accounts` by convention (`account_id` → `id`), but nothing in the schema says so. - -KTX's relationship detection discovers these links automatically. The benchmark measures how accurately it recovers known foreign key relationships from a schema with **all declared constraints removed** — the hardest operating mode. - -Metrics tracked: - -- **Accepted** — relationships scored above the accept threshold (default 0.85) and written to the project manifest -- **Review** — relationships scored between the review threshold (0.55) and accept threshold, flagged for human review -- **Rejected** — relationships scored below the review threshold -- **Skipped** — relationships not evaluated (e.g., filtered by candidate limits) - -## Methodology - -### Detection pipeline - -Relationship detection runs as a multi-stage pipeline during `ktx dev scan`: - -1. **Candidate generation** — scans the schema for potential FK relationships using multiple heuristics: exact column name matches, normalized table name matching, name inflection (singular/plural), column suffix patterns (`_id`, `_key`, `_code`, `_uuid`), self-references (`parent_id`, `manager_id`), and optionally embedding similarity and LLM proposals. - -2. **Column profiling** — samples up to 10,000 rows per column (configurable via `profile_sample_rows`) to collect statistics: row counts, null rates, distinct value counts, uniqueness ratios, sample values, and text length ranges. - -3. **Validation** — tests each candidate relationship against actual data by measuring target uniqueness, source coverage, violation ratio, and value overlap between child and parent columns. - -4. **Scoring** — combines 7 weighted signals into a confidence score: - -| Signal | Weight | What it captures | -|--------|--------|-----------------| -| Name similarity | 0.24 | How closely column/table names match FK conventions | -| Value overlap | 0.22 | What percentage of FK values exist in the PK column | -| Profile uniqueness | 0.22 | How unique the target column values are | -| Type compatibility | 0.10 | Whether data types are compatible (hard gate — score is 0 if incompatible) | -| Embedding similarity | 0.10 | Semantic similarity between column names | -| Profile null rate | 0.08 | Presence of non-null values | -| Structural prior | 0.04 | Baseline structural hints from schema conventions | - -Each signal is normalized to \[0, 1\], multiplied by its weight, and summed. The final confidence is `0.56 + (weighted_sum × 0.65)`, clamped to \[0, 1\]. - -5. **Graph resolution** — resolves conflicts when multiple candidates target the same column, detects primary keys (by name pattern and validation), and classifies each relationship into `accepted`, `review`, or `rejected` based on thresholds. - -### Threshold configuration - -```yaml -scan: - relationships: - accept_threshold: 0.85 - review_threshold: 0.55 -``` - -Relationships scoring above `accept_threshold` are automatically accepted into the project manifest. Those between `review_threshold` and `accept_threshold` are flagged for analyst review. Below `review_threshold`, they're rejected. - -### Test fixture - -The benchmark uses the **Orbit-style product warehouse** — a synthetic schema modeled after a real SaaS analytics warehouse with all declared constraints removed. The fixture is a SQLite database with 6 tables: - -| Table | Role | Estimated rows | -|-------|------|---------------| -| `dim_accounts` | Dimension | 3 | -| `dim_users` | Dimension | 4 | -| `dim_workspaces` | Dimension | 4 | -| `fct_product_events` | Fact | 5 | -| `fct_invoices` | Fact | 3 | -| `support_tickets` | Fact | 4 | - -**Ground truth:** 6 primary keys (one `id` column per table) and 9 foreign key relationships, all `many_to_one`: - -| Source column | Target | -|--------------|--------| -| `dim_users.account_id` | `dim_accounts.id` | -| `dim_workspaces.account_id` | `dim_accounts.id` | -| `dim_workspaces.user_id` | `dim_users.id` | -| `fct_product_events.account_id` | `dim_accounts.id` | -| `fct_product_events.user_id` | `dim_users.id` | -| `fct_product_events.workspace_id` | `dim_workspaces.id` | -| `fct_invoices.account_id` | `dim_accounts.id` | -| `support_tickets.account_id` | `dim_accounts.id` | -| `support_tickets.user_id` | `dim_users.id` | - -The fixture runs in multiple modes to isolate the contribution of each pipeline stage: with LLM disabled, profiling disabled, validation disabled, and embeddings disabled. - -## Results - -Results for the default configuration will be added after the benchmark run is finalized. - -## Reproducing the benchmark - -### Prerequisites - -- Node.js 22+ -- pnpm -- The KTX repository cloned and dependencies installed (`pnpm install`) - -### Running - -From the repository root: - -```bash -pnpm run relationships:verify-orbit -``` - -This runs `ktx dev scan` against the bundled SQLite fixture with enrichment disabled, then generates a verification report at: - -```text -examples/orbit-relationship-verification/reports/orbit-verification.md -``` - -The report includes the full relationship summary, enrichment details, artifact paths, and any warnings. - -### Custom project - -To run verification against your own database (e.g., a local Orbit project): - -```bash -KTX_ORBIT_PROJECT_DIR=/path/to/your-project pnpm run relationships:verify-orbit -``` - -### Configuration - -The benchmark project configuration lives at `examples/orbit-relationship-verification/ktx.yaml`: - -```yaml -scan: - enrichment: - backend: none - relationships: - enabled: true - llm_proposals: false - accept_threshold: 0.85 - review_threshold: 0.55 - profile_sample_rows: 10000 - validation_concurrency: 4 -``` - -Adjust `accept_threshold` and `review_threshold` to see how threshold changes affect the accepted/review/rejected distribution. Lower thresholds accept more relationships (higher recall, lower precision); higher thresholds are more conservative. - -## Broader benchmark suite - -Beyond the Orbit fixture, KTX includes a full benchmark corpus at `packages/context/test/fixtures/relationship-benchmarks/` with fixtures across multiple tiers: - -- **Unit** — minimal schemas testing individual heuristics -- **Row-bearing** — small schemas with data for validation testing -- **Product** — full warehouse schemas like the Orbit fixture - -Fixtures from public datasets (Chinook, Sakila, AdventureWorks, Northwind) supplement the synthetic fixtures. The benchmark runner measures precision, recall, and F1 for both primary key and foreign key detection across all fixtures and modes. diff --git a/docs-site/content/docs/benchmarks/meta.json b/docs-site/content/docs/benchmarks/meta.json deleted file mode 100644 index b75b5aba..00000000 --- a/docs-site/content/docs/benchmarks/meta.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "title": "Benchmarks", - "defaultOpen": true, - "pages": ["link-detection"] -} diff --git a/docs-site/content/docs/cli-reference/ktx-agent.mdx b/docs-site/content/docs/cli-reference/ktx-agent.mdx index 6b60d0d0..cdc4ceac 100644 --- a/docs-site/content/docs/cli-reference/ktx-agent.mdx +++ b/docs-site/content/docs/cli-reference/ktx-agent.mdx @@ -7,7 +7,7 @@ Hidden commands that provide machine-readable JSON output for coding agents. The All `ktx agent` subcommands require `--json` and produce structured JSON output on stdout. -## Usage +## Command signature ```bash ktx agent --json [options] @@ -124,3 +124,25 @@ ktx agent sql execute --json \ --sql-file /tmp/query.sql \ --max-rows 500 ``` + +## Output + +Every `ktx agent` command writes JSON to stdout and diagnostic text to stderr. Agents should parse stdout as JSON and treat a non-zero exit code as a failed tool call. + +```json +{ + "ok": true, + "data": { + "type": "agent-response" + } +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Missing JSON output | `--json` was omitted | Re-run the same subcommand with `--json` | +| Unknown connection id | The requested connection is not configured in `ktx.yaml` | Call `ktx agent context --json` or `ktx connection list` to discover valid ids | +| Query file cannot be read | `--query-file` points to a missing or invalid JSON file | Write the query payload to a real file and pass its absolute path | +| SQL execution rejected | SQL is not read-only or `--max-rows` is missing | Use semantic-layer queries first; for direct SQL, pass read-only SQL and an explicit row limit | diff --git a/docs-site/content/docs/cli-reference/ktx-connection.mdx b/docs-site/content/docs/cli-reference/ktx-connection.mdx index 77458901..31a79736 100644 --- a/docs-site/content/docs/cli-reference/ktx-connection.mdx +++ b/docs-site/content/docs/cli-reference/ktx-connection.mdx @@ -5,7 +5,7 @@ description: "Add, list, test, and map data sources." Manage database and source connections in your KTX project. Connections define how KTX reaches your data warehouse, BI tools, and context sources. -## Usage +## Command signature ```bash ktx connection [options] @@ -147,3 +147,28 @@ ktx connection mapping refresh metabase-prod --auto-accept # Pick Notion root pages interactively ktx connection notion pick my-notion ``` + +## Output + +Interactive commands render prompts and status text. Commands with `--json` return machine-readable JSON suitable for scripts and agents. + +```json +{ + "connections": [ + { + "id": "my-warehouse", + "driver": "postgres", + "readonly": false + } + ] +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Connection test fails | Credentials, network access, database, warehouse, or schema is invalid | Verify the same URL with the database's native client, then rerun `ktx connection add ... --force` | +| Literal credentials rejected | KTX avoids writing raw secrets to `ktx.yaml` by default | Use `env:NAME` or `file:/path/to/secret`; use `--allow-literal-credentials` only for local throwaway projects | +| Mapping validation fails | BI database mappings do not point at valid warehouse connections | Run `ktx connection mapping refresh --auto-accept`, then set invalid mappings explicitly | +| Notion pick cannot run non-interactively | `--no-input` was used without root page or database ids | Pass `--root-page-id`, `--root-database-id`, or `--root-data-source-id` with `--no-input` | diff --git a/docs-site/content/docs/cli-reference/ktx-dev.mdx b/docs-site/content/docs/cli-reference/ktx-dev.mdx index 2187c846..f4d98163 100644 --- a/docs-site/content/docs/cli-reference/ktx-dev.mdx +++ b/docs-site/content/docs/cli-reference/ktx-dev.mdx @@ -5,7 +5,7 @@ description: "Low-level diagnostics, scans, adapter commands, and mapping tools. Hidden commands for low-level project management, diagnostics, direct adapter control, and shell completion. Most users interact with these through higher-level commands like [`ktx ingest`](/docs/cli-reference/ktx-ingest) and [`ktx setup`](/docs/cli-reference/ktx-setup), but `ktx dev` provides direct access when you need fine-grained control. -## Usage +## Command signature ```bash ktx dev [options] @@ -145,3 +145,22 @@ ktx dev completion zsh # Install zsh completions ktx dev completion zsh --install ``` + +## Output + +`ktx dev` commands are diagnostic and may print plain text, JSON, or visual reports depending on the selected flags. + +| Mode | How to request it | Use case | +|------|-------------------|----------| +| Plain text | `--plain` or default diagnostic output | Human-readable terminal inspection | +| JSON | `--json` | Agent parsing and automation | +| Visual report | `--viz` | Interactive memory-flow and ingest debugging | + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Doctor reports missing runtime pieces | Packages, Python environment, or linked CLI are not ready | Run `pnpm install`, `pnpm run setup:dev`, and `uv sync --all-groups` | +| Ingest run cannot find adapter | `--adapter` does not match a supported source adapter | Use configured source names from `ktx.yaml` or run higher-level `ktx ingest` | +| Replay/report file cannot be read | The report path is wrong or the run id is not stored locally | Run `ktx dev ingest status --json` to discover stored run ids and report files | +| Visual output fails in CI | TUI rendering requires an interactive terminal | Use `--plain --no-input` or `--json --no-input` | diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index 82d65e11..8ce9d9a5 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -5,7 +5,7 @@ description: "Build and refresh context from configured sources." Ingest context from your configured sources — dbt, Looker, Metabase, MetricFlow, LookML, or Notion. The ingest process extracts metadata from your tools, then uses an LLM agent to reconcile it with existing context, writing semantic sources and knowledge pages to your project. -## Usage +## Command signature ```bash ktx ingest [connectionId] [options] @@ -68,3 +68,28 @@ ktx ingest status --json ## Low-level ingest commands For adapter-level control, use `ktx dev ingest`. See [`ktx dev`](/docs/cli-reference/ktx-dev) for the full low-level ingest surface including `run`, `status`, `watch`, and `replay` with output mode options (`--plain`, `--json`, `--viz`). + +## Output + +Ingest run commands print progress and create a stored ingest report. `ktx ingest status --json` returns the run state, adapter, connection, and summary information. + +```json +{ + "runId": "ingest-local-abc123", + "status": "completed", + "connectionId": "dbt-main", + "summary": { + "semanticSourcesChanged": 4, + "knowledgePagesChanged": 2 + } +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| No eligible sources | `ktx.yaml` has no configured context source for ingest | Add a source with `ktx setup` or `ktx connection add`, then rerun ingest | +| Ingest needs credentials | The source adapter requires API or git access | Configure the referenced environment variable or secret file | +| Latest run not found | No ingest run has been started in this project | Run `ktx ingest ` or `ktx ingest --all` first | +| Report watch fails in a non-interactive shell | Visual report needs a terminal | Use `ktx ingest status --json` for agent and CI workflows | diff --git a/docs-site/content/docs/cli-reference/ktx-scan.mdx b/docs-site/content/docs/cli-reference/ktx-scan.mdx index 34ad2190..0c37eccb 100644 --- a/docs-site/content/docs/cli-reference/ktx-scan.mdx +++ b/docs-site/content/docs/cli-reference/ktx-scan.mdx @@ -7,7 +7,7 @@ Discover your database schema — tables, columns, types, constraints, and relat Scan commands live under `ktx dev scan`. See also the [Building Context](/docs/guides/building-context) guide for a walkthrough. -## Usage +## Command signature ```bash ktx dev scan [options] @@ -143,3 +143,27 @@ ktx dev scan relationship-calibration --accept-threshold 0.9 --review-threshold # Get threshold advice based on review decisions ktx dev scan relationship-thresholds ``` + +## Output + +Scan commands write scan artifacts under the KTX project directory and print status or report summaries. Use `--json` on report and relationship commands when an agent needs structured output. + +```json +{ + "runId": "scan-local-abc123", + "status": "completed", + "mode": "structural", + "changes": { + "tablesAdded": 42 + } +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Scan cannot connect | Connection credentials or network access are invalid | Run `ktx connection test ` and update the connection before scanning | +| Enriched scan cannot describe columns | LLM credentials are missing or invalid | Complete LLM setup with `ktx setup` before enriched scans | +| Relationship apply writes nothing | No accepted candidates match the provided run id or candidate ids | Inspect `ktx dev scan relationships --status accepted` first | +| Calibration is not ready | Too few reviewed relationship labels exist | Review and accept/reject more candidates, then rerun calibration | diff --git a/docs-site/content/docs/cli-reference/ktx-serve.mdx b/docs-site/content/docs/cli-reference/ktx-serve.mdx index 0f394b33..3816b808 100644 --- a/docs-site/content/docs/cli-reference/ktx-serve.mdx +++ b/docs-site/content/docs/cli-reference/ktx-serve.mdx @@ -5,7 +5,7 @@ description: "Run the MCP stdio server." Start a Model Context Protocol (MCP) server that exposes your KTX project's context to coding agents. The server runs over stdio and provides tools for querying semantic sources, searching knowledge, managing connections, and running ingests. -## Usage +## Command signature ```bash ktx serve --mcp stdio [options] @@ -49,3 +49,26 @@ ktx serve --mcp stdio --project-dir /path/to/my-project ## Agent integration The MCP server is typically configured through `ktx setup --agents` rather than started manually. See the [Serving Agents](/docs/guides/serving-agents) guide and [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool configuration. + +## Output + +`ktx serve --mcp stdio` communicates through MCP messages on stdio. It is meant to be launched by an agent client, not read directly by a human terminal session. + +```json +{ + "command": "ktx", + "args": ["serve", "--mcp", "stdio", "--semantic-compute", "--execute-queries"], + "env": { + "KTX_PROJECT_DIR": "/home/user/analytics" + } +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Agent cannot start server | The agent config cannot find the `ktx` binary | Install `@kaelio/ktx` globally with `npm install -g @kaelio/ktx` or use an absolute command path in the agent config | +| Semantic tools are unavailable | Server was started without `--semantic-compute` | Add `--semantic-compute` or `--semantic-compute-url` to the server args | +| Query execution is denied | Server was started without `--execute-queries` | Add `--execute-queries` only for trusted projects where read-only execution is intended | +| Context resolves to wrong project | `KTX_PROJECT_DIR` is missing or points elsewhere | Set `KTX_PROJECT_DIR` to the project containing the intended `ktx.yaml` | diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 943033b5..d09c2f28 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -5,7 +5,7 @@ description: "Set up or resume a local KTX project." Interactive wizard that walks you through configuring LLM credentials, embeddings, database connections, context sources, and agent integrations. When run without flags in a directory that has no `ktx.yaml`, it launches the full guided flow. When run in an existing project, it resumes from the first incomplete step. -## Usage +## Command signature ```bash ktx setup [options] @@ -173,3 +173,27 @@ ktx setup context watch # Run the packaged demo ktx setup demo ``` + +## Output + +Interactive setup renders prompts and progress messages. `ktx setup status` is the best command for agents because it summarizes readiness in one response. + +```text +KTX project: /home/user/analytics +Project ready: yes +LLM ready: yes (claude-sonnet-4-6) +Embeddings ready: yes (text-embedding-3-small) +Primary sources configured: yes (postgres-warehouse) +Context sources configured: yes (dbt-main) +KTX context built: yes +Agent integration ready: yes (codex:project) +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Setup resumes an unexpected project | `KTX_PROJECT_DIR` or nearest `ktx.yaml` points to another directory | Pass `--project-dir ` explicitly | +| Health check for model fails | Provider key or model id is invalid | Set the correct environment variable or secret file and rerun setup | +| Setup cannot run in CI | Interactive prompts need a TTY | Use `--yes --no-input` with explicit flags for required values | +| Agent integration missing | Setup skipped the agents step | Run `ktx setup --agents --target --agent-install-mode both` | diff --git a/docs-site/content/docs/cli-reference/ktx-sl.mdx b/docs-site/content/docs/cli-reference/ktx-sl.mdx index 8fccf3a2..4ec7bdd1 100644 --- a/docs-site/content/docs/cli-reference/ktx-sl.mdx +++ b/docs-site/content/docs/cli-reference/ktx-sl.mdx @@ -5,7 +5,7 @@ description: "List, read, validate, query, or write semantic-layer sources." Interact with your project's semantic layer. Semantic sources are YAML definitions that describe your tables, columns, measures, joins, and grain — the vocabulary agents use to generate correct SQL. -## Usage +## Command signature ```bash ktx sl [options] @@ -120,3 +120,28 @@ ktx sl query \ --execute \ --max-rows 1000 ``` + +## Output + +Semantic-layer commands return human-readable output by default. Use `--json` or `--format json` when an agent needs structured output; use `--format sql` to inspect generated SQL before execution. + +```json +{ + "sql": "SELECT orders.status, SUM(orders.total_amount) AS total_revenue FROM public.orders GROUP BY orders.status", + "rows": [ + { + "orders.status": "completed", + "total_revenue": 125000 + } + ] +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Source not found | Source name or connection id is wrong | Run `ktx sl list --json` and retry with an exact source name and connection id | +| Validation fails | YAML references missing columns, invalid joins, or invalid SQL expressions | Fix the source YAML and rerun `ktx sl validate` | +| Query compile fails | Measure, dimension, filter, or segment name is invalid | Read the source with `ktx sl read`, then retry using declared fields | +| Execution returns too many rows | `--max-rows` is missing or too high | Add `--max-rows` with a bounded value before executing | diff --git a/docs-site/content/docs/cli-reference/ktx-status.mdx b/docs-site/content/docs/cli-reference/ktx-status.mdx index 2b1e8f34..31a7d113 100644 --- a/docs-site/content/docs/cli-reference/ktx-status.mdx +++ b/docs-site/content/docs/cli-reference/ktx-status.mdx @@ -5,7 +5,7 @@ description: "Show current project status." Print the current setup status of your KTX project — which steps are complete, which need attention, and whether the project is ready for agents. -## Usage +## Command signature ```bash ktx status [options] @@ -26,3 +26,26 @@ ktx status # Get status as JSON (useful for scripting) ktx status --json ``` + +## Output + +`ktx status` prints readiness for each setup area. Agents should use `ktx status --json` when they need to branch on readiness state. + +```json +{ + "projectReady": true, + "llmReady": true, + "embeddingsReady": true, + "primarySourcesConfigured": true, + "contextBuilt": true, + "agentIntegrationReady": true +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| No KTX project found | Current directory has no `ktx.yaml` and `KTX_PROJECT_DIR` is unset | Run from a KTX project or set `KTX_PROJECT_DIR` | +| Project ready is false | One or more setup steps are incomplete | Run `ktx setup` to resume setup | +| Agent integration ready is false | No agent target has been installed | Run `ktx setup --agents --target ` | diff --git a/docs-site/content/docs/cli-reference/ktx-wiki.mdx b/docs-site/content/docs/cli-reference/ktx-wiki.mdx index d2376e51..a709ac07 100644 --- a/docs-site/content/docs/cli-reference/ktx-wiki.mdx +++ b/docs-site/content/docs/cli-reference/ktx-wiki.mdx @@ -5,7 +5,7 @@ description: "List, read, search, or write knowledge pages." Manage knowledge pages in your KTX project. Knowledge pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data. -## Usage +## Command signature ```bash ktx wiki [options] @@ -90,3 +90,28 @@ ktx wiki write data-freshness \ --content "The orders table refreshes every 15 minutes..." \ --ref "https://wiki.example.com/data-pipelines" ``` + +## Output + +Wiki commands print local knowledge pages and search results. Agents should search first, then read the most relevant page by key. + +```json +{ + "results": [ + { + "key": "revenue-definitions", + "summary": "Canonical revenue metric definitions", + "score": 0.92 + } + ] +} +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing | +| Read fails for a key | The page key is wrong or scoped to a different user | Run `ktx wiki list` or search again to get the exact key | +| Write fails due to missing fields | `--summary` or `--content` was omitted | Pass both fields, and keep the summary short enough for search results | +| Agent writes duplicate pages | It did not search existing pages first | Always run `ktx wiki search` before `ktx wiki write` | diff --git a/docs-site/content/docs/community/contributing.mdx b/docs-site/content/docs/community/contributing.mdx index 598ac5a9..1b4e39ce 100644 --- a/docs-site/content/docs/community/contributing.mdx +++ b/docs-site/content/docs/community/contributing.mdx @@ -7,6 +7,11 @@ KTX is an open-source project and welcomes contributions — bug fixes, new conn ## Development setup +This page is for contributors working on the KTX repository. To install KTX for +an analytics project, use the published +[`@kaelio/ktx`](https://www.npmjs.com/package/@kaelio/ktx) package in the +[Quickstart](/docs/getting-started/quickstart). + ### Prerequisites - **Node.js 22+** and **pnpm** — for the TypeScript workspace @@ -44,7 +49,9 @@ pnpm run setup:dev pnpm run link:dev ``` -This makes the `ktx` command available globally, pointing at your local build. +This makes the `ktx-dev` command available globally, pointing at your local +build. Use this development binary when you need to test unpublished repository +changes. ## Repository structure @@ -220,3 +227,17 @@ Before submitting a pull request: 5. **Don't commit artifacts** — `node_modules/`, `.venv/`, `dist/`, coverage output, and local databases should not be committed. For larger features or architectural changes, open an issue first to discuss the approach. + +## Agent usage notes + +Use this page when an agent is modifying the KTX repository itself rather than using KTX in an analytics project. + +| Agent task | Command or section | +|------------|--------------------| +| Prepare the workspace | `pnpm install`, `pnpm run setup:dev`, `uv sync --all-groups` | +| Verify TypeScript changes | `pnpm run type-check`, `pnpm run test`, or package-filtered equivalents | +| Verify Python changes | `uv run pytest -q` and `uv run pre-commit run --files ` | +| Add a connector | Adding a connector | +| Check style expectations | Code conventions | + +Common recovery path: if a check fails because generated files or local runtimes are missing, run the setup commands first. If a check fails because of a real type, lint, or test error, fix the source file and rerun the smallest failing check before broadening verification. diff --git a/docs-site/content/docs/concepts/context-as-code.mdx b/docs-site/content/docs/concepts/context-as-code.mdx index 152b54ce..3a2b1686 100644 --- a/docs-site/content/docs/concepts/context-as-code.mdx +++ b/docs-site/content/docs/concepts/context-as-code.mdx @@ -29,43 +29,51 @@ This reconciliation step is what separates auto-ingestion from a simple sync. A Auto-ingestion is designed to plug into a PR-based workflow. Run ingestion on a branch, review the changed YAML and Markdown files, and merge them the same way you merge dbt models or application code. -``` - dbt / Looker / Metabase KTX project repo - ┌──────────────┐ ┌──────────────────────┐ - │ Metadata │───ingestion──▶│ Branch: ingest/... │ - │ changes │ │ │ - └──────────────┘ │ + 3 new sources │ - │ ~ 2 updated joins │ - │ + 1 knowledge page │ - │ │ - │ ──── Open PR ──── │ - │ │ - │ Review semantic diff │ - │ Approve & merge │ - └──────────────────────┘ - │ - ▼ - Agents see updated - context immediately +```text +dbt / Looker / Metabase / Notion + | + v + metadata changes + | + v + nightly cron or CI ingest + | + v + branch: ingest/nightly + | + | + 3 new sources + | ~ 2 updated joins + | + 1 knowledge page + v + open PR + | + v + review semantic diff + | + v + approve & merge + | + v + agents see updated context ``` A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge. +Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest --all --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. + Once merged, agents querying through KTX's MCP server or CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request. This workflow gives you the same review guarantees you have for dbt models. No semantic source reaches production without a human approving it. But unlike maintaining context manually, the heavy lifting — discovering new tables, drafting source definitions, extracting business rules from documentation — is done by the ingestion agent. You review and approve. You don't write from scratch. ## Feedback loops -Context improves over time through three feedback channels. +Context improves over time through two feedback channels. **Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a knowledge page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest. -**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, source definitions can be tightened with better filters or grain declarations, and relationship thresholds can be adjusted. +**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, and source definitions can be tightened with better filters, join paths, or grain declarations. -**Relationship calibration.** KTX infers foreign key relationships between tables automatically, even when the database has no declared constraints. It does this by analyzing column names, types, value distributions, and asking the LLM for proposals. Each inferred relationship gets a confidence score. You control two thresholds: `acceptThreshold` (relationships above this score are accepted automatically, default 0.85) and `reviewThreshold` (relationships between review and accept are flagged for human review, default 0.55). As you accept or reject proposals, the system learns which patterns match your schema conventions. - -Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Relationship calibration tunes the discovery process to your warehouse's conventions. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration. +Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration. ## Deterministic replay @@ -80,3 +88,14 @@ This matters for three reasons. **Reproducibility.** Because ingestion sessions are recorded as structured transcripts (tool calls and responses, not just logs), they can be replayed for testing and validation. If you change your ingestion configuration or upgrade the LLM, you can replay previous sessions to see how the output would differ. This gives you a safety net for changes that affect how context is generated. The transcript is stored with local ingest run state and can be reviewed or replayed when you need to audit a decision. Commit the resulting YAML and Markdown changes; commit reports or transcripts only when they are part of your team's review workflow. + +## Agent usage notes + +Use this page when an agent needs to explain review workflows, ingestion diffs, replayability, or why KTX writes YAML and Markdown instead of hiding context in a hosted service. + +| Agent task | Relevant section | Next page | +|------------|------------------|-----------| +| Explain how generated context should be reviewed | The git workflow | [Building Context](/docs/guides/building-context) | +| Diagnose why ingestion changed a semantic source | Auto-ingestion and Deterministic replay | [ktx ingest](/docs/cli-reference/ktx-ingest) | +| Explain how context improves over time | Feedback loops | [Building Context](/docs/guides/building-context) | +| Tell a user what to commit | The git workflow | [Writing Context](/docs/guides/writing-context) | diff --git a/docs-site/content/docs/concepts/the-context-layer.mdx b/docs-site/content/docs/concepts/the-context-layer.mdx index 3a3ab9b4..953af89a 100644 --- a/docs-site/content/docs/concepts/the-context-layer.mdx +++ b/docs-site/content/docs/concepts/the-context-layer.mdx @@ -9,7 +9,7 @@ Give an agent access to your database and it will generate SQL. It might even pr The agent doesn't know that `orders.amount` includes refunds and needs a status filter. It doesn't know that `customers` should join to `orders` on `customer_id`, not `id`. It doesn't know that your team stopped using `legacy_segments` six months ago, or that "enterprise" means contracts over $100k, not just big logos. It sees column names and types. It doesn't see your business. -This isn't a model capability problem. GPT-4, Claude, and Gemini can all write correct SQL — when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision. +This isn't a model capability problem. Claude Code, Codex, and your BI agents can write correct SQL when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision. Analytics engineers already know this pain. It's the same reason you write dbt tests, maintain a data dictionary, and spend half of standup explaining why someone's dashboard number doesn't match the board deck. The difference is that agents make decisions at machine speed, so the wrong context propagates faster than a human can catch it. @@ -19,9 +19,9 @@ The industry has moved through three distinct approaches to getting AI and data **Wave one: database access.** Connect an LLM to a database, let it generate SQL. This works for simple lookups — "how many orders last week?" — but breaks on anything that requires business knowledge. The agent guesses at joins, invents metrics, and hallucinates table relationships. Every query is a coin flip. -**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but it's still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter. +**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but building and maintaining that structure by hand is manual, time-consuming, and still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter. -**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, data quality gotchas, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer. +**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, known data quality issues, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer. ## What a context layer is @@ -29,6 +29,13 @@ A context layer is the infrastructure that gives agents the business knowledge t KTX organizes context into four pillars: +- Semantic sources +- Knowledge pages +- Scan artifacts +- Provenance + +Each pillar covers a different kind of context agents need before they can safely write SQL, update semantic definitions, or explain an analytics result. + **Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives. ```yaml @@ -60,7 +67,7 @@ measures: expr: count(id) ``` -**Knowledge pages** are Markdown documents that capture business definitions, rules, and gotchas — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it. +**Knowledge pages** are Markdown documents that capture business definitions, rules, and operating context — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it. ```markdown --- @@ -90,13 +97,12 @@ Together, these four pillars give agents enough context to produce analytics art ## How KTX compares -KTX is a context layer, and its structured core is an agent-native semantic layer. That matters. MetricFlow, Cube, and Malloy all give teams ways to model metrics, dimensions, joins, and generated SQL. KTX covers that same semantic-layer job, then adds the surrounding context agents need to use it well: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools. +KTX is a context layer with an agent-native semantic layer at its core. MetricFlow, Cube, and Malloy model metrics, dimensions, joins, and generated SQL. KTX covers that semantic-layer work, then adds the context agents need to use and maintain it: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools. -The primary user is different. MetricFlow is centered on dbt-style metric definitions. Cube is centered on a governed semantic runtime for BI, applications, and agents. Malloy is centered on an expressive modeling and query language. KTX is centered on agents that need to read a semantic model, change it, validate it, inspect the generated SQL, and leave a reviewable git diff. +The workflow is the difference. Traditional semantic layers are powerful, but they are usually built and maintained through manual modeling work, product-specific runtimes, or language-specific workflows. They are not agent-native by default, which makes them harder for agents to inspect, edit, validate, and review in a tight loop. KTX is designed for agents that need to read context, change semantic files, inspect generated SQL, and leave a reviewable git diff. | | KTX semantic layer | MetricFlow | Cube | Malloy | |---|---|---|---|---| -| **Design center** | Agent-native semantic modeling inside a broader context layer | Metric definitions and dbt semantic models | Governed serving layer for BI, embedded analytics, APIs, and agents | Semantic modeling and analytical query language | | **Model surface** | Plain YAML sources plus Markdown knowledge pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations | | **What it models** | Sources, columns, measures, segments, joins, grain, filters, default time dimensions, and context references | Semantic models, entities, dimensions, measures, metrics, time grains, and metric types | Cubes, views, measures, dimensions, segments, joins, hierarchies, policies, and rollups | Sources, joins, dimensions, measures, calculations, nested results, and query pipelines | | **Agent edit loop** | First-class. Agents can patch small files, save imperfect drafts, run validation, query through MCP, inspect SQL, and refine in the same workflow | Possible, but the interface is a dbt/metric workflow rather than an agent context workflow | Possible through code-first models and platform APIs, but changes are tied to runtime deployment and governance concerns | Possible, but agents must operate in Malloy's language and compiler model | @@ -105,15 +111,7 @@ The primary user is different. MetricFlow is centered on dbt-style metric defini | **Context around semantics** | Built in: wiki pages, scan artifacts, relationship inference, ingest transcripts, replay, and agent-facing MCP tools | Primarily metric and dbt project context | Descriptions and `meta.ai_context` inside the semantic model, plus platform agent features | Annotations/tags can carry metadata; surrounding context depends on the application | | **Best fit** | Agents maintaining analytics code, metrics, joins, SQL, docs, and semantic definitions | Teams standardizing metrics inside dbt workflows | Production semantic APIs, BI integrations, access control, caching, and concurrency | Expressive modeling and exploratory analysis above SQL | -**Agent-native by design.** KTX's advantage is not just that the files are YAML. The whole loop is shaped for agents: sources are small, overlays can add measures or computed columns without copying entire generated schemas, writes are permissive so an agent can save a draft, and validation/query tools give immediate feedback. An agent can move from "this metric is wrong" to "here is the semantic diff, generated SQL, and supporting context" without leaving the project. - -**A semantic layer plus the context to use it.** Traditional semantic layers define what to calculate. KTX also stores why the definition exists, where it came from, what schema evidence supports it, and what an agent did when it changed. A measure can live next to a knowledge page about exclusions, a scan artifact that proves the join path, and an ingest transcript that explains the source of the definition. That is the difference between giving an agent a metric catalog and giving it operational memory. - -**Fan-out handling is explicit and reviewable.** KTX asks model authors and agents to declare grain and relationship direction. The planner uses that metadata to avoid silent row multiplication: it detects `one_to_many` fan-out paths, separates independent fact measures into aggregate-locality CTEs, and refuses filters that would be unsafe to apply after pre-aggregation. Cube, MetricFlow, and Malloy all have strong approaches to this class of problem, but KTX's approach is deliberately inspectable in the files and in the generated plan. - -**Where other systems are stronger.** KTX draws a clear product boundary around agent-native context and semantic modeling. Cube is stronger when you need a production semantic API with access policies, pre-aggregations, refresh workers, and high-concurrency serving. MetricFlow is stronger when your primary workflow is dbt-native metric standardization. Malloy is stronger when you want a full analytical language with nested query shapes. KTX is strongest when the semantic layer is the substrate agents will read, edit, validate, and extend as part of day-to-day analytics engineering. - -**When KTX replaces your semantic layer vs. works beside it.** If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow, LookML, Looker, Metabase, dbt, or Notion, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against. +If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow or LookML, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against. ## The plain-files philosophy @@ -145,3 +143,14 @@ my-project/ Semantic sources and knowledge pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run. This means your analytics context travels with your code. You can fork it, branch it, review it in a PR, and merge it with the same tools you use for dbt models. There's no sync problem between a remote server and your local state. There's no migration to run. The files are the source of truth. + +## Agent usage notes + +Use this page when an agent needs to explain why KTX exists, why schema-only database access is not enough, or how KTX differs from MetricFlow, Cube, Malloy, and traditional semantic layers. + +| Agent task | Relevant section | Next page | +|------------|------------------|-----------| +| Explain why a database agent made a plausible but wrong query | The problem | [Writing Context](/docs/guides/writing-context) | +| Decide whether a metric belongs in YAML or Markdown | What a context layer is | [Writing Context](/docs/guides/writing-context) | +| Compare KTX to another semantic layer | How KTX compares | [Primary Sources](/docs/integrations/primary-sources) | +| Explain reviewability and source of truth | The plain-files philosophy | [Context as Code](/docs/concepts/context-as-code) | diff --git a/docs-site/content/docs/getting-started/introduction.mdx b/docs-site/content/docs/getting-started/introduction.mdx index 827cdff5..a9d98d3e 100644 --- a/docs-site/content/docs/getting-started/introduction.mdx +++ b/docs-site/content/docs/getting-started/introduction.mdx @@ -1,59 +1,92 @@ --- title: Introduction -description: What KTX is and who it's for. +description: How KTX gives analytics agents trusted context for warehouse work. --- -Data agents can write SQL. The hard part is making sure they write the SQL your analytics team would have written. - -KTX is the agent-native context layer for analytics engineering. At its core is a semantic layer: YAML sources that define tables, columns, measures, joins, grain, filters, segments, and computed fields. Around that core, KTX adds the context analytics agents need to work safely: warehouse scans, knowledge pages, ingestion from existing tools, provenance, validation, and MCP access. - -KTX projects are plain files — YAML, Markdown, and SQLite — that you commit to git and review in PRs, just like dbt models. Agents can read them, edit them, validate them, query through them, and leave behind a diff your team can review. +
+
+

+ Make analytics context{'\n'}usable by agents +

+

+ KTX turns warehouse metadata, semantic definitions, and business knowledge + into reviewable project files that agents can use while planning, querying, + and updating analytics work. +

+
+ +
## Who KTX is for -KTX is built for analytics engineers and data teams who want data agents to work on real analytics systems, not just generate one-off SQL. +KTX is built for analytics engineers and data teams who want data agents to +work on real analytics systems — not just generate one-off SQL. Use KTX when you want agents to: -- Generate SQL from approved measures, dimensions, and joins -- Repair or extend semantic definitions through reviewable git diffs -- Explain where a metric definition came from and what business rules shape it -- Use warehouse scans and relationship evidence instead of guessing join paths -- Work alongside **dbt**, **LookML**, **MetricFlow**, **Looker**, **Metabase**, **Notion**, and BI platforms -- Work with warehouses like **PostgreSQL**, **Snowflake**, **BigQuery**, **ClickHouse**, **MySQL**, or **SQL Server** +- **Generate SQL** from approved measures and joins +- **Repair semantic definitions** through reviewable diffs +- **Explain metric provenance** with warehouse evidence +- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI platforms -If you've ever watched an agent confidently generate a query that joins on the wrong key or invents a metric that doesn't exist, KTX is the fix. +Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server. -## What KTX gives agents - -- **A semantic layer they can edit** — plain YAML sources with measures, dimensions, joins, grain, segments, filters, and computed columns -- **Safe query planning** — grain-aware SQL generation, fan-out detection, chasm-trap handling, and dialect transpilation -- **Business context** — Markdown knowledge pages for definitions, rules, exceptions, and data quality notes -- **Schema evidence** — warehouse scans with table metadata, column stats, constraints, and inferred relationships -- **Provenance** — ingest transcripts and replay metadata that explain where context came from and why it changed -- **An agent-facing API** — MCP and CLI tools for reading, writing, validating, searching, and querying context - -## How these docs are organized +## Explore the docs Set up KTX and build your first context in under 10 minutes. - Understand what a context layer is, why agents need one, and how KTX compares to other semantic layers. + Understand what a context layer is and why agents need one. - Hands-on workflows for scanning, ingesting, writing semantic sources, and serving agents. - - - Setup details for every supported database, context source, and agent client. + Hands-on workflows for scanning, ingesting, writing, and serving. - Exhaustive flag and subcommand reference for every KTX command. + Complete flag and subcommand reference for every KTX command. -## Next steps +## Agent usage notes -- **Get hands-on** — follow the [Quickstart](/docs/getting-started/quickstart) to set up KTX with your own database in under 10 minutes. -- **Understand the theory** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why schema access alone breaks on real analytics and how KTX addresses it. +| Agent task | Read next | +|------------|-----------| +| Discover machine-readable docs | [AI Resources](/docs/ai-resources) | +| Learn how a coding assistant should approach KTX | [Agent Quickstart](/docs/ai-resources/agent-quickstart) | +| Set up a new KTX project | [Quickstart](/docs/getting-started/quickstart) | +| Explain what problem KTX solves | [The Context Layer](/docs/concepts/the-context-layer) | +| Scan a database and ingest metadata | [Building Context](/docs/guides/building-context) | +| Edit semantic sources or knowledge pages | [Writing Context](/docs/guides/writing-context) | +| Look up exact command flags | [CLI Reference](/docs/cli-reference/ktx-setup) | diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 7df3a611..61abc301 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -5,32 +5,34 @@ description: Set up KTX and build your first context in under 10 minutes. This guide walks you through `ktx setup` — an interactive wizard that configures your LLM provider, connects your database, optionally ingests from your existing tools, builds context, and installs agent integration. -## Prerequisites +If you are a coding assistant trying to decide which KTX docs page to read, start with the [Agent Quickstart](/docs/ai-resources/agent-quickstart). This page is the human setup walkthrough. -- **Node.js 22+** and **pnpm** -- An **Anthropic API key** for LLM-powered enrichment and ingestion -- A **database connection** — PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite -- Optionally, a **dbt project**, **LookML repo**, **Metabase instance**, or other context source +## Workflow summary + +Use this sequence when you are setting up KTX in an analytics project: + +1. `npm install -g @kaelio/ktx` — install the published KTX CLI from npm. +2. `ktx setup` — create or resume a KTX project. + +The setup wizard is stateful. If it exits before completion, rerun `ktx setup` in the same project directory to resume from the first incomplete step. ## Install and run setup -KTX is currently used from a local checkout or linked workspace CLI. Build and link the CLI first: +Install the published [`@kaelio/ktx`](https://www.npmjs.com/package/@kaelio/ktx) CLI: ```bash -git clone https://github.com/kaelio/ktx.git -cd ktx -pnpm install -pnpm run setup:dev -pnpm run link:dev +npm install -g @kaelio/ktx ``` -Then run the setup wizard in the directory where you want your KTX project: +Then run the setup wizard: ```bash ktx setup ``` -The wizard walks through six steps. You can go back at any point, and if you exit early, running `ktx setup` again resumes where you left off. +The local checkout flow is only for contributors working on KTX itself. See [Contributing](/docs/community/contributing) for that setup. + +The wizard walks through six steps. You can go back at any point, and if you exit early, rerunning `ktx setup` resumes where you left off. ## Step 1: Configure LLM @@ -70,10 +72,11 @@ KTX uses embeddings for semantic search over sources, wiki content, schema metad **OpenAI embeddings** use `text-embedding-3-small` (1536 dimensions) and require an `OPENAI_API_KEY`. -**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX Python daemon. No API key is needed. If you run the daemon as a long-lived HTTP service, start it with: +**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX managed Python runtime. No API key is needed. KTX can install and start the runtime during setup; to prepare it ahead of time, run: ```bash -ktx-daemon serve-http --host 127.0.0.1 --port 8765 +ktx runtime install --feature local-embeddings --yes +ktx runtime start --feature local-embeddings ``` ## Step 3: Connect a database @@ -192,12 +195,29 @@ Then select which agents to install for: │ ◻ Codex │ ◻ Cursor │ ◻ OpenCode +│ ◻ Custom agent (.agents) ``` **CLI mode** writes a skill file (e.g., `.claude/skills/ktx/SKILL.md`) that teaches the agent to call KTX commands directly. **MCP mode** writes an MCP server configuration (e.g., `.mcp.json`) that lets the agent call KTX tools like `sl_query`, `knowledge_search`, and `sl_write_source` over the Model Context Protocol. +**Custom agent** uses the universal `.agents` target for agents that can read project-local skills or MCP configuration. + +## Generated files + +KTX writes project state as plain files so agents can inspect and edit changes in git. + +| Path | Created by | Purpose | +|------|------------|---------| +| `ktx.yaml` | `ktx setup` | Main project configuration: connections, LLM settings, embeddings, and context sources | +| `.ktx/secrets/*` | `ktx setup` when file-backed secrets are selected | Local secret files referenced from `ktx.yaml`; do not commit these | +| `semantic-layer//*.yaml` | context build, ingestion, or `ktx sl write` | Semantic source definitions agents use for SQL generation | +| `knowledge/global/*.md` | ingestion or `ktx wiki write --scope global` | Shared business context and metric definitions | +| `knowledge/user//*.md` | `ktx wiki write --scope user` | User-scoped notes for one agent/user context | +| `.mcp.json`, `.cursor/mcp.json`, `.agents/mcp/ktx.json`, `.opencode/mcp.json` | agent integration setup | MCP server configuration for supported agent clients | +| `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling `ktx agent` commands | + ## Verify it worked Check your project status: @@ -217,35 +237,17 @@ KTX context built: yes Agent integration ready: yes (claude-code:project) ``` -List your semantic sources: +## Common errors -```bash -ktx sl list -``` - -Query through the semantic layer: - -```bash -ktx sl query \ - --connection-id postgres-warehouse \ - --measure orders.total_revenue \ - --dimension orders.status \ - --order-by orders.total_revenue:desc \ - --limit 5 \ - --format sql -``` - -This outputs the generated SQL. Add `--execute` to run it against your warehouse: - -```bash -ktx sl query \ - --connection-id postgres-warehouse \ - --measure orders.total_revenue \ - --dimension orders.status \ - --order-by orders.total_revenue:desc \ - --limit 5 \ - --execute --max-rows 10 -``` +| Error or symptom | Likely cause | Recovery | +|------------------|--------------|----------| +| `ktx: command not found` | The KTX package is not installed globally, or the shell cannot find the global binary | Run `npm install -g @kaelio/ktx` and open a new shell | +| LLM health check fails | Missing, invalid, or unauthorized Anthropic API key | Export `ANTHROPIC_API_KEY` or rerun `ktx setup` and choose the file-backed secret option | +| OpenAI embedding check fails | `OPENAI_API_KEY` is missing when OpenAI embeddings are selected | Export `OPENAI_API_KEY`, or rerun setup and choose local sentence-transformers embeddings | +| Local embeddings hang or fail | The managed Python runtime cannot start or the local model runtime is unavailable | Install `uv`, run `ktx runtime doctor`, then run `ktx runtime install --feature local-embeddings --yes` and rerun setup | +| Database connection test fails | Credentials, network access, warehouse, database, or schema value is wrong | Test the same URL with the database's native client, then rerun `ktx connection add ... --force` or rerun setup | +| `KTX context built: no` in `ktx status` | Setup saved configuration but did not build context | Run `ktx setup context build` or rerun `ktx setup` and choose to build context now | +| Agent integration is incomplete | Setup skipped the agents step or the target was not installed | Run `ktx setup --agents --target codex --agent-install-mode both --project` using the target you need | ## Next steps diff --git a/docs-site/content/docs/guides/writing-context.mdx b/docs-site/content/docs/guides/writing-context.mdx index d70a5f3c..3f0e3fbd 100644 --- a/docs-site/content/docs/guides/writing-context.mdx +++ b/docs-site/content/docs/guides/writing-context.mdx @@ -5,6 +5,17 @@ description: Write and refine semantic sources and knowledge pages. After building context through scanning and ingestion, you'll want to refine it — edit semantic sources to match your business logic, add knowledge pages that capture tribal knowledge, and query your data through the semantic layer to verify everything works. +## Agent workflow summary + +Agents should refine context in this order: + +1. `ktx sl list --json` — discover available sources and connection ids. +2. `ktx sl read --connection-id ` — inspect the current YAML. +3. Edit the source YAML directly or use `ktx sl write`. +4. `ktx sl validate --connection-id ` — verify columns, joins, and table references. +5. `ktx sl query ... --format sql` — compile a representative query without executing it. +6. `ktx wiki search ...` and `ktx wiki write ...` — add business context that does not belong in schema YAML. + ## Semantic Sources Semantic sources are YAML files that describe your tables, columns, measures, and joins. They're the core of the context layer — the structured definitions that agents use to generate correct SQL. @@ -108,6 +119,26 @@ Key fields: | `segments` | No | Named filter conditions | | `inherits_columns_from` | No | Inherit column metadata from a manifest entry | +Source component fields: + +| Component | Field | Required | Description | +|-----------|-------|----------|-------------| +| Column | `name` | Yes | Column identifier as used in SQL expressions | +| Column | `type` | Yes | Agent-facing type: `string`, `number`, `time`, or `boolean` | +| Column | `role` | No | Special role such as `time` for default time dimensions | +| Column | `visibility` | No | `public`, `internal`, or `hidden` | +| Column | `description` | Strongly recommended | Human-readable business meaning | +| Measure | `name` | Yes | Queryable metric name | +| Measure | `expr` | Yes | SQL aggregation expression at the source grain | +| Measure | `filter` | No | SQL predicate applied only to this measure | +| Measure | `description` | Strongly recommended | Definition agents can cite and compare | +| Segment | `name` | Yes | Reusable filter name | +| Segment | `expr` | Yes | SQL predicate for the segment | +| Join | `to` | Yes | Target semantic source name | +| Join | `on` | Yes | SQL join condition using source names or aliases | +| Join | `relationship` | Yes | `many_to_one`, `one_to_many`, or `one_to_one` | +| Join | `alias` | No | Query alias for repeated or clearer joins | + Column visibility controls what agents see: | Visibility | Behavior | @@ -192,6 +223,16 @@ Query flags: The query planner is grain-aware — it understands the cardinality of joins and avoids chasm traps (double-counting caused by many-to-many fan-outs). When you query measures that span multiple sources, KTX generates sub-queries at the correct grain before joining. +### Workflow: edit and validate a source + +1. `ktx sl read orders --connection-id my-postgres > /tmp/orders.yaml` — capture the current definition. +2. Edit `/tmp/orders.yaml` to add columns, measures, joins, or descriptions. +3. `ktx sl write orders --connection-id my-postgres --yaml "$(cat /tmp/orders.yaml)"` — write the updated source. +4. `ktx sl validate orders --connection-id my-postgres` — check the definition against the live schema. +5. `ktx sl query --connection-id my-postgres --measure total_revenue --dimension order_date --format sql` — compile a representative query. + +If validation fails, fix the YAML before asking an agent to use the source. Common validation failures are missing columns, invalid join targets, and measure expressions that reference fields outside the source. + ## Knowledge Pages Knowledge pages are Markdown files that capture business context — definitions, rules, gotchas, and anything an agent needs to understand beyond what the schema tells it. @@ -250,6 +291,18 @@ Write flags: | `--ref ` | Reference to external resources (repeatable) | | `--sl-ref ` | Link to a semantic source (repeatable) | +Knowledge page fields: + +| Field | Required | Description | +|-------|----------|-------------| +| Key | Yes | Stable page identifier passed to `ktx wiki read` | +| Summary | Yes | Short text shown in search results | +| Content | Yes | Full Markdown business context | +| Scope | No | `global` for shared context or `user` for user-scoped notes | +| Tags | No | Search and organization labels | +| External refs | No | Links or identifiers for source-of-truth systems | +| Semantic-layer refs | No | Source names the page explains or constrains | + You can also create and edit knowledge pages directly as Markdown files in the `knowledge/` directory. ### Listing pages @@ -271,3 +324,21 @@ ktx wiki search "revenue recognition" ``` Search uses both full-text matching and semantic similarity — it finds relevant pages even when the exact terms don't match. Agents call this automatically when they need business context to answer a question. + +### Workflow: add searchable business context + +1. Search first: `ktx wiki search "order status definitions"`. +2. If no page already covers the rule, write a page with `ktx wiki write`. +3. Include a concise `--summary`; agents see this before loading full content. +4. Add `--tag` values for the business area and `--sl-ref` values for related semantic sources. +5. Search again with the user's likely wording to confirm the page is discoverable. + +## Common errors + +| Error or symptom | Likely cause | Recovery | +|------------------|--------------|----------| +| `ktx sl validate` reports a missing column | YAML references a column that is absent from the scanned table | Run a fresh scan or update the YAML to match the warehouse schema | +| Query compilation double-counts a measure | Join relationship or grain is missing or wrong | Add `grain` and explicit `relationship` values, then validate and recompile | +| Agent cannot find a metric | Measure name or description does not match business terminology | Add a measure description and a knowledge page with common synonyms | +| Knowledge search misses a page | Summary and tags do not include likely user wording | Rewrite the summary and add relevant tags, then search again | +| `ktx sl write` changes are hard to review | Large YAML was passed inline | Edit the source file directly or write from a temporary file, then review the git diff | diff --git a/docs-site/content/docs/integrations/context-sources.mdx b/docs-site/content/docs/integrations/context-sources.mdx index 49f387f3..02554e08 100644 --- a/docs-site/content/docs/integrations/context-sources.mdx +++ b/docs-site/content/docs/integrations/context-sources.mdx @@ -7,6 +7,29 @@ Context sources feed your existing analytics tooling into KTX. During ingestion, All context sources are configured in `ktx.yaml` under `connections` with their respective `driver` value. +## Ingestion workflow + +Agents should configure and ingest context sources in this order: + +1. Add the context source connection in `ktx.yaml` or with `ktx setup`. +2. Store tokens as `env:NAME` or `file:/path/to/secret`. +3. Run `ktx ingest ` for one source or `ktx ingest --all`. +4. Check progress with `ktx ingest status --json`. +5. Review generated `semantic-layer/` YAML and `knowledge/` Markdown files in git. +6. Validate changed semantic sources with `ktx sl validate`. + +## Shared source fields + +| Field | Required | Description | +|-------|----------|-------------| +| `driver` | Yes | Source adapter: `dbt`, `metricflow`, `lookml`, `metabase`, `looker`, or `notion` | +| `readonly` | Strongly recommended | Marks the source as read-only for KTX | +| `source_dir` | For local file sources | Absolute or project-relative source directory | +| `repo_url` | For Git-hosted sources | Git repository URL | +| `branch` | No | Git branch to read | +| `path` | No | Subdirectory inside a monorepo | +| `auth_token_ref` | For private APIs/repos | `env:NAME` or `file:/path/to/secret` token reference | + ## dbt Ingests schema definitions, model descriptions, column metadata, and test coverage from a dbt project. @@ -351,3 +374,13 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in - Notion is knowledge-only — it does not produce semantic layer sources - Rate limits apply; large workspaces may require multiple ingestion runs - `last_successful_cursor` is auto-managed for incremental sync + +## Common errors + +| Error or symptom | Likely cause | Recovery | +|------------------|--------------|----------| +| Adapter cannot read source files | `source_dir`, `repo_url`, `branch`, or `path` is wrong | Verify the path locally or clone the repo manually with the same credentials | +| Private repo/API authentication fails | Token env var or secret file is missing | Export the env var or update `auth_token_ref` to a readable file | +| Ingest creates duplicate context | Existing source names or knowledge pages do not match imported terminology | Review the diff, rename duplicates, and add knowledge pages with canonical names | +| Notion ingest skips pages | Integration lacks access or root ids are missing | Share pages with the Notion integration and set `root_page_ids` or use `all_accessible` carefully | +| Generated semantic sources fail validation | Tool metadata does not match the live warehouse schema | Map BI/source databases to primary warehouse connections and rerun validation | diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index be71cba0..c36260d1 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -11,6 +11,20 @@ All connectors share these conventions: - Connections are read-only — KTX never writes to your database - Schema scanning discovers tables, columns, types, and constraints automatically +## Connection field reference + +Agents should prefer environment or file references over literal secrets. + +| Field | Required | Applies to | Description | +|-------|----------|------------|-------------| +| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `clickhouse`, `mysql`, `sqlserver`, or `sqlite` | +| `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` | +| `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, ClickHouse, SQL Server | Field-by-field connection values | +| `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan | +| `readonly` | Strongly recommended | all primary sources | Marks the connection as read-only in KTX config | +| `historicSql` | No | supported warehouses | Enables query-history ingestion when the warehouse supports it | +| `path` | Yes for path-style SQLite | SQLite | Local SQLite database path or `env:NAME` reference | + ## PostgreSQL The most full-featured connector. Supports schema introspection, foreign key detection, column statistics, and historic SQL via `pg_stat_statements`. @@ -488,3 +502,13 @@ No authentication required — SQLite is file-based. The file must be readable b - SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB` - Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON` - In-memory databases supported with `path: ":memory:"` (for testing) + +## Common errors + +| Error or symptom | Likely cause | Recovery | +|------------------|--------------|----------| +| Connection URL appears in git diff | A literal credential URL was written to `ktx.yaml` | Replace it with `env:NAME` or `file:/path/to/secret` and rotate exposed credentials | +| Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | +| Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup | +| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output | +| SQL execution fails through agents | Connection is missing, unreachable, or execution is disabled in the server | Run `ktx connection test ` and check `ktx serve` flags | diff --git a/docs-site/content/docs/meta.json b/docs-site/content/docs/meta.json index 816b18d6..6fbee965 100644 --- a/docs-site/content/docs/meta.json +++ b/docs-site/content/docs/meta.json @@ -6,8 +6,8 @@ "concepts", "guides", "integrations", - "benchmarks", "cli-reference", + "ai-resources", "community" ] } diff --git a/docs-site/lib/llm-docs.ts b/docs-site/lib/llm-docs.ts new file mode 100644 index 00000000..9d9b5c74 --- /dev/null +++ b/docs-site/lib/llm-docs.ts @@ -0,0 +1,159 @@ +import { source } from "@/lib/source"; + +const siteOrigin = "https://docs.kaelio.com/ktx"; + +export type LlmDocsPage = { + title: string; + description?: string; + url: string; + markdownUrl: string; + slug: string[]; + getMarkdown: () => Promise; +}; + +export function getLlmDocsPages(): LlmDocsPage[] { + return source.getPages().map(toLlmDocsPage); +} + +export function getLlmDocsPage(slug: string[] | undefined) { + const page = source.getPage(slug); + return page ? toLlmDocsPage(page) : null; +} + +export async function getPageMarkdown(page: LlmDocsPage) { + const description = page.description ? `\n\n> ${page.description}` : ""; + const body = await page.getMarkdown(); + + return normalizeMarkdown(`# ${page.title}${description} + +Canonical URL: ${absoluteUrl(page.url)} +Markdown URL: ${absoluteUrl(page.markdownUrl)} + +${body} +`); +} + +export function buildLlmsTxt() { + const pages = getLlmDocsPages(); + const byUrl = new Map(pages.map((page) => [page.url, page])); + const link = (url: string, label: string, fallbackDescription: string) => { + const page = byUrl.get(url); + const description = page?.description ?? fallbackDescription; + const markdownUrl = page?.markdownUrl ?? `${url}.md`; + return `- [${label}](${absoluteUrl(markdownUrl)}): ${description}`; + }; + + return `# KTX + +> Agent-native context layer for analytics engineering and database agents. + +KTX provides semantic-layer files, warehouse scans, knowledge pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins. + +## Agent Entry Points + +${link("/docs/ai-resources", "AI Resources", "Machine-readable docs, prompt recipes, and agent setup paths")} +${link("/docs/ai-resources/agent-quickstart", "Agent Quickstart", "Task-first route for coding assistants using KTX")} +${link("/docs/ai-resources/markdown-access", "Markdown Access", "Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown")} +${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested instructions for coding assistants that need to read and cite KTX docs")} + +## Start Here + +${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")} +${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")} +${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and knowledge pages")} + +## Machine-Readable Documentation + +- [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response +- [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown +- [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough +- [Agent CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-agent.md")}): Machine-readable agent commands + +## CLI Reference + +${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")} +${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")} +${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")} +${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")} +${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")} + +## Integrations + +${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")} +${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")} + +## All Documentation + +${buildPageIndex(pages)} +`; +} + +export async function buildLlmsFullTxt() { + const rendered = await Promise.all(getLlmDocsPages().map(getPageMarkdown)); + return [`# KTX Full Documentation`, `Source: ${siteOrigin}`, ...rendered].join( + "\n\n---\n\n", + ); +} + +function toLlmDocsPage(page: ReturnType[number]) { + return { + title: page.data.title, + description: page.data.description, + url: page.url, + markdownUrl: `${page.url}.md`, + slug: page.slugs, + getMarkdown: async () => normalizeMarkdown(page.data.content), + } satisfies LlmDocsPage; +} + +function normalizeMarkdown(markdown: string) { + return markdown + .trim() + .replace(/^---\n[\s\S]*?\n---\n?/, "") + .trim() + .replace(/\n{3,}/g, "\n\n"); +} + +function buildPageIndex(pages: LlmDocsPage[]) { + const grouped = new Map(); + + for (const page of pages) { + const category = page.slug[0] ?? "general"; + grouped.set(category, [...(grouped.get(category) ?? []), page]); + } + + return [...grouped.entries()] + .map(([category, categoryPages]) => { + const links = categoryPages + .map((page) => { + const description = page.description ? `: ${page.description}` : ""; + return `- [${page.title}](${absoluteUrl(page.markdownUrl)})${description}`; + }) + .join("\n"); + + return `### ${formatCategoryName(category)} + +${links}`; + }) + .join("\n\n"); +} + +function absoluteUrl(path: string) { + return `${siteOrigin}${path}`; +} + +function formatCategoryName(category: string) { + const labels: Record = { + "ai-resources": "AI Resources", + "cli-reference": "CLI Reference", + }; + + if (labels[category]) { + return labels[category]; + } + + return category + .split("-") + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(" "); +} diff --git a/docs-site/middleware.ts b/docs-site/middleware.ts new file mode 100644 index 00000000..670ebd33 --- /dev/null +++ b/docs-site/middleware.ts @@ -0,0 +1,51 @@ +import { NextResponse, type NextRequest } from "next/server"; + +const markdownMimeTypes = new Set([ + "text/markdown", + "text/x-markdown", + "application/markdown", +]); + +export function middleware(request: NextRequest) { + if (!isMarkdownPreferred(request.headers.get("accept"))) { + return NextResponse.next(); + } + + const { pathname } = request.nextUrl; + if (!pathname.startsWith("/docs/") || pathname.endsWith(".md")) { + return NextResponse.next(); + } + + const rewriteUrl = request.nextUrl.clone(); + rewriteUrl.pathname = `/llms.mdx${pathname}`; + + return NextResponse.rewrite(rewriteUrl); +} + +export const config = { + matcher: ["/docs/:path*"], +}; + +function isMarkdownPreferred(acceptHeader: string | null) { + if (!acceptHeader) return false; + + const accepted = acceptHeader + .split(",") + .map((entry, index) => { + const [type = "", ...parameters] = entry.trim().split(";"); + const quality = parameters + .map((parameter) => parameter.trim()) + .find((parameter) => parameter.startsWith("q=")); + + return { + type: type.trim().toLowerCase(), + quality: quality ? Number.parseFloat(quality.slice(2)) : 1, + index, + }; + }) + .filter((entry) => Number.isFinite(entry.quality) && entry.quality > 0) + .sort((a, b) => b.quality - a.quality || a.index - b.index); + + const preferred = accepted[0]?.type; + return preferred ? markdownMimeTypes.has(preferred) : false; +} diff --git a/docs-site/next.config.mjs b/docs-site/next.config.mjs index d07746eb..8b28486f 100644 --- a/docs-site/next.config.mjs +++ b/docs-site/next.config.mjs @@ -3,6 +3,15 @@ import { createMDX } from "fumadocs-mdx/next"; const withMDX = createMDX(); /** @type {import('next').NextConfig} */ -const config = {}; +const config = { + async rewrites() { + return [ + { + source: "/docs/:path*.md", + destination: "/llms.mdx/docs/:path*", + }, + ]; + }, +}; export default withMDX(config); diff --git a/docs-site/package.json b/docs-site/package.json index 8ab53b93..d4a88999 100644 --- a/docs-site/package.json +++ b/docs-site/package.json @@ -6,7 +6,8 @@ "scripts": { "dev": "next dev", "build": "next build", - "start": "next start" + "start": "next start", + "test": "node --test tests/*.test.mjs" }, "dependencies": { "fumadocs-core": "15.7.13", diff --git a/docs-site/public/brand/ktx-mascot.png b/docs-site/public/brand/ktx-mascot.png new file mode 100644 index 00000000..9440f1a3 Binary files /dev/null and b/docs-site/public/brand/ktx-mascot.png differ diff --git a/docs-site/tests/docs-index-route.test.mjs b/docs-site/tests/docs-index-route.test.mjs new file mode 100644 index 00000000..859ae54e --- /dev/null +++ b/docs-site/tests/docs-index-route.test.mjs @@ -0,0 +1,14 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +const docsSiteUrl = process.env.DOCS_SITE_URL ?? "http://localhost:3000"; + +test("/docs redirects to the docs introduction", async () => { + const response = await fetch(`${docsSiteUrl}/docs`, { redirect: "manual" }); + + assert.equal(response.status, 307); + assert.equal( + response.headers.get("location"), + "/docs/getting-started/introduction", + ); +}); diff --git a/docs/superpowers/plans/2026-05-11-agent-friendly-docs-site.md b/docs/superpowers/plans/2026-05-11-agent-friendly-docs-site.md new file mode 100644 index 00000000..0f2eea70 --- /dev/null +++ b/docs/superpowers/plans/2026-05-11-agent-friendly-docs-site.md @@ -0,0 +1,411 @@ +# Agent-Friendly Docs Site Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `docs-site` discoverable and readable by coding agents through `llms.txt`, bundled markdown, per-page markdown routes, markdown negotiation, and stricter agent-friendly docs content. + +**Architecture:** Keep the existing Next 15 + Fumadocs app. Add a small `lib/llm-docs.ts` module that reads Fumadocs pages and builds machine-readable markdown responses, then expose those responses through route handlers and a markdown negotiation proxy. Rewrite existing MDX pages in place so the rendered UI and machine-readable routes share one source of truth. + +**Tech Stack:** Next.js 15 App Router, Fumadocs, MDX, TypeScript, pnpm, Node 22. + +--- + +### Task 1: Machine-Readable Docs Routes + +**Files:** +- Create: `docs-site/lib/llm-docs.ts` +- Create: `docs-site/app/llms.txt/route.ts` +- Create: `docs-site/app/llms-full.txt/route.ts` +- Create: `docs-site/app/llms.mdx/docs/[[...slug]]/route.ts` +- Modify: `docs-site/next.config.mjs` + +- [ ] **Step 1: Add the LLM docs utility** + +Create `docs-site/lib/llm-docs.ts` with functions that: + +```ts +import { source } from "@/lib/source"; + +const SITE_ORIGIN = "https://ktx.dev"; + +export type LlmDocsPage = { + title: string; + description?: string; + url: string; + markdownUrl: string; + slug: string[]; + getMarkdown: () => Promise; +}; + +export function getLlmDocsPages(): LlmDocsPage[] { + return source.getPages().map((page) => ({ + title: page.data.title, + description: page.data.description, + url: page.url, + markdownUrl: `${page.url}.md`, + slug: page.slugs, + getMarkdown: async () => normalizeMarkdown(await page.data.getText("raw")), + })); +} + +export function getLlmDocsPage(slug: string[] | undefined) { + const page = source.getPage(slug); + if (!page) return null; + + return { + title: page.data.title, + description: page.data.description, + url: page.url, + markdownUrl: `${page.url}.md`, + slug: page.slugs, + getMarkdown: async () => normalizeMarkdown(await page.data.getText("raw")), + } satisfies LlmDocsPage; +} + +export async function getPageMarkdown(page: LlmDocsPage) { + const body = await page.getMarkdown(); + const description = page.description ? `\n\n> ${page.description}` : ""; + + return `# ${page.title}${description}\n\nCanonical URL: ${page.url}\nMarkdown URL: ${page.markdownUrl}\n\n${body}`; +} + +export function buildLlmsTxt() { + const pages = getLlmDocsPages(); + const byUrl = new Map(pages.map((page) => [page.url, page])); + const link = (url: string, label: string, fallbackDescription: string) => { + const page = byUrl.get(url); + const description = page?.description ?? fallbackDescription; + return `- [${label}](${url}): ${description}`; + }; + + return `# KTX + +> Agent-native context layer for analytics engineering and database agents. + +KTX provides semantic-layer files, warehouse scans, knowledge pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins. + +## Start Here + +${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")} +${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")} +${link("/docs/guides/serving-agents", "Serving Agents", "Expose KTX context through MCP and CLI tools")} +${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and knowledge pages")} + +## Machine-Readable Documentation + +- [Full documentation](/llms-full.txt): All docs pages in one plain-text markdown response +- [Quickstart markdown](/docs/getting-started/quickstart.md): Raw markdown for the setup guide +- [Agent CLI markdown](/docs/cli-reference/ktx-agent.md): Raw markdown for machine-readable agent commands +- [Serving Agents markdown](/docs/guides/serving-agents.md): Raw markdown for MCP and CLI workflows + +## CLI Reference + +${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")} +${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")} +${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")} +${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")} +${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")} + +## Integrations + +${link("/docs/integrations/agent-clients", "Agent Clients", "Configure Claude Code, Cursor, Codex, and OpenCode")} +${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")} +${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")} +`; +} + +export async function buildLlmsFullTxt() { + const pages = getLlmDocsPages(); + const rendered = await Promise.all(pages.map(getPageMarkdown)); + return [`# KTX Full Documentation`, `Source: ${SITE_ORIGIN}`, ...rendered].join("\n\n---\n\n"); +} + +function normalizeMarkdown(markdown: string) { + return markdown.trim().replace(/\n{3,}/g, "\n\n"); +} +``` + +- [ ] **Step 2: Add route handlers** + +Create route files: + +```ts +import { buildLlmsTxt } from "@/lib/llm-docs"; + +export const dynamic = "force-static"; + +export function GET() { + return new Response(buildLlmsTxt(), { + headers: { "Content-Type": "text/plain; charset=utf-8" }, + }); +} +``` + +```ts +import { buildLlmsFullTxt } from "@/lib/llm-docs"; + +export const dynamic = "force-static"; + +export async function GET() { + return new Response(await buildLlmsFullTxt(), { + headers: { "Content-Type": "text/plain; charset=utf-8" }, + }); +} +``` + +```ts +import { getLlmDocsPage, getPageMarkdown } from "@/lib/llm-docs"; +import { notFound } from "next/navigation"; + +export const dynamic = "force-static"; + +export async function GET( + _request: Request, + props: { params: Promise<{ slug?: string[] }> }, +) { + const params = await props.params; + const page = getLlmDocsPage(params.slug); + if (!page) notFound(); + + return new Response(await getPageMarkdown(page), { + headers: { "Content-Type": "text/markdown; charset=utf-8" }, + }); +} + +export function generateStaticParams() { + return getLlmDocsPages().map((page) => ({ slug: page.slug })); +} +``` + +- [ ] **Step 3: Add `.md` rewrite** + +Modify `docs-site/next.config.mjs`: + +```js +import { createMDX } from "fumadocs-mdx/next"; + +const withMDX = createMDX(); + +/** @type {import('next').NextConfig} */ +const config = { + async rewrites() { + return [ + { + source: "/docs/:path*.md", + destination: "/llms.mdx/docs/:path*", + }, + ]; + }, +}; + +export default withMDX(config); +``` + +- [ ] **Step 4: Build check** + +Run: `pnpm --filter ktx-docs build` + +Expected: Next build completes and static routes include `llms.txt`, `llms-full.txt`, and the LLM markdown route. + +### Task 2: Markdown Negotiation + +**Files:** +- Create: `docs-site/proxy.ts` + +- [ ] **Step 1: Add markdown negotiation proxy** + +Create `docs-site/proxy.ts`: + +```ts +import { isMarkdownPreferred, rewritePath } from "fumadocs-core/negotiation"; +import { NextResponse, type NextRequest } from "next/server"; + +const { rewrite } = rewritePath("/docs/*path", "/llms.mdx/docs/*path"); + +export function proxy(request: NextRequest) { + if (!isMarkdownPreferred(request)) { + return NextResponse.next(); + } + + const rewrittenPath = rewrite(request.nextUrl.pathname); + if (!rewrittenPath) { + return NextResponse.next(); + } + + return NextResponse.rewrite(new URL(rewrittenPath, request.nextUrl)); +} + +export const config = { + matcher: ["/docs/:path*"], +}; +``` + +- [ ] **Step 2: Verify build** + +Run: `pnpm --filter ktx-docs build` + +Expected: Build passes with the proxy included. + +### Task 3: Agent-Friendly High-Priority Guides + +**Files:** +- Modify: `docs-site/content/docs/getting-started/quickstart.mdx` +- Modify: `docs-site/content/docs/guides/serving-agents.mdx` +- Modify: `docs-site/content/docs/guides/writing-context.mdx` + +- [ ] **Step 1: Rewrite quickstart structure** + +Add sections for: + +- Workflow summary +- Generated files +- Common errors and recovery + +Keep existing setup detail, but make each command block copy-pasteable and each expected output complete enough for agents to recognize success. + +- [ ] **Step 2: Rewrite Serving Agents as API reference** + +Add tables for MCP tool inputs and CLI command inputs. Add workflows: + +- Answer an analytics question through MCP +- Answer an analytics question through CLI +- Safely execute SQL with row limits + +- [ ] **Step 3: Rewrite Writing Context with schemas and workflows** + +Add semantic-source field tables, knowledge-page field tables, and workflows: + +- Inspect a source +- Edit and validate a source +- Query through the semantic layer +- Write and search a knowledge page + +- [ ] **Step 4: Build check** + +Run: `pnpm --filter ktx-docs build` + +Expected: MDX compiles without syntax errors. + +### Task 4: CLI Reference Normalization + +**Files:** +- Modify: `docs-site/content/docs/cli-reference/*.mdx` + +- [ ] **Step 1: Normalize every CLI page** + +For each CLI reference page, ensure this structure exists: + +```md +## Command signature + +```bash +ktx [subcommand] [options] +``` + +## Subcommands + +| Subcommand | Description | +|---|---| + +## Options + +| Flag | Type | Required | Description | Default | +|---|---|---|---|---| + +## Examples + +```bash +ktx --real-flag realistic-value +``` + +## Output + +```text +complete expected output shape +``` + +## Common errors + +| Error | Cause | Recovery | +|---|---|---| +``` + +Only add sections that are relevant to the command; do not invent output for commands whose output is intentionally interactive. + +- [ ] **Step 2: Build check** + +Run: `pnpm --filter ktx-docs build` + +Expected: MDX compiles without syntax errors. + +### Task 5: Integration and Concept Page Polish + +**Files:** +- Modify: `docs-site/content/docs/integrations/agent-clients.mdx` +- Modify: `docs-site/content/docs/integrations/primary-sources.mdx` +- Modify: `docs-site/content/docs/integrations/context-sources.mdx` +- Modify: `docs-site/content/docs/concepts/*.mdx` +- Modify: `docs-site/content/docs/benchmarks/link-detection.mdx` + +- [ ] **Step 1: Normalize integrations** + +Add structured sections for supported values, config snippets, authentication, generated files, and recovery notes. Keep existing examples aligned with current KTX commands. + +- [ ] **Step 2: Add agent usage notes** + +For concept and benchmark pages, add a compact `## Agent usage notes` section that tells agents when the page is relevant and which concrete page to read next. + +- [ ] **Step 3: Build check** + +Run: `pnpm --filter ktx-docs build` + +Expected: MDX compiles without syntax errors. + +### Task 6: Route Verification and Final Checks + +**Files:** +- No required source changes unless verification finds a bug. + +- [ ] **Step 1: Run production build** + +Run: `pnpm --filter ktx-docs build` + +Expected: Build succeeds. + +- [ ] **Step 2: Run TypeScript check** + +Run: `pnpm --filter ktx-docs exec tsc --noEmit` + +Expected: TypeScript exits successfully. + +- [ ] **Step 3: Start local server** + +Run: `pnpm --filter ktx-docs start` + +Expected: Server starts on an available port. + +- [ ] **Step 4: Verify machine-readable routes** + +Run: + +```bash +curl -i http://localhost:3000/llms.txt +curl -i http://localhost:3000/llms-full.txt +curl -i http://localhost:3000/docs/getting-started/quickstart.md +curl -i -H "Accept: text/markdown" http://localhost:3000/docs/getting-started/quickstart +curl -i http://localhost:3000/docs/not-a-page.md +``` + +Expected: + +- `/llms.txt`: `200`, `Content-Type: text/plain; charset=utf-8` +- `/llms-full.txt`: `200`, `Content-Type: text/plain; charset=utf-8` +- `/docs/getting-started/quickstart.md`: `200`, `Content-Type: text/markdown; charset=utf-8` +- `/docs/getting-started/quickstart` with `Accept: text/markdown`: `200`, `Content-Type: text/markdown; charset=utf-8` +- `/docs/not-a-page.md`: `404` + +- [ ] **Step 5: Inspect final diff** + +Run: `git diff --stat && git diff --check` + +Expected: Diff contains only docs-site and plan changes, with no whitespace errors. diff --git a/docs/superpowers/plans/2026-05-11-demo-guided-tour.md b/docs/superpowers/plans/2026-05-11-demo-guided-tour.md new file mode 100644 index 00000000..3204e111 --- /dev/null +++ b/docs/superpowers/plans/2026-05-11-demo-guided-tour.md @@ -0,0 +1,813 @@ +# Demo Guided Tour Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the disconnected "Try KTX with packaged demo data" flow with a guided tour that walks users through the same setup wizard steps using pre-filled, read-only selections, then connects their agent to the populated demo project. + +**Architecture:** A new `setup-demo-tour.ts` module owns the demo tour flow. It renders read-only cards (database, sources), a simulated context build replay using the existing `renderContextBuildView` + `createRepainter` pipeline from `context-build-view.ts`, then hands off to the real `runKtxSetupAgentsStep`. The entry point in `setup.ts` (`runKtxSetupDemoFromEntryMenu`) is rewired to call this new module instead of `runKtxDemo`. + +**Tech Stack:** TypeScript (ESM), Node.js raw stdin for keypress handling, existing `@clack/prompts` visual patterns, vitest for tests. + +--- + +### Task 1: Create `setup-demo-tour.ts` with keypress utility and banner + +**Files:** +- Create: `packages/cli/src/setup-demo-tour.ts` +- Test: `packages/cli/src/setup-demo-tour.test.ts` + +- [ ] **Step 1: Write the failing test for `renderDemoBanner`** + +```typescript +// packages/cli/src/setup-demo-tour.test.ts +import { describe, expect, it } from 'vitest'; +import { renderDemoBanner } from './setup-demo-tour.js'; + +describe('renderDemoBanner', () => { + it('includes demo mode explanation', () => { + const output = renderDemoBanner(); + expect(output).toContain('Demo mode'); + expect(output).toContain('pre-processed'); + expect(output).toContain('read-only'); + }); +}); +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: FAIL — module not found + +- [ ] **Step 3: Implement `renderDemoBanner` and `waitForDemoNavigation`** + +```typescript +// packages/cli/src/setup-demo-tour.ts +import type { KtxCliIo } from './cli-runtime.js'; +import { KtxSetupExitError } from './setup-interrupt.js'; + +const ESC = String.fromCharCode(0x1b); + +function cyan(text: string): string { + return `${ESC}[36m${text}${ESC}[39m`; +} + +function dim(text: string): string { + return `${ESC}[2m${text}${ESC}[22m`; +} + +export function renderDemoBanner(): string { + const lines = [ + '', + `┌ ${cyan('Demo mode')} — data has been pre-processed and KTX context is already built.`, + `│ This walkthrough illustrates the setup steps. Selections are pre-filled and read-only.`, + '', + ]; + return lines.join('\n'); +} + +export async function waitForDemoNavigation( + stdin: NodeJS.ReadStream = process.stdin, +): Promise<'forward' | 'back'> { + return new Promise((resolve, reject) => { + const wasRaw = stdin.isRaw; + if (stdin.setRawMode) stdin.setRawMode(true); + stdin.resume(); + + const onData = (data: Buffer) => { + const key = data.toString(); + if (key === '\r' || key === '\n') { + cleanup(); + resolve('forward'); + } else if (key === '\x1b') { + cleanup(); + resolve('back'); + } else if (key === '\x03') { + cleanup(); + reject(new KtxSetupExitError()); + } + }; + + const cleanup = () => { + stdin.off('data', onData); + if (stdin.setRawMode) stdin.setRawMode(wasRaw ?? false); + }; + + stdin.on('data', onData); + }); +} +``` + +- [ ] **Step 4: Run the test to verify it passes** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "feat(cli): add demo tour banner and keypress navigation utility" +``` + +--- + +### Task 2: Add `renderDemoCard` function + +**Files:** +- Modify: `packages/cli/src/setup-demo-tour.ts` +- Modify: `packages/cli/src/setup-demo-tour.test.ts` + +- [ ] **Step 1: Write the failing test for `renderDemoCard`** + +Append to the test file: + +```typescript +import { renderDemoCardContent } from './setup-demo-tour.js'; + +describe('renderDemoCardContent', () => { + it('renders a card with title and selections', () => { + const output = renderDemoCardContent('Database connection', ['PostgreSQL (demo warehouse)']); + expect(output).toContain('Database connection'); + expect(output).toContain('PostgreSQL (demo warehouse)'); + expect(output).toContain('Press Enter to continue'); + expect(output).toContain('Escape to go back'); + }); + + it('renders multiple selections', () => { + const output = renderDemoCardContent('Context sources', ['dbt', 'Metabase', 'Notion']); + expect(output).toContain('dbt'); + expect(output).toContain('Metabase'); + expect(output).toContain('Notion'); + }); +}); +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: FAIL — `renderDemoCardContent` not exported + +- [ ] **Step 3: Implement `renderDemoCardContent` and `renderDemoCard`** + +Add to `setup-demo-tour.ts`: + +```typescript +export function renderDemoCardContent(title: string, selections: string[]): string { + const lines = [ + `┌ ${title}`, + '│', + ...selections.map((s) => `│ ${cyan('▸')} ${s}`), + '│', + `│ ${dim('Press Enter to continue, Escape to go back')}`, + '└', + '', + ]; + return lines.join('\n'); +} + +export async function renderDemoCard( + title: string, + selections: string[], + io: KtxCliIo, + stdin?: NodeJS.ReadStream, + waitNav?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>, +): Promise<'forward' | 'back'> { + io.stdout.write(renderDemoBanner()); + io.stdout.write(renderDemoCardContent(title, selections)); + const nav = waitNav ?? waitForDemoNavigation; + return nav(stdin); +} +``` + +- [ ] **Step 4: Run the test to verify it passes** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "feat(cli): add demo tour read-only card rendering" +``` + +--- + +### Task 3: Add demo context build replay animation + +**Files:** +- Modify: `packages/cli/src/setup-demo-tour.ts` +- Modify: `packages/cli/src/setup-demo-tour.test.ts` + +- [ ] **Step 1: Write the failing test for demo replay event sequence** + +Append to the test file: + +```typescript +import { buildDemoReplayTimeline, DEMO_REPLAY_TARGETS } from './setup-demo-tour.js'; + +describe('buildDemoReplayTimeline', () => { + it('produces events for all four demo targets', () => { + const events = buildDemoReplayTimeline(); + const connectionIds = new Set(events.map((e) => e.connectionId)); + expect(connectionIds).toEqual(new Set(['demo-warehouse', 'dbt', 'metabase', 'notion'])); + }); + + it('ends with all targets done', () => { + const events = buildDemoReplayTimeline(); + const lastByConnection = new Map(); + for (const e of events) { + lastByConnection.set(e.connectionId, e.status); + } + for (const status of lastByConnection.values()) { + expect(status).toBe('done'); + } + }); + + it('events are sorted by delayMs', () => { + const events = buildDemoReplayTimeline(); + for (let i = 1; i < events.length; i++) { + expect(events[i]!.delayMs).toBeGreaterThanOrEqual(events[i - 1]!.delayMs); + } + }); +}); + +describe('DEMO_REPLAY_TARGETS', () => { + it('has one primary source and three context sources', () => { + expect(DEMO_REPLAY_TARGETS.primarySources).toHaveLength(1); + expect(DEMO_REPLAY_TARGETS.contextSources).toHaveLength(3); + }); +}); +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: FAIL — exports not found + +- [ ] **Step 3: Implement replay timeline and target definitions** + +Add to `setup-demo-tour.ts`: + +```typescript +import type { KtxPublicIngestPlanTarget } from './public-ingest.js'; +import type { ContextBuildTargetState, ContextBuildViewState } from './context-build-view.js'; + +export interface DemoReplayEvent { + delayMs: number; + connectionId: string; + status: 'running' | 'done'; + detailLine: string | null; + summaryText: string | null; +} + +function createDemoTarget(connectionId: string, operation: 'scan' | 'source-ingest', driver: string): KtxPublicIngestPlanTarget { + return { + connectionId, + driver, + operation, + debugCommand: `ktx ${operation === 'scan' ? 'scan' : 'ingest'} ${connectionId}`, + steps: operation === 'scan' ? ['scan'] : ['source-ingest'], + }; +} + +const primaryTarget = createDemoTarget('demo-warehouse', 'scan', 'postgres'); +const dbtTarget = createDemoTarget('dbt', 'source-ingest', 'dbt'); +const metabaseTarget = createDemoTarget('metabase', 'source-ingest', 'metabase'); +const notionTarget = createDemoTarget('notion', 'source-ingest', 'notion'); + +function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState { + return { + target, + status: 'queued', + detailLine: null, + summaryText: null, + startedAt: null, + elapsedMs: 0, + }; +} + +export const DEMO_REPLAY_TARGETS = { + primarySources: [primaryTarget], + contextSources: [dbtTarget, metabaseTarget, notionTarget], +}; + +export function buildDemoReplayTimeline(): DemoReplayEvent[] { + return [ + { delayMs: 0, connectionId: 'demo-warehouse', status: 'running', detailLine: 'scanning...', summaryText: null }, + { delayMs: 600, connectionId: 'demo-warehouse', status: 'running', detailLine: '[50%] scanning...', summaryText: null }, + { delayMs: 1200, connectionId: 'demo-warehouse', status: 'done', detailLine: null, summaryText: 'completed' }, + { delayMs: 1200, connectionId: 'dbt', status: 'running', detailLine: 'ingesting...', summaryText: null }, + { delayMs: 1800, connectionId: 'dbt', status: 'running', detailLine: '[60%] ingesting...', summaryText: null }, + { delayMs: 2200, connectionId: 'dbt', status: 'done', detailLine: null, summaryText: 'completed' }, + { delayMs: 2200, connectionId: 'metabase', status: 'running', detailLine: 'ingesting...', summaryText: null }, + { delayMs: 2800, connectionId: 'metabase', status: 'done', detailLine: null, summaryText: 'completed' }, + { delayMs: 2800, connectionId: 'notion', status: 'running', detailLine: 'ingesting...', summaryText: null }, + { delayMs: 3400, connectionId: 'notion', status: 'done', detailLine: null, summaryText: 'completed' }, + ]; +} +``` + +- [ ] **Step 4: Run the test to verify it passes** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: PASS + +- [ ] **Step 5: Implement `runDemoContextReplay` animation driver** + +Add to `setup-demo-tour.ts`: + +```typescript +import { renderContextBuildView, createRepainter } from './context-build-view.js'; + +export async function runDemoContextReplay( + io: KtxCliIo, + stdin?: NodeJS.ReadStream, +): Promise<'forward' | 'back'> { + const repainter = createRepainter(io); + const timeline = buildDemoReplayTimeline(); + + const state: ContextBuildViewState = { + primarySources: DEMO_REPLAY_TARGETS.primarySources.map((t) => createTargetState(t)), + contextSources: DEMO_REPLAY_TARGETS.contextSources.map((t) => createTargetState(t)), + frame: 0, + startedAt: Date.now(), + totalElapsedMs: 0, + }; + + const allTargets = [...state.primarySources, ...state.contextSources]; + const targetMap = new Map(allTargets.map((t) => [t.target.connectionId, t])); + let eventIndex = 0; + const startTime = Date.now(); + const FRAME_MS = 120; + + await new Promise((resolve) => { + const interval = setInterval(() => { + const elapsed = Date.now() - startTime; + state.frame += 1; + state.totalElapsedMs = elapsed; + + while (eventIndex < timeline.length && timeline[eventIndex]!.delayMs <= elapsed) { + const event = timeline[eventIndex]!; + const target = targetMap.get(event.connectionId); + if (target) { + target.status = event.status; + target.detailLine = event.detailLine; + target.summaryText = event.summaryText; + if (event.status === 'running' && target.startedAt === null) { + target.startedAt = Date.now(); + } + if (event.status === 'done') { + target.elapsedMs = target.startedAt ? Date.now() - target.startedAt : 0; + } + } + eventIndex += 1; + } + + for (const t of allTargets) { + if (t.status === 'running' && t.startedAt !== null) { + t.elapsedMs = Date.now() - t.startedAt; + } + } + + repainter.paint(renderContextBuildView(state, { styled: io.stdout.isTTY ?? false, showHint: false })); + + if (eventIndex >= timeline.length && allTargets.every((t) => t.status === 'done')) { + clearInterval(interval); + resolve(); + } + }, FRAME_MS); + }); + + io.stdout.write(renderDemoContextCompletionSummary()); + return waitForDemoNavigation(stdin); +} + +function renderDemoContextCompletionSummary(): string { + const lines = [ + '', + `${cyan('★')} KTX finished ingesting demo data`, + '', + ' Placeholder — final counts will come from pre-packaged demo results.', + '', + ` ${dim('Press Enter to continue, Escape to go back')}`, + '', + ]; + return lines.join('\n'); +} +``` + +Note: `renderDemoContextCompletionSummary` is a placeholder that will be updated when +the user provides the real pre-packaged demo data. The summary counts (business areas, +query definitions, knowledge pages) will be populated from those assets. + +- [ ] **Step 6: Run tests and type-check** + +Run: `pnpm --filter @ktx/cli run type-check && pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: PASS + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "feat(cli): add demo context build replay animation" +``` + +--- + +### Task 4: Add transition message and completion summary + +**Files:** +- Modify: `packages/cli/src/setup-demo-tour.ts` +- Modify: `packages/cli/src/setup-demo-tour.test.ts` + +- [ ] **Step 1: Write the failing tests** + +Append to test file: + +```typescript +import { renderDemoAgentTransition, renderDemoCompletionSummary } from './setup-demo-tour.js'; + +describe('renderDemoAgentTransition', () => { + it('includes transition message about connecting agent', () => { + const output = renderDemoAgentTransition(); + expect(output).toContain('Demo project is ready'); + expect(output).toContain('connect your agent'); + }); +}); + +describe('renderDemoCompletionSummary', () => { + it('includes project path and temp warning', () => { + const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', true); + expect(output).toContain('/tmp/ktx-demo-abc123'); + expect(output).toContain('temporary'); + expect(output).toContain('ktx setup'); + }); + + it('shows manual agent instructions when agent not installed', () => { + const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', false); + expect(output).toContain('ktx setup --agents'); + }); + + it('shows success message when agent installed', () => { + const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', true); + expect(output).toContain('agent is connected'); + }); +}); +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: FAIL — exports not found + +- [ ] **Step 3: Implement transition and completion rendering** + +Add to `setup-demo-tour.ts`: + +```typescript +export function renderDemoAgentTransition(): string { + const lines = [ + '', + `┌ Demo project is ready — let's connect your agent`, + '│', + '│ Your KTX context has been built with demo data.', + '│ Select an agent to start using it.', + '└', + '', + ]; + return lines.join('\n'); +} + +export function renderDemoCompletionSummary(projectDir: string, agentInstalled: boolean): string { + const lines = [ + '', + `${cyan('★')} KTX demo is ready`, + '', + ]; + + if (agentInstalled) { + lines.push(' Your agent is connected to a demo KTX project.'); + } else { + lines.push(' Demo project created. Connect an agent to start using it:'); + lines.push(` $ ktx setup --agents --project-dir ${projectDir}`); + } + + lines.push( + '', + ` ${dim('⚠')} This project is in a temporary directory and will be`, + ` cleaned up by your system. To set up KTX with your own`, + ' data, run: ktx setup', + '', + ` Project: ${projectDir}`, + '', + ); + return lines.join('\n'); +} +``` + +- [ ] **Step 4: Run the test to verify it passes** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "feat(cli): add demo tour transition and completion summary" +``` + +--- + +### Task 5: Implement `runDemoTour` orchestrator + +**Files:** +- Modify: `packages/cli/src/setup-demo-tour.ts` +- Modify: `packages/cli/src/setup-demo-tour.test.ts` + +- [ ] **Step 1: Write the failing test for the orchestrator** + +Append to test file: + +```typescript +import { vi } from 'vitest'; +import type { KtxSetupAgentsResult } from './setup-agents.js'; +import { runDemoTour } from './setup-demo-tour.js'; + +describe('runDemoTour', () => { + function createMockIo() { + const chunks: string[] = []; + return { + io: { + stdout: { isTTY: true, columns: 80, write: (chunk: string) => { chunks.push(chunk); } }, + stderr: { write: () => {} }, + }, + chunks, + }; + } + + it('returns 0 on successful tour with agent installed', async () => { + const { io } = createMockIo(); + const mockAgents = vi.fn<() => Promise>().mockResolvedValue({ + status: 'ready', + projectDir: '/tmp/test', + installs: [{ target: 'claude-code' as const, scope: 'project' as const, mode: 'both' as const }], + }); + + const navigation = vi.fn<() => Promise<'forward' | 'back'>>().mockResolvedValue('forward'); + + const result = await runDemoTour( + { inputMode: 'auto' }, + io, + { agents: mockAgents, waitForNavigation: navigation, skipReplayAnimation: true }, + ); + expect(result).toBe(0); + expect(mockAgents).toHaveBeenCalled(); + }); + + it('handles back navigation from first step', async () => { + const { io } = createMockIo(); + const navigation = vi.fn<() => Promise<'forward' | 'back'>>().mockResolvedValue('back'); + + const result = await runDemoTour( + { inputMode: 'auto' }, + io, + { waitForNavigation: navigation, skipReplayAnimation: true }, + ); + expect(result).toBe(0); + }); +}); +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: FAIL — `runDemoTour` not exported or wrong signature + +- [ ] **Step 3: Implement `runDemoTour`** + +Add to `setup-demo-tour.ts`: + +```typescript +import { defaultDemoProjectDir, ensureSeededDemoProject } from './demo-assets.js'; +import type { KtxSetupAgentsResult } from './setup-agents.js'; +import { runKtxSetupAgentsStep } from './setup-agents.js'; + +type DemoStep = 'databases' | 'sources' | 'context' | 'agents'; + +const DEMO_STEPS: DemoStep[] = ['databases', 'sources', 'context', 'agents']; + +export interface DemoTourDeps { + agents?: (args: Parameters[0], io: KtxCliIo) => Promise; + waitForNavigation?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>; + ensureProject?: typeof ensureSeededDemoProject; + skipReplayAnimation?: boolean; +} + +export async function runDemoTour( + args: { inputMode: 'auto' | 'disabled' }, + io: KtxCliIo, + deps: DemoTourDeps = {}, +): Promise { + const waitNav = deps.waitForNavigation ?? waitForDemoNavigation; + const ensureProject = deps.ensureProject ?? ensureSeededDemoProject; + + const projectDir = defaultDemoProjectDir(); + await ensureProject({ projectDir }); + + let stepIndex = 0; + + while (stepIndex < DEMO_STEPS.length) { + const step = DEMO_STEPS[stepIndex]!; + let direction: 'forward' | 'back'; + + if (step === 'databases') { + direction = await renderDemoCard('Database connection', ['PostgreSQL (demo warehouse)'], io, undefined, waitNav); + } else if (step === 'sources') { + direction = await renderDemoCard('Context sources', ['dbt', 'Metabase', 'Notion'], io, undefined, waitNav); + } else if (step === 'context') { + io.stdout.write(renderDemoBanner()); + if (deps.skipReplayAnimation) { + direction = await waitNav(); + } else { + direction = await runDemoContextReplay(io); + } + } else { + io.stdout.write(renderDemoAgentTransition()); + const agentsRunner = deps.agents ?? runKtxSetupAgentsStep; + const agentsResult = await agentsRunner( + { + projectDir, + inputMode: args.inputMode, + yes: false, + agents: true, + scope: 'project', + mode: 'both', + skipAgents: false, + }, + io, + ); + const agentInstalled = agentsResult.status === 'ready'; + if (agentsResult.status === 'back') { + direction = 'back'; + } else { + io.stdout.write(renderDemoCompletionSummary(projectDir, agentInstalled)); + return 0; + } + } + + if (direction === 'back') { + if (stepIndex === 0) return 0; + stepIndex -= 1; + } else { + stepIndex += 1; + } + } + + return 0; +} +``` + +- [ ] **Step 4: Run the test to verify it passes** + +Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour` +Expected: PASS + +- [ ] **Step 5: Run type-check** + +Run: `pnpm --filter @ktx/cli run type-check` +Expected: PASS — all types align with existing interfaces + +- [ ] **Step 6: Commit** + +```bash +git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "feat(cli): add runDemoTour orchestrator with step navigation" +``` + +--- + +### Task 6: Wire up in `setup.ts` + +**Files:** +- Modify: `packages/cli/src/setup.ts` + +- [ ] **Step 1: Read the current `runKtxSetupDemoFromEntryMenu` function** + +Read `packages/cli/src/setup.ts` and locate `runKtxSetupDemoFromEntryMenu` (around lines 218-233). + +Current implementation: +```typescript +async function runKtxSetupDemoFromEntryMenu( + args: Extract, + io: KtxCliIo, + deps: KtxSetupDeps, +): Promise { + const runner = deps.demo ?? (await import('./demo.js')).runKtxDemo; + return await runner( + { + command: 'seeded', + projectDir: defaultDemoProjectDir(), + outputMode: 'viz', + inputMode: args.inputMode, + }, + io, + ); +} +``` + +- [ ] **Step 2: Replace with demo tour call** + +Replace the function body to call `runDemoTour`: + +```typescript +async function runKtxSetupDemoFromEntryMenu( + args: Extract, + io: KtxCliIo, + deps: KtxSetupDeps, +): Promise { + const { runDemoTour } = await import('./setup-demo-tour.js'); + return await runDemoTour( + { inputMode: args.inputMode }, + io, + { agents: deps.agents }, + ); +} +``` + +- [ ] **Step 3: Update imports — remove unused `defaultDemoProjectDir` import if no longer needed elsewhere in setup.ts** + +Check if `defaultDemoProjectDir` is used elsewhere in `setup.ts`. If it's only used +in `runKtxSetupDemoFromEntryMenu`, remove the import. If used elsewhere, keep it. + +Also check if the `KtxDemoArgs` import is still needed. If `runKtxSetupDemoFromEntryMenu` +was the only consumer of `deps.demo` with that type, it may now be unused. Keep the +`demo` slot in `KtxSetupDeps` for backwards compatibility but it will no longer be +called from the entry menu path. + +- [ ] **Step 4: Run type-check and tests** + +Run: `pnpm --filter @ktx/cli run type-check && pnpm --filter @ktx/cli run test` +Expected: PASS — existing tests continue to work, demo tour is now wired in + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup.ts +git commit -m "feat(cli): wire demo tour into setup entry menu" +``` + +--- + +### Task 7: End-to-end verification + +**Files:** +- None (verification only) + +- [ ] **Step 1: Run full test suite** + +Run: `pnpm --filter @ktx/cli run test 2>&1 | tee /tmp/ktx-demo-tour-test.log` +Expected: All tests pass. Check the output for any regressions. + +- [ ] **Step 2: Run type-check across workspace** + +Run: `pnpm run type-check` +Expected: PASS + +- [ ] **Step 3: Run pre-commit checks if available** + +Run: `pnpm run check` (if configured) +Expected: PASS + +- [ ] **Step 4: Manual smoke test (if TTY available)** + +Run: `pnpm --filter @ktx/cli run build && node packages/cli/dist/cli.js setup` + +1. Select "Try KTX with packaged demo data" +2. Verify demo banner appears with full explanation text +3. Verify "Database connection" card shows with "PostgreSQL (demo warehouse)" +4. Press Enter → verify "Context sources" card shows with dbt, Metabase, Notion +5. Press Escape → verify you go back to database card +6. Press Enter twice → verify context build replay animation runs +7. Verify completion summary appears after replay +8. Press Enter → verify agents step prompt appears (interactive) +9. Press Escape all the way back → verify you return to entry menu + +- [ ] **Step 5: Final commit if any adjustments needed** + +```bash +git add -A +git commit -m "fix(cli): demo tour adjustments from smoke test" +``` + +--- + +## Open Seams for Demo Data + +When the user provides the real pre-packaged demo results, update these locations: + +1. **`renderDemoContextCompletionSummary()`** in `setup-demo-tour.ts` — replace placeholder text with actual counts (business areas, query definitions, knowledge pages) from the demo data +2. **`buildDemoReplayTimeline()`** in `setup-demo-tour.ts` — adjust timing and progress details to match the real ingestion profile +3. **`demo-assets.ts`** — update `REQUIRED_SEEDED_ASSET_PATHS` and `demoConfig()` if the demo dataset changes from SQLite/Orbit to Postgres/dbt/Metabase/Notion +4. **Pre-packaged asset files** in `packages/cli/assets/demo/` — replace with the new demo dataset diff --git a/docs/superpowers/specs/2026-05-11-agent-friendly-docs-site-design.md b/docs/superpowers/specs/2026-05-11-agent-friendly-docs-site-design.md new file mode 100644 index 00000000..69784f5e --- /dev/null +++ b/docs/superpowers/specs/2026-05-11-agent-friendly-docs-site-design.md @@ -0,0 +1,171 @@ +# Agent-Friendly Docs Site Design + +## Goal + +Make `docs-site` easier for coding agents and LLM readers to discover, ingest, +and use. The work applies the Vercel Academy agent-friendly docs patterns to the +KTX documentation site while preserving the current Fumadocs + Next.js +architecture. + +Success means agents can: + +- Discover the documentation from well-known root files. +- Fetch all documentation in one plain-text response. +- Fetch any docs page as markdown without parsing the HTML UI. +- Follow CLI, MCP, setup, integration, and semantic-layer workflows from + structured examples. +- Recover from common setup and command failures using explicit troubleshooting + notes. + +## Current State + +`docs-site` is a Next 15 app using Fumadocs. Source pages live under +`docs-site/content/docs`, and rendered docs are served under `/docs`. + +The site currently has good human-facing MDX pages, but it does not expose: + +- `/llms.txt` +- `/llms-full.txt` +- raw markdown routes such as `/docs/getting-started/quickstart.md` +- markdown content negotiation + +Many docs pages already use tables and code blocks, but the structure is not +consistently optimized for literal agent parsing. CLI and agent-facing pages are +the highest-priority content because agents are most likely to copy commands and +JSON examples directly. + +## Design + +### Machine-readable access + +Add a small LLM docs utility layer inside `docs-site`: + +- `docs-site/lib/llm-docs.ts` + - Converts Fumadocs pages to raw or LLM-readable markdown. + - Builds a stable ordered list of docs pages from `source.getPages()`. + - Produces the `llms.txt` index content. + - Produces the `llms-full.txt` bundled content. + +Add routes: + +- `docs-site/app/llms.txt/route.ts` + - Returns `text/plain; charset=utf-8`. + - Includes `# KTX`, a blockquote summary, a short description, and sections + linking to key docs, markdown docs, CLI reference pages, integration pages, + and `/llms-full.txt`. + +- `docs-site/app/llms-full.txt/route.ts` + - Returns `text/plain; charset=utf-8`. + - Concatenates all docs pages in source order. + - Prefixes each page with a stable heading and canonical `/docs/...` URL. + +- `docs-site/app/llms.mdx/docs/[[...slug]]/route.ts` + - Returns one docs page as `text/markdown; charset=utf-8`. + - Uses the same slug shape as `/docs/[[...slug]]`. + - Returns 404 for unknown pages. + +Add a Next rewrite in `docs-site/next.config.mjs`: + +- `/docs/:path*.md` rewrites to `/llms.mdx/docs/:path*` + +Add a markdown negotiation proxy for `/docs/...` requests: + +- Requests whose `Accept` header prefers markdown are rewritten to the matching + LLM markdown route. +- Normal browser requests continue to render the existing Fumadocs UI. +- The proxy must leave `/llms.txt`, `/llms-full.txt`, assets, and non-docs + routes unchanged. + +### Content rewrite pass + +Rewrite the existing MDX content in a bounded, high-impact pass. The intent is +not to expand every page; it is to make every page more literal and consistent +for agents. + +Apply these patterns across docs: + +- Put command signatures in fenced code blocks. +- Use tables for flags, options, inputs, outputs, supported values, and + environment variables. +- Use realistic values in copy-paste examples. +- Show complete expected command output when output shape matters. +- Add explicit "Common errors" or "Recovery" sections for workflows where a + command can fail for predictable reasons. +- Add workflow sections that chain commands in the order an agent should use + them. +- Avoid placeholders that an agent could copy literally, unless the placeholder + is clearly marked as a value to replace. + +Priority pages: + +1. `getting-started/quickstart.mdx` + - Add a compact workflow summary. + - Make prerequisites and generated files explicit. + - Add troubleshooting for missing API keys, failed connection tests, daemon + startup, and unbuilt context. + +2. `guides/serving-agents.mdx` + - Treat MCP tools and `ktx agent` commands as agent-facing API references. + - Add tool/command input tables, output expectations, safety constraints, and + workflows for answering analytics questions. + +3. `guides/writing-context.mdx` + - Add semantic-source schema tables. + - Add workflows for listing, reading, editing, validating, querying, and + writing wiki knowledge. + +4. `cli-reference/*.mdx` + - Normalize every command page to: command signature, subcommands table, + option tables, examples, output modes, common errors, and related workflows + where useful. + +5. `integrations/agent-clients.mdx`, `integrations/primary-sources.mdx`, and + `integrations/context-sources.mdx` + - Normalize integration setup sections into structured config tables, + copy-paste examples, authentication requirements, and recovery notes. + +6. Concept and benchmark pages + - Keep narrative content, but add compact "Agent usage notes" where it helps + agents decide when to read or cite the page. + +### Documentation boundaries + +The first pass should not introduce a separate public docs tree or a generated +API reference system. It should work with the existing MDX source files and +Fumadocs loader. + +Do not add stale compatibility aliases or rename KTX concepts. Keep examples +aligned with commands and files that exist in the standalone KTX repository. + +### Testing + +Verification commands: + +- `pnpm --filter ktx-docs build` +- `pnpm --filter ktx-docs exec tsc --noEmit` after generated Fumadocs source + files exist. +- Route checks against a local docs server: + - `GET /llms.txt` returns 200 and `text/plain`. + - `GET /llms-full.txt` returns 200 and `text/plain`. + - `GET /docs/getting-started/quickstart.md` returns 200 and + `text/markdown`. + - unknown markdown docs paths return 404. + +For content checks, inspect the generated markdown responses to confirm they +contain: + +- realistic command examples, +- tables, +- full output examples where documented, +- workflow sections, +- recovery/error sections. + +## Acceptance Criteria + +- `/llms.txt` gives agents a concise index with links to key KTX docs and + `/llms-full.txt`. +- `/llms-full.txt` returns all docs content in source order as plain text. +- Every Fumadocs page can be fetched through a `.md` URL. +- High-priority docs pages use consistent agent-friendly structure. +- The docs site builds successfully. +- Verification results and any skipped checks are reported clearly. diff --git a/docs/superpowers/specs/2026-05-11-demo-guided-tour-design.md b/docs/superpowers/specs/2026-05-11-demo-guided-tour-design.md new file mode 100644 index 00000000..34585743 --- /dev/null +++ b/docs/superpowers/specs/2026-05-11-demo-guided-tour-design.md @@ -0,0 +1,252 @@ +# Demo Guided Tour — Design Spec + +## Problem + +The "Try KTX with packaged demo data" option in `ktx setup` is completely +disconnected from the real setup wizard. It bypasses all wizard steps, plays +an animated replay in a temp directory, and exits with no bridge to actually +using KTX. Users don't learn the real setup flow and hit a dead end. + +## Solution + +Redesign the demo option as a **guided tour** that walks the user through the +same setup wizard steps with pre-filled, read-only selections. The tour ends +with a real interactive agents step so the user can immediately use the demo +project with their coding agent. + +## Design Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| Implementation strategy | Demo mode flag on existing wizard steps | Maximum code reuse; wizard changes automatically apply to demo | +| LLM/embeddings steps | Skipped | Not relevant to pre-packaged demo data | +| Database selection | PostgreSQL (read-only card) | Pre-filled, matches demo dataset | +| Context sources | dbt, Metabase, Notion (read-only card) | Pre-filled, matches demo dataset | +| Context build | Replay through real progress visualization | Same spinners, progress bars, status icons as real build | +| Agents step | Real interactive step | User actually connects their agent | +| Project location | Temp directory (`/tmp/ktx-demo-{hex}`) | Frictionless, no directory prompt | +| Navigation | Enter to advance, Escape to go back | Consistent with rest of wizard | + +## Flow + +``` +Entry menu: "Try KTX with packaged demo data" + │ + ▼ +Create demo project in /tmp/ktx-demo-{hex} +Copy pre-packaged assets (demo DB, replay, context artifacts) + │ + ▼ +┌────────────────────────────────────────────────────────────────┐ +│ Demo banner (persistent, shown on every step) │ +│ │ +│ Demo mode — data has been pre-processed and KTX context is │ +│ already built. This walkthrough illustrates the setup steps. │ +│ Selections are pre-filled and read-only. │ +└────────────────────────────────────────────────────────────────┘ + │ + ▼ +Read-only card: Database connection + ▸ PostgreSQL (demo warehouse) + [Enter → next, Escape → back to entry menu] + │ + ▼ +Read-only card: Context sources + ▸ dbt + ▸ Metabase + ▸ Notion + [Enter → next, Escape → back to database card] + │ + ▼ +Context build replay + Same renderContextBuildView() / repainter as real wizard + Sources: demo-warehouse, dbt, metabase, notion + Replay at slightly faster-than-real pace + Completion summary: business areas, query definitions, knowledge pages + [Enter → next, Escape → back to sources card] + │ + ▼ +Transition message: + "Demo project is ready — let's connect your agent" + │ + ▼ +Interactive agents step (real runKtxSetupAgentsStep()) + User selects agent target, scope, install mode + [Normal interactive navigation; Escape goes back to replay summary] + │ + ▼ +Final summary: + ★ KTX demo is ready + Agent connected, project path shown + ⚠ Temp directory warning + Pointer to `ktx setup` for real data +``` + +## Step Details + +### Demo Banner + +Shown at the top of every read-only step. Uses clack box-drawing style: + +``` +┌ Demo mode — data has been pre-processed and KTX context is already built. +│ This walkthrough illustrates the setup steps. Selections are pre-filled and read-only. +``` + +### Read-Only Step Cards + +Rendered by a shared `renderDemoCard()` helper: + +```typescript +async function renderDemoCard( + title: string, + selections: string[], + io: KtxCliIo, +): Promise<'forward' | 'back'> +``` + +- Renders a clack-style box with title, bullet list of pre-filled selections, + and navigation hint ("Press Enter to continue, Escape to go back") +- Listens for raw keypresses: Enter → `'forward'`, Escape → `'back'` +- Uses same box-drawing characters and colors as `@clack/prompts` + +Card format: + +``` +┌ {title} +│ +│ ▸ {selection 1} +│ ▸ {selection 2} +│ ... +│ +│ Press Enter to continue, Escape to go back +└ +``` + +### Demo Step Sequence + +The demo reuses the main wizard's step loop with these steps: + +```typescript +const demoSteps = ['databases', 'sources', 'context', 'agents']; +``` + +Steps `databases` and `sources` dispatch to `renderDemoCard()` instead of +their real interactive functions when demo mode is active. Step `context` +dispatches to the replay visualization. Step `agents` runs the real +`runKtxSetupAgentsStep()`. + +Back navigation reuses `previousNavigableStepIndex()`. Escaping from the +first step (databases) returns to the entry menu. + +### Context Build Replay + +Uses the same rendering pipeline as the real context build: + +- `renderContextBuildView()` for the progress display +- `createRepainter()` for terminal repainting +- Same spinner frames, progress bars (`████░░░░`), status icons (`✓`, `⠹`, `○`) +- Same source grouping (Primary sources / Context sources) + +Sources shown: + +``` +Primary sources: + ✓ demo-warehouse completed · Xs + +Context sources: + ✓ dbt completed · Xs + ✓ metabase completed · Xs + ✓ notion completed · Xs +``` + +Replay timing: events from the pre-packaged replay file are played back at +a slightly faster pace than real-time (compressed to feel brisk but not +instant). + +Completion summary uses the existing format: + +``` +★ KTX finished ingesting your data + + ✓ Analyzed X business areas + ✓ Reconciled — 0 conflicts + + KTX created: + 📊 X query definitions + 📝 X knowledge pages + + Press Enter to continue, Escape to go back +``` + +The exact counts and artifact names come from the pre-packaged demo results +(to be provided by the user as improved demo data). + +### Agents Step Transition + +A brief message bridges from the read-only tour to the interactive step: + +``` +┌ Demo project is ready — let's connect your agent +│ +│ Your KTX context has been built with demo data. +│ Select an agent to start using it. +└ +``` + +Then `runKtxSetupAgentsStep()` runs with the demo project directory, +normal interactive prompts enabled. + +### Final Summary + +``` +★ KTX demo is ready + + Your agent is connected to a demo KTX project. + + ⚠ This project is in a temporary directory and will be + cleaned up by your system. To set up KTX with your own + data, run: ktx setup + + Project: /tmp/ktx-demo-a1b2c3 +``` + +If the user skips the agents step, replace the first line with manual +agent connection instructions (`ktx setup --agents --project-dir /tmp/...`). + +## Implementation Approach + +Thread a `demoMode` flag through the main setup loop in `setup.ts`. When +active: + +1. Skip `models` and `embeddings` steps entirely +2. Replace `databases` and `sources` step dispatch with `renderDemoCard()` +3. Replace `context` step dispatch with replay visualization +4. Run `agents` step normally +5. Show demo-specific completion summary instead of ready menu + +The `renderDemoCard()` helper is a new function in a new file +(e.g. `setup-demo-cards.ts`) that handles read-only card rendering and +keypress listening. + +The context build replay reuses existing `renderContextBuildView()` and +`createRepainter()` from `context-build-view.ts`, fed with events from +the pre-packaged replay file at an accelerated playback rate. + +## Files Changed + +| File | Change | +|------|--------| +| `packages/cli/src/setup.ts` | Add `demoMode` flag to setup loop; skip models/embeddings; dispatch to demo cards for databases/sources; show demo banner; demo completion summary | +| `packages/cli/src/setup-demo-cards.ts` | New file: `renderDemoCard()` helper, demo banner renderer, demo step definitions | +| `packages/cli/src/setup-context.ts` | Support replay mode for demo: feed pre-packaged events at accelerated pace through existing progress view | +| `packages/cli/src/demo.ts` | Remove or simplify `runKtxSetupDemoFromEntryMenu()` — now dispatches to the main setup loop with `demoMode: true` | +| `packages/cli/src/demo-assets.ts` | Update asset list if new demo data is provided; ensure demo project setup writes valid `ktx.yaml` for agent use | + +## Open Items + +- **Demo data**: User will provide improved pre-packaged results (Postgres, + dbt, Metabase, Notion). Current demo assets may need updating. +- **Replay speed**: Exact acceleration factor TBD — should feel brisk but + give users time to read source names and status transitions. Start with + ~2x real-time and adjust. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/.gitkeep b/packages/cli/assets/demo/orbit/knowledge/global/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/packages/cli/assets/demo/orbit/knowledge/global/activation-policy.md b/packages/cli/assets/demo/orbit/knowledge/global/activation-policy.md deleted file mode 100644 index 186381e5..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/activation-policy.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -summary: Account activation policy changed on January 15, 2026. -tags: - - growth - - activation - - policy -refs: [] -sl_refs: - - orbit_demo.accounts - - orbit_demo.purchase_requests -usage_mode: auto ---- - -Before January 15, 2026, activation meant first requester login. - -On and after January 15, 2026, activation requires an approved purchase request and at least three activated requesters. - -Always separate pre-policy and post-policy cohorts when comparing activation rates. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/arr-contract-first.md b/packages/cli/assets/demo/orbit/knowledge/global/arr-contract-first.md deleted file mode 100644 index 8e67ec65..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/arr-contract-first.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -summary: ARR uses contract-first precedence before subscription-derived revenue. -tags: - - finance - - arr - - revenue -refs: [] -sl_refs: - - orbit_demo.contracts - - orbit_demo.arr_movements -usage_mode: auto ---- - -ARR is calculated from active recurring contract ARR before falling back to subscription-derived revenue. - -Do not double-count subscription MRR when an active contract row covers the same account and period. - -Exclude cancelled contracts ending before the metric date, future-starting contracts, internal accounts, and test accounts. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/customer-communication-policy.md b/packages/cli/assets/demo/orbit/knowledge/global/customer-communication-policy.md new file mode 100644 index 00000000..162ade23 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/customer-communication-policy.md @@ -0,0 +1,44 @@ +--- +summary: "Required elements for valid customer updates: what happened, what is being done, who owns next step, and when customer will hear back. Vague status phrases are not acceptable." +usage_mode: auto +sort_order: 0 +tags: + - policy + - customer-success +refs: + - orbit-how-we-work + - sales-ops-cs-handoff-process +--- + +## Customer Update Communication Standard + +**Source:** Notion — People & Operating Norms, last edited 2026-05-07 + +--- + +## Policy + +Every customer update must contain four elements. An update that omits any of these is incomplete and must not be sent. + +| # | Required Element | Example | +|---|---|---| +| 1 | **What happened** | "The approval routing failed for the renewal PO because the department budget split was not configured." | +| 2 | **What is being done** | "We are reconfiguring the budget split and re-routing the approval to the correct approver." | +| 3 | **Who owns the next step** | "[Name] on our CS team owns this and is working it now." | +| 4 | **When the customer will hear back** | "You will have an update by 3pm today." | + +## Named Anti-Pattern + +- **Do not send:** "We are looking into it." +- This phrase is only acceptable when the actual blocker is genuinely unknown. If the blocker is known, name it. +- Vague status phrases without a named owner and a time commitment are not acceptable customer updates. + +## When This Applies + +- Any written update to a customer during an active issue, escalation, or implementation delay. +- Applies to email, Slack, and any other written channel. +- Verbal updates in calls should follow the same structure; a written summary must follow the call. + +--- + +See also: [[orbit-how-we-work]], [[sales-ops-cs-handoff-process]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/customer-health-scoring.md b/packages/cli/assets/demo/orbit/knowledge/global/customer-health-scoring.md deleted file mode 100644 index 15e84251..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/customer-health-scoring.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -summary: Customer health combines support severity and procurement activity. -tags: - - customer-success - - health - - churn-risk -refs: - - nrr-retention -sl_refs: - - orbit_demo.support_tickets - - orbit_demo.purchase_requests - - orbit_demo.accounts -usage_mode: auto ---- - -High-risk accounts have multiple recent high-severity tickets or no recent procurement activity on growth and enterprise plans. - -Medium risk captures partial support pressure or a material month-over-month decline in procurement activity. - -Internal and test accounts are excluded from customer health scoring. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/discount-expiration.md b/packages/cli/assets/demo/orbit/knowledge/global/discount-expiration.md deleted file mode 100644 index e65039df..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/discount-expiration.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -summary: Discount expirations are tracked separately from organic contraction. -tags: - - finance - - retention -refs: - - arr-contract-first - - nrr-retention -sl_refs: - - orbit_demo.contracts - - orbit_demo.arr_movements -usage_mode: auto ---- - -Discount expiration events identify pricing changes when negotiated discounts end. - -Track these separately from organic contraction so board reporting can split pricing-driven and usage-driven changes. - -Use movement_reason on arr_movements when separating discount expiration from churn or seat-reduction events. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/internal-test-exclusion.md b/packages/cli/assets/demo/orbit/knowledge/global/internal-test-exclusion.md deleted file mode 100644 index 17fce5ea..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/internal-test-exclusion.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -summary: Canonical metrics exclude internal and test accounts and users. -tags: - - data-quality - - governance -refs: [] -sl_refs: - - orbit_demo.accounts -usage_mode: auto ---- - -All canonical customer metrics exclude rows marked as internal or test fixtures. - -This exclusion applies at both account and user grain when joining procurement, support, and revenue activity. - -If a metric unexpectedly increases, check whether new internal or test accounts were created without proper flags. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/new-hire-onboarding-policy.md b/packages/cli/assets/demo/orbit/knowledge/global/new-hire-onboarding-policy.md new file mode 100644 index 00000000..2059329d --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/new-hire-onboarding-policy.md @@ -0,0 +1,47 @@ +--- +summary: "New hire week-one knowledge requirements: four things every new hire must understand by end of week one, with manager as responsible owner." +usage_mode: auto +sort_order: 0 +tags: + - orbit + - policy +refs: + - orbit-company-overview + - orbit-how-we-work +--- + +## New Hire Week-One Onboarding Policy + +**Source:** Notion — People & Operating Norms, last edited 2026-05-07 +**Owner:** Manager (not People Ops) + +--- + +## Policy + +Every new hire must understand **four things by end of week one**. The manager — not People Ops — is responsible for supplying this context. + +## Required Week-One Knowledge + +| # | What the new hire must understand | +|---|---| +| 1 | **What Orbit sells** — the core procurement workflow product and value proposition | +| 2 | **Why procurement workflow gets messy inside a customer** — the pain points that make Orbit necessary | +| 3 | **Which team handles which part of the customer lifecycle** — team lanes and ownership boundaries | +| 4 | **What their first useful project is** — a concrete, scoped piece of work they can contribute to immediately | + +## Ownership + +- The **manager** is responsible for delivering this context, not People Ops or a generic onboarding doc. +- If the manager cannot supply item 4 (first useful project) by day one, they should have it ready by end of day two at the latest. +- Items 1–3 can be covered via existing documentation; the manager should point to the right pages rather than re-explaining from scratch. + +## Suggested Reading for Items 1–3 + +- Item 1 & 2: [[orbit-company-overview]] +- Item 3: [[orbit-company-overview]] (Team Lanes section) +- Operating norms and how decisions are made: [[orbit-how-we-work]] + +--- + +See also: [[orbit-company-overview]], [[orbit-how-we-work]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/nrr-retention.md b/packages/cli/assets/demo/orbit/knowledge/global/nrr-retention.md deleted file mode 100644 index b9b0c07b..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/nrr-retention.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -summary: NRR is calculated at parent-account grain by calendar quarter. -tags: - - analytics - - retention - - nrr -refs: - - arr-contract-first -sl_refs: - - orbit_demo.arr_movements - - orbit_demo.accounts -usage_mode: auto ---- - -Net Revenue Retention uses parent-account rollups by calendar quarter. - -The formula is starting ARR plus expansion minus contraction and churn, divided by starting ARR. - -Exclude parent accounts with zero starting ARR, new business, reactivations, and internal/test accounts from the denominator. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-kpi-glossary.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-kpi-glossary.md new file mode 100644 index 00000000..dc97bdc2 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-kpi-glossary.md @@ -0,0 +1,64 @@ +--- +summary: "Customer activation: email verified + first project + team invite within 14 days of signup. D7/D14 activation rates and Time-to-Activate formulas. Source tables: customer, project, invite." +usage_mode: auto +sort_order: 0 +tags: + - activation + - kpi + - growth + - funnel + - metrics +refs: + - orbit-customers-source + - orbit-activation-policy-change-jan-2026 + - orbit-mart-account-activity +tables: + - orbit_analytics.customer + - orbit_analytics.project + - orbit_analytics.invite +--- + +# Activation KPI Glossary + +**Owner team:** Growth +**Source:** Notion — Orbit Demo Home / Data Team - Onboarding / Activation KPI Glossary, last edited 2026-05-07 + +Use this when a question is about signup-to-habit behavior. Orbit uses activation language across Growth, Product, and CS conversations. + +## Activation Definition + +A customer is **activated** when **all three** of the following happen **within 14 days of signup**: + +1. Email is verified +2. First project is created +3. At least one teammate is invited + +## Funnel Stages + +| Stage | Signal | Data source | +|---|---|---| +| 1. Signup | Customer row created | `orbit_analytics.customer` | +| 2. Email Verified | `customer.email_verified_at` is not null | `orbit_analytics.customer` | +| 3. First Project | At least one row in `orbit_analytics.project` for the customer | `orbit_analytics.project` | +| 4. Team Invite | At least one row in `orbit_analytics.invite` for the customer | `orbit_analytics.invite` | +| 5. Activated | All of (2), (3), and (4) within 14 days of (1) | — | + +## Conversion-Rate KPIs + +| KPI | Formula | +|---|---| +| **D7 Activation Rate** | `activated_customers_within_7_days / signups_in_cohort` | +| **D14 Activation Rate** | `activated_customers_within_14_days / signups_in_cohort` | +| **Time-to-Activate** | `median(activated_at - created_at)` in hours | + +Growth conversations typically use D7 and D14 Activation Rate. Product and CS may ask about individual funnel steps — confirm whether they mean the full activation definition or only one stage. + +## Source Notes + +- Use `orbit_analytics.customer` for `created_at` and `email_verified_at`. +- For project or invite timing, check `orbit_analytics.project` and `orbit_analytics.invite` before changing the activation definition. +- `created_at` is UTC; confirm timezone expectations before cohort filtering. + +## Relationship to Account-Level Activation + +This glossary defines **customer-level** activation (signup-to-habit). The **account-level** activation workflow (requester login → first approved purchase request → account activated) is a separate concept tracked in `mart_account_activity` and governed by the January 2026 policy change. See `orbit-activation-policy-change-jan-2026` for that definition. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-policy-change-jan-2026.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-policy-change-jan-2026.md new file mode 100644 index 00000000..3216d94d --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-policy-change-jan-2026.md @@ -0,0 +1,46 @@ +--- +summary: "January 2026 activation policy change: policy_version splits events into pre_2026_01_15 and post_2026_01_15 cohorts. mart_account_activity compares activation counts across the boundary." +usage_mode: auto +sort_order: 0 +tags: + - activation + - growth + - policy + - governed-metric + - procurement +sl_refs: + - mart_account_activity +--- + +# Activation Policy Change — January 2026 + +**Governed metric key:** `activated_accounts` +**Owner team:** growth +**Notion:** `notion://notion_page_activation_policy_decision#policy-change` +**Sources:** `mart_account_activity`, `int_activation_policy_windows`, `stg_activation_events` + +## Policy Boundary + +The activation workflow changed on **2026-01-15**. All activation events are tagged with `policy_version`: + +- `pre_2026_01_15` — events before the workflow update +- `post_2026_01_15` — events after the workflow update + +## Activation Event Types + +`first_requester_login`, `requester_activated`, `first_approved_purchase_request`, `account_activated` + +## Account Activation Sequence + +1. First requester login → `first_requester_login` +2. Requester activated → `requester_activated` +3. First approved purchase request → `first_approved_purchase_request` +4. Account activated → `account_activated` + +## Exclusions + +Internal and test accounts (lifecycle_status = `internal` or `test` on `stg_accounts`) are excluded from activation counts. Sessions (`stg_sessions`) are used for pre-policy activation and activity exclusions. + +## Dashboard + +Exposed via the **Growth Activation Dashboard** (`https://orbit-demo.example.com/dashboards/activation`), which depends on `mart_account_activity`. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-arr-contract-first-definition.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-arr-contract-first-definition.md new file mode 100644 index 00000000..4cb34b76 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-arr-contract-first-definition.md @@ -0,0 +1,39 @@ +--- +summary: "ARR is calculated contract-first: active contract ARR takes precedence over subscription ARR for any covered period." +usage_mode: auto +sort_order: 0 +tags: + - arr + - governed-metric + - finance + - contracts + - subscriptions +sl_refs: + - mart_arr_daily + - mart_account_segments +--- + +# ARR — Contract-First Definition + +**Governed metric key:** `arr` +**Owner team:** finance +**Notion:** `notion://notion_page_arr_contract_reporting#arr-contract-first` +**Source:** `mart_arr_daily` (grain: `metric_date`) + +## Rule + +ARR is calculated **contract-first**: when an active contract exists for an account and period, `int_active_contract_arr` is used. Subscription ARR (`stg_subscriptions`) is only used when no active contract covers the period. + +## Known Assertion + +The dbt test on `mart_arr_daily.arr_cents` asserts the value equals **1,874,200,000 cents ($18,742,000)** as of `metric_date = 2026-03-31`. + +## Intermediate model + +`int_active_contract_arr` — active contract ARR as of 2026-03-31 (grain: `contract_id`). + +## Related + +- `stg_contracts` — contract records (status: draft, active, cancelled, expired) +- `stg_subscriptions` — fallback ARR source (status: active, cancelled, past_due, trialing) +- `mart_arr_daily` — board-prep daily ARR mart diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-company-overview.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-company-overview.md new file mode 100644 index 00000000..83645aeb --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-company-overview.md @@ -0,0 +1,71 @@ +--- +summary: "Orbit: procurement workflow software (requests → approvals → supplier onboarding → POs). Plans: Starter, Growth, Enterprise. Team lanes and open operating questions as of May 2026." +usage_mode: auto +sort_order: 0 +tags: + - company-context + - product + - plans + - team-lanes + - procurement +refs: + - orbit-plan-segment-normalization + - orbit-procurement-qualifying-actions +--- + +# Orbit Company Overview + +**Source:** Notion — Orbit Demo Home / Company Overview + Orbit Demo Home (root), last edited 2026-05-07 + +## What Orbit Sells + +Orbit sells procurement workflow and spend-control software. The core value proposition: route purchase requests, collect approvals, onboard suppliers, and issue purchase orders without turning every exception into a status hunt. + +**Primary buyers:** Finance, Procurement, Business Operations. +**Daily users:** department admins, office managers, IT leads, legal ops partners — anyone who has to get a vendor through the building. + +## Product Workflow + +1. Requester submits a purchase request +2. Approval routing collects the right decision +3. Supplier invite and onboarding happen before work starts +4. Purchase order is created from the approved request +5. Renewal handoff keeps the relationship from drifting + +## Plans + +| Plan | Target customer | +|---|---| +| **Starter** | Teams moving out of spreadsheet tracking | +| **Growth** | Default mid-market plan | +| **Enterprise** | Multiple approval policies, parent/child account structures, heavier renewal coordination | + +**Legacy alias:** `pro_plus` in older notes means Growth. Treat as Growth unless Sales Ops says otherwise. See `orbit-plan-segment-normalization` for the data-layer normalization rule. + +## Team Lanes + +| Team | Responsibilities | +|---|---| +| Product | Requester onboarding, supplier onboarding, approval routing, PO workflow quality | +| Growth | Activation, self-serve conversion | +| Sales Ops | Account segmentation, plan mapping, contracts, handoff hygiene | +| Customer Success | Implementation, support escalations, account health, renewal risk | +| Finance | Billing, close, board prep | +| Data | Cross-functional support for all departments | +| Executive | Company priorities, weekly operating review | + +## Open / Unsettled Questions (as of May 2026) + +- Whether supplier onboarding stays fully inside Product or splits more work with CS for larger accounts. +- Whether Growth is still the right default-plan language in sales materials. +- How renewal handoff works when Sales Ops updates account segment late in-quarter. +- Implementation handoff template decision still pending. +- Renewal risk review agenda should not live only in meeting notes. + +## Common Customer Pain Points (Pre-Sale) + +- "We have too many request paths." +- "Approvals happen, but no one can explain the state of the request." +- "Supplier onboarding is split across three teams." +- "Renewals are visible too late." +- "People keep asking Finance for status because there is nowhere better to look." diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-health-risk-definition.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-health-risk-definition.md new file mode 100644 index 00000000..56deb3f3 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-health-risk-definition.md @@ -0,0 +1,46 @@ +--- +summary: "Customer health risk definition: risk_level (low/medium/high) derived from open critical support tickets and recent procurement activity. Mart: mart_customer_health, as of 2026-03-31." +usage_mode: auto +sort_order: 0 +tags: + - customer-health + - risk + - customer-success + - governed-metric + - support +sl_refs: + - mart_customer_health +--- + +# Customer Health Risk Definition + +**Governed metric key:** `active_customers` +**Owner team:** customer_success +**Notion:** `notion://notion_page_customer_health_playbook#risk-definition` +**Sources:** `mart_customer_health`, `int_customer_health_signals` + +## Risk Levels + +`low`, `medium`, `high` — derived from two signal types: + +1. **Support ticket signals** (`stg_support_tickets`): open or pending tickets with severity `high` or `critical` increase risk. +2. **Procurement activity signals** (`stg_purchase_requests`, `stg_purchase_orders`): recent qualifying procurement actions reduce risk. + +## Intermediate Model + +`int_customer_health_signals` — combines open critical ticket count and recent procurement action count per account. + +## Mart + +`mart_customer_health` — account-grain risk mart as of **2026-03-31**. + +- `account_id`: dbt not_null, unique +- `risk_level`: dbt accepted_values [low, medium, high] + +## Support Ticket Severities + +`low`, `medium`, `high`, `critical` + +## Account Ownership Context + +`stg_account_owners` provides effective-dated ownership (owner_team: sales_ops, customer_success, finance) for escalation routing. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-stakeholder-needs.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-stakeholder-needs.md new file mode 100644 index 00000000..f7e57215 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-stakeholder-needs.md @@ -0,0 +1,42 @@ +--- +summary: "Recurring customer stakeholder needs by role: Finance, Department leaders, Procurement, Legal, and Customer Success each have distinct priorities that should inform product and positioning decisions." +usage_mode: auto +sort_order: 0 +tags: + - product + - customer-success + - orbit +refs: + - orbit-company-overview + - orbit-product-review-checklist +--- + +## Customer Stakeholder Needs by Role + +**Source:** Notion — Product & Customers, last edited 2026-05-07 + +--- + +## Policy + +These are recurring, role-specific customer needs observed across accounts. Use them to inform product prioritization, positioning, and CS engagement strategies. + +## Stakeholder Map + +| Role | Primary Need | Implication | +|---|---|---| +| **Finance** | Committed spend visibility earlier in the procurement cycle | Surface budget commitments at request approval, not at PO creation | +| **Department leaders** | Request speed — faster time from request to approval | Reduce approval routing friction; minimize back-and-forth | +| **Procurement** | Supplier file complete before the first invoice | Supplier onboarding must be finished before PO is issued, not after | +| **Legal** | Fewer emergency reviews | Route contracts with legal implications earlier; avoid last-minute escalations | +| **Customer Success (internal)** | Renewal risk visible before the account is already annoyed | CS needs leading indicators of dissatisfaction, not lagging ones | + +## Usage Notes + +- These needs are recurring patterns, not one-off requests. They should be treated as standing assumptions until explicitly updated. +- When prioritizing roadmap items, map each item to the stakeholder(s) it serves and verify the need is still active. +- When positioning Orbit to a new prospect, use this map to tailor the value proposition to the roles present in the buying committee. + +--- + +See also: [[orbit-company-overview]], [[orbit-product-review-checklist]], [[orbit-known-product-gaps]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md new file mode 100644 index 00000000..e98c1663 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md @@ -0,0 +1,60 @@ +--- +summary: "orbit_analytics.customer: one row per customer. Columns, joins to account/subscription_event, measures (customer_count, paying_customer_count, mrr), and watch-outs." +usage_mode: auto +sort_order: 0 +tags: + - data-source + - customers + - orbit-analytics + - measures +refs: + - orbit-plan-segment-normalization + - orbit-activation-kpi-glossary +tables: + - orbit_analytics.customer + - orbit_analytics.account + - orbit_analytics.subscription_event +--- + +# Orbit Customers Source + +**Table:** `orbit_analytics.customer` +**Grain:** one row per signed-up customer +**Source:** Notion — Orbit Demo Home / Data Team - Onboarding / Orbit Customers Source, last edited 2026-05-07 + +Use this when a question needs customer identity, plan tier, signup timing, recent activity, or the standard customer joins. + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `id` | number | Primary key, surrogate key | +| `email` | string | Login email, unique — **do not use as join key** | +| `name` | string | Display name | +| `country` | string | ISO 3166-1 alpha-2 code | +| `plan_tier` | string | One of `free`, `pro`, `enterprise` | +| `created_at` | time | UTC signup timestamp | +| `last_seen_at` | time | UTC most recent app activity | +| `email_verified_at` | time | UTC email verification timestamp (used in activation funnel) | + +## Joins + +- **one-to-many** → `orbit_analytics.account` on `customer.id = account.customer_id` +- **one-to-many** → `orbit_analytics.subscription_event` on `customer.id = subscription_event.customer_id` + +Always join through `customer.id`. Do not join on `email`. + +## Standard Measures + +| Measure | Formula | +|---|---| +| `customer_count` | `count(distinct id)` | +| `paying_customer_count` | `count(distinct id) where plan_tier in ('pro', 'enterprise')` | +| `mrr` | `sum(subscription_event.amount) where event_type = 'renewed'` | + +## Watch-outs + +- **Join key:** Always use `customer.id`, never `email`. +- **Timezone:** `created_at` and `last_seen_at` are UTC. Confirm whether a question expects UTC or a local business day before filtering. +- **Paying vs. all:** `free` customers must be excluded from paying-customer follow-ups. Use `paying_customer_count`, not `customer_count`. +- **plan_tier values:** `free`, `pro`, `enterprise`. Note: `pro_plus` is a legacy alias for `growth` in the account/contract layer (see `orbit-plan-segment-normalization`), but `plan_tier` on this table uses `pro` not `pro_plus`. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-exposures.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-exposures.md new file mode 100644 index 00000000..05011e48 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-exposures.md @@ -0,0 +1,44 @@ +--- +summary: "dbt exposures declared in models/exposures.yml: three dashboards (Retention Executive, Executive Revenue, Growth Activation) with their upstream mart dependencies and owners." +usage_mode: auto +sort_order: 0 +tags: + - dbt + - exposures + - dashboards + - orbit +sl_refs: + - mart_nrr_quarterly + - mart_retention_movement_breakout + - mart_arr_daily + - mart_revenue_daily + - mart_account_activity +--- + +# Orbit dbt Exposures + +Declared in `models/exposures.yml`. All exposures are type `dashboard` with maturity `high` or `medium`. + +## Retention Executive Dashboard + +- **URL:** https://orbit-demo.example.com/dashboards/retention +- **Maturity:** high +- **Owner:** Analytics (analytics@orbit-demo.example.com) +- **Depends on:** `mart_nrr_quarterly`, `mart_retention_movement_breakout` +- **Description:** Executive retention view covering NRR and movement breakout. + +## Executive Revenue Dashboard + +- **URL:** https://orbit-demo.example.com/dashboards/revenue +- **Maturity:** high +- **Owner:** Finance (finance@orbit-demo.example.com) +- **Depends on:** `mart_arr_daily`, `mart_revenue_daily` +- **Description:** Board reporting view for ARR and gross-to-net revenue. + +## Growth Activation Dashboard + +- **URL:** https://orbit-demo.example.com/dashboards/activation +- **Maturity:** medium +- **Owner:** Growth (growth@orbit-demo.example.com) +- **Depends on:** `mart_account_activity` +- **Description:** Activation policy comparison around the January 2026 workflow update. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-project-overview.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-project-overview.md new file mode 100644 index 00000000..41179bc2 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-project-overview.md @@ -0,0 +1,54 @@ +--- +summary: "Overview of the kaelio_demo dbt project: connection, schema layout, model layers, and governed metrics." +usage_mode: auto +sort_order: 0 +tags: + - dbt + - orbit + - data-model + - governed-metrics +sl_refs: + - stg_accounts + - stg_contracts + - stg_arr_movements + - mart_arr_daily + - mart_nrr_quarterly + - mart_revenue_daily + - mart_account_activity + - mart_procurement_activity + - mart_customer_health + - mart_account_segments +--- + +# Orbit dbt Project Overview + +**Project name:** `kaelio_demo` +**dbt version:** 1.0.0 +**Profile target:** Postgres (`orbit_analytics` schema, `kaelio_demo` database) +**Raw source schema:** `orbit_raw` +**Analytics schema:** `orbit_analytics` (all models materialised as views by default) + +## Model Layers + +| Layer | Prefix | Purpose | +|---|---|---| +| Staging | `stg_` | 1-to-1 with `orbit_raw` tables; adds type-casting, column tests, enum constraints | +| Intermediate | `int_` | Business-logic joins and rollups; not exposed to BI directly | +| Mart | `mart_` | Board/dashboard-ready aggregates; each has a `governed_metric_key` and `owner_team` | + +## Governed Metrics (mart layer) + +| Mart | `governed_metric_key` | Owner | Notion | +|---|---|---|---| +| `mart_arr_daily` | `arr` | finance | `notion_page_arr_contract_reporting` | +| `mart_nrr_quarterly` | `net_revenue_retention` | analytics | `notion_page_retention_policy_current` | +| `mart_retention_movement_breakout` | `net_revenue_retention` | analytics | `notion_page_retention_policy_current` | +| `mart_revenue_daily` | `net_revenue` | finance | `notion_page_revenue_reporting_policy` | +| `mart_account_activity` | `activated_accounts` | growth | `notion_page_activation_policy_decision` | +| `mart_procurement_activity` | `weekly_active_requesters` | product | `notion_page_procurement_instrumentation` | +| `mart_customer_health` | `active_customers` | customer_success | `notion_page_customer_health_playbook` | +| `mart_account_segments` | `segment` | sales_ops | `notion_page_sales_ops_segmentation` | + +## Raw Source Tables (`orbit_raw` schema) + +accounts, account_hierarchy, plans, contracts, subscriptions, contract_discount_terms, arr_movements, invoices, invoice_line_items, refunds, plan_segment_mapping, users, activation_events, sessions, purchase_requests, approval_events, suppliers, supplier_onboarding_events, purchase_orders, support_tickets, account_owners. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-how-we-work.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-how-we-work.md new file mode 100644 index 00000000..3640a693 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-how-we-work.md @@ -0,0 +1,75 @@ +--- +summary: "Orbit operating model: remote-first, written-first, weekly rhythm, decision process, escalation policy, and standing operating norms." +usage_mode: auto +sort_order: 0 +tags: + - policy + - orbit +refs: + - orbit-company-overview + - customer-communication-policy +--- + +## How We Work + +**Source:** Notion — Orbit Demo Home / How We Work, last edited 2026-05-07 + +--- + +## Operating Model + +- Orbit is a **mostly remote, mostly written** company. +- Meetings must serve a specific purpose: making a decision, unblocking a handoff, or building shared context that writing alone would be slower to achieve. +- If a meeting does not meet one of those three purposes, default to async written communication. + +--- + +## Weekly Rhythm + +| Day(s) | Focus | +|---|---| +| **Monday** | Commitments and dependency checks | +| **Tuesday – Thursday** | Customer calls, product work, implementation, and building | +| **Friday** | Closing loops — review what shipped, what slipped, and write down any decisions | + +Use this rhythm when scheduling work, meetings, or reviews. Do not schedule decision-making meetings on Fridays; use Friday to record decisions already made. + +--- + +## Decision-Making Process + +1. **The person closest to the work writes the recommendation.** +2. **Stakeholders who will live with the decision get to push back.** +3. **The accountable lead makes the call** when a real tradeoff exists. +4. **The result is written where the work is happening.** Decisions that exist only in Slack or a meeting are not considered durable. + +> A decision that isn't written down didn't happen. + +--- + +## Standing Operating Norms + +These are explicitly codified rules Orbit has identified as recurring failure modes: + +- **Name the accountable person before work begins.** If no one is named, no one is accountable. +- **Never let a quick sync be the only source of truth.** Write it down after. +- **Bring a customer example when proposing product changes.** Abstract proposals without customer grounding are harder to evaluate. +- **Involve affected teams before a plan is finalized.** Surprises in execution are more expensive than slower planning. +- **Prefer a rough written decision today over a perfect recap that never gets written.** Done and documented beats polished and lost. + +--- + +## Escalation Policy + +- **Escalations are coordination tools, not indicators of individual failure.** Escalating is the correct behavior when a problem exceeds the current team's ability to resolve it alone. +- When escalating, the person escalating must: + 1. Bring in the right people (those with authority or context to unblock). + 2. Summarize current state clearly — what has been tried, what is blocked, and why. + 3. Name the customer impact explicitly. + 4. Keep updates moving until the risk is resolved or a workaround is established. +- Escalations that stall because no one owns the next update are a process failure, not a customer failure. +- An escalation is closed when the risk is resolved or a documented workaround is in place — not when the immediate noise stops. + +--- + +See also: [[orbit-company-overview]], [[orbit-team-lanes-detail]], [[customer-communication-policy]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-known-product-gaps.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-known-product-gaps.md new file mode 100644 index 00000000..ea0c4a9e --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-known-product-gaps.md @@ -0,0 +1,47 @@ +--- +summary: "Known Orbit product friction: approval routing for non-standard cases (weird supplier setups, split budgets, renewal changes) causes teams to fall back to side channels outside Orbit." +usage_mode: auto +sort_order: 0 +tags: + - product + - orbit + - customer-success +refs: + - orbit-customer-stakeholder-needs + - orbit-product-review-checklist + - orbit-company-overview +--- + +## Known Product Gaps and Friction Points + +**Source:** Notion — Product & Customers (Notes from Recent Customer Calls), last edited 2026-05-07 + +--- + +## Primary Friction: Approval Routing for Exceptions + +The primary source of customer friction is **approval routing around non-standard cases**. When a procurement request does not fit the standard routing rules, teams fall back to side channels (email, Slack, spreadsheets) outside Orbit. + +### Specific Triggers + +| Trigger | Why It Causes Fallback | +|---|---| +| **Weird supplier setups** | Non-standard supplier configurations don't fit the default approval chain | +| **Split department budgets** | Requests that span multiple budget owners require manual coordination not supported in the routing UI | +| **Renewal changes** | Mid-term contract changes (scope, price, term) don't map cleanly to the new-request flow | + +## Impact + +- Teams that fall back to side channels for exceptions create a split record: part of the procurement history is in Orbit, part is not. +- This undermines the supplier file completeness that Procurement requires (see [[orbit-customer-stakeholder-needs]]). +- It also creates renewal risk because CS cannot see the full picture of what was agreed. + +## Status + +- This is a known, unresolved gap as of May 2026. +- Treat as a standing assumption in roadmap and analysis decisions until a fix is shipped and validated. +- Do not design analyses or reports that assume all procurement activity flows through Orbit for accounts with known exception patterns. + +--- + +See also: [[orbit-customer-stakeholder-needs]], [[orbit-product-review-checklist]], [[orbit-company-overview]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-activity.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-activity.md new file mode 100644 index 00000000..74a936f6 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-activity.md @@ -0,0 +1,49 @@ +--- +summary: "mart_account_activity: pre/post policy 30-day activation rates per policy_change_date. policy_change_date = 2026-01-15 is the Jan 2026 boundary. Rates are 0–1 ratios." +usage_mode: auto +sort_order: 0 +tags: + - activation + - policy + - mart + - orbit-analytics +sl_refs: + - mart_account_activity +tables: + - orbit_analytics.mart_account_activity +--- + +# mart_account_activity + + + + + + +**Table:** `orbit_analytics.mart_account_activity` +**Grain:** one row per `policy_change_date` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `policy_change_date` | date | The policy boundary date (primary value: `2026-01-15`) | +| `pre_policy_30_day_activation_rate` | decimal | 30-day activation rate before the policy change (0–1 ratio) | +| `post_policy_30_day_activation_rate` | decimal | 30-day activation rate after the policy change (0–1 ratio) | + +## Key measures (SL: `mart_account_activity`) + +- `avg_pre_policy_activation_rate` — `avg(pre_policy_30_day_activation_rate)` +- `avg_post_policy_activation_rate` — `avg(post_policy_30_day_activation_rate)` + +## Common query patterns + +- **Policy comparison:** `WHERE policy_change_date = date '2026-01-15'` +- **As percent:** `round(pre_policy_30_day_activation_rate * 100, 1)` +- **Side-by-side:** UNION of pre and post rows with a `policy_window` label column + +## Business rules + +- The January 2026 activation policy change (`policy_change_date = 2026-01-15`) is the primary boundary. `policy_version` in upstream events splits into `pre_2026_01_15` and `post_2026_01_15` cohorts. +- Rates are ratios (0–1); multiply by 100 for percentage display. +- See [orbit-activation-policy-change-jan-2026](orbit-activation-policy-change-jan-2026) for full policy context. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-segments.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-segments.md new file mode 100644 index 00000000..04085359 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-segments.md @@ -0,0 +1,55 @@ +--- +summary: "mart_account_segments: account segmentation with contract ARR, plan codes, size_band, segment (self_serve/commercial/enterprise), and contract_status. One row per account_id." +usage_mode: auto +sort_order: 0 +tags: + - arr + - segmentation + - accounts + - mart + - orbit-analytics +sl_refs: + - mart_account_segments +tables: + - orbit_analytics.mart_account_segments +--- + +# mart_account_segments + + + + +**Table:** `orbit_analytics.mart_account_segments` +**Grain:** one row per `account_id` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `account_id` | text | Primary key | +| `parent_account_id` | text | Parent account for hierarchy rollups | +| `current_plan_code` | text | Raw plan code from billing system | +| `normalized_plan_code` | text | Canonical plan code (`pro_plus` → `growth`) | +| `size_band` | text | Company size band | +| `segment` | text | Reporting segment: `self_serve`, `commercial`, `enterprise` | +| `contract_arr_cents` | bigint | Contract ARR in cents | +| `contract_status` | text | `active`, `churned`, etc. | + +## Key measures (SL: `mart_account_segments`) + +- `account_count` — `count(*)` +- `total_contract_arr_cents` — `sum(contract_arr_cents)` +- `active_contract_arr_cents` — `sum(contract_arr_cents)` where `contract_status = 'active'` +- `active_contract_arr_millions` — active ARR in $M + +## Common query patterns + +- **ARR by segment:** `GROUP BY segment WHERE contract_status = 'active'` +- **Top accounts:** `ORDER BY contract_arr_cents DESC` with `is_internal = false AND is_test = false` (join to `orbit_raw.accounts`) +- **Unmapped segment:** `COALESCE(segment, 'unmapped')` + +## Business rules + +- `normalized_plan_code` maps `pro_plus` → `growth`. Always use `normalized_plan_code` for plan-based reporting. See [orbit-plan-segment-normalization](orbit-plan-segment-normalization). +- `segment` is derived from `canonical_plan_code × size_band` via `stg_plan_segment_mapping`. +- `contract_arr_cents` is the contract-first ARR value. See [orbit-arr-contract-first-definition](orbit-arr-contract-first-definition). diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-arr-daily.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-arr-daily.md new file mode 100644 index 00000000..5b3db7dd --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-arr-daily.md @@ -0,0 +1,46 @@ +--- +summary: "mart_arr_daily: daily ARR snapshot with contract-first valuation, arr_cents and display columns, used for ARR trend and EoQ reporting." +usage_mode: auto +sort_order: 0 +tags: + - arr + - revenue + - mart + - orbit-analytics +sl_refs: + - mart_arr_daily +tables: + - orbit_analytics.mart_arr_daily +--- + +# mart_arr_daily + + + + +**Table:** `orbit_analytics.mart_arr_daily` +**Grain:** one row per `metric_date` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `metric_date` | date | Snapshot date | +| `arr_cents` | bigint | ARR in cents (contract-first: active contract ARR takes precedence over subscription ARR) | +| `display` | text | Human-readable ARR label (e.g. formatted dollar string) | + +## Key measures (SL: `mart_arr_daily`) + +- `total_arr_cents` — `sum(arr_cents)` +- `arr_millions` — `round(sum(arr_cents) / 100000000.0, 3)` — ARR in $M + +## Common query patterns + +- **Current ARR:** filter `metric_date = current_date` (or latest available date) +- **EoQ ARR:** filter `metric_date = date '2026-03-31'` +- **ARR trend:** group by `metric_date`, plot `arr_cents` + +## Business rules + +- ARR is calculated contract-first: active contract ARR takes precedence over subscription ARR for any covered period. See [orbit-arr-contract-first-definition](orbit-arr-contract-first-definition). +- `display` is a formatted label for UI rendering; use `arr_cents` for all arithmetic. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-nrr-quarterly.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-nrr-quarterly.md new file mode 100644 index 00000000..288c8201 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-nrr-quarterly.md @@ -0,0 +1,55 @@ +--- +summary: "mart_nrr_quarterly: quarterly NRR by segment with net_revenue_retention ratio, expansion/contraction/churn ARR cents, and quarter_label. Enterprise is the primary reporting segment." +usage_mode: auto +sort_order: 0 +tags: + - nrr + - retention + - revenue + - mart + - orbit-analytics +sl_refs: + - mart_nrr_quarterly +tables: + - orbit_analytics.mart_nrr_quarterly +--- + +# mart_nrr_quarterly + + + + + +**Table:** `orbit_analytics.mart_nrr_quarterly` +**Grain:** one row per `quarter_label` × `segment` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `quarter_start_date` | date | First day of the quarter | +| `quarter_label` | text | Quarter identifier, e.g. `'2026-Q1'` | +| `segment` | text | Customer segment: `enterprise`, `commercial`, `self_serve` | +| `starting_arr_cents` | bigint | ARR at start of quarter in cents | +| `expansion_arr_cents` | bigint | ARR added from expansions | +| `contraction_arr_cents` | bigint | ARR lost from contractions (includes discount expirations) | +| `churned_arr_cents` | bigint | ARR lost from churn | +| `net_revenue_retention` | decimal | NRR ratio (e.g. `1.12` = 112%) | + +## Key measures (SL: `mart_nrr_quarterly`) + +- `avg_nrr` — `avg(net_revenue_retention)` across all rows +- `avg_nrr_enterprise` — `avg(net_revenue_retention)` filtered to `segment = 'enterprise'` +- `total_expansion_arr_cents`, `total_contraction_arr_cents`, `total_churned_arr_cents` + +## Common query patterns + +- **Q1 enterprise NRR:** `WHERE quarter_label = '2026-Q1' AND segment = 'enterprise'` +- **NRR as percent:** `round(net_revenue_retention * 100, 1)` +- **Trend by quarter:** `ORDER BY quarter_start_date` + +## Business rules + +- `net_revenue_retention` is a ratio, not a percentage. Multiply by 100 for display. +- Contraction includes discount expirations (classified as contraction, not churn). See [orbit-nrr-discount-expiration-treatment](orbit-nrr-discount-expiration-treatment). +- Enterprise is the primary executive reporting segment. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-procurement-activity.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-procurement-activity.md new file mode 100644 index 00000000..ab3de364 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-procurement-activity.md @@ -0,0 +1,47 @@ +--- +summary: "mart_procurement_activity: weekly active requester counts by contract_arr_threshold_cents. Standard threshold is 20000000 cents ($200k ARR). Used for golden-week procurement metrics." +usage_mode: auto +sort_order: 0 +tags: + - procurement + - mart + - orbit-analytics + - active-requesters +sl_refs: + - mart_procurement_activity +tables: + - orbit_analytics.mart_procurement_activity +--- + +# mart_procurement_activity + + + + +**Table:** `orbit_analytics.mart_procurement_activity` +**Grain:** one row per `week_start_date` × `contract_arr_threshold_cents` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `week_start_date` | date | Monday of the reporting week | +| `week_end_date` | date | Sunday of the reporting week | +| `contract_arr_threshold_cents` | bigint | ARR threshold filter applied (e.g. `20000000` = $200k) | +| `active_requesters` | bigint | Count of qualifying active requesters for the week | + +## Key measures (SL: `mart_procurement_activity`) + +- `total_active_requesters` — `sum(active_requesters)` +- `active_requesters_200k_threshold` — `sum(active_requesters)` where `contract_arr_threshold_cents = 20000000` + +## Common query patterns + +- **Golden week (week of 2026-03-23):** `WHERE week_start_date = date '2026-03-23' AND contract_arr_threshold_cents = 20000000` +- **Weekly trend at $200k threshold:** `WHERE contract_arr_threshold_cents = 20000000 ORDER BY week_start_date` + +## Business rules + +- `active_requesters` counts non-internal, non-test requesters on large active contracts. See [orbit-procurement-qualifying-actions](orbit-procurement-qualifying-actions). +- The standard threshold is `contract_arr_threshold_cents = 20000000` ($200k ARR). +- Always filter by `contract_arr_threshold_cents` — the table contains rows for multiple threshold values. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-retention-movement-breakout.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-retention-movement-breakout.md new file mode 100644 index 00000000..7c22cf16 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-retention-movement-breakout.md @@ -0,0 +1,55 @@ +--- +summary: "mart_retention_movement_breakout: quarterly ARR movement by segment, movement_type, and movement_reason. NRR waterfall source. Contraction includes discount expirations." +usage_mode: auto +sort_order: 0 +tags: + - nrr + - retention + - arr + - mart + - orbit-analytics +sl_refs: + - mart_retention_movement_breakout +tables: + - orbit_analytics.mart_retention_movement_breakout +--- + +# mart_retention_movement_breakout + + + + +**Table:** `orbit_analytics.mart_retention_movement_breakout` +**Grain:** one row per `quarter_label` × `segment` × `movement_type` × `movement_reason` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `quarter_start_date` | date | First day of the quarter | +| `quarter_label` | text | Quarter identifier, e.g. `'2026-Q1'` | +| `segment` | text | Customer segment: `enterprise`, `commercial`, `self_serve` | +| `movement_type` | text | `expansion`, `contraction`, or `churn` | +| `movement_reason` | text | Specific reason (e.g. `discount_expiration`) | +| `parent_account_count` | bigint | Number of parent accounts in this bucket | +| `expansion_arr_cents` | bigint | Expansion ARR in cents | +| `contraction_arr_cents` | bigint | Contraction ARR in cents | +| `churned_arr_cents` | bigint | Churned ARR in cents | + +## Key measures (SL: `mart_retention_movement_breakout`) + +- `total_expansion_arr_cents`, `total_contraction_arr_cents`, `total_churned_arr_cents` +- `expansion_arr_millions`, `contraction_arr_millions`, `churned_arr_millions` +- `parent_account_count` + +## Common query patterns + +- **Q1 enterprise waterfall:** `WHERE quarter_label = '2026-Q1' AND segment = 'enterprise'` +- **Movement summary:** `GROUP BY movement_type ORDER BY movement_type` +- **Discount expiration contraction:** `WHERE movement_reason = 'discount_expiration'` + +## Business rules + +- Contraction includes discount expirations, classified as contraction (not churn), tracked via `movement_reason`. See [orbit-nrr-discount-expiration-treatment](orbit-nrr-discount-expiration-treatment). +- This table is the row-level source for `mart_nrr_quarterly` aggregations. +- Only one of `expansion_arr_cents`, `contraction_arr_cents`, `churned_arr_cents` is non-zero per row. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-revenue-daily.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-revenue-daily.md new file mode 100644 index 00000000..8deb5ffe --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-revenue-daily.md @@ -0,0 +1,56 @@ +--- +summary: "mart_revenue_daily: daily gross-to-net revenue reconciliation with gross_revenue_cents, credits_cents, refunds_cents, net_revenue_cents, and reconciliation_check." +usage_mode: auto +sort_order: 0 +tags: + - revenue + - reconciliation + - mart + - orbit-analytics +sl_refs: + - mart_revenue_daily +tables: + - orbit_analytics.mart_revenue_daily +--- + +# mart_revenue_daily + + + + + + +**Table:** `orbit_analytics.mart_revenue_daily` +**Grain:** one row per `revenue_date` + +## Columns + +| Column | Type | Notes | +|---|---|---| +| `revenue_date` | date | Revenue recognition date | +| `gross_revenue_cents` | bigint | Gross invoice revenue in cents | +| `credits_cents` | bigint | Credits applied in cents | +| `refunds_cents` | bigint | Refunds issued in cents | +| `net_revenue_cents` | bigint | Net revenue = gross − credits − refunds | +| `reconciliation_check` | boolean | Must be `true` on every row; flags rows where net ≠ gross − credits − refunds | + +## Key measures (SL: `mart_revenue_daily`) + +- `total_gross_revenue_cents` — `sum(gross_revenue_cents)` +- `total_credits_cents` — `sum(credits_cents)` +- `total_refunds_cents` — `sum(refunds_cents)` +- `total_net_revenue_cents` — `sum(net_revenue_cents)` +- `net_revenue_millions` — `round(sum(net_revenue_cents) / 100000000.0, 3)` +- `gross_revenue_millions` — `round(sum(gross_revenue_cents) / 100000000.0, 3)` + +## Common query patterns + +- **Q1 net revenue:** `WHERE revenue_date BETWEEN '2026-01-01' AND '2026-03-31'` +- **February reconciliation:** `WHERE revenue_date BETWEEN '2026-02-01' AND '2026-02-28'` +- **Monthly trend:** `GROUP BY date_trunc('month', revenue_date)` + +## Business rules + +- `reconciliation_check` must be `true` on every row. Any `false` row indicates a data quality issue. +- Gross-to-net reconciliation: gross revenue − credits − refunds = net revenue. See [orbit-revenue-gross-to-net-reconciliation](orbit-revenue-gross-to-net-reconciliation). +- All amounts are in cents; divide by 100 for USD, by 100,000,000 for $M. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-metabase-sql-library-patterns.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-metabase-sql-library-patterns.md new file mode 100644 index 00000000..28055b9b --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-metabase-sql-library-patterns.md @@ -0,0 +1,71 @@ +--- +summary: "Metabase SQL Library collection (collection 7): reusable query patterns, the account_join snippet, and field-filter conventions used across Orbit Showcase cards." +usage_mode: auto +sort_order: 0 +tags: + - metabase + - sql-patterns + - orbit-showcase +sl_refs: + - mart_account_segments + - mart_procurement_activity + - mart_customer_health + - mart_retention_movement_breakout + - mart_revenue_daily + - mart_nrr_quarterly +--- + +# Orbit Metabase SQL Library — Patterns & Conventions + +Collection **7 "SQL Library"** (parent: Orbit Showcase, collection 5) contains reference queries that demonstrate how to write Metabase native SQL against the Orbit analytics marts. Cards here are intentionally illustrative; several have `dashboardCount: 0` and are not embedded in live dashboards. + +## Reusable snippet: `account_join` + +Card 55 ("Large contract requesters") references `{{snippet: account_join}}`. The resolved SQL shows the canonical pattern for joining `orbit_analytics.mart_account_segments` to `orbit_raw.accounts`: + +```sql +FROM orbit_analytics.mart_account_segments mart +LEFT JOIN orbit_raw.accounts a + ON a.account_id = mart.account_id + AND a.is_internal = false + AND a.is_test = false +``` + +Key points: +- The `is_internal = false AND is_test = false` guard is applied **in the JOIN condition**, not the WHERE clause, so it does not drop rows from `mart_account_segments` that have no matching account row. +- The alias `mart` is used for `mart_account_segments` throughout the snippet. +- This pattern is equivalent to the filter used in card 48 ("Top accounts by contract ARR"), which applies the same guards in the WHERE clause instead. + +## Field-filter conventions + +Cards in this collection use Metabase dimension field filters (`type: dimension`) for optional narrowing: +- `segment` filter → maps to `mart_account_segments.segment` or `mart_retention_movement_breakout.segment`. +- `date_range` filter → maps to `mart_procurement_activity.week_start_date`. +- `quarter` filter → maps to `mart_nrr_quarterly.quarter_label`. + +These filters are **optional** (`[[ ... ]]` blocks in raw SQL); the resolved SQL drops them, leaving the unfiltered dataset. SL sources derived from these cards should not bake in the filter. + +## Hard-coded date anti-pattern + +Card 54 ("February credits drilldown") is explicitly documented as a **counter-example**: it hard-codes `revenue_date BETWEEN DATE '2026-02-01' AND DATE '2026-02-28'`. This card is not embedded in any dashboard and should not be used as a template. Use `mart_revenue_daily` directly with a runtime date filter instead. + +## Near-duplicate pair: cards 48 and 55 + +Both cards query `mart_account_segments` + `orbit_raw.accounts` and project `account_name`, `contract_arr`, `segment`, `size_band`. They differ only in: +- Card 48: no ARR floor filter, LIMIT 20, on 1 dashboard. +- Card 55: `contract_arr_cents >= 20,000,000` ($200k floor), LIMIT 25, no dashboard. + +Card 48 is the canonical reference; card 55 is a filtered variant for large-contract analysis. + +## Cards and their mart sources + +| Card | Name | Mart | Dashboard count | +|------|------|------|----------------| +| 48 | Top accounts by contract ARR | mart_account_segments | 1 | +| 49 | Procurement actions by week | mart_procurement_activity | 1 | +| 50 | Accounts at risk | mart_customer_health | 1 | +| 51 | ARR movement breakout | mart_retention_movement_breakout | 1 | +| 52 | Revenue refund audit | mart_revenue_daily | 0 | +| 53 | Enterprise NRR quarter breakout | mart_nrr_quarterly | 0 | +| 54 | February credits drilldown | mart_revenue_daily | 0 | +| 55 | Large contract requesters | mart_account_segments | 0 | diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-nrr-discount-expiration-treatment.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-nrr-discount-expiration-treatment.md new file mode 100644 index 00000000..0b966d8c --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-nrr-discount-expiration-treatment.md @@ -0,0 +1,47 @@ +--- +summary: "NRR definition and the Q1 2026 discount-expiration contraction treatment: discount expirations are classified as contraction, not churn, and tracked separately via is_discount_expiration_contraction." +usage_mode: auto +sort_order: 0 +tags: + - nrr + - retention + - governed-metric + - analytics + - discount + - contraction +sl_refs: + - mart_nrr_quarterly +--- + +# NRR — Discount Expiration Treatment + +**Governed metric key:** `net_revenue_retention` +**Owner team:** analytics +**Notion:** `notion://notion_page_retention_policy_current#nrr-definition` and `#discount-expiration-treatment` +**Sources:** `mart_nrr_quarterly`, `mart_retention_movement_breakout` + +## NRR Definition + +Net Revenue Retention (NRR) is calculated quarterly at the **parent-account** grain using `int_parent_account_arr_movements`. The enterprise segment is the primary reporting cut. + +**Known assertions:** +- Enterprise NRR **2026-Q1 = 1.018** (101.8%) +- Enterprise NRR **2025-Q4 = 1.064** (106.4%) + +## Discount Expiration Treatment + +Contraction ARR arising from the expiry of launch/renewal/migration/goodwill discounts is **not classified as churn**. It is tracked via the boolean flag `is_discount_expiration_contraction` on `int_parent_account_arr_movements` and surfaced as `movement_reason = 'discount_expiration'` in `mart_retention_movement_breakout`. + +**Known assertion:** 11 parent accounts had `movement_type = 'contraction'` and `movement_reason = 'discount_expiration'` in Q1 2026. + +## Discount Types (from `stg_contract_discount_terms`) + +`launch`, `renewal`, `migration`, `goodwill` + +## Movement Types + +`new`, `expansion`, `contraction`, `churn`, `reactivation` + +## Why This Matters + +Without the discount-expiration carve-out, Q1 2026 enterprise NRR would appear lower than it is. The Q4 → Q1 drop (1.064 → 1.018) is partly explained by discount expirations, not organic churn. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-plan-segment-normalization.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-plan-segment-normalization.md new file mode 100644 index 00000000..580c1fa1 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-plan-segment-normalization.md @@ -0,0 +1,49 @@ +--- +summary: "Plan code normalization rules: pro_plus maps to growth. Reporting segments (self_serve, commercial, enterprise) are derived from canonical_plan_code × size_band via stg_plan_segment_mapping." +usage_mode: auto +sort_order: 0 +tags: + - segmentation + - plans + - sales-ops + - governed-metric + - normalization +sl_refs: + - mart_account_segments +--- + +# Plan & Segment Normalization + +**Governed metric key:** `segment` +**Owner team:** sales_ops +**Notion:** `notion://notion_page_sales_ops_segmentation#growth-plan-normalization` +**Sources:** `mart_account_segments`, `stg_plan_segment_mapping`, `stg_plans` + +## Canonical Plan Codes + +| Raw / Legacy Code | Canonical Code | +|---|---| +| `starter` | `starter` | +| `growth` | `growth` | +| `pro_plus` | **`growth`** (normalized) | +| `enterprise` | `enterprise` | + +The normalization is applied via `stg_plans.canonical_plan_code`. `mart_account_segments.normalized_plan_code` reflects the post-normalization value. + +## Reporting Segments + +Segments are derived from `canonical_plan_code` × `size_band` using the effective-dated lookup `stg_plan_segment_mapping`: + +| Segment | Typical plan + size band | +|---|---| +| `self_serve` | starter / smb | +| `commercial` | growth / mid_market | +| `enterprise` | enterprise / enterprise | + +## Size Bands + +`smb`, `mid_market`, `enterprise` + +## Effective Dating + +`stg_plan_segment_mapping` has `effective_from` / `effective_to` columns, allowing segment rules to change over time without rewriting history. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-procurement-qualifying-actions.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-procurement-qualifying-actions.md new file mode 100644 index 00000000..08126dd5 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-procurement-qualifying-actions.md @@ -0,0 +1,46 @@ +--- +summary: "Qualifying procurement actions for weekly active requester counts: non-internal, non-test requesters on large active contracts. Covers golden week metric and supplier onboarding." +usage_mode: auto +sort_order: 0 +tags: + - procurement + - product + - governed-metric + - weekly-active-requesters + - suppliers +sl_refs: + - mart_procurement_activity +--- + +# Procurement — Qualifying Actions & Weekly Active Requesters + +**Governed metric key:** `weekly_active_requesters` +**Owner team:** product +**Notion:** `notion://notion_page_procurement_instrumentation#qualifying-procurement-actions` +**Sources:** `mart_procurement_activity`, `int_procurement_qualifying_actions` + +## Qualifying Action Definition + +A qualifying procurement action is any activity by a **non-internal, non-test** requester on a **large active contract** within the measurement week. Captured in `int_procurement_qualifying_actions`. + +Qualifying action types include: +- Submitting a purchase request (`stg_purchase_requests`, status: submitted/approved) +- Supplier onboarding milestones (`stg_supplier_onboarding_events`, event_type: profile_completed, approved) +- Purchase order creation (`stg_purchase_orders`) + +## Exclusions + +- Accounts with `lifecycle_status IN ('internal', 'test')` on `stg_accounts` +- Requesters without an approved purchase request in the window + +## Supplier Onboarding Milestones + +`invited` → `profile_started` → `profile_completed` → `approved` + +## Approval Decisions (`stg_approval_events`) + +`approved`, `rejected`, `returned` + +## Dashboard + +Exposed via the **Growth Activation Dashboard** (`https://orbit-demo.example.com/dashboards/activation`), which depends on `mart_account_activity`. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-design-principles.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-design-principles.md new file mode 100644 index 00000000..f2b72b43 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-design-principles.md @@ -0,0 +1,43 @@ +--- +summary: "Orbit product design policy: new features must make requester or approver experience clearer; complexity for its own sake is not built." +usage_mode: auto +sort_order: 0 +tags: + - product + - policy + - orbit +refs: + - orbit-product-review-checklist + - orbit-company-overview +--- + +## Orbit Product Design Principles + +**Source:** Notion — Product & Customers, last edited 2026-05-07 + +--- + +## Core Policy + +Orbit does not build complexity for its own sake. + +## Feature Complexity Rule + +- When a new feature adds multiple configuration choices, it **must** be evaluated on whether it makes the requester or approver experience clearer. +- If the added configuration does not make the requester or approver experience clearer, the feature should not be built as designed. +- The test: can a first-time requester or approver use the new feature without needing to understand the configuration choices behind it? + +## Design Heuristics + +- **Default to simpler.** If two designs achieve the same outcome, prefer the one with fewer choices exposed to the end user. +- **Configuration is a last resort.** Expose configuration only when different customers have legitimately incompatible needs that cannot be resolved by a sensible default. +- **Requester and approver clarity are the primary UX metrics.** Speed, completeness, and confidence for those two roles are the measures of a good Orbit feature. + +## What This Is Not + +- This principle does not prohibit powerful or flexible features. +- It prohibits features where the complexity is internal to Orbit's implementation but leaks into the requester or approver experience without benefit. + +--- + +See also: [[orbit-product-review-checklist]], [[orbit-company-overview]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-review-checklist.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-review-checklist.md new file mode 100644 index 00000000..abf8e747 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-review-checklist.md @@ -0,0 +1,44 @@ +--- +summary: "Five-question checklist to evaluate every Orbit product change: requester clarity, approver context, supplier onboarding ownership, PO accuracy, and CS rollout visibility." +usage_mode: auto +sort_order: 0 +tags: + - product + - policy + - orbit +refs: + - orbit-company-overview + - sales-ops-cs-handoff-process +--- + +## Product Review Checklist + +**Source:** Notion — Product & Customers, last edited 2026-05-07 + +--- + +## Policy + +Every product change must be evaluated against all five questions before shipping. A "no" on any question is a blocker unless explicitly waived by the accountable lead with a written rationale. + +## The Five Questions + +| # | Question | What a "no" means | +|---|---|---| +| 1 | **Does a first-time requester know what to do next?** | The requester experience is unclear or requires prior knowledge not surfaced in the UI. | +| 2 | **Can an approver make a decision without missing context?** | The approver is missing information needed to approve or reject confidently. | +| 3 | **Is supplier onboarding assigned to a named person, not a queue?** | Supplier onboarding has no clear owner and will stall. | +| 4 | **Does the PO reflect the approved request?** | There is a mismatch between what was approved and what the PO captures. | +| 5 | **Can Customer Success detect a stuck rollout after week two?** | CS has no signal to identify customers who are not progressing past initial setup. | + +## Usage + +- Use this checklist in product reviews, design critiques, and pre-launch readiness checks. +- Questions 1–2 are requester/approver experience checks. +- Question 3 is a supplier onboarding ownership check. +- Question 4 is a PO accuracy check. +- Question 5 is a post-launch CS visibility check. + +--- + +See also: [[orbit-company-overview]], [[orbit-product-design-principles]], [[sales-ops-cs-handoff-process]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-revenue-gross-to-net-reconciliation.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-revenue-gross-to-net-reconciliation.md new file mode 100644 index 00000000..65004331 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-revenue-gross-to-net-reconciliation.md @@ -0,0 +1,50 @@ +--- +summary: "Gross-to-net revenue reconciliation: mart_revenue_daily reconciles gross invoice revenue, credits, and refunds to net revenue daily. reconciliation_check must be true on every row." +usage_mode: auto +sort_order: 0 +tags: + - revenue + - net-revenue + - governed-metric + - finance + - reconciliation +sl_refs: + - mart_revenue_daily +--- + +# Revenue — Gross-to-Net Reconciliation + +**Governed metric key:** `net_revenue` +**Owner team:** finance +**Notion:** `notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation` +**Source:** `mart_revenue_daily` (grain: `revenue_date`) + +## Formula + +``` +net_revenue = gross_revenue - credits - refunds +``` + +All amounts are in **cents** (USD only — `stg_invoices.currency` is asserted to be `USD`). + +## Components + +| Column | Source | Description | +|---|---|---| +| `gross_revenue_cents` | `stg_invoices` / `stg_invoice_line_items` | Billed amounts before adjustments | +| `credit_cents` | `stg_invoice_line_items` (type=credit) | Credits applied to invoices | +| `refund_cents` | `stg_refunds` | Refunds reduce net revenue in the refund month | +| `net_revenue_cents` | Derived | gross − credits − refunds | + +## Intermediate model + +`int_revenue_components` — daily gross, credit, refund, and net revenue components. + +## Quality Gates + +- `reconciliation_check` must be `true` on every row of `mart_revenue_daily`. +- `assert_february_2026_net_revenue` — a dbt singular test covering February 2026 net revenue total. + +## Line Item Types (`stg_invoice_line_items`) + +`subscription`, `seat`, `usage`, `addon`, `credit` diff --git a/packages/cli/assets/demo/orbit/knowledge/global/procurement-workflows.md b/packages/cli/assets/demo/orbit/knowledge/global/procurement-workflows.md deleted file mode 100644 index 6495065f..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/procurement-workflows.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -summary: Procurement workflow activity measures active requesters and qualifying actions. -tags: - - product - - procurement -refs: - - activation-policy -sl_refs: - - orbit_demo.purchase_requests -usage_mode: auto ---- - -Weekly active requesters counts distinct non-internal requesters with a qualifying procurement action in the calendar week. - -Qualifying actions include purchase request creation, approval decisions, supplier invites, and purchase-order creation. - -Purchase-request comments and short sessions are excluded from the canonical requester activity metric. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/revenue-gross-to-net.md b/packages/cli/assets/demo/orbit/knowledge/global/revenue-gross-to-net.md deleted file mode 100644 index 2c23363d..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/revenue-gross-to-net.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -summary: Gross-to-net revenue reconciles paid invoices, credits, and refunds. -tags: - - finance - - revenue -refs: - - arr-contract-first -sl_refs: - - orbit_demo.invoices -usage_mode: auto ---- - -Gross revenue starts from paid invoice activity. Net revenue subtracts credits and successful refunds in the month they are recorded. - -Exclude unpaid, void, draft, failed, internal, and test-account invoice activity from canonical revenue reporting. - -February 2026 has an elevated refund event captured in the source notes and revenue dashboard. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md b/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md new file mode 100644 index 00000000..d547d026 --- /dev/null +++ b/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md @@ -0,0 +1,58 @@ +--- +summary: "Sales Ops → Customer Success implementation handoff: required fields, ownership, enterprise account risk, and policy that CS must not rediscover sales-stage details." +usage_mode: auto +sort_order: 0 +tags: + - policy + - sales-ops + - customer-success +refs: + - orbit-company-overview + - orbit-how-we-work + - orbit-plan-segment-normalization +--- + +## Sales Ops → Customer Success Implementation Handoff + +**Source:** Notion — People & Operating Norms, last edited 2026-05-07 +**Owner:** Sales Ops (sender), Customer Success (receiver) + +--- + +## Policy + +Sales Ops must complete the handoff **before the first implementation call**. Customer Success should not need to rediscover any of the following details. + +## Required Handoff Fields + +| Field | Notes | +|---|---| +| Current plan | Starter / Growth / Enterprise — use canonical plan name, not legacy aliases | +| Account segment | self_serve / commercial / enterprise (see `orbit-plan-segment-normalization`) | +| Contract shape | Term, ARR, any discounts or custom terms | +| Renewal contact | Named person on the customer side responsible for renewal | +| Unusual approval requirements | Any non-standard approval routing the customer has configured or requested | +| Unusual supplier requirements | Any supplier onboarding exceptions or pre-approved vendor lists | + +## Ownership + +- **Sales Ops** is responsible for populating and delivering the handoff before the first implementation call. +- **Customer Success** is responsible for flagging missing fields to Sales Ops before the call, not during or after. +- If a field is unknown at handoff time, Sales Ops must note it explicitly as "unknown — to be resolved by [date]" rather than leaving it blank. + +## Common Failure Mode + +Handoffs that omit contract shape or renewal contact force CS to re-engage Sales Ops mid-implementation, which delays time-to-value and creates duplicate discovery work. This is the primary failure mode this process is designed to prevent. + +--- + +## Enterprise Account Risk: Parent/Child Complexity + +- Enterprise accounts with parent/child account structures require extra care during handoff. +- Small assumptions made during handoff in these accounts tend to produce large downstream problems (billing mismatches, approval routing failures, supplier onboarding gaps). +- When the account has parent/child complexity, Sales Ops must explicitly flag it in the handoff and document the account hierarchy before the first implementation call. +- CS should treat any undocumented parent/child relationship as a blocker — do not proceed with implementation setup until the structure is confirmed. + +--- + +See also: [[orbit-company-overview]], [[orbit-how-we-work]], [[orbit-plan-segment-normalization]] diff --git a/packages/cli/assets/demo/orbit/knowledge/global/segment-classification.md b/packages/cli/assets/demo/orbit/knowledge/global/segment-classification.md deleted file mode 100644 index 901ea1a8..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/segment-classification.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -summary: Account segments derive from plan normalization and effective-dated mapping. -tags: - - sales-ops - - segmentation -refs: [] -sl_refs: - - orbit_demo.accounts - - orbit_demo.contracts -usage_mode: auto ---- - -Account segment labels combine plan_code, canonical_plan_code, and size_band fields. - -Historical plan code pro_plus maps to growth for current segment analysis. - -Use the mapping active at the metric date when segment definitions change over time. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/support-escalation.md b/packages/cli/assets/demo/orbit/knowledge/global/support-escalation.md deleted file mode 100644 index 569f842a..00000000 --- a/packages/cli/assets/demo/orbit/knowledge/global/support-escalation.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -summary: Support escalation tiers map ticket severity to SLA targets. -tags: - - support - - sla -refs: - - customer-health-scoring -sl_refs: - - orbit_demo.support_tickets -usage_mode: auto ---- - -Critical support tickets require immediate response and on-call escalation. - -High severity tickets should receive first response within four business hours. - -Resolution time is measured from created_at to resolved_at and only applies to resolved tickets. diff --git a/packages/cli/assets/demo/orbit/manifest.json b/packages/cli/assets/demo/orbit/manifest.json index 72cad508..1fcb3bef 100644 --- a/packages/cli/assets/demo/orbit/manifest.json +++ b/packages/cli/assets/demo/orbit/manifest.json @@ -43,12 +43,12 @@ }, "generated": { "semanticLayer": { - "path": "semantic-layer/orbit_demo", - "sourceCount": 6 + "path": "semantic-layer", + "sourceCount": 46 }, "knowledge": { "path": "knowledge/global", - "pageCount": 10 + "pageCount": 28 }, "links": { "path": "links", diff --git a/packages/cli/assets/demo/orbit/semantic-layer/.gitkeep b/packages/cli/assets/demo/orbit/semantic-layer/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_activation_policy_windows.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_activation_policy_windows.yaml new file mode 100644 index 00000000..ddceca13 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_activation_policy_windows.yaml @@ -0,0 +1,27 @@ +name: int_activation_policy_windows +table: orbit_analytics.int_activation_policy_windows +grain: + - policy_version +columns: + - name: policy_version + type: string + descriptions: + user: pre_2026_01_15 or post_2026_01_15 + - name: activated_account_count + type: number + descriptions: + ktx: Column activated account count from int_activation_policy_windows. + - name: window_start + type: time + descriptions: + ktx: Column window start from int_activation_policy_windows. + - name: window_end + type: time + descriptions: + ktx: Column window end from int_activation_policy_windows. +joins: [] +measures: + - name: total_activated_accounts + expr: sum(activated_account_count) +descriptions: + user: Activation cohort counts around the January 2026 policy change. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_active_contract_arr.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_active_contract_arr.yaml new file mode 100644 index 00000000..00360734 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_active_contract_arr.yaml @@ -0,0 +1,24 @@ +name: int_active_contract_arr +table: orbit_analytics.int_active_contract_arr +grain: + - contract_id +columns: + - name: contract_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on int_active_contract_arr. + - name: arr_cents + type: number + descriptions: + ktx: Column arr cents from int_active_contract_arr. +joins: [] +measures: + - name: total_arr_cents + expr: sum(arr_cents) + description: Total active contract ARR in cents as of 2026-03-31. +descriptions: + user: Active contract ARR as of 2026-03-31. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_customer_health_signals.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_customer_health_signals.yaml new file mode 100644 index 00000000..5e5af811 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_customer_health_signals.yaml @@ -0,0 +1,25 @@ +name: int_customer_health_signals +table: orbit_analytics.int_customer_health_signals +grain: + - account_id +columns: + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on int_customer_health_signals. + - name: open_critical_ticket_count + type: number + descriptions: + ktx: Column open critical ticket count from int_customer_health_signals. + - name: recent_procurement_action_count + type: number + descriptions: + ktx: Column recent procurement action count from int_customer_health_signals. + - name: risk_level + type: string + descriptions: + user: "Derived risk level: low, medium, high" +joins: [] +measures: [] +descriptions: + user: Support-ticket and recent-procurement signals for customer health risk. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_parent_account_arr_movements.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_parent_account_arr_movements.yaml new file mode 100644 index 00000000..e053ab6e --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_parent_account_arr_movements.yaml @@ -0,0 +1,49 @@ +name: int_parent_account_arr_movements +table: orbit_analytics.int_parent_account_arr_movements +grain: + - arr_movement_id +columns: + - name: arr_movement_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: parent_account_id + type: string + descriptions: + ktx: Identifier for the related parent account on int_parent_account_arr_movements. + - name: movement_type + type: string + descriptions: + user: "dbt: accepted_values [new, expansion, contraction, churn, reactivation]" + - name: is_discount_expiration_contraction + type: boolean + descriptions: + user: Discount expiration contraction flag used to keep discount movement separate from churn. + - name: movement_date + type: time + descriptions: + ktx: Date or time value for movement date on int_parent_account_arr_movements. + - name: arr_cents + type: number + descriptions: + ktx: Column arr cents from int_parent_account_arr_movements. +joins: [] +measures: + - name: expansion_arr_cents + expr: sum(arr_cents) + filter: movement_type = 'expansion' + description: Sum of expansion ARR movements in cents. + - name: contraction_arr_cents + expr: sum(arr_cents) + filter: movement_type = 'contraction' + description: Sum of contraction ARR movements in cents. + - name: churn_arr_cents + expr: sum(arr_cents) + filter: movement_type = 'churn' + description: Sum of churn ARR movements in cents. + - name: discount_expiration_contraction_arr_cents + expr: sum(arr_cents) + filter: is_discount_expiration_contraction = true + description: Contraction ARR from discount expirations — kept separate from churn in NRR calculation. +descriptions: + user: Parent-account movement rollups for retention metrics. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_procurement_qualifying_actions.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_procurement_qualifying_actions.yaml new file mode 100644 index 00000000..03fdc513 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_procurement_qualifying_actions.yaml @@ -0,0 +1,27 @@ +name: int_procurement_qualifying_actions +table: orbit_analytics.int_procurement_qualifying_actions +grain: + - purchase_request_id +columns: + - name: purchase_request_id + type: string + descriptions: + ktx: Identifier for the related purchase request on int_procurement_qualifying_actions. + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on int_procurement_qualifying_actions. + - name: requester_user_id + type: string + descriptions: + ktx: Identifier for the related requester user on int_procurement_qualifying_actions. + - name: action_week + type: time + descriptions: + ktx: Column action week from int_procurement_qualifying_actions. +joins: [] +measures: + - name: qualifying_action_count + expr: count(purchase_request_id) +descriptions: + user: Non-internal, non-test requester activity for large active contracts in the golden week. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_revenue_components.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_revenue_components.yaml new file mode 100644 index 00000000..3bc25415 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/int_revenue_components.yaml @@ -0,0 +1,37 @@ +name: int_revenue_components +table: orbit_analytics.int_revenue_components +grain: + - revenue_date +columns: + - name: revenue_date + type: time + descriptions: + ktx: Date or time value for revenue date on int_revenue_components. + - name: gross_revenue_cents + type: number + descriptions: + ktx: Column gross revenue cents from int_revenue_components. + - name: credit_cents + type: number + descriptions: + ktx: Column credit cents from int_revenue_components. + - name: refund_cents + type: number + descriptions: + ktx: Column refund cents from int_revenue_components. + - name: net_revenue_cents + type: number + descriptions: + ktx: Column net revenue cents from int_revenue_components. +joins: [] +measures: + - name: total_gross_revenue_cents + expr: sum(gross_revenue_cents) + - name: total_credit_cents + expr: sum(credit_cents) + - name: total_refund_cents + expr: sum(refund_cents) + - name: total_net_revenue_cents + expr: sum(net_revenue_cents) +descriptions: + user: Daily gross, credit, refund, and net revenue components. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_account_activity.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_account_activity.yaml new file mode 100644 index 00000000..973a5ecb --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_account_activity.yaml @@ -0,0 +1,23 @@ +name: mart_account_activity +table: orbit_analytics.mart_account_activity +grain: + - policy_version +columns: + - name: policy_version + type: string + descriptions: + user: pre_2026_01_15 or post_2026_01_15 + - name: activated_account_count + type: number + descriptions: + ktx: Column activated account count from mart_account_activity. + - name: window_label + type: string + descriptions: + ktx: Column window label from mart_account_activity. +joins: [] +measures: + - name: total_activated_accounts + expr: sum(activated_account_count) +descriptions: + user: "Activation policy comparison values. Governed metric: activated_accounts. Owner: growth. See notion://notion_page_activation_policy_decision#policy-change." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_account_segments.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_account_segments.yaml new file mode 100644 index 00000000..cc08bbf7 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_account_segments.yaml @@ -0,0 +1,27 @@ +name: mart_account_segments +table: orbit_analytics.mart_account_segments +grain: + - account_id +columns: + - name: account_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: normalized_plan_code + type: string + descriptions: + user: pro_plus is normalized to growth through plans.canonical_plan_code. + - name: size_band + type: string + descriptions: + ktx: Column size band from mart_account_segments. + - name: segment + type: string + descriptions: + user: "Reporting segment: self_serve, commercial, enterprise" +joins: [] +measures: + - name: account_count + expr: count(account_id) +descriptions: + user: "Current plan, size band, and reporting segment for accounts. Governed metric: segment. Owner: sales_ops. See notion://notion_page_sales_ops_segmentation#growth-plan-normalization." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_arr_daily.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_arr_daily.yaml new file mode 100644 index 00000000..17a0e0e5 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_arr_daily.yaml @@ -0,0 +1,20 @@ +name: mart_arr_daily +table: orbit_analytics.mart_arr_daily +grain: + - metric_date +columns: + - name: metric_date + type: time + descriptions: + user: "dbt: not_null, unique" + - name: arr_cents + type: number + descriptions: + user: "ARR in cents. dbt assertion: expected value 1874200000 (i.e. $18,742,000) as of 2026-03-31." +joins: [] +measures: + - name: arr_cents + expr: sum(arr_cents) + description: Total ARR in cents across metric dates. +descriptions: + user: "Board-prep ARR as of the metric date. Governed metric: arr. Owner: finance. Contract-first ARR calculation — see notion://notion_page_arr_contract_reporting#arr-contract-first." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_customer_health.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_customer_health.yaml new file mode 100644 index 00000000..b21744c0 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_customer_health.yaml @@ -0,0 +1,30 @@ +name: mart_customer_health +table: orbit_analytics.mart_customer_health +grain: + - account_id +columns: + - name: account_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: risk_level + type: string + descriptions: + user: "dbt: accepted_values [low, medium, high]" + - name: open_critical_ticket_count + type: number + descriptions: + ktx: Column open critical ticket count from mart_customer_health. + - name: recent_procurement_action_count + type: number + descriptions: + ktx: Column recent procurement action count from mart_customer_health. +joins: [] +measures: + - name: account_count + expr: count(account_id) + - name: high_risk_account_count + expr: count(account_id) + filter: risk_level = 'high' +descriptions: + user: "Customer-health risk mart as of 2026-03-31. Governed metric: active_customers. Owner: customer_success. See notion://notion_page_customer_health_playbook#risk-definition." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_nrr_quarterly.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_nrr_quarterly.yaml new file mode 100644 index 00000000..fc61d756 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_nrr_quarterly.yaml @@ -0,0 +1,22 @@ +name: mart_nrr_quarterly +table: orbit_analytics.mart_nrr_quarterly +grain: + - quarter_label + - segment +columns: + - name: quarter_label + type: string + descriptions: + user: "dbt: not_null. Format: YYYY-QN (e.g. 2026-Q1)." + - name: segment + type: string + descriptions: + user: Reporting segment (self_serve, commercial, enterprise). + - name: net_revenue_retention + type: number + descriptions: + user: "NRR ratio. dbt assertions: enterprise 2026-Q1 = 1.018; enterprise 2025-Q4 = 1.064." +joins: [] +measures: [] +descriptions: + user: "Enterprise quarterly net revenue retention. Governed metric: net_revenue_retention. Owner: analytics. See notion://notion_page_retention_policy_current#nrr-definition." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_procurement_activity.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_procurement_activity.yaml new file mode 100644 index 00000000..4579d473 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_procurement_activity.yaml @@ -0,0 +1,24 @@ +name: mart_procurement_activity +table: orbit_analytics.mart_procurement_activity +grain: + - week_start + - account_id +columns: + - name: week_start + type: time + descriptions: + ktx: Date or time value for week start on mart_procurement_activity. + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on mart_procurement_activity. + - name: active_requesters + type: number + descriptions: + user: Weekly active requesters for large active contracts. +joins: [] +measures: + - name: total_active_requesters + expr: sum(active_requesters) +descriptions: + user: "Weekly active requester counts for large active contracts. Governed metric: weekly_active_requesters. Owner: product. See notion://notion_page_procurement_instrumentation#qualifying-procurement-actions." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_retention_movement_breakout.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_retention_movement_breakout.yaml new file mode 100644 index 00000000..7ef31a27 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_retention_movement_breakout.yaml @@ -0,0 +1,31 @@ +name: mart_retention_movement_breakout +table: orbit_analytics.mart_retention_movement_breakout +grain: + - movement_type + - movement_reason +columns: + - name: movement_type + type: string + descriptions: + user: "dbt: accepted_values [expansion, contraction, churn]" + - name: movement_reason + type: string + descriptions: + user: Includes discount_expiration contraction, which is not churn. + - name: parent_account_count + type: number + descriptions: + user: "dbt assertion: 11 parent accounts where movement_type='contraction' and movement_reason='discount_expiration'." + - name: expansion_arr_cents + type: number + descriptions: + user: Expansion ARR cents for Q1 enterprise movement rows. +joins: [] +measures: + - name: total_parent_account_count + expr: sum(parent_account_count) + - name: total_expansion_arr_cents + expr: sum(expansion_arr_cents) + filter: movement_type = 'expansion' +descriptions: + user: "Q1 2026 enterprise retention movement breakout. Governed metric: net_revenue_retention. Owner: analytics. See notion://notion_page_retention_policy_current#discount-expiration-treatment." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_revenue_daily.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_revenue_daily.yaml new file mode 100644 index 00000000..ad876f78 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/mart_revenue_daily.yaml @@ -0,0 +1,37 @@ +name: mart_revenue_daily +table: orbit_analytics.mart_revenue_daily +grain: + - revenue_date +columns: + - name: revenue_date + type: time + descriptions: + user: "dbt: not_null, unique" + - name: reconciliation_check + type: boolean + descriptions: + user: "dbt assertion: must be true on every row." + - name: net_revenue_cents + type: number + descriptions: + user: Daily net revenue in cents. February 2026 total covered by assert_february_2026_net_revenue. + - name: gross_revenue_cents + type: number + descriptions: + ktx: Column gross revenue cents from mart_revenue_daily. + - name: credit_cents + type: number + descriptions: + ktx: Column credit cents from mart_revenue_daily. + - name: refund_cents + type: number + descriptions: + ktx: Column refund cents from mart_revenue_daily. +joins: [] +measures: + - name: total_net_revenue_cents + expr: sum(net_revenue_cents) + - name: total_gross_revenue_cents + expr: sum(gross_revenue_cents) +descriptions: + user: "Daily revenue mart reconciling gross, credits, refunds, and net revenue. Governed metric: net_revenue. Owner: finance. See notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_account_hierarchy.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_account_hierarchy.yaml new file mode 100644 index 00000000..ff2011f0 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_account_hierarchy.yaml @@ -0,0 +1,25 @@ +name: stg_account_hierarchy +table: orbit_analytics.stg_account_hierarchy +grain: + - account_hierarchy_id +columns: + - name: account_hierarchy_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: parent_account_id + type: string + descriptions: + ktx: Identifier for the related parent account on stg_account_hierarchy. + - name: child_account_id + type: string + descriptions: + ktx: Identifier for the related child account on stg_account_hierarchy. + - name: relationship_type + type: string + descriptions: + user: "dbt: accepted_values [subsidiary, division, billing_group]" +joins: [] +measures: [] +descriptions: + user: Parent-child account relationships used for enterprise retention grain. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_account_owners.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_account_owners.yaml new file mode 100644 index 00000000..86e6ca22 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_account_owners.yaml @@ -0,0 +1,33 @@ +name: stg_account_owners +table: orbit_analytics.stg_account_owners +grain: + - account_owner_id +columns: + - name: account_owner_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_account_owners. + - name: owner_user_id + type: string + descriptions: + ktx: Identifier for the related owner user on stg_account_owners. + - name: owner_team + type: string + descriptions: + user: "dbt: accepted_values [sales_ops, customer_success, finance]" + - name: effective_from + type: time + descriptions: + ktx: Column effective from from stg_account_owners. + - name: effective_to + type: time + descriptions: + ktx: Column effective to from stg_account_owners. +joins: [] +measures: [] +descriptions: + user: Effective-dated ownership assignments for account health, renewals, and escalation context. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_accounts.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_accounts.yaml new file mode 100644 index 00000000..24bc3c92 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_accounts.yaml @@ -0,0 +1,25 @@ +name: stg_accounts +table: orbit_analytics.stg_accounts +grain: + - account_id +columns: + - name: account_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: sales_region + type: string + descriptions: + user: "dbt: accepted_values [na, emea, apac]" + - name: size_band + type: string + descriptions: + user: "dbt: accepted_values [smb, mid_market, enterprise]" + - name: lifecycle_status + type: string + descriptions: + user: "dbt: accepted_values [prospect, active, churned, internal, test]" +joins: [] +measures: [] +descriptions: + user: Customer and internal/test account records for Orbit. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_activation_events.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_activation_events.yaml new file mode 100644 index 00000000..10247052 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_activation_events.yaml @@ -0,0 +1,33 @@ +name: stg_activation_events +table: orbit_analytics.stg_activation_events +grain: + - activation_event_id +columns: + - name: activation_event_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_activation_events. + - name: user_id + type: string + descriptions: + ktx: Identifier for the related user on stg_activation_events. + - name: event_type + type: string + descriptions: + user: "dbt: accepted_values [first_requester_login, requester_activated, first_approved_purchase_request, account_activated]" + - name: policy_version + type: string + descriptions: + user: "dbt: accepted_values [pre_2026_01_15, post_2026_01_15]" + - name: event_at + type: time + descriptions: + ktx: Column event at from stg_activation_events. +joins: [] +measures: [] +descriptions: + user: Account and requester activation events across the January policy change. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_approval_events.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_approval_events.yaml new file mode 100644 index 00000000..8e9bd5e8 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_approval_events.yaml @@ -0,0 +1,25 @@ +name: stg_approval_events +table: orbit_analytics.stg_approval_events +grain: + - approval_event_id +columns: + - name: approval_event_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: purchase_request_id + type: string + descriptions: + ktx: Identifier for the related purchase request on stg_approval_events. + - name: decision + type: string + descriptions: + user: "dbt: accepted_values [approved, rejected, returned]" + - name: decided_at + type: time + descriptions: + ktx: Column decided at from stg_approval_events. +joins: [] +measures: [] +descriptions: + user: Approval decisions tied to procurement requests. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_arr_movements.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_arr_movements.yaml new file mode 100644 index 00000000..1e625a9e --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_arr_movements.yaml @@ -0,0 +1,29 @@ +name: stg_arr_movements +table: orbit_analytics.stg_arr_movements +grain: + - arr_movement_id +columns: + - name: arr_movement_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_arr_movements. + - name: movement_type + type: string + descriptions: + user: "dbt: accepted_values [new, expansion, contraction, churn, reactivation]" + - name: movement_date + type: time + descriptions: + ktx: Date or time value for movement date on stg_arr_movements. + - name: arr_cents + type: number + descriptions: + ktx: Column arr cents from stg_arr_movements. +joins: [] +measures: [] +descriptions: + user: ARR movement ledger used by retention and expansion marts. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_contract_discount_terms.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_contract_discount_terms.yaml new file mode 100644 index 00000000..90332ff8 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_contract_discount_terms.yaml @@ -0,0 +1,25 @@ +name: stg_contract_discount_terms +table: orbit_analytics.stg_contract_discount_terms +grain: + - discount_term_id +columns: + - name: discount_term_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: contract_id + type: string + descriptions: + ktx: Identifier for the related contract on stg_contract_discount_terms. + - name: discount_type + type: string + descriptions: + user: "dbt: accepted_values [launch, renewal, migration, goodwill]" + - name: expiry_date + type: time + descriptions: + ktx: Date or time value for expiry date on stg_contract_discount_terms. +joins: [] +measures: [] +descriptions: + user: Contract discount terms that explain Q1 2026 enterprise contraction movement. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_contracts.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_contracts.yaml new file mode 100644 index 00000000..6ea74539 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_contracts.yaml @@ -0,0 +1,29 @@ +name: stg_contracts +table: orbit_analytics.stg_contracts +grain: + - contract_id +columns: + - name: contract_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_contracts. + - name: status + type: string + descriptions: + user: "dbt: accepted_values [draft, active, cancelled, expired]" + - name: renewal_type + type: string + descriptions: + user: "dbt: accepted_values [new, renewal, expansion, downgrade]" + - name: arr_cents + type: number + descriptions: + ktx: Column arr cents from stg_contracts. +joins: [] +measures: [] +descriptions: + user: Contract records that provide contract-first ARR for active accounts. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_invoice_line_items.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_invoice_line_items.yaml new file mode 100644 index 00000000..f67bf4e1 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_invoice_line_items.yaml @@ -0,0 +1,25 @@ +name: stg_invoice_line_items +table: orbit_analytics.stg_invoice_line_items +grain: + - invoice_line_item_id +columns: + - name: invoice_line_item_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: invoice_id + type: string + descriptions: + ktx: Identifier for the related invoice on stg_invoice_line_items. + - name: line_item_type + type: string + descriptions: + user: "dbt: accepted_values [subscription, seat, usage, addon, credit]" + - name: amount_cents + type: number + descriptions: + ktx: Column amount cents from stg_invoice_line_items. +joins: [] +measures: [] +descriptions: + user: Invoice line items used to split gross revenue, credits, seats, usage, and addons. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_invoices.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_invoices.yaml new file mode 100644 index 00000000..7a74c203 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_invoices.yaml @@ -0,0 +1,33 @@ +name: stg_invoices +table: orbit_analytics.stg_invoices +grain: + - invoice_id +columns: + - name: invoice_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_invoices. + - name: status + type: string + descriptions: + user: "dbt: accepted_values [draft, open, paid, void, failed]" + - name: currency + type: string + descriptions: + user: "dbt: accepted_values [USD] — USD only" + - name: invoice_date + type: time + descriptions: + ktx: Date or time value for invoice date on stg_invoices. + - name: gross_amount_cents + type: number + descriptions: + ktx: Column gross amount cents from stg_invoices. +joins: [] +measures: [] +descriptions: + user: Billing invoices that anchor gross revenue recognition dates. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_plan_segment_mapping.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_plan_segment_mapping.yaml new file mode 100644 index 00000000..c09a06cb --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_plan_segment_mapping.yaml @@ -0,0 +1,33 @@ +name: stg_plan_segment_mapping +table: orbit_analytics.stg_plan_segment_mapping +grain: + - plan_segment_mapping_id +columns: + - name: plan_segment_mapping_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: canonical_plan_code + type: string + descriptions: + user: "dbt: accepted_values [starter, growth, enterprise]" + - name: size_band + type: string + descriptions: + user: "dbt: accepted_values [smb, mid_market, enterprise]" + - name: segment + type: string + descriptions: + user: "dbt: accepted_values [self_serve, commercial, enterprise]" + - name: effective_from + type: time + descriptions: + ktx: Column effective from from stg_plan_segment_mapping. + - name: effective_to + type: time + descriptions: + ktx: Column effective to from stg_plan_segment_mapping. +joins: [] +measures: [] +descriptions: + user: Effective-dated mapping from canonical plans and size bands to reporting segments. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_plans.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_plans.yaml new file mode 100644 index 00000000..225b91f7 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_plans.yaml @@ -0,0 +1,17 @@ +name: stg_plans +table: orbit_analytics.stg_plans +grain: + - plan_id +columns: + - name: plan_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: canonical_plan_code + type: string + descriptions: + user: "dbt: accepted_values [starter, growth, enterprise]. Note: pro_plus is normalized to growth." +joins: [] +measures: [] +descriptions: + user: Canonical and historical Orbit pricing plans. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_purchase_orders.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_purchase_orders.yaml new file mode 100644 index 00000000..eb037363 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_purchase_orders.yaml @@ -0,0 +1,29 @@ +name: stg_purchase_orders +table: orbit_analytics.stg_purchase_orders +grain: + - purchase_order_id +columns: + - name: purchase_order_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: purchase_request_id + type: string + descriptions: + ktx: Identifier for the related purchase request on stg_purchase_orders. + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_purchase_orders. + - name: status + type: string + descriptions: + user: "dbt: accepted_values [created, sent, fulfilled, cancelled]" + - name: created_at + type: time + descriptions: + ktx: Date or time value for created at on stg_purchase_orders. +joins: [] +measures: [] +descriptions: + user: Purchase orders generated from approved procurement requests. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_purchase_requests.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_purchase_requests.yaml new file mode 100644 index 00000000..36666900 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_purchase_requests.yaml @@ -0,0 +1,29 @@ +name: stg_purchase_requests +table: orbit_analytics.stg_purchase_requests +grain: + - purchase_request_id +columns: + - name: purchase_request_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_purchase_requests. + - name: requester_user_id + type: string + descriptions: + ktx: Identifier for the related requester user on stg_purchase_requests. + - name: status + type: string + descriptions: + user: "dbt: accepted_values [draft, submitted, approved, rejected, cancelled]" + - name: submitted_at + type: time + descriptions: + ktx: Column submitted at from stg_purchase_requests. +joins: [] +measures: [] +descriptions: + user: Procurement request records used for activation, requester activity, and health signals. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_refunds.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_refunds.yaml new file mode 100644 index 00000000..a972f580 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_refunds.yaml @@ -0,0 +1,29 @@ +name: stg_refunds +table: orbit_analytics.stg_refunds +grain: + - refund_id +columns: + - name: refund_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: invoice_id + type: string + descriptions: + ktx: Identifier for the related invoice on stg_refunds. + - name: status + type: string + descriptions: + user: "dbt: accepted_values [pending, succeeded, failed, cancelled]" + - name: refund_date + type: time + descriptions: + ktx: Date or time value for refund date on stg_refunds. + - name: amount_cents + type: number + descriptions: + ktx: Column amount cents from stg_refunds. +joins: [] +measures: [] +descriptions: + user: Refund events that reduce net revenue in the refund month. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_sessions.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_sessions.yaml new file mode 100644 index 00000000..a7595052 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_sessions.yaml @@ -0,0 +1,25 @@ +name: stg_sessions +table: orbit_analytics.stg_sessions +grain: + - session_id +columns: + - name: session_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_sessions. + - name: user_id + type: string + descriptions: + ktx: Identifier for the related user on stg_sessions. + - name: started_at + type: time + descriptions: + ktx: Column started at from stg_sessions. +joins: [] +measures: [] +descriptions: + user: Product sessions used for pre-policy activation and activity exclusions. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_subscriptions.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_subscriptions.yaml new file mode 100644 index 00000000..d2612489 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_subscriptions.yaml @@ -0,0 +1,25 @@ +name: stg_subscriptions +table: orbit_analytics.stg_subscriptions +grain: + - subscription_id +columns: + - name: subscription_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_subscriptions. + - name: status + type: string + descriptions: + user: "dbt: accepted_values [active, cancelled, past_due, trialing]" + - name: arr_cents + type: number + descriptions: + ktx: Column arr cents from stg_subscriptions. +joins: [] +measures: [] +descriptions: + user: Subscription rows used when active contract ARR is not present for a covered period. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_supplier_onboarding_events.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_supplier_onboarding_events.yaml new file mode 100644 index 00000000..853265db --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_supplier_onboarding_events.yaml @@ -0,0 +1,29 @@ +name: stg_supplier_onboarding_events +table: orbit_analytics.stg_supplier_onboarding_events +grain: + - supplier_onboarding_event_id +columns: + - name: supplier_onboarding_event_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: supplier_id + type: string + descriptions: + ktx: Identifier for the related supplier on stg_supplier_onboarding_events. + - name: event_type + type: string + descriptions: + user: "dbt: accepted_values [invited, profile_started, profile_completed, approved]" + - name: status + type: string + descriptions: + user: "dbt: accepted_values [pending, completed, blocked]" + - name: occurred_at + type: time + descriptions: + ktx: Column occurred at from stg_supplier_onboarding_events. +joins: [] +measures: [] +descriptions: + user: Supplier onboarding milestones that qualify as procurement workflow activity. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_suppliers.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_suppliers.yaml new file mode 100644 index 00000000..655787e8 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_suppliers.yaml @@ -0,0 +1,21 @@ +name: stg_suppliers +table: orbit_analytics.stg_suppliers +grain: + - supplier_id +columns: + - name: supplier_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: status + type: string + descriptions: + user: "dbt: accepted_values [invited, onboarding, active, inactive]" + - name: name + type: string + descriptions: + ktx: Column name from stg_suppliers. +joins: [] +measures: [] +descriptions: + user: Supplier directory records associated with procurement workflow events. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_support_tickets.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_support_tickets.yaml new file mode 100644 index 00000000..f5cc4287 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_support_tickets.yaml @@ -0,0 +1,29 @@ +name: stg_support_tickets +table: orbit_analytics.stg_support_tickets +grain: + - support_ticket_id +columns: + - name: support_ticket_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_support_tickets. + - name: severity + type: string + descriptions: + user: "dbt: accepted_values [low, medium, high, critical]" + - name: status + type: string + descriptions: + user: "dbt: accepted_values [open, pending, solved, closed]" + - name: created_at + type: time + descriptions: + ktx: Date or time value for created at on stg_support_tickets. +joins: [] +measures: [] +descriptions: + user: Customer support tickets that inform account health and risk. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_users.yaml b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_users.yaml new file mode 100644 index 00000000..159c6298 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/dbt-main/stg_users.yaml @@ -0,0 +1,21 @@ +name: stg_users +table: orbit_analytics.stg_users +grain: + - user_id +columns: + - name: user_id + type: string + descriptions: + user: "dbt: not_null, unique" + - name: account_id + type: string + descriptions: + ktx: Identifier for the related account on stg_users. + - name: email + type: string + descriptions: + ktx: Column email from stg_users. +joins: [] +measures: [] +descriptions: + user: Orbit user identities shared across warehouse, Slack, Looker, Notion, and Drive artifacts. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml deleted file mode 100644 index a9dc698f..00000000 --- a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: accounts -table: accounts -description: Customer accounts with industry, region, lifecycle, and internal/test flags. -grain: - - account_id -columns: - - name: account_id - type: string - - name: parent_account_id - type: string - - name: account_name - type: string - - name: domain - type: string - - name: industry - type: string - - name: sales_region - type: string - - name: size_band - type: string - - name: lifecycle_status - type: string - - name: is_internal - type: boolean - - name: is_test - type: boolean - - name: created_at - type: time -joins: - - to: contracts - "on": "account_id = contracts.account_id" - relationship: one_to_many - - to: purchase_requests - "on": "account_id = purchase_requests.account_id" - relationship: one_to_many -measures: - - name: account_count - expr: "count(distinct account_id)" - - name: enterprise_count - expr: "count(distinct account_id)" - filter: "size_band = 'enterprise'" -segments: - - name: external_only - expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml deleted file mode 100644 index cfe4d7fb..00000000 --- a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml +++ /dev/null @@ -1,38 +0,0 @@ -name: arr_movements -table: arr_movements -description: ARR movement ledger for expansion, contraction, churn, and reactivation analysis. -grain: - - arr_movement_id -columns: - - name: arr_movement_id - type: string - - name: account_id - type: string - - name: parent_account_id - type: string - - name: contract_id - type: string - - name: movement_date - type: time - - name: movement_type - type: string - - name: movement_reason - type: string - - name: arr_delta_cents - type: number - - name: starting_arr_cents - type: number - - name: ending_arr_cents - type: number -joins: - - to: accounts - "on": "account_id = accounts.account_id" - relationship: many_to_one -measures: - - name: movement_count - expr: "count(*)" - - name: net_arr_delta - expr: "sum(arr_delta_cents) / 100.0" -segments: - - name: external_only - expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml deleted file mode 100644 index cf6c4c7c..00000000 --- a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: contracts -table: contracts -description: Subscription contracts with ARR, plan, renewal, and status details. -grain: - - contract_id -columns: - - name: contract_id - type: string - - name: account_id - type: string - - name: parent_account_id - type: string - - name: plan_id - type: string - - name: contract_arr_cents - type: number - - name: booked_arr_cents - type: number - - name: start_date - type: time - - name: end_date - type: time - - name: status - type: string - - name: renewal_type - type: string -joins: - - to: accounts - "on": "account_id = accounts.account_id" - relationship: many_to_one -measures: - - name: contract_count - expr: "count(distinct contract_id)" - - name: total_arr - expr: "sum(contract_arr_cents) / 100.0" - filter: "status = 'active'" -segments: - - name: external_only - expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml deleted file mode 100644 index 178c6bad..00000000 --- a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: invoices -table: invoices -description: Billing invoices with payment status and revenue-recognition dates. -grain: - - invoice_id -columns: - - name: invoice_id - type: string - - name: account_id - type: string - - name: subscription_id - type: string - - name: invoice_date - type: time - - name: paid_at - type: time - - name: status - type: string - - name: currency - type: string -joins: - - to: accounts - "on": "account_id = accounts.account_id" - relationship: many_to_one -measures: - - name: invoice_count - expr: "count(*)" - - name: paid_invoice_count - expr: "count(*)" - filter: "status = 'paid'" -segments: - - name: external_only - expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml deleted file mode 100644 index db9df059..00000000 --- a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: purchase_requests -table: purchase_requests -description: Procurement workflow requests with requester, status, supplier, and spend fields. -grain: - - purchase_request_id -columns: - - name: purchase_request_id - type: string - - name: account_id - type: string - - name: requester_user_id - type: string - - name: created_at - type: time - - name: status - type: string - - name: amount_cents - type: number - - name: supplier_id - type: string -joins: - - to: accounts - "on": "account_id = accounts.account_id" - relationship: many_to_one -measures: - - name: request_count - expr: "count(*)" - - name: approved_spend - expr: "sum(amount_cents) / 100.0" - filter: "status = 'approved'" -segments: - - name: external_only - expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml deleted file mode 100644 index ddbc97e7..00000000 --- a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: support_tickets -table: support_tickets -description: Customer support tickets with severity, category, status, and resolution tracking. -grain: - - support_ticket_id -columns: - - name: support_ticket_id - type: string - - name: account_id - type: string - - name: requester_user_id - type: string - - name: severity - type: string - - name: category - type: string - - name: status - type: string - - name: created_at - type: time - - name: resolved_at - type: time - - name: owner_user_id - type: string -joins: - - to: accounts - "on": "account_id = accounts.account_id" - relationship: many_to_one -measures: - - name: ticket_count - expr: "count(*)" - - name: open_ticket_count - expr: "count(*)" - filter: "status != 'resolved'" -segments: - - name: external_only - expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml new file mode 100644 index 00000000..f7787164 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml @@ -0,0 +1,1400 @@ +tables: + int_activation_policy_windows: + table: orbit_analytics.int_activation_policy_windows + columns: + - name: policy_window + type: string + descriptions: + ai: Categorical indicator distinguishing time periods before and after a policy implementation. + - name: cohort_users + type: number + descriptions: + ai: Total number of users in each cohort being tracked for activation analysis. + - name: activated_users + type: number + descriptions: + ai: Count of users who completed activation within each policy window period. + - name: activation_rate + type: number + descriptions: + ai: Proportion of cohort users who completed activation, expressed as a decimal ratio. + descriptions: + ai: Compares user activation rates before and after a policy change, measuring its impact on converting cohort users into activated users. + int_active_contract_arr: + table: orbit_analytics.int_active_contract_arr + columns: + - name: contract_id + type: string + descriptions: + ai: Unique identifiers for active contracts, linking revenue records to specific customer agreements. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer accounts associated with active contracts and their ARR. + - name: parent_account_id + type: string + descriptions: + ai: Hierarchical identifier linking contracts to their top-level or parent organizational account. + - name: plan_id + type: string + descriptions: + ai: Identifier referencing the subscription or pricing plan associated with an active contract. + - name: contract_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) for active contracts, expressed in cents (USD). + descriptions: + ai: Active contract revenue data tracking Annual Recurring Revenue (ARR) in cents across accounts, parent accounts, and subscription plans. + joins: + - to: accounts + "on": int_active_contract_arr.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: contracts + "on": int_active_contract_arr.contract_id = contracts.contract_id + relationship: many_to_one + source: inferred + - to: plans + "on": int_active_contract_arr.plan_id = plans.plan_id + relationship: many_to_one + source: inferred + int_customer_health_signals: + table: orbit_analytics.int_customer_health_signals + columns: + - name: account_id + type: string + descriptions: + ai: Unique identifier for each customer account, used to track health signals. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking a child account to its parent organization in a hierarchical account structure. + - name: account_name + type: string + descriptions: + ai: Display names of customer accounts used to identify organizations in health monitoring. + - name: is_active_customer + type: boolean + descriptions: + ai: Boolean flag indicating whether the account is currently an active paying customer. + - name: has_unresolved_high_ticket + type: boolean + descriptions: + ai: Boolean flag indicating whether the customer has at least one open, high-priority support ticket. + - name: has_recent_procurement_activity + type: boolean + descriptions: + ai: Boolean flag indicating whether the customer has shown recent purchasing or procurement activity. + - name: risk_level + type: string + descriptions: + ai: Categorical assessment of customer churn or account health risk, indicating low, medium, or high exposure. + descriptions: + ai: Customer health monitoring data tracking account activity, support ticket status, procurement behavior, and risk levels to assess overall customer relationship health. + joins: + - to: accounts + "on": int_customer_health_signals.account_id = accounts.account_id + relationship: many_to_one + source: inferred + int_parent_account_arr_movements: + table: orbit_analytics.int_parent_account_arr_movements + columns: + - name: arr_movement_id + type: string + descriptions: + ai: Unique identifier for each ARR movement event, formatted as a sequential alphanumeric code. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the individual customer account associated with each ARR movement record. + - name: parent_account_id + type: string + descriptions: + ai: Unique identifier for the parent/hierarchical account grouping child accounts beneath it. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking ARR movements to specific customer contracts driving revenue changes. + - name: movement_date + type: time + descriptions: + ai: Timestamp of when an ARR movement event occurred for a parent account. + - name: quarter_start_date + type: time + descriptions: + ai: Start date of the fiscal quarter in which the ARR movement occurred. + - name: quarter_label + type: string + descriptions: + ai: Fiscal quarter identifier in "YYYY-QN" format, used for grouping and reporting ARR movements. + - name: segment + type: string + descriptions: + ai: Customer tier or market classification, such as enterprise, mid-market, or SMB. + - name: movement_type + type: string + descriptions: + ai: Categorizes ARR changes by type, such as expansion, contraction, churn, or reactivation. + - name: movement_reason + type: string + descriptions: + ai: Categorizes the underlying cause driving an ARR change, such as seat growth or contraction. + - name: arr_delta_cents + type: number + descriptions: + ai: Monetary change in Annual Recurring Revenue for a movement event, stored in cents. + - name: starting_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) in cents at the beginning of a movement period. + - name: ending_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue balance in cents after applying the recorded ARR movement. + - name: expansion_arr_cents + type: number + descriptions: + ai: Monetary value of ARR growth from expansions, stored in cents at the parent account level. + - name: contraction_arr_cents + type: number + descriptions: + ai: Monetary value (in cents) of ARR decreases due to contraction events; zero indicates no contraction. + - name: churned_arr_cents + type: number + descriptions: + ai: Monetary value in cents of ARR lost due to customer churn; zero indicates no churn. + - name: is_discount_expiration_contraction + type: boolean + descriptions: + ai: Boolean flag indicating whether a contraction in ARR resulted from a discount expiring. + - name: is_reactivation + type: boolean + descriptions: + ai: Boolean flag indicating whether an ARR movement represents a previously churned account returning as a customer. + descriptions: + ai: Tracks ARR (Annual Recurring Revenue) movements aggregated at the parent account level, capturing subscription revenue changes like expansions across billing periods. + joins: + - to: arr_movements + "on": int_parent_account_arr_movements.arr_movement_id = arr_movements.arr_movement_id + relationship: many_to_one + source: inferred + int_procurement_qualifying_actions: + table: orbit_analytics.int_procurement_qualifying_actions + columns: + - name: action_id + type: string + descriptions: + ai: Unique identifiers for procurement approval actions, formatted as sequential approval reference codes. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the account associated with each qualifying procurement action. + - name: user_id + type: string + descriptions: + ai: Unique identifiers for users who performed qualifying procurement actions. + - name: action_date + type: time + descriptions: + ai: Timestamps recording when qualifying procurement actions occurred, stored in Pacific timezone. + - name: action_type + type: string + descriptions: + ai: Categorizes qualifying procurement actions, such as approval events, within the procurement workflow process. + descriptions: + ai: Records of procurement approval events tied to accounts and users, used to track qualifying actions within a procurement workflow. + joins: + - to: stg_users + "on": int_procurement_qualifying_actions.user_id = stg_users.user_id + relationship: many_to_one + source: inferred + int_revenue_components: + table: orbit_analytics.int_revenue_components + columns: + - name: revenue_date + type: time + descriptions: + ai: Daily timestamps representing when revenue transactions were recorded, used for time-series financial reporting. + - name: gross_revenue_cents + type: number + descriptions: + ai: Total revenue earned before deductions, stored in cents (e.g., 3888000 = $38,880). + - name: credits_cents + type: number + descriptions: + ai: Monetary credits applied against gross revenue, stored in cents; appears consistently zero in sample. + - name: refunds_cents + type: number + descriptions: + ai: Monetary value of customer refunds issued, stored in cents, for daily revenue reconciliation. + - name: net_revenue_cents + type: number + descriptions: + ai: Calculated revenue in cents after deducting credits and refunds from gross revenue. + descriptions: + ai: Daily revenue tracking data capturing gross earnings, credits, and refunds to calculate net revenue for financial reporting and analysis. + mart_account_activity: + table: orbit_analytics.mart_account_activity + columns: + - name: policy_change_date + type: time + descriptions: + ai: Date when a policy change took effect, used to compare pre/post activation rates. + - name: pre_policy_30_day_activation_rate + type: number + descriptions: + ai: Account activation rate within 30 days before a policy change was implemented. + - name: post_policy_30_day_activation_rate + type: number + descriptions: + ai: The 30-day account activation rate measured after a policy change was implemented. + descriptions: + ai: Tracks the impact of policy changes on user activation rates by comparing 30-day engagement metrics before and after implementation. + mart_account_segments: + table: orbit_analytics.mart_account_segments + columns: + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer accounts, formatted with an "acct_" prefix and numeric suffix. + - name: parent_account_id + type: string + descriptions: + ai: Identifiers linking child accounts to their parent accounts in a hierarchical account structure. + - name: current_plan_code + type: string + descriptions: + ai: Subscription plan tier currently assigned to the account (e.g., starter, pro). + - name: normalized_plan_code + type: string + descriptions: + ai: Standardized or simplified version of the subscription plan code for consistent segmentation and reporting. + - name: size_band + type: string + descriptions: + ai: Categorization of accounts by company size, indicating small-to-medium business (SMB) classification. + - name: segment + type: string + descriptions: + ai: Business classification indicating how an account is managed or acquired, e.g., self-serve vs. sales-assisted. + - name: contract_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) for a contract, stored in cents for precision. + - name: contract_status + type: string + descriptions: + ai: Current state of a customer's contract, such as expired or cancelled. + descriptions: + ai: Customer account segmentation data used for classifying accounts by plan type, company size, sales segment, and contract value for business analytics. + joins: + - to: accounts + "on": mart_account_segments.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: plans + "on": mart_account_segments.current_plan_code = plans.plan_code + relationship: many_to_one + source: inferred + - to: plans + "on": mart_account_segments.normalized_plan_code = plans.plan_code + relationship: many_to_one + source: inferred + mart_arr_daily: + table: orbit_analytics.mart_arr_daily + columns: + - name: metric_date + type: time + descriptions: + ai: Daily timestamp marking when ARR metrics were recorded, used for time-series financial tracking. + - name: arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) stored in cents for precise financial calculations, avoiding floating-point errors. + - name: display + type: string + descriptions: + ai: Human-readable, formatted ARR value with currency symbol and abbreviated magnitude (e.g., millions). + descriptions: + ai: Daily snapshot of Annual Recurring Revenue (ARR) metrics, tracking subscription revenue trends over time for financial reporting and business performance monitoring. + mart_customer_health: + table: orbit_analytics.mart_customer_health + columns: + - name: as_of_date + type: time + descriptions: + ai: Snapshot date indicating when the customer health metrics were last calculated or refreshed. + - name: account_id + type: string + descriptions: + ai: Unique identifier for each customer account tracked in the health monitoring mart. + - name: parent_account_id + type: string + descriptions: + ai: Unique identifier linking child accounts to their parent organization in a hierarchical account structure. + - name: account_name + type: string + descriptions: + ai: Descriptive labels identifying customer organizations tracked for health and risk monitoring purposes. + - name: is_active_customer + type: boolean + descriptions: + ai: Boolean flag indicating whether the account is currently an active paying customer. + - name: has_unresolved_high_ticket + type: boolean + descriptions: + ai: Boolean flag indicating whether the customer account has at least one unresolved high-priority support ticket. + - name: has_recent_procurement_activity + type: boolean + descriptions: + ai: Boolean flag indicating whether the account has had recent purchasing or procurement transactions. + - name: risk_level + type: string + descriptions: + ai: Categorical assessment of customer churn or account health risk, segmented into tiers (e.g., low, medium, high). + descriptions: + ai: A snapshot of customer health metrics as of a specific date, used to assess account risk levels and engagement status for customer success management. + joins: + - to: accounts + "on": mart_customer_health.account_id = accounts.account_id + relationship: many_to_one + source: inferred + mart_nrr_quarterly: + table: orbit_analytics.mart_nrr_quarterly + columns: + - name: quarter_start_date + type: time + descriptions: + ai: Start date of each fiscal quarter used to track NRR metrics over time. + - name: quarter_label + type: string + descriptions: + ai: Human-readable fiscal quarter identifier combining year and quarter number (e.g., "2025-Q4"). + - name: segment + type: string + descriptions: + ai: Customer tier or market segment classification, such as enterprise, mid-market, or SMB. + - name: starting_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue at the beginning of each quarter, stored in cents. + - name: expansion_arr_cents + type: number + descriptions: + ai: Revenue growth in cents from existing customers upgrading or expanding their subscriptions quarterly. + - name: contraction_arr_cents + type: number + descriptions: + ai: Reduction in recurring revenue from existing customers who downgraded, measured in cents. + - name: churned_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue lost due to customer cancellations in a quarter, measured in cents. + - name: net_revenue_retention + type: number + descriptions: + ai: Ratio of retained and expanded revenue to starting ARR, typically expressed as a decimal multiplier. + descriptions: + ai: Quarterly Net Revenue Retention metrics tracking ARR movements (expansion, contraction, churn) by customer segment to measure revenue growth and customer health. + mart_procurement_activity: + table: orbit_analytics.mart_procurement_activity + columns: + - name: week_start_date + type: time + descriptions: + ai: Start date of a weekly procurement activity reporting period. + - name: week_end_date + type: time + descriptions: + ai: End date of a weekly procurement activity reporting period. + - name: contract_arr_threshold_cents + type: number + descriptions: + ai: Minimum annual recurring revenue threshold (in cents) for contract procurement eligibility, here $200,000. + - name: active_requesters + type: number + descriptions: + ai: Count of unique users who submitted procurement requests during the given week. + descriptions: + ai: Weekly procurement activity metrics tracking active requesters and contract value thresholds, used for monitoring purchasing behavior and procurement pipeline management. + mart_retention_movement_breakout: + table: orbit_analytics.mart_retention_movement_breakout + columns: + - name: quarter_start_date + type: time + descriptions: + ai: Start date of the fiscal quarter used to group retention movement data. + - name: quarter_label + type: string + descriptions: + ai: Human-readable fiscal quarter identifier combining year and quarter number (e.g., "2026-Q1"). + - name: segment + type: string + descriptions: + ai: Customer tier or market segment classification, such as enterprise, mid-market, or SMB. + - name: movement_type + type: string + descriptions: + ai: Categorizes ARR changes as growth (expansion), reduction (contraction), or full cancellation (churn). + - name: movement_reason + type: string + descriptions: + ai: Categorical reasons driving customer retention movements such as budget loss, discounts, or seat changes. + - name: parent_account_count + type: number + descriptions: + ai: Count of distinct parent accounts experiencing a specific retention movement type within a quarter. + - name: expansion_arr_cents + type: number + descriptions: + ai: Monetary value of ARR growth from account expansions, stored in cents. + - name: contraction_arr_cents + type: number + descriptions: + ai: Monetary value of ARR decreases from existing accounts, stored in cents. + - name: churned_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue lost due to customer churn, expressed in cents, per segment and quarter. + descriptions: + ai: Tracks quarterly ARR movement (churn, contraction, expansion) by customer segment and reason, enabling retention analysis and revenue trend monitoring across enterprise accounts. + mart_revenue_daily: + table: orbit_analytics.mart_revenue_daily + columns: + - name: revenue_date + type: time + descriptions: + ai: Daily timestamp representing the specific calendar date for revenue aggregation and reporting. + - name: gross_revenue_cents + type: number + descriptions: + ai: Total revenue earned before deductions, stored in cents, for a given daily period. + - name: credits_cents + type: number + descriptions: + ai: Monetary credits applied against revenue, stored in cents; appears consistently zero in sample data. + - name: refunds_cents + type: number + descriptions: + ai: Total refund amounts issued on a given day, stored in cents for precision. + - name: net_revenue_cents + type: number + descriptions: + ai: Daily net revenue in cents after deducting credits and refunds from gross revenue. + - name: reconciliation_check + type: boolean + descriptions: + ai: Boolean flag verifying that gross revenue minus credits and refunds equals net revenue. + descriptions: + ai: Daily revenue tracking for financial reporting, capturing gross sales, credits, refunds, and net revenue with reconciliation validation. + stg_account_hierarchy: + table: orbit_analytics.stg_account_hierarchy + columns: + - name: account_hierarchy_id + type: string + descriptions: + ai: Unique identifiers for parent-child account relationship records within the organizational hierarchy. + - name: parent_account_id + type: string + descriptions: + ai: Identifiers for the higher-level accounts in a parent-child account relationship hierarchy. + - name: child_account_id + type: string + descriptions: + ai: Unique identifiers for subordinate accounts within a parent-child account hierarchy relationship. + - name: relationship_type + type: string + descriptions: + ai: Categorizes the structural or organizational relationship between parent and child accounts. + - name: effective_start_date + type: time + descriptions: + ai: Start date when a parent-child account relationship becomes active or valid. + - name: effective_end_date + type: time + descriptions: + ai: End date when the parent-child account relationship expires or becomes inactive. + descriptions: + ai: Staging data capturing parent-child account relationships, including hierarchy types and date-bounded validity periods, used for organizational account structure management. + joins: + - to: account_hierarchy + "on": stg_account_hierarchy.account_hierarchy_id = account_hierarchy.account_hierarchy_id + relationship: many_to_one + source: inferred + stg_account_owners: + table: orbit_analytics.stg_account_owners + columns: + - name: account_owner_id + type: string + descriptions: + ai: Unique identifiers for account ownership records, linking accounts to their assigned owners. + - name: account_id + type: string + descriptions: + ai: Unique identifiers linking ownership records to specific customer or business accounts. + - name: owner_user_id + type: string + descriptions: + ai: Unique identifier for the user assigned as an account owner. + - name: owner_team + type: string + descriptions: + ai: Team responsible for owning or managing the account, such as sales or customer success. + - name: role + type: string + descriptions: + ai: Business function or responsibility of the account owner, such as sales, success, or finance. + - name: effective_start_date + type: time + descriptions: + ai: Start date when an account owner's responsibility or role becomes active. + - name: effective_end_date + type: time + descriptions: + ai: End date marking when an account ownership assignment expires or becomes inactive. + descriptions: + ai: Tracks ownership assignments of accounts to users and teams, including their roles and the time periods during which those assignments are active. + joins: + - to: accounts + "on": stg_account_owners.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: account_owners + "on": stg_account_owners.account_owner_id = account_owners.account_owner_id + relationship: many_to_one + source: inferred + stg_accounts: + table: orbit_analytics.stg_accounts + columns: + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer or business accounts, prefixed with "acct_" followed by a numeric code. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking a child account to its parent in a hierarchical account structure. + - name: account_name + type: string + descriptions: + ai: Display name of the customer or business account entity. + - name: domain + type: string + descriptions: + ai: Unique web domain identifiers associated with each customer account, used for account identification. + - name: industry + type: string + descriptions: + ai: Business sector or vertical market classification of the account organization. + - name: sales_region + type: string + descriptions: + ai: Geographic sales territory classification (North America, EMEA, Asia-Pacific) for account-based revenue segmentation. + - name: size_band + type: string + descriptions: + ai: Categorical classification of account size, segmenting customers by organizational scale (e.g., enterprise, SMB). + - name: lifecycle_status + type: string + descriptions: + ai: Current stage of an account in its business relationship lifecycle (e.g., active, churned). + - name: is_internal + type: boolean + descriptions: + ai: Boolean flag identifying whether the account belongs to the company internally, rather than an external customer. + - name: is_test + type: boolean + descriptions: + ai: Boolean flag identifying whether the account is a test/dummy record to be excluded from analysis. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when each account record was first created in the system. + descriptions: + ai: Staged customer account records used for CRM and sales operations, capturing organizational profiles across industries, regions, and lifecycle stages. + joins: + - to: accounts + "on": stg_accounts.account_id = accounts.account_id + relationship: many_to_one + source: inferred + stg_activation_events: + table: orbit_analytics.stg_activation_events + columns: + - name: activation_event_id + type: string + descriptions: + ai: Unique identifiers for individual activation events, formatted with a sequential numeric suffix. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer or business accounts associated with activation events. + - name: user_id + type: string + descriptions: + ai: Unique identifiers for users associated with activation events, formatted as "user\_XXXXXX". + - name: event_type + type: string + descriptions: + ai: Categorical label identifying the specific activation milestone or action completed by a user. + - name: event_at + type: time + descriptions: + ai: Timestamp recording when an activation event occurred, stored in Pacific Standard Time. + - name: policy_version + type: string + descriptions: + ai: Version identifier of the policy in effect at the time of the activation event. + descriptions: + ai: Tracks milestone activation events for user accounts, capturing when users first engage with the platform under specific policy versions. + joins: + - to: activation_events + "on": stg_activation_events.activation_event_id = activation_events.activation_event_id + relationship: many_to_one + source: inferred + stg_approval_events: + table: orbit_analytics.stg_approval_events + columns: + - name: approval_event_id + type: string + descriptions: + ai: Unique identifiers for individual approval events, formatted as sequential alphanumeric strings. + - name: purchase_request_id + type: string + descriptions: + ai: Unique identifiers linking approval events to their associated purchase requests. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the account associated with each approval event. + - name: approver_user_id + type: string + descriptions: + ai: Unique identifiers for users who reviewed and made approval decisions on purchase requests. + - name: decision + type: string + descriptions: + ai: Outcome of an approver's review on a purchase request, such as approved or rejected. + - name: decided_at + type: time + descriptions: + ai: Timestamp recording when an approver made their decision on a purchase request. + descriptions: + ai: Staging data capturing purchase request approval decisions, tracking which approvers acted on requests and when decisions were made. + joins: + - to: approval_events + "on": stg_approval_events.approval_event_id = approval_events.approval_event_id + relationship: many_to_one + source: inferred + - to: purchase_requests + "on": stg_approval_events.purchase_request_id = purchase_requests.purchase_request_id + relationship: many_to_one + source: inferred + stg_arr_movements: + table: orbit_analytics.stg_arr_movements + columns: + - name: arr_movement_id + type: string + descriptions: + ai: Unique identifiers for individual ARR movement records, prefixed with "arr_move" and sequentially numbered. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the customer account associated with each ARR movement record. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking a child account to its parent in a hierarchical account structure. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking ARR movements to specific customer contracts or subscription agreements. + - name: movement_date + type: time + descriptions: + ai: Timestamp recording when an ARR movement event occurred, used for tracking revenue changes over time. + - name: movement_type + type: string + descriptions: + ai: Categorical classification of ARR change events, such as expansion, contraction, churn, or new business. + - name: movement_reason + type: string + descriptions: + ai: Categorizes the specific cause driving an ARR change, such as seat expansion or contraction. + - name: arr_delta_cents + type: number + descriptions: + ai: Monetary change in Annual Recurring Revenue for a given movement, stored in cents. + - name: starting_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue value in cents before the movement was applied. + - name: ending_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) balance in cents after applying the movement transaction. + descriptions: + ai: Tracks changes in Annual Recurring Revenue (ARR) across customer accounts, capturing expansion, contraction, and churn movements for subscription revenue analysis. + joins: + - to: arr_movements + "on": stg_arr_movements.arr_movement_id = arr_movements.arr_movement_id + relationship: many_to_one + source: inferred + stg_contract_discount_terms: + table: orbit_analytics.stg_contract_discount_terms + columns: + - name: discount_term_id + type: string + descriptions: + ai: Unique identifiers for individual discount terms associated with contracts, formatted as sequential codes. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking discount terms to their associated contracts in the system. + - name: discount_type + type: string + descriptions: + ai: Categorizes the reason or occasion for applying a contract discount, such as launch or renewal. + - name: discount_cents + type: number + descriptions: + ai: Fixed discount amount in cents applied to a contract, representing a monetary reduction. + - name: discount_percent + type: number + descriptions: + ai: Percentage-based discount rate applied to a contract, expressed as a decimal (e.g., 0.10 = 10%). + - name: starts_on + type: time + descriptions: + ai: Effective start date when a contract discount term becomes active. + - name: expires_on + type: time + descriptions: + ai: End date when a contract discount term becomes inactive or no longer valid. + - name: reason + type: string + descriptions: + ai: Categorical label indicating the rationale or trigger behind a contract discount term. + descriptions: + ai: Staging data capturing time-bound discount terms applied to contracts, including discount type, value, and validity period for pricing management. + stg_contracts: + table: orbit_analytics.stg_contracts + columns: + - name: contract_id + type: string + descriptions: + ai: Unique identifiers for individual contracts, used to track and reference specific contract records. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the customer account associated with each contract. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking a contract to its parent/hierarchical account in a multi-tier account structure. + - name: plan_id + type: string + descriptions: + ai: Identifier referencing the subscription or pricing plan associated with a contract. + - name: contract_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) value of the contract, stored in cents. + - name: booked_arr_cents + type: number + descriptions: + ai: Committed Annual Recurring Revenue at booking time, stored in cents (e.g., $250,000). + - name: start_date + type: time + descriptions: + ai: Date when the contract becomes active and billing or service terms begin. + - name: end_date + type: time + descriptions: + ai: Expiration date marking when a contract term officially concludes, used for renewal tracking. + - name: status + type: string + descriptions: + ai: Current lifecycle state of a contract, such as active, expired, or cancelled. + - name: renewal_type + type: string + descriptions: + ai: Classification of contract motion type, indicating whether it's new business, a renewal, expansion, or downgrade. + descriptions: + ai: Staging data for customer subscription contracts, tracking annual recurring revenue, contract terms, account relationships, and renewal lifecycle stages for revenue management. + joins: + - to: accounts + "on": stg_contracts.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: contracts + "on": stg_contracts.contract_id = contracts.contract_id + relationship: many_to_one + source: inferred + - to: plans + "on": stg_contracts.plan_id = plans.plan_id + relationship: many_to_one + source: inferred + stg_invoice_line_items: + table: orbit_analytics.stg_invoice_line_items + columns: + - name: invoice_line_item_id + type: string + descriptions: + ai: Unique identifiers for individual line items within invoices, formatted as sequential alphanumeric codes. + - name: invoice_id + type: string + descriptions: + ai: Unique identifier linking line items to their parent invoice record. + - name: line_item_type + type: string + descriptions: + ai: Categorizes invoice charges by billing type, such as subscription, usage, seat, or addon fees. + - name: amount_cents + type: number + descriptions: + ai: Monetary value of invoice line items stored in cents (e.g., $20,000.00). + - name: recognized_at + type: time + descriptions: + ai: Timestamp indicating when a invoice line item's revenue was officially recognized. + descriptions: + ai: Staging data capturing individual line items within invoices, detailing billing charges by type (subscription, seat, usage) with associated amounts and recognition dates. + joins: + - to: invoice_line_items + "on": stg_invoice_line_items.invoice_line_item_id = invoice_line_items.invoice_line_item_id + relationship: many_to_one + source: inferred + stg_invoices: + table: orbit_analytics.stg_invoices + columns: + - name: invoice_id + type: string + descriptions: + ai: Unique identifiers for individual invoices, formatted with an "inv_" prefix and sequential numbering. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer accounts associated with each invoice record. + - name: subscription_id + type: string + descriptions: + ai: Unique identifier linking each invoice to its associated subscription plan or service. + - name: invoice_date + type: time + descriptions: + ai: Date when the invoice was issued to the customer or account. + - name: paid_at + type: time + descriptions: + ai: Timestamp recording when an invoice was successfully paid, in Pacific Standard Time. + - name: status + type: string + descriptions: + ai: Current payment state of an invoice, such as paid, pending, or failed. + - name: currency + type: string + descriptions: + ai: Three-letter ISO currency code used for billing and payment processing on invoices. + descriptions: + ai: Staging data for customer invoices tracking billing activity, payment status, and subscription-level charges across accounts in USD currency. + joins: + - to: accounts + "on": stg_invoices.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: invoices + "on": stg_invoices.invoice_id = invoices.invoice_id + relationship: many_to_one + source: inferred + - to: stg_subscriptions + "on": stg_invoices.subscription_id = stg_subscriptions.subscription_id + relationship: many_to_one + source: inferred + stg_plan_segment_mapping: + table: orbit_analytics.stg_plan_segment_mapping + columns: + - name: plan_segment_mapping_id + type: string + descriptions: + ai: Unique identifiers for plan-to-segment mapping records, linking canonical plans with size bands and segments. + - name: canonical_plan_code + type: string + descriptions: + ai: Standardized plan tier labels (e.g., "starter," "growth") used to categorize subscription or pricing plans. + - name: size_band + type: string + descriptions: + ai: "Business size classification of plan segments: small-medium business, mid-market, or enterprise tiers." + - name: segment + type: string + descriptions: + ai: Business tier or market category assigned to a plan, such as self-serve, commercial, or enterprise. + - name: effective_start_date + type: time + descriptions: + ai: Start date when a plan-segment mapping record becomes active and valid for use. + - name: effective_end_date + type: time + descriptions: + ai: End date when a plan-to-segment mapping record expires or becomes inactive. + descriptions: + ai: Maps subscription plans to customer segments (SMB, mid-market, enterprise) with time-bound validity periods, enabling segment-specific plan routing and business rules. + joins: + - to: plans + "on": stg_plan_segment_mapping.canonical_plan_code = plans.plan_code + relationship: many_to_one + source: inferred + - to: plan_segment_mapping + "on": stg_plan_segment_mapping.plan_segment_mapping_id = plan_segment_mapping.plan_segment_mapping_id + relationship: many_to_one + source: inferred + stg_plans: + table: orbit_analytics.stg_plans + columns: + - name: plan_id + type: string + descriptions: + ai: Unique identifiers for individual subscription or service plans within the system. + - name: plan_code + type: string + descriptions: + ai: Short identifier codes representing subscription tier levels (e.g., starter, growth, enterprise). + - name: plan_name + type: string + descriptions: + ai: Human-readable label for a subscription tier offered to customers. + - name: canonical_plan_code + type: string + descriptions: + ai: Standardized plan tier categories used to group or normalize various plan variants. + - name: is_retired + type: boolean + descriptions: + ai: Boolean flag indicating whether a plan has been deactivated or discontinued from active use. + - name: retired_at + type: time + descriptions: + ai: Timestamp indicating when a plan was or will be retired; far-future dates suggest active plans. + descriptions: + ai: Staging data for subscription plan tiers (Starter, Growth, Enterprise), tracking active plan configurations and retirement status for product offerings. + joins: + - to: plans + "on": stg_plans.canonical_plan_code = plans.plan_code + relationship: many_to_one + source: inferred + - to: plans + "on": stg_plans.plan_code = plans.plan_code + relationship: many_to_one + source: inferred + - to: plans + "on": stg_plans.plan_id = plans.plan_id + relationship: many_to_one + source: inferred + stg_purchase_orders: + table: orbit_analytics.stg_purchase_orders + columns: + - name: purchase_order_id + type: string + descriptions: + ai: Unique identifier for each purchase order, formatted with a "po_" prefix and sequential number. + - name: purchase_request_id + type: string + descriptions: + ai: Unique identifier linking a purchase order to its originating purchase request. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the account associated with each purchase order transaction. + - name: supplier_id + type: string + descriptions: + ai: Unique identifier referencing the supplier fulfilling the purchase order. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when each purchase order was created, stored in Pacific Time. + - name: status + type: string + descriptions: + ai: Current state of the purchase order in the procurement workflow (e.g., sent, fulfilled). + - name: amount_cents + type: number + descriptions: + ai: Monetary value of purchase orders stored in cents (e.g., $251.00–$255.00). + descriptions: + ai: Staging data capturing purchase orders issued to suppliers, tracking procurement requests, associated accounts, order amounts, and fulfillment status for purchasing workflow management. + joins: + - to: purchase_orders + "on": stg_purchase_orders.purchase_order_id = purchase_orders.purchase_order_id + relationship: many_to_one + source: inferred + - to: purchase_requests + "on": stg_purchase_orders.purchase_request_id = purchase_requests.purchase_request_id + relationship: many_to_one + source: inferred + - to: stg_suppliers + "on": stg_purchase_orders.supplier_id = stg_suppliers.supplier_id + relationship: many_to_one + source: inferred + stg_purchase_requests: + table: orbit_analytics.stg_purchase_requests + columns: + - name: purchase_request_id + type: string + descriptions: + ai: Unique identifiers for purchase requests, prefixed with "pr_" followed by a sequential number. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the account associated with each purchase request. + - name: requester_user_id + type: string + descriptions: + ai: Unique identifier of the user who submitted the purchase request. + - name: created_at + type: time + descriptions: + ai: Timestamp indicating when a purchase request was submitted, stored in UTC with timezone offset. + - name: status + type: string + descriptions: + ai: Current approval state of a purchase request in its workflow lifecycle. + - name: amount_cents + type: number + descriptions: + ai: Monetary value of purchase requests stored in cents (e.g., 25100 = $251.00). + - name: supplier_id + type: string + descriptions: + ai: Unique identifiers referencing the supplier associated with each purchase request. + descriptions: + ai: Staging data capturing employee-initiated purchase requests submitted to suppliers for procurement approval workflows, tracking request status and associated monetary amounts. + joins: + - to: purchase_requests + "on": stg_purchase_requests.purchase_request_id = purchase_requests.purchase_request_id + relationship: many_to_one + source: inferred + - to: stg_users + "on": stg_purchase_requests.requester_user_id = stg_users.user_id + relationship: many_to_one + source: inferred + - to: stg_suppliers + "on": stg_purchase_requests.supplier_id = stg_suppliers.supplier_id + relationship: many_to_one + source: inferred + stg_refunds: + table: orbit_analytics.stg_refunds + columns: + - name: refund_id + type: string + descriptions: + ai: Unique identifiers for individual refund transactions, formatted with a sequential numeric suffix. + - name: invoice_id + type: string + descriptions: + ai: Unique identifier linking each refund to its originating invoice record. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking refunds to the associated customer or business account. + - name: amount_cents + type: number + descriptions: + ai: Monetary value of refunds in cents; sample suggests a fixed refund amount of $10,000. + - name: status + type: string + descriptions: + ai: Current state of the refund transaction (e.g., succeeded, pending, failed). + - name: refunded_at + type: time + descriptions: + ai: Timestamp indicating when a refund was processed, recorded in Pacific Standard Time. + - name: reason + type: string + descriptions: + ai: Categorical explanation for why a refund was issued, such as board reconciliation credits. + descriptions: + ai: Staging data capturing processed refund transactions issued to accounts against invoices, primarily for board reconciliation credits and financial auditing purposes. + joins: + - to: accounts + "on": stg_refunds.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: invoices + "on": stg_refunds.invoice_id = invoices.invoice_id + relationship: many_to_one + source: inferred + - to: refunds + "on": stg_refunds.refund_id = refunds.refund_id + relationship: many_to_one + source: inferred + stg_sessions: + table: orbit_analytics.stg_sessions + columns: + - name: session_id + type: string + descriptions: + ai: Unique sequential identifiers for individual user sessions, formatted with a zero-padded numeric suffix. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the customer or business account associated with each session. + - name: user_id + type: string + descriptions: + ai: Unique identifiers for individual users associated with each recorded session. + - name: started_at + type: time + descriptions: + ai: Timestamp marking when a user session began, stored in Pacific Time zone. + - name: duration_seconds + type: number + descriptions: + ai: Length of a user session measured in seconds, indicating engagement duration. + - name: is_internal + type: boolean + descriptions: + ai: Boolean flag indicating whether the session originated from internal company users or staff. + - name: is_test + type: boolean + descriptions: + ai: Boolean flag identifying whether the session was generated for testing purposes. + descriptions: + ai: Staging data capturing user session activity, tracking engagement duration and filtering out internal or test traffic for analytics purposes. + joins: + - to: sessions + "on": stg_sessions.session_id = sessions.session_id + relationship: many_to_one + source: inferred + stg_subscriptions: + table: orbit_analytics.stg_subscriptions + columns: + - name: subscription_id + type: string + descriptions: + ai: Unique identifiers for individual subscription records, prefixed with "sub_" followed by a sequential number. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking subscriptions to their associated customer accounts. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking subscriptions to their associated contractual agreements or sales contracts. + - name: plan_id + type: string + descriptions: + ai: Identifier referencing the specific subscription plan or pricing tier associated with each subscription record. + - name: mrr_cents + type: number + descriptions: + ai: Monthly Recurring Revenue in cents for each subscription, used for billing and revenue tracking. + - name: status + type: string + descriptions: + ai: Current state of the subscription lifecycle, such as active, cancelled, or expired. + - name: started_at + type: time + descriptions: + ai: Timestamp marking when a subscription became active or the billing period began. + - name: ended_at + type: time + descriptions: + ai: Timestamp marking when a subscription period concludes or expires, indicating the scheduled end date. + - name: cancelled_at + type: time + descriptions: + ai: Timestamp indicating when a subscription was cancelled; far-future dates suggest active, non-cancelled subscriptions. + descriptions: + ai: Staging data tracking customer subscription lifecycle details, including billing amounts, plan assignments, and contract durations for recurring revenue management. + joins: + - to: stg_invoices + "on": stg_subscriptions.subscription_id = stg_invoices.subscription_id + relationship: one_to_many + source: inferred + - to: invoices + "on": stg_subscriptions.subscription_id = invoices.subscription_id + relationship: one_to_many + source: inferred + stg_supplier_onboarding_events: + table: orbit_analytics.stg_supplier_onboarding_events + columns: + - name: supplier_onboarding_event_id + type: string + descriptions: + ai: Unique identifiers for individual supplier onboarding events, formatted with sequential numeric suffixes. + - name: supplier_id + type: string + descriptions: + ai: Unique identifiers for suppliers being tracked through the onboarding process. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for accounts associated with supplier onboarding events, formatted as "acct_XXXX". + - name: event_type + type: string + descriptions: + ai: Stages in the supplier onboarding lifecycle, tracking progression from invitation to approval. + - name: event_at + type: time + descriptions: + ai: Timestamps recording when supplier onboarding events occurred, stored in Pacific Time zone. + - name: status + type: string + descriptions: + ai: Current state of a supplier onboarding event, such as pending, completed, or blocked. + descriptions: + ai: Tracks supplier onboarding lifecycle events, capturing progress milestones and statuses as suppliers move through the onboarding process. + stg_suppliers: + table: orbit_analytics.stg_suppliers + columns: + - name: supplier_id + type: string + descriptions: + ai: Unique identifiers for suppliers, formatted with a zero-padded sequential numeric suffix. + - name: account_id + type: string + descriptions: + ai: Unique account identifiers linking suppliers to their associated business accounts in the system. + - name: supplier_name + type: string + descriptions: + ai: Official business name or label assigned to each supplier in the system. + - name: status + type: string + descriptions: + ai: Current lifecycle stage of a supplier's relationship with the organization, from onboarding to active or inactive. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when each supplier record was initially created in the system. + descriptions: + ai: Staging data tracking supplier onboarding lifecycle, capturing vendor registration status and progression from invitation through active account activation. + joins: + - to: stg_purchase_orders + "on": stg_suppliers.supplier_id = stg_purchase_orders.supplier_id + relationship: one_to_many + source: inferred + - to: stg_purchase_requests + "on": stg_suppliers.supplier_id = stg_purchase_requests.supplier_id + relationship: one_to_many + source: inferred + - to: purchase_orders + "on": stg_suppliers.supplier_id = purchase_orders.supplier_id + relationship: one_to_many + source: inferred + - to: purchase_requests + "on": stg_suppliers.supplier_id = purchase_requests.supplier_id + relationship: one_to_many + source: inferred + stg_support_tickets: + table: orbit_analytics.stg_support_tickets + columns: + - name: support_ticket_id + type: string + descriptions: + ai: Unique identifiers for individual customer support tickets, formatted as sequential alphanumeric codes. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking support tickets to specific customer or business accounts. + - name: requester_user_id + type: string + descriptions: + ai: Unique identifier of the user who submitted or initiated the support ticket request. + - name: severity + type: string + descriptions: + ai: Priority level of support tickets, indicating urgency (e.g., critical, high). + - name: category + type: string + descriptions: + ai: Classification of the support issue type, such as approval routing workflows or processes. + - name: status + type: string + descriptions: + ai: Current state of a support ticket in its resolution workflow (e.g., open, closed). + - name: created_at + type: time + descriptions: + ai: Timestamp recording when a support ticket was initially submitted by the requester. + - name: resolved_at + type: time + descriptions: + ai: Timestamp indicating when a support ticket was resolved; future date (2099) likely signals unresolved tickets. + - name: owner_user_id + type: string + descriptions: + ai: Unique identifier of the support agent or employee assigned to handle the ticket. + descriptions: + ai: Staging data for customer support tickets tracking issue severity, status, and ownership for resolution management across accounts. + stg_users: + table: orbit_analytics.stg_users + columns: + - name: user_id + type: string + descriptions: + ai: Unique identifiers for individual users, formatted with a zero-padded sequential numeric suffix. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking users to their associated organizational account, formatted as "acct_XXXX". + - name: email + type: string + descriptions: + ai: Email addresses of users, formatted with anonymized user and customer identifiers for staging/testing purposes. + - name: role + type: string + descriptions: + ai: Functional designation of a user within the system, such as requester, approver, admin, or finance. + - name: is_requester + type: boolean + descriptions: + ai: Boolean flag indicating whether the user has submitted or can submit requests. + - name: is_internal + type: boolean + descriptions: + ai: Boolean flag indicating whether a user belongs to the internal organization or team. + - name: is_test + type: boolean + descriptions: + ai: Boolean flag identifying whether the user account is a test or dummy entry. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when a user account was first created in the system. + - name: slack_user_id + type: string + descriptions: + ai: Unique identifiers linking users to their corresponding Slack workspace accounts. + - name: looker_user_id + type: string + descriptions: + ai: Unique identifiers linking users to their corresponding Looker analytics platform accounts. + - name: notion_user_id + type: string + descriptions: + ai: Unique identifiers linking users to their corresponding Notion workspace accounts. + - name: drive_owner_id + type: string + descriptions: + ai: Unique identifiers linking users to their associated Google Drive ownership accounts. + descriptions: + ai: Staging data for external user accounts, capturing identity, roles, and system integrations (Slack, Looker) to support access management and authentication workflows. + joins: + - to: int_procurement_qualifying_actions + "on": stg_users.user_id = int_procurement_qualifying_actions.user_id + relationship: one_to_many + source: inferred + - to: stg_purchase_requests + "on": stg_users.user_id = stg_purchase_requests.requester_user_id + relationship: one_to_many + source: inferred + - to: purchase_requests + "on": stg_users.user_id = purchase_requests.requester_user_id + relationship: one_to_many + source: inferred + - to: sessions + "on": stg_users.user_id = sessions.user_id + relationship: one_to_many + source: inferred diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/_schema/orbit_raw.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/_schema/orbit_raw.yaml new file mode 100644 index 00000000..2011f93d --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/_schema/orbit_raw.yaml @@ -0,0 +1,989 @@ +tables: + account_hierarchy: + table: orbit_raw.account_hierarchy + columns: + - name: account_hierarchy_id + type: string + descriptions: + ai: Unique identifiers for account parent-child relationship records within an organizational hierarchy. + - name: parent_account_id + type: string + descriptions: + ai: Identifiers for the higher-level accounts in a parent-child account relationship hierarchy. + - name: child_account_id + type: string + descriptions: + ai: Unique identifiers for subordinate accounts nested within a parent-child account relationship structure. + - name: relationship_type + type: string + descriptions: + ai: Categorizes the structural or financial relationship between parent and child accounts in a hierarchy. + - name: effective_start_date + type: time + descriptions: + ai: Start date when a parent-child account relationship becomes active and valid. + - name: effective_end_date + type: time + descriptions: + ai: End date when the parent-child account relationship expires or becomes inactive. + descriptions: + ai: Defines parent-child relationships between accounts, supporting organizational structures like subsidiaries, divisions, and billing groups with time-bound validity periods. + joins: + - to: stg_account_hierarchy + "on": account_hierarchy.account_hierarchy_id = stg_account_hierarchy.account_hierarchy_id + relationship: one_to_many + source: inferred + account_owners: + table: orbit_raw.account_owners + columns: + - name: account_owner_id + type: string + descriptions: + ai: Unique identifiers for account ownership records, linking accounts to their designated owners. + - name: account_id + type: string + descriptions: + ai: Unique identifiers linking ownership records to specific customer or business accounts. + - name: owner_user_id + type: string + descriptions: + ai: Unique identifiers for individual users assigned as owners of specific accounts. + - name: owner_team + type: string + descriptions: + ai: Organizational team responsible for managing or owning the account, such as sales or finance. + - name: role + type: string + descriptions: + ai: Business function or responsibility of the owner associated with the account relationship. + - name: effective_start_date + type: time + descriptions: + ai: Start date when an account owner's role or assignment becomes active. + - name: effective_end_date + type: time + descriptions: + ai: End date when an account owner's assigned role or responsibility expires or becomes inactive. + descriptions: + ai: Tracks ownership assignments of accounts to users and teams, defining roles and time-bound responsibilities for managing customer relationships. + joins: + - to: stg_account_owners + "on": account_owners.account_owner_id = stg_account_owners.account_owner_id + relationship: one_to_many + source: inferred + - to: accounts + "on": account_owners.account_id = accounts.account_id + relationship: many_to_one + source: inferred + accounts: + table: orbit_raw.accounts + columns: + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer or business accounts, prefixed with "acct_" followed by a numeric sequence. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking a child account to its parent in a hierarchical account structure. + - name: account_name + type: string + descriptions: + ai: Display name or label assigned to each customer or business account. + - name: domain + type: string + descriptions: + ai: Unique web domain associated with each customer account for identification and access purposes. + - name: industry + type: string + descriptions: + ai: Business sector or vertical market classification of the account organization. + - name: sales_region + type: string + descriptions: + ai: Geographic sales territory classification (e.g., North America, EMEA, Asia-Pacific) for account segmentation. + - name: size_band + type: string + descriptions: + ai: Categorical classification of account size, segmenting customers by organizational scale (e.g., enterprise, SMB). + - name: lifecycle_status + type: string + descriptions: + ai: Current stage of an account in the business relationship lifecycle (e.g., active, churned). + - name: is_internal + type: boolean + descriptions: + ai: Boolean flag identifying whether the account belongs to the company internally, rather than an external customer. + - name: is_test + type: boolean + descriptions: + ai: Boolean flag identifying whether an account is a test/sandbox entry, excluded from production reporting. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when each account record was first created in the system. + descriptions: + ai: Customer account records for a SaaS platform, tracking organizational clients across industries, regions, and lifecycle stages for account management purposes. + joins: + - to: int_active_contract_arr + "on": accounts.account_id = int_active_contract_arr.account_id + relationship: one_to_many + source: inferred + - to: int_customer_health_signals + "on": accounts.account_id = int_customer_health_signals.account_id + relationship: one_to_many + source: inferred + - to: mart_account_segments + "on": accounts.account_id = mart_account_segments.account_id + relationship: one_to_many + source: inferred + - to: mart_customer_health + "on": accounts.account_id = mart_customer_health.account_id + relationship: one_to_many + source: inferred + - to: stg_account_owners + "on": accounts.account_id = stg_account_owners.account_id + relationship: one_to_many + source: inferred + - to: stg_accounts + "on": accounts.account_id = stg_accounts.account_id + relationship: one_to_many + source: inferred + - to: stg_contracts + "on": accounts.account_id = stg_contracts.account_id + relationship: one_to_many + source: inferred + - to: stg_invoices + "on": accounts.account_id = stg_invoices.account_id + relationship: one_to_many + source: inferred + - to: stg_refunds + "on": accounts.account_id = stg_refunds.account_id + relationship: one_to_many + source: inferred + - to: account_owners + "on": accounts.account_id = account_owners.account_id + relationship: one_to_many + source: inferred + - to: contracts + "on": accounts.account_id = contracts.account_id + relationship: one_to_many + source: inferred + - to: invoices + "on": accounts.account_id = invoices.account_id + relationship: one_to_many + source: inferred + - to: refunds + "on": accounts.account_id = refunds.account_id + relationship: one_to_many + source: inferred + activation_events: + table: orbit_raw.activation_events + columns: + - name: activation_event_id + type: string + descriptions: + ai: Unique sequential identifiers for individual activation events, formatted with a prefixed numeric string. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer or business accounts associated with activation events. + - name: user_id + type: string + descriptions: + ai: Unique identifiers for users associated with activation events, formatted as sequential user codes. + - name: event_type + type: string + descriptions: + ai: Categorizes key activation milestones, such as a user's first login as a requester. + - name: event_at + type: time + descriptions: + ai: Timestamp recording when an activation event occurred, stored in UTC with timezone offset. + - name: policy_version + type: string + descriptions: + ai: Version identifier of the policy in effect at the time of the activation event. + descriptions: + ai: Tracks milestone activation events when users first access accounts, capturing policy version context for compliance and onboarding analytics. + joins: + - to: stg_activation_events + "on": activation_events.activation_event_id = stg_activation_events.activation_event_id + relationship: one_to_many + source: inferred + approval_events: + table: orbit_raw.approval_events + columns: + - name: approval_event_id + type: string + descriptions: + ai: Unique identifiers for individual approval events, formatted with a sequential numeric suffix. + - name: purchase_request_id + type: string + descriptions: + ai: Unique identifiers linking approval events to their associated purchase requests. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for accounts associated with purchase request approval events. + - name: approver_user_id + type: string + descriptions: + ai: Unique identifiers for users who reviewed and made approval decisions on purchase requests. + - name: decision + type: string + descriptions: + ai: Outcome of an approval action on a purchase request, such as "approved" or "rejected." + - name: decided_at + type: time + descriptions: + ai: Timestamp recording when an approver made their decision on a purchase request. + descriptions: + ai: Records of approval decisions made by designated approvers on purchase requests, tracking who approved what and when for procurement governance. + joins: + - to: stg_approval_events + "on": approval_events.approval_event_id = stg_approval_events.approval_event_id + relationship: one_to_many + source: inferred + - to: purchase_requests + "on": approval_events.purchase_request_id = purchase_requests.purchase_request_id + relationship: many_to_one + source: inferred + arr_movements: + table: orbit_raw.arr_movements + columns: + - name: arr_movement_id + type: string + descriptions: + ai: Unique identifiers for individual ARR movement records, using a sequential prefixed format. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the customer account associated with each ARR movement record. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking a child account to its parent in a hierarchical account structure. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking ARR movements to their associated customer contracts. + - name: movement_date + type: time + descriptions: + ai: Date when an ARR movement or change event occurred, used for revenue tracking. + - name: movement_type + type: string + descriptions: + ai: Categorical classification of ARR change events, such as expansion, contraction, churn, or new business. + - name: movement_reason + type: string + descriptions: + ai: Categorizes the specific cause driving an ARR change, such as seat expansion or contraction. + - name: arr_delta_cents + type: number + descriptions: + ai: Monetary change in Annual Recurring Revenue for a given movement, stored in cents. + - name: starting_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) value in cents before the movement was applied. + - name: ending_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) balance in cents after applying the recorded movement. + descriptions: + ai: Tracks changes in Annual Recurring Revenue (ARR) for customer accounts, capturing expansion, contraction, and churn movements to monitor subscription revenue growth. + joins: + - to: int_parent_account_arr_movements + "on": arr_movements.arr_movement_id = int_parent_account_arr_movements.arr_movement_id + relationship: one_to_many + source: inferred + - to: stg_arr_movements + "on": arr_movements.arr_movement_id = stg_arr_movements.arr_movement_id + relationship: one_to_many + source: inferred + contract_discount_terms: + table: orbit_raw.contract_discount_terms + columns: + - name: discount_term_id + type: string + descriptions: + ai: Unique identifiers for individual discount terms associated with contracts, formatted as sequential codes. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking discount terms to their associated contracts in the system. + - name: discount_type + type: string + descriptions: + ai: Categorizes the business reason or occasion for applying a contract discount. + - name: discount_cents + type: number + descriptions: + ai: Fixed discount amount in cents applied to a contract, representing a flat monetary reduction. + - name: discount_percent + type: number + descriptions: + ai: Percentage-based discount rate applied to a contract, expressed as a decimal (e.g., 0.10 = 10%). + - name: starts_on + type: time + descriptions: + ai: Effective start date for when a contract discount term becomes active. + - name: expires_on + type: time + descriptions: + ai: End date when a contract discount term becomes inactive or no longer valid. + - name: reason + type: string + descriptions: + ai: Categorical label indicating the rationale or trigger for a contract discount, such as expiration events. + descriptions: + ai: Tracks discount terms applied to contracts, capturing promotional pricing details including discount amounts, types, validity periods, and expiration reasons for contract management. + contracts: + table: orbit_raw.contracts + columns: + - name: contract_id + type: string + descriptions: + ai: Unique identifiers for individual contracts, formatted as sequential alphanumeric codes. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking each contract to a specific customer account. + - name: parent_account_id + type: string + descriptions: + ai: Identifier linking contracts to a parent/umbrella account, supporting hierarchical account structures. + - name: plan_id + type: string + descriptions: + ai: Identifier referencing the subscription or pricing plan associated with each contract. + - name: contract_arr_cents + type: number + descriptions: + ai: Annual Recurring Revenue (ARR) value of the contract, stored in cents. + - name: booked_arr_cents + type: number + descriptions: + ai: Committed annual recurring revenue at time of booking, stored in cents. + - name: start_date + type: time + descriptions: + ai: Date when a contract becomes active and billing or service obligations begin. + - name: end_date + type: time + descriptions: + ai: Expiration date marking when a contract term concludes, used for renewal tracking. + - name: status + type: string + descriptions: + ai: Current lifecycle state of a contract, such as active, expired, or cancelled. + - name: renewal_type + type: string + descriptions: + ai: Classification of contract motion type, indicating whether it's new business, a renewal, expansion, or downgrade. + descriptions: + ai: Tracks customer subscription contracts, capturing revenue commitments, contract terms, and lifecycle stages (new, renewal, expansion) for recurring revenue management. + joins: + - to: int_active_contract_arr + "on": contracts.contract_id = int_active_contract_arr.contract_id + relationship: one_to_many + source: inferred + - to: stg_contracts + "on": contracts.contract_id = stg_contracts.contract_id + relationship: one_to_many + source: inferred + - to: accounts + "on": contracts.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: plans + "on": contracts.plan_id = plans.plan_id + relationship: many_to_one + source: inferred + invoice_line_items: + table: orbit_raw.invoice_line_items + columns: + - name: invoice_line_item_id + type: string + descriptions: + ai: Unique identifiers for individual line items within invoices, formatted as sequential alphanumeric codes. + - name: invoice_id + type: string + descriptions: + ai: Foreign key referencing the parent invoice, linking line items to their corresponding invoice record. + - name: line_item_type + type: string + descriptions: + ai: Categorizes invoice charges by billing type, such as subscriptions, seats, usage, or add-ons. + - name: amount_cents + type: number + descriptions: + ai: Monetary value of individual invoice line items, stored in cents (e.g., $20,000). + - name: recognized_at + type: time + descriptions: + ai: Timestamp indicating when a line item's revenue was formally recognized for accounting purposes. + descriptions: + ai: Individual line items within invoices, capturing billing details for subscriptions, seat licenses, and usage-based charges with revenue recognition timestamps. + joins: + - to: stg_invoice_line_items + "on": invoice_line_items.invoice_line_item_id = stg_invoice_line_items.invoice_line_item_id + relationship: one_to_many + source: inferred + invoices: + table: orbit_raw.invoices + columns: + - name: invoice_id + type: string + descriptions: + ai: Unique sequential identifiers for individual invoice records, prefixed with "inv_". + - name: account_id + type: string + descriptions: + ai: Unique identifiers linking invoices to specific customer accounts in the billing system. + - name: subscription_id + type: string + descriptions: + ai: Unique identifiers linking invoices to their associated subscription plans or agreements. + - name: invoice_date + type: time + descriptions: + ai: Timestamp indicating when an invoice was issued to the account or customer. + - name: paid_at + type: time + descriptions: + ai: Timestamp recording when an invoice was successfully paid, stored in UTC. + - name: status + type: string + descriptions: + ai: Current payment state of an invoice, such as paid, pending, or overdue. + - name: currency + type: string + descriptions: + ai: Three-letter ISO currency code used for billing and payment processing on invoices. + descriptions: + ai: Billing records tracking subscription invoices, payment timestamps, and statuses for customer accounts in a subscription-based revenue management system. + joins: + - to: stg_invoices + "on": invoices.invoice_id = stg_invoices.invoice_id + relationship: one_to_many + source: inferred + - to: stg_refunds + "on": invoices.invoice_id = stg_refunds.invoice_id + relationship: one_to_many + source: inferred + - to: accounts + "on": invoices.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: stg_subscriptions + "on": invoices.subscription_id = stg_subscriptions.subscription_id + relationship: many_to_one + source: inferred + - to: refunds + "on": invoices.invoice_id = refunds.invoice_id + relationship: one_to_many + source: inferred + plan_segment_mapping: + table: orbit_raw.plan_segment_mapping + columns: + - name: plan_segment_mapping_id + type: string + descriptions: + ai: Unique identifiers for plan-to-segment mapping records, linking insurance plans to specific customer segments. + - name: canonical_plan_code + type: string + descriptions: + ai: Standardized plan tier identifiers (e.g., "starter," "growth") used to categorize subscription or pricing plans. + - name: size_band + type: string + descriptions: + ai: Business size classification of customers, categorizing them as SMB, mid-market, or enterprise segments. + - name: segment + type: string + descriptions: + ai: Business or market segment classification (e.g., self-serve, commercial, enterprise) for plan targeting. + - name: effective_start_date + type: time + descriptions: + ai: Start date when a plan-segment mapping record becomes active and valid for use. + - name: effective_end_date + type: time + descriptions: + ai: End date when a plan-segment mapping record expires or becomes inactive. + descriptions: + ai: Maps subscription plans to customer segments and size bands with time-bound effective dates, enabling segment-specific plan routing and pricing logic. + joins: + - to: stg_plan_segment_mapping + "on": plan_segment_mapping.plan_segment_mapping_id = stg_plan_segment_mapping.plan_segment_mapping_id + relationship: one_to_many + source: inferred + - to: plans + "on": plan_segment_mapping.canonical_plan_code = plans.plan_code + relationship: many_to_one + source: inferred + plans: + table: orbit_raw.plans + columns: + - name: plan_id + type: string + descriptions: + ai: Unique identifiers for subscription or service plans within the system. + - name: plan_code + type: string + descriptions: + ai: Short identifier codes for subscription tiers, used to programmatically reference available plans. + - name: plan_name + type: string + descriptions: + ai: Human-readable label for a subscription tier offered to customers. + - name: canonical_plan_code + type: string + descriptions: + ai: Standardized plan tier identifiers grouping related plans into core business categories (starter, growth, enterprise). + - name: is_retired + type: boolean + descriptions: + ai: Boolean flag indicating whether a plan has been decommissioned or is no longer active. + - name: retired_at + type: time + descriptions: + ai: Timestamp indicating when a plan was or will be retired/deactivated. + descriptions: + ai: Subscription tier configurations defining available service plans (Starter, Growth, Enterprise) for customer pricing and product packaging decisions. + joins: + - to: int_active_contract_arr + "on": plans.plan_id = int_active_contract_arr.plan_id + relationship: one_to_many + source: inferred + - to: mart_account_segments + "on": plans.plan_code = mart_account_segments.current_plan_code + relationship: one_to_many + source: inferred + - to: mart_account_segments + "on": plans.plan_code = mart_account_segments.normalized_plan_code + relationship: one_to_many + source: inferred + - to: stg_contracts + "on": plans.plan_id = stg_contracts.plan_id + relationship: one_to_many + source: inferred + - to: stg_plan_segment_mapping + "on": plans.plan_code = stg_plan_segment_mapping.canonical_plan_code + relationship: one_to_many + source: inferred + - to: stg_plans + "on": plans.plan_code = stg_plans.canonical_plan_code + relationship: one_to_many + source: inferred + - to: stg_plans + "on": plans.plan_code = stg_plans.plan_code + relationship: one_to_many + source: inferred + - to: stg_plans + "on": plans.plan_id = stg_plans.plan_id + relationship: one_to_many + source: inferred + - to: contracts + "on": plans.plan_id = contracts.plan_id + relationship: one_to_many + source: inferred + - to: plan_segment_mapping + "on": plans.plan_code = plan_segment_mapping.canonical_plan_code + relationship: one_to_many + source: inferred + - to: plans + "on": plans.canonical_plan_code = plans.plan_code + relationship: many_to_one + source: inferred + - to: plans + "on": plans.plan_code = plans.canonical_plan_code + relationship: one_to_many + source: inferred + purchase_orders: + table: orbit_raw.purchase_orders + columns: + - name: purchase_order_id + type: string + descriptions: + ai: Unique identifiers for purchase orders, formatted with a "po_" prefix and sequential numbering. + - name: purchase_request_id + type: string + descriptions: + ai: Unique identifier linking a purchase order to its originating purchase request. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the account associated with each purchase order transaction. + - name: supplier_id + type: string + descriptions: + ai: Unique identifiers referencing the supplier fulfilling each purchase order. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when each purchase order was created or submitted in the system. + - name: status + type: string + descriptions: + ai: Current state of a purchase order in its fulfillment lifecycle (e.g., sent, fulfilled). + - name: amount_cents + type: number + descriptions: + ai: Monetary value of purchase orders stored in cents (e.g., 25100 = $251.00). + descriptions: + ai: Records of formal purchase orders issued to suppliers, tracking procurement transactions from approved purchase requests through fulfillment for financial and supply chain management. + joins: + - to: stg_purchase_orders + "on": purchase_orders.purchase_order_id = stg_purchase_orders.purchase_order_id + relationship: one_to_many + source: inferred + - to: purchase_requests + "on": purchase_orders.purchase_request_id = purchase_requests.purchase_request_id + relationship: many_to_one + source: inferred + - to: stg_suppliers + "on": purchase_orders.supplier_id = stg_suppliers.supplier_id + relationship: many_to_one + source: inferred + purchase_requests: + table: orbit_raw.purchase_requests + columns: + - name: purchase_request_id + type: string + descriptions: + ai: Unique identifiers for purchase requests, formatted with a "pr_" prefix and sequential numbering. + - name: account_id + type: string + descriptions: + ai: Unique identifier for the account associated with each purchase request. + - name: requester_user_id + type: string + descriptions: + ai: Unique identifier of the user who submitted the purchase request. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when each purchase request was submitted, stored in UTC with timezone offset. + - name: status + type: string + descriptions: + ai: Current approval state of a purchase request, such as "submitted" or "approved." + - name: amount_cents + type: number + descriptions: + ai: Monetary value of purchase requests, stored in cents (e.g., $251–$255). + - name: supplier_id + type: string + descriptions: + ai: Unique identifier referencing the vendor or supplier associated with a purchase request. + descriptions: + ai: Tracks employee or user-submitted purchase requests, capturing approval status, requested amounts, and associated suppliers for procurement workflow management. + joins: + - to: stg_approval_events + "on": purchase_requests.purchase_request_id = stg_approval_events.purchase_request_id + relationship: one_to_many + source: inferred + - to: stg_purchase_orders + "on": purchase_requests.purchase_request_id = stg_purchase_orders.purchase_request_id + relationship: one_to_many + source: inferred + - to: stg_purchase_requests + "on": purchase_requests.purchase_request_id = stg_purchase_requests.purchase_request_id + relationship: one_to_many + source: inferred + - to: approval_events + "on": purchase_requests.purchase_request_id = approval_events.purchase_request_id + relationship: one_to_many + source: inferred + - to: purchase_orders + "on": purchase_requests.purchase_request_id = purchase_orders.purchase_request_id + relationship: one_to_many + source: inferred + - to: stg_users + "on": purchase_requests.requester_user_id = stg_users.user_id + relationship: many_to_one + source: inferred + - to: stg_suppliers + "on": purchase_requests.supplier_id = stg_suppliers.supplier_id + relationship: many_to_one + source: inferred + refunds: + table: orbit_raw.refunds + columns: + - name: refund_id + type: string + descriptions: + ai: Unique identifiers for individual refund transactions, formatted with a sequential numeric suffix. + - name: invoice_id + type: string + descriptions: + ai: Unique identifier linking each refund to its originating invoice record. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking refunds to the associated customer or business account. + - name: amount_cents + type: number + descriptions: + ai: Monetary value of refunds in cents; all samples equal $10,000. + - name: status + type: string + descriptions: + ai: Current state of the refund process, indicating whether the refund was successfully completed. + - name: refunded_at + type: time + descriptions: + ai: Timestamp recording when a refund was officially processed and completed. + - name: reason + type: string + descriptions: + ai: Categorical explanation for why a refund was issued, such as board reconciliation credits. + descriptions: + ai: Records of processed financial refunds issued to accounts against invoices, tracking amounts, outcomes, and reasons for reconciliation purposes. + joins: + - to: stg_refunds + "on": refunds.refund_id = stg_refunds.refund_id + relationship: one_to_many + source: inferred + - to: accounts + "on": refunds.account_id = accounts.account_id + relationship: many_to_one + source: inferred + - to: invoices + "on": refunds.invoice_id = invoices.invoice_id + relationship: many_to_one + source: inferred + sessions: + table: orbit_raw.sessions + columns: + - name: session_id + type: string + descriptions: + ai: Unique sequential identifiers for individual user sessions, formatted with zero-padded numeric suffixes. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for customer or business accounts associated with each session. + - name: user_id + type: string + descriptions: + ai: Unique identifiers for individual users associated with each session record. + - name: started_at + type: time + descriptions: + ai: Timestamp marking when a user session began, stored in Pacific Time. + - name: duration_seconds + type: number + descriptions: + ai: Length of a user session measured in seconds, used for engagement analysis. + - name: is_internal + type: boolean + descriptions: + ai: Boolean flag indicating whether the session originated from internal company users or staff. + - name: is_test + type: boolean + descriptions: + ai: Boolean flag identifying whether the session was generated for testing purposes. + descriptions: + ai: Tracks user session activity across accounts, capturing login events, session duration, and timestamps for monitoring platform engagement and usage patterns. + joins: + - to: stg_sessions + "on": sessions.session_id = stg_sessions.session_id + relationship: one_to_many + source: inferred + - to: stg_users + "on": sessions.user_id = stg_users.user_id + relationship: many_to_one + source: inferred + subscriptions: + table: orbit_raw.subscriptions + columns: + - name: subscription_id + type: string + descriptions: + ai: Unique identifiers for individual subscription records, prefixed with "sub_" followed by a sequential number. + - name: account_id + type: string + descriptions: + ai: Unique identifiers linking subscriptions to their associated customer accounts. + - name: contract_id + type: string + descriptions: + ai: Unique identifier linking subscriptions to their associated contractual agreements with customers. + - name: plan_id + type: string + descriptions: + ai: Identifier referencing the pricing plan associated with a subscription, linking to a plans table. + - name: mrr_cents + type: number + descriptions: + ai: Monthly recurring revenue in cents for each subscription, used for billing and revenue tracking. + - name: status + type: string + descriptions: + ai: Current state of a subscription, such as active, cancelled, or expired. + - name: started_at + type: time + descriptions: + ai: Timestamp marking when a subscription became active or billing period began. + - name: ended_at + type: time + descriptions: + ai: Timestamp marking when a subscription period expires or terminates, often a future contract end date. + - name: cancelled_at + type: time + descriptions: + ai: Timestamp marking when a subscription was cancelled; a far-future date (2099) indicates active, non-cancelled subscriptions. + descriptions: + ai: Tracks customer subscription lifecycle data, linking accounts to contracts and pricing plans, with billing amounts and active/inactive status periods. + supplier_onboarding_events: + table: orbit_raw.supplier_onboarding_events + columns: + - name: supplier_onboarding_event_id + type: string + descriptions: + ai: Unique identifiers for individual supplier onboarding events, formatted with sequential numeric suffixes. + - name: supplier_id + type: string + descriptions: + ai: Unique identifiers for suppliers being tracked through the onboarding process. + - name: account_id + type: string + descriptions: + ai: Unique identifiers for accounts associated with supplier onboarding events, formatted as "acct_XXXX". + - name: event_type + type: string + descriptions: + ai: Stages in the supplier onboarding lifecycle, tracking progression from invitation to approval. + - name: event_at + type: time + descriptions: + ai: Timestamps recording when supplier onboarding events occurred, stored in Pacific Time zone. + - name: status + type: string + descriptions: + ai: Current state of a supplier onboarding event, such as pending, completed, or blocked. + descriptions: + ai: Tracks milestone events during supplier onboarding workflows, capturing progression stages, statuses, and timestamps for managing supplier account activation processes. + suppliers: + table: orbit_raw.suppliers + columns: + - name: supplier_id + type: string + descriptions: + ai: Unique identifiers for suppliers, formatted with a sequential numeric suffix. + - name: account_id + type: string + descriptions: + ai: Unique account identifiers linking suppliers to their associated accounts, formatted with an "acct_" prefix. + - name: supplier_name + type: string + descriptions: + ai: Official business name or label assigned to identify each supplier entity. + - name: status + type: string + descriptions: + ai: Current lifecycle stage of a supplier relationship, tracking progression from invitation to active or inactive. + - name: created_at + type: time + descriptions: + ai: Timestamps recording when supplier records were created, stored in Pacific Time zone. + descriptions: + ai: Tracks vendor/supplier onboarding lifecycle, managing their registration status and account associations from initial invitation through active engagement. + support_tickets: + table: orbit_raw.support_tickets + columns: + - name: support_ticket_id + type: string + descriptions: + ai: Unique identifiers for customer support tickets, formatted sequentially with a "ticket_" prefix. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking each support ticket to a specific customer account. + - name: requester_user_id + type: string + descriptions: + ai: Unique identifiers of users who submitted or initiated the support ticket requests. + - name: severity + type: string + descriptions: + ai: Priority level of a support ticket, indicating urgency (e.g., critical, high). + - name: category + type: string + descriptions: + ai: Classification of the support issue type, such as approval routing workflows or processes. + - name: status + type: string + descriptions: + ai: Current state of a support ticket in its resolution workflow (e.g., open, closed). + - name: created_at + type: time + descriptions: + ai: Timestamp recording when a support ticket was initially submitted or opened. + - name: resolved_at + type: time + descriptions: + ai: Timestamp indicating when a ticket was resolved; future sentinel date (2099) likely represents unresolved tickets. + - name: owner_user_id + type: string + descriptions: + ai: Unique identifier of the support agent or staff member assigned to handle the ticket. + descriptions: + ai: Tracks customer support requests, their severity, status, and ownership for managing issue resolution workflows across accounts. + users: + table: orbit_raw.users + columns: + - name: user_id + type: string + descriptions: + ai: Unique identifiers for individual user records, formatted with a sequential numeric suffix. + - name: account_id + type: string + descriptions: + ai: Unique identifier linking users to their associated organizational accounts, formatted with an "acct_" prefix. + - name: email + type: string + descriptions: + ai: Email addresses of users, formatted with user and customer identifiers for multi-tenant accounts. + - name: role + type: string + descriptions: + ai: Functional access level or permission type assigned to each user within the system. + - name: is_requester + type: boolean + descriptions: + ai: Boolean flag indicating whether the user has the role of a requester within the system. + - name: is_internal + type: boolean + descriptions: + ai: Boolean flag indicating whether a user belongs to the internal organization or team. + - name: is_test + type: boolean + descriptions: + ai: Boolean flag identifying whether the user account is a test or dummy entry. + - name: created_at + type: time + descriptions: + ai: Timestamp recording when a user account was first created in the system. + - name: slack_user_id + type: string + descriptions: + ai: Unique identifiers linking users to their corresponding Slack workspace accounts for integration purposes. + - name: looker_user_id + type: string + descriptions: + ai: Unique identifiers linking users to their corresponding accounts in the Looker analytics platform. + - name: notion_user_id + type: string + descriptions: + ai: Unique identifiers linking users to their corresponding Notion workspace accounts. + - name: drive_owner_id + type: string + descriptions: + ai: Unique identifiers linking users to their associated Google Drive owner accounts. + descriptions: + ai: External customer user accounts with role-based access controls, integrated across Slack and Looker platforms for workflow and analytics management. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/large_contract_requesters.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/large_contract_requesters.yaml new file mode 100644 index 00000000..faf02e14 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/large_contract_requesters.yaml @@ -0,0 +1,44 @@ +name: large_contract_requesters +source_type: sql +sql: |- + select account.account_name, + requester.email as requester_email, + activity.action_type, + activity.action_date, + round(segment.contract_arr_cents / 100.0, 0) as contract_arr_usd + from orbit_analytics.int_procurement_qualifying_actions activity + join orbit_raw.accounts account on account.account_id = activity.account_id + join orbit_raw.users requester on requester.user_id = activity.user_id + left join orbit_analytics.mart_account_segments segment on segment.account_id = activity.account_id + order by activity.action_date desc, segment.contract_arr_cents desc nulls last + limit 25 +grain: + - action_date + - requester_email + - action_type +columns: + - name: account_name + type: string + descriptions: + user: Name of the account + - name: requester_email + type: string + descriptions: + user: Email of the requester + - name: action_type + type: string + descriptions: + user: Type of qualifying procurement action + - name: action_date + type: time + role: time + descriptions: + user: Date the action occurred + - name: contract_arr_usd + type: number + descriptions: + user: Contract ARR in USD +joins: [] +measures: [] +descriptions: + user: Recent procurement actions by requesters on large contracts. Joins qualifying procurement actions to account names, requester emails, and contract ARR. Used to identify active requesters on high-value accounts. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_account_activity.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_account_activity.yaml new file mode 100644 index 00000000..6fab01b3 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_account_activity.yaml @@ -0,0 +1,10 @@ +name: mart_account_activity +measures: + - name: avg_pre_policy_activation_rate + expr: avg(pre_policy_30_day_activation_rate) + description: Average 30-day activation rate for the pre-policy cohort (before 2026-01-15). + - name: avg_post_policy_activation_rate + expr: avg(post_policy_30_day_activation_rate) + description: Average 30-day activation rate for the post-policy cohort (on or after 2026-01-15). +descriptions: + user: "Pre/post-policy 30-day activation rates. Source: dbt mart_account_activity. Compares activation counts across the Jan 2026 policy boundary." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_account_segments.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_account_segments.yaml new file mode 100644 index 00000000..d8f98b29 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_account_segments.yaml @@ -0,0 +1,18 @@ +name: mart_account_segments +measures: + - name: total_contract_arr_cents + expr: sum(contract_arr_cents) + description: Total active-contract ARR in cents across all accounts. + - name: active_contract_arr_cents + expr: sum(contract_arr_cents) + filter: contract_status = 'active' + description: Active-contract ARR in cents, filtered to accounts with an active contract status. + - name: count_accounts + expr: count(distinct account_id) + description: Distinct count of accounts in the segment. + - name: count_active_contract_accounts + expr: count(distinct account_id) + filter: contract_status = 'active' + description: Distinct count of accounts with an active contract. +descriptions: + user: "Per-account segment and active-contract ARR. Source: dbt mart_account_segments." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_arr_daily.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_arr_daily.yaml new file mode 100644 index 00000000..543b66c4 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_arr_daily.yaml @@ -0,0 +1,10 @@ +name: mart_arr_daily +measures: + - name: total_arr_cents + expr: sum(arr_cents) + description: Sum of ARR in cents across all snapshot dates (use with a date filter to get point-in-time ARR). + - name: latest_arr_cents + expr: max(arr_cents) + description: Most recent ARR value in cents (max across dates in the filtered window). +descriptions: + user: "Daily ARR snapshot. Source: dbt mart_arr_daily. One row per metric_date with global ARR in cents." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_customer_health.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_customer_health.yaml new file mode 100644 index 00000000..c76b0912 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_customer_health.yaml @@ -0,0 +1,27 @@ +name: mart_customer_health +measures: + - name: count_accounts + expr: count(distinct account_id) + description: Total distinct accounts in the health snapshot. + - name: count_active_customers + expr: count(distinct account_id) + filter: is_active_customer = true + description: Distinct count of accounts flagged as active customers. + - name: count_high_risk_accounts + expr: count(distinct account_id) + filter: risk_level = 'high' + description: Distinct count of accounts with high risk level (open critical tickets and/or recent procurement activity). + - name: count_medium_risk_accounts + expr: count(distinct account_id) + filter: risk_level = 'medium' + description: Distinct count of accounts with medium risk level. + - name: count_accounts_with_high_ticket + expr: count(distinct account_id) + filter: has_unresolved_high_ticket = true + description: Distinct count of accounts with at least one unresolved high-severity support ticket. + - name: count_accounts_with_recent_procurement + expr: count(distinct account_id) + filter: has_recent_procurement_activity = true + description: Distinct count of accounts with recent procurement activity. +descriptions: + user: "Per-account risk signals as of a snapshot date. Source: dbt mart_customer_health." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_nrr_quarterly.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_nrr_quarterly.yaml new file mode 100644 index 00000000..925b4f75 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_nrr_quarterly.yaml @@ -0,0 +1,19 @@ +name: mart_nrr_quarterly +measures: + - name: avg_net_revenue_retention + expr: avg(net_revenue_retention) + description: Average NRR (net revenue retention) across quarters and segments. + - name: total_expansion_arr_cents + expr: sum(expansion_arr_cents) + description: Total expansion ARR in cents across all quarters and segments. + - name: total_contraction_arr_cents + expr: sum(contraction_arr_cents) + description: Total contraction ARR in cents across all quarters and segments. + - name: total_churned_arr_cents + expr: sum(churned_arr_cents) + description: Total churned ARR in cents across all quarters and segments. + - name: total_starting_arr_cents + expr: sum(starting_arr_cents) + description: Total starting ARR in cents at the beginning of each quarter. +descriptions: + user: "Quarterly NRR per segment. Source: dbt mart_nrr_quarterly. Enterprise-focused in current dataset." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_procurement_activity.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_procurement_activity.yaml new file mode 100644 index 00000000..2e9cfcc9 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_procurement_activity.yaml @@ -0,0 +1,13 @@ +name: mart_procurement_activity +measures: + - name: total_active_requesters + expr: sum(active_requesters) + description: Total count of active requesters summed across all weeks. + - name: avg_weekly_active_requesters + expr: avg(active_requesters) + description: Average number of active requesters per week (golden week metric). + - name: avg_contract_arr_threshold_cents + expr: avg(contract_arr_threshold_cents) + description: Average contract ARR threshold in cents used to qualify large active contracts. +descriptions: + user: "Weekly procurement KPI. Source: dbt mart_procurement_activity. Tracks active requesters on large active contracts." diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_retention_movement_breakout.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_retention_movement_breakout.yaml new file mode 100644 index 00000000..ac3756d7 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_retention_movement_breakout.yaml @@ -0,0 +1,25 @@ +name: mart_retention_movement_breakout +measures: + - name: total_expansion_arr_cents + expr: sum(expansion_arr_cents) + description: Total expansion ARR in cents + - name: total_contraction_arr_cents + expr: sum(contraction_arr_cents) + description: Total contraction ARR in cents (includes discount expiration contraction) + - name: total_churned_arr_cents + expr: sum(churned_arr_cents) + description: Total churned ARR in cents + - name: parent_account_count + expr: sum(parent_account_count) + description: Total number of parent accounts affected by the movement + - name: expansion_arr_millions + expr: round(sum(expansion_arr_cents) / 100000000.0, 3) + description: Expansion ARR in millions of dollars + - name: contraction_arr_millions + expr: round(sum(contraction_arr_cents) / 100000000.0, 3) + description: Contraction ARR in millions of dollars + - name: churned_arr_millions + expr: round(sum(churned_arr_cents) / 100000000.0, 3) + description: Churned ARR in millions of dollars +descriptions: + user: Quarterly ARR movement breakout by segment, movement_type (expansion/contraction/churn), and movement_reason. One row per quarter × segment × movement_type × movement_reason. Contraction includes discount expirations (is_discount_expiration_contraction). Used for NRR waterfall analysis. diff --git a/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_revenue_daily.yaml b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_revenue_daily.yaml new file mode 100644 index 00000000..3bbb59d9 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/postgres-warehouse/mart_revenue_daily.yaml @@ -0,0 +1,20 @@ +name: mart_revenue_daily +measures: + - name: total_gross_revenue_cents + expr: sum(gross_revenue_cents) + description: Total gross invoice revenue in cents across all days. + - name: total_credits_cents + expr: sum(credits_cents) + description: Total credits applied in cents across all days. + - name: total_refunds_cents + expr: sum(refunds_cents) + description: Total refunds issued in cents across all days. + - name: total_net_revenue_cents + expr: sum(net_revenue_cents) + description: Total net revenue in cents (gross minus credits and refunds) across all days. + - name: count_reconciled_days + expr: count(*) + filter: reconciliation_check = true + description: Number of days where reconciliation_check passed (gross - credits - refunds = net). +descriptions: + user: "Daily gross-to-net revenue. Source: dbt mart_revenue_daily. Aggregate-only — no account dimension." diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts index a54280be..ae593805 100644 --- a/packages/cli/src/connection.test.ts +++ b/packages/cli/src/connection.test.ts @@ -1,7 +1,8 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { initKtxProject, parseKtxProjectConfig } from '@ktx/context/project'; +import type { MetabaseRuntimeClient } from '@ktx/context/ingest'; +import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project'; import type { KtxConnectionDriver, KtxScanConnector, KtxSchemaSnapshot } from '@ktx/context/scan'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxConnection } from './connection.js'; @@ -598,6 +599,61 @@ describe('runKtxConnection', () => { expect(io.stdout()).toContain('Tables: 2'); }); + it('tests a configured Metabase connection through the Metabase runtime client', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + const projectConfig = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...projectConfig, + connections: { + ...projectConfig.connections, + prod_metabase: { + driver: 'metabase', + api_url: 'http://metabase.example.test', + api_key: 'mb_test', + }, + }, + }), + 'utf-8', + ); + const testConnection = vi.fn(async () => ({ success: true as const })); + const getDatabases = vi.fn(async () => [ + { id: 1, name: 'Analytics', engine: 'postgres', details: {}, is_sample: false }, + { id: 2, name: 'Sample Database', engine: 'h2', details: {}, is_sample: true }, + ]); + const cleanup = vi.fn(async () => undefined); + const createMetabaseClient = vi.fn( + async (): Promise> => ({ + testConnection, + getDatabases, + cleanup, + }), + ); + const createScanConnector = vi.fn(async () => { + throw new Error('native scanner should not be used for Metabase'); + }); + const io = makeIo(); + + await expect( + runKtxConnection({ command: 'test', projectDir, connectionId: 'prod_metabase' }, io.io, { + createScanConnector, + createMetabaseClient, + }), + ).resolves.toBe(0); + + expect(createScanConnector).not.toHaveBeenCalled(); + expect(createMetabaseClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'prod_metabase'); + expect(testConnection).toHaveBeenCalledTimes(1); + expect(getDatabases).toHaveBeenCalledTimes(1); + expect(cleanup).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('Connection test passed: prod_metabase'); + expect(io.stdout()).toContain('Driver: metabase'); + expect(io.stdout()).toContain('Databases: 1'); + expect(io.stderr()).toBe(''); + }); + it('cleans up the native scan connector when connection testing fails', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index aa6de7c2..1dde60ac 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -1,4 +1,10 @@ import { cancel, confirm, isCancel } from '@clack/prompts'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultMetabaseConnectionClientFactory, + type MetabaseRuntimeClient, + metabaseRuntimeConfigFromLocalConnection, +} from '@ktx/context/ingest'; import { type KtxLocalProject, loadKtxProject, serializeKtxProjectConfig } from '@ktx/context/project'; import type { KtxScanConnector } from '@ktx/context/scan'; import type { KtxConnectionMappingArgs } from './commands/connection-mapping.js'; @@ -61,6 +67,7 @@ interface KtxConnectionIo extends KtxCliIo { interface KtxConnectionDeps { createScanConnector?: typeof createKtxCliScanConnector; + createMetabaseClient?: typeof createDefaultMetabaseClient; runMapping?: (argv: string[], io: KtxCliIo) => Promise; prompts?: KtxConnectionPromptAdapter; } @@ -104,6 +111,12 @@ async function cleanupConnector(connector: KtxScanConnector | null): Promise> { + const factory = new DefaultMetabaseConnectionClientFactory( + (metabaseConnectionId) => + metabaseRuntimeConfigFromLocalConnection( + metabaseConnectionId, + project.config.connections[metabaseConnectionId], + ), + DEFAULT_METABASE_CLIENT_CONFIG, + ); + return factory.createClient(connectionId); +} + +async function testMetabaseConnection( + project: KtxLocalProject, + connectionId: string, + createMetabaseClient: typeof createDefaultMetabaseClient, +): Promise<{ driver: 'metabase'; databaseCount: number }> { + let client: Pick | null = null; + try { + client = await createMetabaseClient(project, connectionId); + const testResult = await client.testConnection(); + if (!testResult.success) { + throw new Error( + `Metabase connection test failed: ${testResult.error ?? testResult.message ?? 'unknown error'}`, + ); + } + + const databases = await client.getDatabases(); + const databaseCount = databases.filter((database) => database.is_sample !== true).length; + if (databaseCount === 0) { + throw new Error('Metabase auth worked but no usable databases were returned'); + } + + return { driver: 'metabase', databaseCount }; + } finally { + await client?.cleanup(); + } +} + interface BufferedIo extends KtxCliIo { stdoutText(): string; stderrText(): string; @@ -399,6 +454,18 @@ export async function runKtxConnection( return 0; } + if (normalizedConnectionDriver(project, args.connectionId) === 'metabase') { + const result = await testMetabaseConnection( + project, + args.connectionId, + deps.createMetabaseClient ?? createDefaultMetabaseClient, + ); + io.stdout.write(`Connection test passed: ${args.connectionId}\n`); + io.stdout.write(`Driver: ${result.driver}\n`); + io.stdout.write(`Databases: ${result.databaseCount}\n`); + return 0; + } + const result = await testNativeConnection( project, args.connectionId, diff --git a/packages/cli/src/demo-assets.test.ts b/packages/cli/src/demo-assets.test.ts index d8307c66..7ef89296 100644 --- a/packages/cli/src/demo-assets.test.ts +++ b/packages/cli/src/demo-assets.test.ts @@ -91,22 +91,17 @@ describe('demo assets', () => { expect(manifest.sources.bi.explores).toBeGreaterThanOrEqual(2); expect(manifest.sources.bi.dashboards).toBeGreaterThanOrEqual(2); expect(manifest.sources.notion.pages).toBeGreaterThanOrEqual(5); - expect(manifest.generated.semanticLayer.sourceCount).toBeGreaterThanOrEqual(5); - expect(manifest.generated.knowledge.pageCount).toBeGreaterThanOrEqual(10); + expect(manifest.generated.semanticLayer.sourceCount).toBeGreaterThanOrEqual(40); + expect(manifest.generated.knowledge.pageCount).toBeGreaterThanOrEqual(20); expect(manifest.generated.links.linkCount).toBeGreaterThanOrEqual(10); const dbStat = await stat(packagedDemoAssetPath('demo.db')); expect(dbStat.size).toBeGreaterThan(0); expect(dbStat.size).toBeLessThan(10 * 1024 * 1024); - await expect(access(packagedDemoAssetPath('raw-sources/warehouse/accounts.csv'))).resolves.toBeUndefined(); - await expect(access(packagedDemoAssetPath('raw-sources/dbt/schema.yml'))).resolves.toBeUndefined(); - await expect(access(packagedDemoAssetPath('raw-sources/bi/revenue_exec.dashboard.lookml'))).resolves.toBeUndefined(); - await expect(access(packagedDemoAssetPath('raw-sources/notion/revenue-reporting-policy.md'))).resolves.toBeUndefined(); - expect(manifest.generated.semanticLayer.path).toBe('semantic-layer/orbit_demo'); - - await expect(access(packagedDemoAssetPath('semantic-layer/orbit_demo/accounts.yaml'))).resolves.toBeUndefined(); - await expect(access(packagedDemoAssetPath('knowledge/global/arr-contract-first.md'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('semantic-layer/dbt-main/mart_arr_daily.yaml'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('semantic-layer/postgres-warehouse/mart_account_activity.yaml'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('knowledge/global/orbit-company-overview.md'))).resolves.toBeUndefined(); await expect(access(packagedDemoAssetPath('links/provenance.json'))).resolves.toBeUndefined(); await expect(access(packagedDemoAssetPath('reports/seeded-demo-report.json'))).resolves.toBeUndefined(); }); diff --git a/packages/cli/src/demo-assets.ts b/packages/cli/src/demo-assets.ts index c0127d41..6754164a 100644 --- a/packages/cli/src/demo-assets.ts +++ b/packages/cli/src/demo-assets.ts @@ -45,14 +45,9 @@ const REQUIRED_SEEDED_ASSET_PATHS = [ 'demo.db', 'manifest.json', DEMO_REPLAY_FILE, - join('raw-sources', 'warehouse', 'accounts.csv'), - join('raw-sources', 'dbt', 'schema.yml'), - join('raw-sources', 'bi', 'revenue_exec.dashboard.lookml'), - join('raw-sources', 'notion', 'revenue-reporting-policy.md'), - join('semantic-layer', 'orbit_demo', 'accounts.yaml'), - join('knowledge', 'global', 'arr-contract-first.md'), - join('links', 'provenance.json'), - join('reports', 'seeded-demo-report.json'), + join('semantic-layer', 'dbt-main', 'mart_arr_daily.yaml'), + join('semantic-layer', 'postgres-warehouse', 'mart_account_activity.yaml'), + join('knowledge', 'global', 'orbit-company-overview.md'), ] as const; function assetDir(): string { diff --git a/packages/cli/src/demo-seeded-inspect.test.ts b/packages/cli/src/demo-seeded-inspect.test.ts index 35c76861..a45415bd 100644 --- a/packages/cli/src/demo-seeded-inspect.test.ts +++ b/packages/cli/src/demo-seeded-inspect.test.ts @@ -53,8 +53,8 @@ describe('seeded demo inspect contract', () => { notion: { label: 'Notion', path: 'raw-sources/notion', pageCount: 8 }, }, generatedOutputs: { - semanticLayer: { path: 'semantic-layer/orbit_demo', manifestSourceCount: 6, fileCount: 6 }, - knowledge: { path: 'knowledge/global', manifestPageCount: 10, fileCount: 10 }, + semanticLayer: { path: 'semantic-layer', manifestSourceCount: 46, fileCount: 46 }, + knowledge: { path: 'knowledge/global', manifestPageCount: 28, fileCount: 28 }, links: { path: 'links/provenance.json', manifestLinkCount: 23, linkCount: 23 }, reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 }, replays: { primaryPath: 'replays/replay.memory-flow.v1.json', latestPath: 'replays/latest.memory-flow.v1.json' }, @@ -83,8 +83,8 @@ describe('seeded demo inspect contract', () => { expect(output).toContain('dbt: 3 models, 8 source tables'); expect(output).toContain('BI: 5 explores, 2 dashboards'); expect(output).toContain('Notion: 8 pages'); - expect(output).toContain('Semantic-layer sources: 6 manifest, 6 files'); - expect(output).toContain('Knowledge pages: 10 manifest, 10 files'); + expect(output).toContain('Semantic-layer sources: 46 manifest, 46 files'); + expect(output).toContain('Knowledge pages: 28 manifest, 28 files'); expect(output).toContain('Evidence links: 23 manifest, 23 links'); expect(output).toContain('Report: reports/seeded-demo-report.json'); expect(output).toContain('Replay: replays/replay.memory-flow.v1.json'); diff --git a/packages/cli/src/demo-seeded-inspect.ts b/packages/cli/src/demo-seeded-inspect.ts index 0081e4b8..13320890 100644 --- a/packages/cli/src/demo-seeded-inspect.ts +++ b/packages/cli/src/demo-seeded-inspect.ts @@ -71,12 +71,9 @@ const REQUIRED_SEEDED_PROJECT_PATHS = [ 'state.sqlite', 'manifest.json', join('replays', 'replay.memory-flow.v1.json'), - join('raw-sources', 'warehouse', 'accounts.csv'), - join('raw-sources', 'dbt', 'schema.yml'), - join('raw-sources', 'bi', 'revenue_exec.dashboard.lookml'), - join('raw-sources', 'notion', 'revenue-reporting-policy.md'), - join('semantic-layer', 'orbit_demo', 'accounts.yaml'), - join('knowledge', 'global', 'arr-contract-first.md'), + join('semantic-layer', 'dbt-main', 'mart_arr_daily.yaml'), + join('semantic-layer', 'postgres-warehouse', 'mart_account_activity.yaml'), + join('knowledge', 'global', 'orbit-company-overview.md'), join('links', 'provenance.json'), join('reports', 'seeded-demo-report.json'), ] as const; diff --git a/packages/cli/src/demo-seeded.test.ts b/packages/cli/src/demo-seeded.test.ts index c6065c07..95bf0a5a 100644 --- a/packages/cli/src/demo-seeded.test.ts +++ b/packages/cli/src/demo-seeded.test.ts @@ -19,11 +19,9 @@ describe('demo seeded mode', () => { await expect(access(join(projectDir, 'demo.db'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'ktx.yaml'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'manifest.json'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'semantic-layer/orbit_demo/accounts.yaml'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'knowledge/global/arr-contract-first.md'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'raw-sources/dbt/schema.yml'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'raw-sources/bi/revenue_exec.dashboard.lookml'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'raw-sources/notion/revenue-reporting-policy.md'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'semantic-layer/dbt-main/mart_arr_daily.yaml'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'semantic-layer/postgres-warehouse/mart_account_activity.yaml'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'knowledge/global/orbit-company-overview.md'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'links/provenance.json'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'reports/seeded-demo-report.json'))).resolves.toBeUndefined(); }); @@ -88,8 +86,8 @@ describe('demo seeded mode', () => { it('SL YAML validates correctly', async () => { await ensureSeededDemoProject({ projectDir, force: false }); - const slYaml = await readFile(join(projectDir, 'semantic-layer/orbit_demo/accounts.yaml'), 'utf-8'); - expect(slYaml).toContain('name: accounts'); + const slYaml = await readFile(join(projectDir, 'semantic-layer/dbt-main/mart_arr_daily.yaml'), 'utf-8'); + expect(slYaml).toContain('name: mart_arr_daily'); expect(slYaml).toContain('grain:'); expect(slYaml).toContain('columns:'); expect(slYaml).toContain('measures:'); @@ -98,11 +96,11 @@ describe('demo seeded mode', () => { it('wiki pages have valid frontmatter', async () => { await ensureSeededDemoProject({ projectDir, force: false }); - const wiki = await readFile(join(projectDir, 'knowledge/global/arr-contract-first.md'), 'utf-8'); + const wiki = await readFile(join(projectDir, 'knowledge/global/orbit-company-overview.md'), 'utf-8'); expect(wiki).toContain('---'); expect(wiki).toContain('summary:'); expect(wiki).toContain('tags:'); - expect(wiki).toContain('sl_refs:'); + expect(wiki).toContain('refs:'); expect(wiki).toContain('usage_mode: auto'); }); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 9fc4dc82..59df5e86 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -32,6 +32,7 @@ import { writeWarehouseConfig, } from './ingest.test-utils.js'; import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; +import { runKtxSetup } from './setup.js'; describe('runKtxIngest', () => { let tempDir: string; @@ -105,6 +106,75 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); + it('prints provider setup guidance when a skip-llm setup project runs dev ingest', async () => { + const projectDir = join(tempDir, 'project'); + const setupIo = makeIo(); + await expect( + runKtxSetup( + { + command: 'run', + projectDir, + mode: 'new', + agents: false, + agentScope: 'project', + agentInstallMode: 'cli', + skipAgents: true, + inputMode: 'disabled', + yes: true, + cliVersion: '0.0.0-test', + skipLlm: true, + skipEmbeddings: true, + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:WAREHOUSE_URL', + databaseSchemas: [], + enableHistoricSql: true, + skipDatabases: false, + skipSources: true, + }, + setupIo.io, + { + databasesDeps: { + testConnection: async (_projectDir, _connectionId, io) => { + io.stdout.write('Driver: postgres\nTables: 1\n'); + return 0; + }, + scanConnection: async () => 0, + historicSqlProbe: async () => ({ ok: true, lines: ['PASS Historic SQL probe skipped in test'] }), + }, + context: async () => ({ status: 'skipped', projectDir }), + }, + ), + ).resolves.toBe(0); + + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runIo = makeIo(); + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + sourceDir, + outputMode: 'plain', + }, + runIo.io, + ), + ).resolves.toBe(1); + + expect(runIo.stdout()).toBe(''); + expect(runIo.stderr()).toContain( + 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + ); + expect(runIo.stderr()).toContain( + `ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, + ); + }); + it('routes metabase scheduled pulls to the fan-out runner and prints child summaries', async () => { const projectDir = join(tempDir, 'project'); await writeMetabaseConfig(projectDir); @@ -918,6 +988,97 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); }); + it('prints plain WorkUnit step progress during long-running local ingest', async () => { + const projectDir = join(tempDir, 'historic-sql-step-progress-project'); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: historic-sql-step-progress-project', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' minExecutions: 2', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + 'utf-8', + ); + const createdAdapters: SourceAdapter[] = [ + { source: 'historic-sql', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'historic-sql-table-public-orders', + rawFiles: ['tables/public/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + { + unitKey: 'historic-sql-table-public-customers', + rawFiles: ['tables/public/customers.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }); + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: 'historic-sql-table-public-orders', + skills: ['historic_sql_table_digest'], + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'work_unit_step', + unitKey: 'historic-sql-table-public-orders', + stepIndex: 7, + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'work_unit_finished', + unitKey: 'historic-sql-table-public-orders', + status: 'success', + }); + input.memoryFlow?.finish('done'); + return completedLocalBundleRun(input, input.jobId ?? 'historic-step-progress-job'); + }); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + outputMode: 'plain', + }, + io.io, + { + env: interactiveEnv(), + createAdapters: vi.fn(() => createdAdapters as never), + runLocalIngest: runLocal, + jobIdFactory: () => 'historic-step-progress-job', + }, + ), + ).resolves.toBe(0); + + const stdout = io.stdout(); + expect(stdout).toContain('[45%] Planned 2 work units'); + expect(stdout).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); + expect(stdout).toContain('[58%] Processing 1/2 work units: historic-sql-table-public-orders step 7/40'); + expect(stdout).toContain('[68%] Processed 1/2 work units'); + }); + it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index a580b3d5..39bf21bb 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -168,13 +168,37 @@ function formatDiffProgress(event: Extract event.type === 'work_unit_finished').length; +function workUnitEventsThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): MemoryFlowEvent[] { + return snapshot.events.slice(0, eventIndex + 1); +} + +function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { + return workUnitEventsThrough(snapshot, eventIndex).filter((event) => event.type === 'work_unit_finished').length; +} + +function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { + if (snapshot.plannedWorkUnits.length > 0) { + return snapshot.plannedWorkUnits.length; + } + const planEvent = workUnitEventsThrough(snapshot, eventIndex) + .filter((event) => event.type === 'chunks_planned') + .at(-1); + return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex); +} + +function workUnitOrdinalThrough(snapshot: MemoryFlowReplayInput, eventIndex: number, unitKey: string): number { + const events = workUnitEventsThrough(snapshot, eventIndex); + const startedIndex = events.findIndex((event) => event.type === 'work_unit_started' && event.unitKey === unitKey); + if (startedIndex === -1) { + return completedWorkUnitCountThrough(snapshot, eventIndex) + 1; + } + return events.slice(0, startedIndex + 1).filter((event) => event.type === 'work_unit_started').length; } function plainIngestEventProgress( event: MemoryFlowEvent, snapshot: MemoryFlowReplayInput, + eventIndex: number, ): { percent: number; message: string } | null { switch (event.type) { case 'source_acquired': @@ -196,11 +220,27 @@ function plainIngestEventProgress( }; case 'stage_skipped': return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` }; - case 'work_unit_started': - return { percent: 55, message: `Processing ${event.unitKey}` }; + case 'work_unit_started': { + const total = plannedWorkUnitCountThrough(snapshot, eventIndex); + const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); + const progress = total > 0 ? `${ordinal}/${total} work units: ` : ''; + return { percent: 55, message: `Processing ${progress}${event.unitKey}` }; + } + case 'work_unit_step': { + const total = plannedWorkUnitCountThrough(snapshot, eventIndex); + const completed = completedWorkUnitCountThrough(snapshot, eventIndex); + const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); + const stepFraction = event.stepBudget > 0 ? Math.min(1, event.stepIndex / event.stepBudget) : 0; + const percent = total > 0 ? 55 + Math.ceil(((completed + stepFraction) / total) * 25) : 55; + const progress = total > 0 ? `${ordinal}/${total} work units: ` : ''; + return { + percent, + message: `Processing ${progress}${event.unitKey} step ${event.stepIndex}/${event.stepBudget}`, + }; + } case 'work_unit_finished': { - const total = snapshot.plannedWorkUnits.length || completedWorkUnitCount(snapshot); - const completed = completedWorkUnitCount(snapshot); + const total = plannedWorkUnitCountThrough(snapshot, eventIndex); + const completed = completedWorkUnitCountThrough(snapshot, eventIndex); const percent = total > 0 ? 55 + Math.round((completed / total) * 25) : 80; return { percent, @@ -225,7 +265,6 @@ function plainIngestEventProgress( case 'report_created': return { percent: 98, message: `Created ingest report ${event.reportPath ?? event.runId}` }; case 'scope_detected': - case 'work_unit_step': case 'candidate_action': return null; } @@ -259,11 +298,12 @@ function createPlainIngestProgressRenderer( }, update(snapshot) { while (printedEvents < snapshot.events.length) { + const eventIndex = printedEvents; const event = snapshot.events[printedEvents++]; if (!event) { continue; } - const progress = plainIngestEventProgress(event, snapshot); + const progress = plainIngestEventProgress(event, snapshot, eventIndex); if (progress) { write(progress.percent, progress.message); } diff --git a/packages/cli/src/managed-python-daemon.test.ts b/packages/cli/src/managed-python-daemon.test.ts index 24df2a78..4e7af22c 100644 --- a/packages/cli/src/managed-python-daemon.test.ts +++ b/packages/cli/src/managed-python-daemon.test.ts @@ -170,6 +170,41 @@ describe('managed Python daemon lifecycle', () => { }); }); + it('makes a final health probe before reporting startup failure', async () => { + const spawnDaemon = makeSpawn(5556); + const installRuntime = vi.fn(async () => installResult(tempDir)); + const fetch = vi + .fn() + .mockRejectedValueOnce(new Error('fetch failed')) + .mockResolvedValueOnce({ + ok: true, + status: 200, + json: async () => ({ status: 'healthy', version: '0.2.0' }), + text: async () => '', + }); + + const result = await startManagedPythonDaemon({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + features: ['core'], + installRuntime, + spawnDaemon, + fetch, + allocatePort: vi.fn(async () => 61234), + now: () => new Date('2026-05-11T00:00:00.000Z'), + startupTimeoutMs: 5, + pollIntervalMs: 20, + }); + + expect(result.status).toBe('started'); + expect(fetch).toHaveBeenCalledTimes(2); + expect(JSON.parse(await readFile(layout(tempDir).daemonStatePath, 'utf8'))).toMatchObject({ + pid: 5556, + port: 61234, + version: '0.2.0', + }); + }); + it('reuses a healthy daemon with the requested feature set', async () => { await mkdir(layout(tempDir).versionDir, { recursive: true }); await writeFile(layout(tempDir).daemonStatePath, `${JSON.stringify(runningState(tempDir), null, 2)}\n`); diff --git a/packages/cli/src/managed-python-daemon.ts b/packages/cli/src/managed-python-daemon.ts index 4b128c63..2caf9182 100644 --- a/packages/cli/src/managed-python-daemon.ts +++ b/packages/cli/src/managed-python-daemon.ts @@ -273,6 +273,15 @@ async function waitForHealth(input: { lastDetail = health.detail; await delay(input.pollIntervalMs); } + const finalHealth = await healthOk({ + state: input.state, + cliVersion: input.cliVersion, + fetch: input.fetch, + }); + if (finalHealth.ok) { + return; + } + lastDetail = finalHealth.detail; throw new Error(`KTX Python daemon failed to start: ${lastDetail}. stderr: ${input.state.stderrLog}`); } diff --git a/packages/cli/src/managed-python-runtime.test.ts b/packages/cli/src/managed-python-runtime.test.ts index d100e409..fa14f480 100644 --- a/packages/cli/src/managed-python-runtime.test.ts +++ b/packages/cli/src/managed-python-runtime.test.ts @@ -161,6 +161,14 @@ describe('verifyRuntimeAsset', () => { await expect(verifyRuntimeAsset({ assetDir })).rejects.toThrow(/Unsafe runtime wheel filename/); }); + + it('reports the source-checkout artifact command when the bundled manifest is missing', async () => { + const assetDir = join(tempDir, 'packages', 'cli', 'assets', 'python'); + + await expect(verifyRuntimeAsset({ assetDir })).rejects.toThrow( + /Missing bundled Python runtime manifest.*pnpm run artifacts:build/s, + ); + }); }); describe('installManagedPythonRuntime', () => { @@ -210,6 +218,30 @@ describe('installManagedPythonRuntime', () => { expect(manifest.python.daemonExecutable).toBe(result.layout.daemonPath); }); + it('disables repo uv config for managed runtime uv commands', async () => { + const { assetDir } = await writeAsset(tempDir, 'core-wheel'); + const commands: Array<{ command: string; args: string[]; env?: NodeJS.ProcessEnv }> = []; + const exec: ManagedPythonRuntimeExec = vi.fn(async (command, args, options) => { + commands.push({ command, args, env: options?.env }); + return { stdout: command === 'uv' && args[0] === '--version' ? 'uv 0.11.13\n' : '', stderr: '' }; + }); + + await installManagedPythonRuntime({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + assetDir, + env: { PATH: '/opt/homebrew/bin', UV_NO_CONFIG: '0' }, + features: ['core'], + exec, + }); + + expect(commands.map((call) => [call.command, call.args[0], call.env?.UV_NO_CONFIG, call.env?.PATH])).toEqual([ + ['uv', '--version', '1', '/opt/homebrew/bin'], + ['uv', 'venv', '1', '/opt/homebrew/bin'], + ['uv', 'pip', '1', '/opt/homebrew/bin'], + ]); + }); + it('installs the local-embeddings extra when requested', async () => { const { assetDir } = await writeAsset(tempDir, 'embedding-wheel'); const commands: Array<{ command: string; args: string[] }> = []; diff --git a/packages/cli/src/managed-python-runtime.ts b/packages/cli/src/managed-python-runtime.ts index 2b715b69..bb1a71d8 100644 --- a/packages/cli/src/managed-python-runtime.ts +++ b/packages/cli/src/managed-python-runtime.ts @@ -186,9 +186,28 @@ async function readJsonFile(path: string): Promise { return JSON.parse(await readFile(path, 'utf8')) as unknown; } +function isErrnoException(error: unknown, code: string): boolean { + return typeof error === 'object' && error !== null && 'code' in error && error.code === code; +} + export async function verifyRuntimeAsset(input: { assetDir: string }): Promise { const manifestPath = join(input.assetDir, 'manifest.json'); - const manifest = runtimeAssetManifestSchema.parse(await readJsonFile(manifestPath)); + let manifestData: unknown; + try { + manifestData = await readJsonFile(manifestPath); + } catch (error) { + if (isErrnoException(error, 'ENOENT')) { + throw new Error( + [ + `Missing bundled Python runtime manifest: ${manifestPath}`, + 'In a source checkout, build the local runtime assets with: pnpm run artifacts:build', + 'Then retry the runtime-backed KTX command.', + ].join('\n'), + ); + } + throw error; + } + const manifest = runtimeAssetManifestSchema.parse(manifestData); assertSafeWheelFilename(manifest.wheel.file); const wheelPath = join(input.assetDir, manifest.wheel.file); const wheel = await readFile(wheelPath); @@ -243,10 +262,11 @@ async function runLogged(input: { command: string; args: string[]; cwd?: string; + env?: NodeJS.ProcessEnv; }): Promise<{ stdout: string; stderr: string }> { await appendFile(input.logPath, `$ ${input.command} ${input.args.join(' ')}\n`); try { - const result = await input.exec(input.command, input.args, { cwd: input.cwd }); + const result = await input.exec(input.command, input.args, { cwd: input.cwd, env: input.env }); if (result.stdout) { await appendFile(input.logPath, result.stdout.endsWith('\n') ? result.stdout : `${result.stdout}\n`); } @@ -266,9 +286,13 @@ async function runLogged(input: { } } -async function ensureUv(exec: ManagedPythonRuntimeExec): Promise { +function managedRuntimeUvEnv(baseEnv: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + return { ...baseEnv, UV_NO_CONFIG: '1' }; +} + +async function ensureUv(exec: ManagedPythonRuntimeExec, env?: NodeJS.ProcessEnv): Promise { try { - const result = await exec('uv', ['--version']); + const result = await exec('uv', ['--version'], { env }); return result.stdout.trim() || 'uv available'; } catch { throw new Error(MISSING_UV_RUNTIME_INSTALL_MESSAGE); @@ -282,6 +306,7 @@ export async function installManagedPythonRuntime( const exec = options.exec ?? defaultExec; const features = normalizeFeatures(options.features); const asset = await verifyRuntimeAsset({ assetDir: layout.assetDir }); + const uvEnv = managedRuntimeUvEnv(options.env ?? process.env); const existing = await readInstalledManifest(layout.manifestPath); if ( options.force !== true && @@ -298,14 +323,21 @@ export async function installManagedPythonRuntime( await rm(layout.versionDir, { recursive: true, force: true }); await mkdir(layout.versionDir, { recursive: true }); await writeFile(layout.installLogPath, ''); - await ensureUv(exec); - await runLogged({ exec, logPath: layout.installLogPath, command: 'uv', args: ['venv', layout.venvDir] }); + await ensureUv(exec, uvEnv); + await runLogged({ + exec, + logPath: layout.installLogPath, + command: 'uv', + args: ['venv', layout.venvDir], + env: uvEnv, + }); const wheelSpec = features.includes('local-embeddings') ? `${asset.wheelPath}[local-embeddings]` : asset.wheelPath; await runLogged({ exec, logPath: layout.installLogPath, command: 'uv', args: ['pip', 'install', '--python', layout.pythonPath, wheelSpec], + env: uvEnv, }); const manifest: InstalledKtxRuntimeManifest = { @@ -371,7 +403,7 @@ export async function doctorManagedPythonRuntime( const exec = options.exec ?? defaultExec; const checks: ManagedPythonRuntimeDoctorCheck[] = []; try { - const version = await ensureUv(exec); + const version = await ensureUv(exec, managedRuntimeUvEnv(options.env ?? process.env)); checks.push(check('pass', { id: 'uv', label: 'uv', detail: version })); } catch (error) { checks.push( diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 09b9d29f..a20df910 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -1295,6 +1295,7 @@ describe('setup databases step', () => { expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns'); expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); + expect(config.ingest.workUnits.maxConcurrency).toBe(6); expect(io.stdout()).toContain('Historic SQL probe...'); expect(io.stdout()).toContain('pg_stat_statements ready'); }); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index bd554590..3d49f75b 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -14,6 +14,8 @@ import { runKtxScan } from './scan.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; +const HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY = 6; + export type KtxSetupDatabaseDriver = | 'sqlite' | 'postgres' @@ -843,7 +845,7 @@ async function writeConnectionConfig(input: { ? (input.connection.historicSql as Record) : null; if (historicSql?.enabled === true) { - await ensureHistoricSqlAdapterEnabled(input.projectDir); + await ensureHistoricSqlIngestDefaults(input.projectDir); } } @@ -954,9 +956,19 @@ async function maybeConfigurePostgresSchemas(input: { return true; } -async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise { +async function ensureHistoricSqlIngestDefaults(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - if (project.config.ingest.adapters.includes('historic-sql')) { + const adapters = project.config.ingest.adapters.includes('historic-sql') + ? project.config.ingest.adapters + : [...project.config.ingest.adapters, 'historic-sql']; + const maxConcurrency = Math.max( + project.config.ingest.workUnits.maxConcurrency, + HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY, + ); + if ( + adapters === project.config.ingest.adapters && + maxConcurrency === project.config.ingest.workUnits.maxConcurrency + ) { return; } await writeFile( @@ -965,7 +977,11 @@ async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise { + it('contains "Demo mode"', () => { + const plain = stripAnsi(renderDemoBanner()); + expect(plain).toContain('Demo mode'); + }); + + it('mentions pre-processed data', () => { + const plain = stripAnsi(renderDemoBanner()); + expect(plain).toContain('pre-processed'); + }); + + it('mentions read-only', () => { + const plain = stripAnsi(renderDemoBanner()); + expect(plain).toContain('read-only'); + }); +}); + +describe('renderDemoCardContent', () => { + it('contains the title', () => { + const plain = stripAnsi(renderDemoCardContent('Database connection', ['Postgres'])); + expect(plain).toContain('Database connection'); + }); + + it('contains each selection', () => { + const plain = stripAnsi(renderDemoCardContent('Sources', ['dbt', 'metabase'])); + expect(plain).toContain('dbt'); + expect(plain).toContain('metabase'); + }); + + it('contains navigation hints', () => { + const plain = stripAnsi(renderDemoCardContent('Title', ['a'])); + expect(plain).toContain('Press Enter to continue'); + expect(plain).toContain('Escape to go back'); + }); + + it('works with multiple selections', () => { + const result = renderDemoCardContent('Pick', ['one', 'two', 'three']); + const plain = stripAnsi(result); + expect(plain).toContain('one'); + expect(plain).toContain('two'); + expect(plain).toContain('three'); + // Each selection gets a ▸ bullet + const bullets = (plain.match(/▸/g) ?? []).length; + expect(bullets).toBe(3); + }); +}); + +describe('renderDemoAgentTransition', () => { + it('contains "Demo project is ready"', () => { + const plain = stripAnsi(renderDemoAgentTransition()); + expect(plain).toContain('Demo project is ready'); + }); + + it('mentions connecting an agent', () => { + const plain = stripAnsi(renderDemoAgentTransition()); + expect(plain).toContain('connect your agent'); + }); +}); + +describe('renderDemoCompletionSummary', () => { + const projectDir = '/tmp/ktx-demo-123'; + + it('includes the project path', () => { + const plain = stripAnsi(renderDemoCompletionSummary(projectDir, true)); + expect(plain).toContain(projectDir); + }); + + it('includes a temp directory warning', () => { + const plain = stripAnsi(renderDemoCompletionSummary(projectDir, true)); + expect(plain).toContain('temporary directory'); + }); + + it('points to ktx setup for real data', () => { + const plain = stripAnsi(renderDemoCompletionSummary(projectDir, true)); + expect(plain).toContain('ktx setup'); + }); + + it('shows agent-connected message when installed', () => { + const plain = stripAnsi(renderDemoCompletionSummary(projectDir, true)); + expect(plain).toContain('agent is connected'); + }); + + it('includes star headline', () => { + const plain = stripAnsi(renderDemoCompletionSummary(projectDir, true)); + expect(plain).toContain('★ KTX demo is ready'); + }); + + it('shows manual instructions when agent not installed', () => { + const plain = stripAnsi(renderDemoCompletionSummary(projectDir, false)); + expect(plain).toContain('--agents'); + expect(plain).toContain(`--project-dir ${projectDir}`); + }); +}); + +describe('buildDemoReplayTimeline', () => { + const timeline = buildDemoReplayTimeline(); + const connectionIds = new Set(timeline.map((e) => e.connectionId)); + + it('produces events for all 4 targets', () => { + expect(connectionIds.size).toBe(4); + expect(connectionIds).toContain('postgres-warehouse'); + expect(connectionIds).toContain('dbt-main'); + expect(connectionIds).toContain('metabase-main'); + expect(connectionIds).toContain('notion-main'); + }); + + it('all targets end as done', () => { + for (const id of connectionIds) { + const events = timeline.filter((e) => e.connectionId === id); + const last = events[events.length - 1]; + expect(last.status).toBe('done'); + } + }); + + it('events are sorted by delayMs', () => { + for (let i = 1; i < timeline.length; i++) { + expect(timeline[i].delayMs).toBeGreaterThanOrEqual(timeline[i - 1].delayMs); + } + }); +}); + +describe('DEMO_REPLAY_TARGETS', () => { + it('has 1 primary source', () => { + expect(DEMO_REPLAY_TARGETS.primarySources).toHaveLength(1); + }); + + it('has 3 context sources', () => { + expect(DEMO_REPLAY_TARGETS.contextSources).toHaveLength(3); + }); + + it('primary source is a scan operation', () => { + expect(DEMO_REPLAY_TARGETS.primarySources[0].operation).toBe('scan'); + }); + + it('context sources are source-ingest operations', () => { + for (const source of DEMO_REPLAY_TARGETS.contextSources) { + expect(source.operation).toBe('source-ingest'); + } + }); +}); + +describe('runDemoTour', () => { + function createMockIo() { + const chunks: string[] = []; + return { + io: { + stdout: { isTTY: true, columns: 80, write: (chunk: string) => { chunks.push(chunk); } }, + stderr: { write: () => {} }, + }, + chunks, + }; + } + + it('returns 0 on successful tour with agent installed', async () => { + const { io, chunks } = createMockIo(); + const mockAgents = vi.fn().mockResolvedValue({ + status: 'ready', + projectDir: '/tmp/test', + installs: [{ target: 'claude-code', scope: 'project', mode: 'both' }], + } satisfies KtxSetupAgentsResult); + + const navigation = vi.fn().mockResolvedValue('forward'); + + const result = await runDemoTour( + { inputMode: 'auto' }, + io, + { + agents: mockAgents, + waitForNavigation: navigation, + skipReplayAnimation: true, + ensureProject: vi.fn().mockResolvedValue({ projectDir: '/tmp/test' }), + }, + ); + expect(result).toBe(0); + expect(mockAgents).toHaveBeenCalled(); + // Should have rendered completion summary + const allOutput = chunks.join(''); + expect(allOutput).toContain('agent is connected'); + }); + + it('handles back navigation from first step by exiting', async () => { + const { io } = createMockIo(); + const navigation = vi.fn().mockResolvedValue('back'); + + const result = await runDemoTour( + { inputMode: 'auto' }, + io, + { + waitForNavigation: navigation, + skipReplayAnimation: true, + ensureProject: vi.fn().mockResolvedValue({ projectDir: '/tmp/test' }), + }, + ); + expect(result).toBe(0); + // Navigation called once for databases step, then exits + expect(navigation).toHaveBeenCalledTimes(1); + }); + + it('goes back from sources to databases', async () => { + const { io } = createMockIo(); + let callCount = 0; + const navigation = vi.fn().mockImplementation(() => { + callCount++; + // First call (databases): forward + // Second call (sources): back + // Third call (databases again): back (exit) + if (callCount === 1) return Promise.resolve('forward'); + return Promise.resolve('back'); + }); + + const result = await runDemoTour( + { inputMode: 'auto' }, + io, + { + waitForNavigation: navigation, + skipReplayAnimation: true, + ensureProject: vi.fn().mockResolvedValue({ projectDir: '/tmp/test' }), + }, + ); + expect(result).toBe(0); + expect(navigation).toHaveBeenCalledTimes(3); + }); + + it('handles agent step returning back', async () => { + const { io } = createMockIo(); + let navCount = 0; + const navigation = vi.fn().mockImplementation(() => { + navCount++; + // Forward through databases, sources, context + // Then back from context (after agents returns back) + // Then back from sources, then back from databases (exit) + if (navCount <= 3) return Promise.resolve('forward'); + return Promise.resolve('back'); + }); + + const mockAgents = vi.fn().mockResolvedValue({ + status: 'back', + projectDir: '/tmp/test', + } satisfies KtxSetupAgentsResult); + + const result = await runDemoTour( + { inputMode: 'auto' }, + io, + { + agents: mockAgents, + waitForNavigation: navigation, + skipReplayAnimation: true, + ensureProject: vi.fn().mockResolvedValue({ projectDir: '/tmp/test' }), + }, + ); + expect(result).toBe(0); + }); +}); diff --git a/packages/cli/src/setup-demo-tour.ts b/packages/cli/src/setup-demo-tour.ts new file mode 100644 index 00000000..557a52cb --- /dev/null +++ b/packages/cli/src/setup-demo-tour.ts @@ -0,0 +1,390 @@ +import type { KtxCliIo } from './cli-runtime.js'; +import type { + ContextBuildTargetState, + ContextBuildViewState, +} from './context-build-view.js'; +import { createRepainter, renderContextBuildView } from './context-build-view.js'; +import { defaultDemoProjectDir, ensureSeededDemoProject } from './demo-assets.js'; +import type { KtxPublicIngestPlanTarget } from './public-ingest.js'; +import type { KtxSetupAgentsResult } from './setup-agents.js'; +import { runKtxSetupAgentsStep } from './setup-agents.js'; +import { KtxSetupExitError } from './setup-interrupt.js'; + +// --------------------------------------------------------------------------- +// ANSI helpers (internal) +// --------------------------------------------------------------------------- + +const ESC = String.fromCharCode(0x1b); + +function cyan(text: string): string { + return `${ESC}[36m${text}${ESC}[39m`; +} + +function dim(text: string): string { + return `${ESC}[2m${text}${ESC}[22m`; +} + +// --------------------------------------------------------------------------- +// Demo target helpers (internal) +// --------------------------------------------------------------------------- + +function createDemoTarget( + connectionId: string, + operation: 'scan' | 'source-ingest', + driver: string, +): KtxPublicIngestPlanTarget { + const adapter = operation === 'source-ingest' ? driver : undefined; + return { + connectionId, + driver, + operation, + ...(adapter ? { adapter } : {}), + debugCommand: `ktx setup context build --target ${connectionId}`, + steps: operation === 'scan' + ? ['scan', 'enrich', 'memory-update'] + : ['source-ingest', 'enrich', 'memory-update'], + }; +} + +function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState { + return { + target, + status: 'queued', + detailLine: null, + summaryText: null, + startedAt: null, + elapsedMs: 0, + }; +} + +// --------------------------------------------------------------------------- +// Pure rendering functions +// --------------------------------------------------------------------------- + +export function renderDemoBanner(): string { + const lines = [ + '', + `┌ ${cyan('Demo mode')} — data has been pre-processed and KTX context is already built.`, + '│ This walkthrough illustrates the setup steps. Selections are pre-filled and read-only.', + ]; + return lines.join('\n'); +} + +export function renderDemoCardContent(title: string, selections: string[]): string { + const lines = [ + `┌ ${title}`, + '│', + ...selections.map((s) => `│ ${cyan('▸')} ${s}`), + '│', + `│ ${dim('Press Enter to continue, Escape to go back')}`, + '└', + ]; + return lines.join('\n'); +} + +export function renderDemoAgentTransition(): string { + const lines = [ + '┌ Demo project is ready — let\'s connect your agent', + '│', + '│ Your KTX context has been built with demo data.', + '│ Select an agent to start using it.', + '└', + ]; + return lines.join('\n'); +} + +export function renderDemoCompletionSummary(projectDir: string, agentInstalled: boolean): string { + const lines: string[] = [ + '', + `${cyan('★')} KTX demo is ready`, + '', + ]; + + if (agentInstalled) { + lines.push(' Your agent is connected to a demo KTX project.'); + } else { + lines.push(' Demo project created. Connect an agent to start using it:'); + lines.push(` $ ${cyan(`ktx setup --agents --project-dir ${projectDir}`)}`); + } + + lines.push( + '', + ` ${dim('⚠')} This project is in a temporary directory and will be`, + ' cleaned up by your system. To set up KTX with your own', + ' data, run: ktx setup', + '', + ` Project: ${projectDir}`, + ); + + return lines.join('\n'); +} + +// --------------------------------------------------------------------------- +// Keypress navigation +// --------------------------------------------------------------------------- + +export async function waitForDemoNavigation( + stdin?: NodeJS.ReadStream, +): Promise<'forward' | 'back'> { + const input = stdin ?? process.stdin; + const hadRawMode = input.isRaw ?? false; + + return new Promise<'forward' | 'back'>((resolve, reject) => { + if (typeof input.setRawMode === 'function') { + input.setRawMode(true); + } + input.resume(); + + const cleanup = () => { + input.off('data', onData); + if (typeof input.setRawMode === 'function') { + input.setRawMode(hadRawMode); + } + }; + + const onData = (data: Buffer) => { + const char = data.toString(); + if (char === '\r' || char === '\n') { + cleanup(); + resolve('forward'); + } else if (char === '\x1b') { + cleanup(); + resolve('back'); + } else if (char === '\x03') { + cleanup(); + reject(new KtxSetupExitError()); + } + }; + + input.on('data', onData); + }); +} + +// --------------------------------------------------------------------------- +// Interactive card +// --------------------------------------------------------------------------- + +export async function renderDemoCard( + title: string, + selections: string[], + io: KtxCliIo, + stdin?: NodeJS.ReadStream, + waitNav: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'> = waitForDemoNavigation, +): Promise<'forward' | 'back'> { + io.stdout.write(renderDemoBanner() + '\n\n'); + io.stdout.write(renderDemoCardContent(title, selections) + '\n'); + return waitNav(stdin); +} + +// --------------------------------------------------------------------------- +// Context build replay +// --------------------------------------------------------------------------- + +export interface DemoReplayEvent { + delayMs: number; + connectionId: string; + status: 'running' | 'done'; + detailLine: string | null; + summaryText: string | null; +} + +export const DEMO_REPLAY_TARGETS = { + primarySources: [ + createDemoTarget('postgres-warehouse', 'scan', 'postgres'), + ], + contextSources: [ + createDemoTarget('dbt-main', 'source-ingest', 'dbt'), + createDemoTarget('metabase-main', 'source-ingest', 'metabase'), + createDemoTarget('notion-main', 'source-ingest', 'notion'), + ], +} as const; + +export function buildDemoReplayTimeline(): DemoReplayEvent[] { + return [ + // postgres-warehouse: scan + { delayMs: 0, connectionId: 'postgres-warehouse', status: 'running', detailLine: null, summaryText: null }, + { delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] scanning tables...', summaryText: null }, + { delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables scanned' }, + // dbt-main + { delayMs: 2400, connectionId: 'dbt-main', status: 'running', detailLine: null, summaryText: null }, + { delayMs: 3600, connectionId: 'dbt-main', status: 'running', detailLine: '[60%] ingesting models...', summaryText: null }, + { delayMs: 4400, connectionId: 'dbt-main', status: 'done', detailLine: null, summaryText: '34 models ingested' }, + // metabase-main + { delayMs: 4400, connectionId: 'metabase-main', status: 'running', detailLine: null, summaryText: null }, + { delayMs: 5600, connectionId: 'metabase-main', status: 'done', detailLine: null, summaryText: '80 cards ingested' }, + // notion-main + { delayMs: 5600, connectionId: 'notion-main', status: 'running', detailLine: null, summaryText: null }, + { delayMs: 6800, connectionId: 'notion-main', status: 'done', detailLine: null, summaryText: '9 pages ingested' }, + ]; +} + +function renderDemoContextCompletionSummary(): string { + const lines = [ + '', + `${cyan('★')} KTX finished building context`, + '', + ' KTX created:', + ` ${cyan('📊')} 46 semantic layer definitions`, + ` ${cyan('📝')} 28 knowledge pages`, + '', + ` ${dim('Press Enter to continue, Escape to go back')}`, + '', + ]; + return lines.join('\n'); +} + +export async function runDemoContextReplay( + io: KtxCliIo, + stdin?: NodeJS.ReadStream, +): Promise<'forward' | 'back'> { + const allPrimary = DEMO_REPLAY_TARGETS.primarySources.map(createTargetState); + const allContext = DEMO_REPLAY_TARGETS.contextSources.map(createTargetState); + + const state: ContextBuildViewState = { + primarySources: allPrimary, + contextSources: allContext, + frame: 0, + startedAt: Date.now(), + totalElapsedMs: 0, + }; + + const allTargets = [...allPrimary, ...allContext]; + const timeline = buildDemoReplayTimeline(); + + const repainter = createRepainter(io); + const paint = () => repainter.paint(renderContextBuildView(state, { styled: true })); + + paint(); + + let eventIndex = 0; + const startTime = Date.now(); + + await new Promise((resolve) => { + const frameInterval = setInterval(() => { + const elapsed = Date.now() - startTime; + state.frame++; + state.totalElapsedMs = elapsed; + + // Apply all events up to the current elapsed time + while (eventIndex < timeline.length && timeline[eventIndex].delayMs <= elapsed) { + const event = timeline[eventIndex]; + const target = allTargets.find((t) => t.target.connectionId === event.connectionId); + if (target) { + target.status = event.status; + target.detailLine = event.detailLine; + if (event.summaryText !== null) { + target.summaryText = event.summaryText; + } + if (event.status === 'running' && target.startedAt === null) { + target.startedAt = Date.now(); + } + if (event.status === 'done') { + target.elapsedMs = target.startedAt !== null ? Date.now() - target.startedAt : 0; + } + } + eventIndex++; + } + + // Update running target elapsed times + for (const t of allTargets) { + if (t.status === 'running' && t.startedAt !== null) { + t.elapsedMs = Date.now() - t.startedAt; + } + } + + paint(); + + // Check if all events have been applied + if (eventIndex >= timeline.length) { + clearInterval(frameInterval); + resolve(); + } + }, 120); + }); + + // Final paint with all done + paint(); + + // Show completion summary and wait for navigation + io.stdout.write(renderDemoContextCompletionSummary() + '\n'); + return waitForDemoNavigation(stdin); +} + +// --------------------------------------------------------------------------- +// Demo tour orchestrator +// --------------------------------------------------------------------------- + +type DemoStep = 'databases' | 'sources' | 'context' | 'agents'; + +const DEMO_STEPS: DemoStep[] = ['databases', 'sources', 'context', 'agents']; + +export interface DemoTourDeps { + agents?: (args: Parameters[0], io: KtxCliIo) => Promise; + waitForNavigation?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>; + ensureProject?: typeof ensureSeededDemoProject; + skipReplayAnimation?: boolean; +} + +export async function runDemoTour( + args: { inputMode: 'auto' | 'disabled' }, + io: KtxCliIo, + deps: DemoTourDeps = {}, +): Promise { + const waitNav = deps.waitForNavigation ?? waitForDemoNavigation; + const ensureProject = deps.ensureProject ?? ensureSeededDemoProject; + + const projectDir = defaultDemoProjectDir(); + await ensureProject({ projectDir, force: false }); + + let stepIndex = 0; + + while (stepIndex < DEMO_STEPS.length) { + const step = DEMO_STEPS[stepIndex]!; + let direction: 'forward' | 'back'; + + if (step === 'databases') { + direction = await renderDemoCard('Database connection', ['PostgreSQL — Orbit Analytics (56 tables, 2 schemas)'], io, undefined, waitNav); + } else if (step === 'sources') { + direction = await renderDemoCard('Context sources', ['dbt — 34 transformation models', 'Metabase — 80 dashboard cards', 'Notion — 9 knowledge pages'], io, undefined, waitNav); + } else if (step === 'context') { + io.stdout.write(renderDemoBanner() + '\n\n'); + if (deps.skipReplayAnimation) { + direction = await waitNav(); + } else { + direction = await runDemoContextReplay(io); + } + } else { + // agents step — real interactive + io.stdout.write(renderDemoAgentTransition() + '\n'); + const agentsRunner = deps.agents ?? runKtxSetupAgentsStep; + const agentsResult = await agentsRunner( + { + projectDir, + inputMode: args.inputMode, + yes: false, + agents: true, + scope: 'project', + mode: 'both', + skipAgents: false, + }, + io, + ); + const agentInstalled = agentsResult.status === 'ready'; + if (agentsResult.status === 'back') { + direction = 'back'; + } else { + io.stdout.write(renderDemoCompletionSummary(projectDir, agentInstalled) + '\n'); + return 0; + } + } + + if (direction === 'back') { + if (stepIndex === 0) return 0; + stepIndex -= 1; + } else { + stepIndex += 1; + } + } + + return 0; +} diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts index 81c8b361..96092b25 100644 --- a/packages/cli/src/setup-models.test.ts +++ b/packages/cli/src/setup-models.test.ts @@ -676,4 +676,53 @@ describe('setup Anthropic model step', () => { ).resolves.toMatchObject({ status: 'ready' }); expect(healthCheck).not.toHaveBeenCalled(); }); + + it.each([ + { + backend: 'vertex', + providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'], + model: 'claude-sonnet-4-6', + }, + { + backend: 'gateway', + providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'], + model: 'anthropic/claude-sonnet-4-6', + }, + ])('preserves already configured $backend llm setup without asking for Anthropic credentials', async (fixture) => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'setup:', + ' database_connection_ids: []', + ' completed_steps:', + ' - project', + ' - llm', + 'connections: {}', + 'llm:', + ' provider:', + ...fixture.providerLines, + ' models:', + ` default: ${fixture.model}`, + 'ingest:', + ' embeddings:', + ' backend: deterministic', + ' model: deterministic', + ' dimensions: 8', + ].join('\n'), + 'utf-8', + ); + + const healthCheck = vi.fn(async () => ({ ok: true as const })); + const io = makeIo(); + await expect( + runKtxSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, io.io, { + healthCheck, + }), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(healthCheck).not.toHaveBeenCalled(); + expect(io.stdout()).toContain(`LLM ready: yes (${fixture.model})`); + expect(io.stderr()).not.toContain('Anthropic'); + }); }); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 28908849..5b0dea18 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -1,5 +1,6 @@ import { writeFile } from 'node:fs/promises'; import { cancel, isCancel, password, select, text } from '@clack/prompts'; +import { resolveLocalKtxLlmConfig } from '@ktx/context'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { type KtxProjectConfig, @@ -170,13 +171,26 @@ export async function fetchAnthropicModels( return models.map((item, index) => ({ ...item, recommended: index === Math.max(recommendedIndex, 0) })); } -function hasCompletedLlm(config: KtxProjectConfig): boolean { - return ( - config.setup?.completed_steps.includes('llm') === true && - config.llm.provider.backend === 'anthropic' && - typeof config.llm.models.default === 'string' && - config.llm.models.default.length > 0 - ); +export function isKtxSetupLlmConfigReady(config: KtxProjectLlmConfig): boolean { + let resolved: KtxLlmConfig | null; + try { + resolved = resolveLocalKtxLlmConfig(config, process.env); + } catch { + return false; + } + if (!resolved) { + return false; + } + + if (resolved.backend === 'vertex') { + return typeof resolved.vertex?.location === 'string' && resolved.vertex.location.trim().length > 0; + } + + return resolved.backend === 'anthropic' || resolved.backend === 'gateway'; +} + +function hasUsableConfiguredLlm(config: KtxProjectConfig): boolean { + return isKtxSetupLlmConfigReady(config.llm); } function buildProjectLlmConfig( @@ -386,7 +400,7 @@ export async function runKtxSetupAnthropicModelStep( const project = await loadKtxProject({ projectDir: args.projectDir }); if ( args.forcePrompt !== true && - hasCompletedLlm(project.config) && + hasUsableConfiguredLlm(project.config) && !args.anthropicApiKeyEnv && !args.anthropicApiKeyFile && !args.anthropicModel diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 44fc8c7d..58efc506 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -5,8 +5,13 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js'; import { contextBuildCommands, writeKtxSetupContextState } from './setup-context.js'; +import { runDemoTour } from './setup-demo-tour.js'; import { readKtxSetupStatus, runKtxSetup } from './setup.js'; +vi.mock('./setup-demo-tour.js', () => ({ + runDemoTour: vi.fn(async () => 0), +})); + function makeIo() { let stdout = ''; let stderr = ''; @@ -83,6 +88,38 @@ describe('setup status', () => { }); }); + it.each([ + { + backend: 'vertex', + providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'], + model: 'claude-sonnet-4-6', + }, + { + backend: 'gateway', + providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'], + model: 'anthropic/claude-sonnet-4-6', + }, + ])('reports configured $backend llm backends as setup-ready', async (fixture) => { + await mkdir(tempDir, { recursive: true }); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'llm:', + ' provider:', + ...fixture.providerLines, + ' models:', + ` default: ${fixture.model}`, + 'connections: {}', + ].join('\n'), + 'utf-8', + ); + + await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ + llm: { backend: fixture.backend, ready: true, model: fixture.model }, + }); + }); + it('uses setup database connection ids when present', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -404,10 +441,10 @@ describe('setup status', () => { expect(labels).toEqual([ 'Set up KTX for my data', 'Check setup status', - 'Try KTX with packaged demo data', + 'Explore a pre-built KTX project', 'Exit', ]); - expect(labels.indexOf('Try KTX with packaged demo data')).toBe(labels.length - 2); + expect(labels.indexOf('Explore a pre-built KTX project')).toBe(labels.length - 2); return 'exit'; }); const cancel = vi.fn(); @@ -453,7 +490,7 @@ describe('setup status', () => { 'Create a new KTX project', 'Connect a coding agent to KTX', 'Check setup status', - 'Try KTX with packaged demo data', + 'Explore a pre-built KTX project', 'Exit', ]); return 'exit'; @@ -748,9 +785,8 @@ describe('setup status', () => { ); }); - it('runs the seeded demo when the first setup intent menu chooses packaged demo data', async () => { + it('runs the demo tour when the first setup intent menu chooses demo', async () => { const testIo = makeIo(); - const demo = vi.fn(async (_args: { projectDir: string }, _io: unknown) => 0); await expect( runKtxSetup( @@ -771,19 +807,15 @@ describe('setup status', () => { showEntryMenu: true, }, testIo.io, - { entryMenuDeps: { prompts: { select: vi.fn(async () => 'demo'), cancel: vi.fn() } }, demo }, + { entryMenuDeps: { prompts: { select: vi.fn(async () => 'demo'), cancel: vi.fn() } } }, ), ).resolves.toBe(0); - expect(demo).toHaveBeenCalledWith( - expect.objectContaining({ - command: 'seeded', - outputMode: 'viz', - inputMode: 'auto', - }), + expect(runDemoTour).toHaveBeenCalledWith( + { inputMode: 'auto' }, testIo.io, + expect.objectContaining({}), ); - expect(demo.mock.calls[0]?.[0].projectDir).toMatch(/ktx-demo-/); }); it('creates a project through run mode when --new is selected', async () => { @@ -1231,6 +1263,77 @@ describe('setup status', () => { expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']); }); + it.each([ + { + backend: 'vertex', + providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'], + model: 'claude-sonnet-4-6', + }, + { + backend: 'gateway', + providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'], + model: 'anthropic/claude-sonnet-4-6', + }, + ])('adds a dbt source in non-interactive setup with existing $backend llm config', async (fixture) => { + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - databases', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_URL', + 'llm:', + ' provider:', + ...fixture.providerLines, + ' models:', + ` default: ${fixture.model}`, + ].join('\n'), + 'utf-8', + ); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: true, + cliVersion: '0.2.0', + skipLlm: false, + skipEmbeddings: true, + skipDatabases: true, + source: 'dbt', + sourceConnectionId: 'dbt-main', + sourceGitUrl: 'https://github.com/Kaelio/klo-dbt-demo', + sourceBranch: 'main', + sourceProjectName: 'orbit_analytics', + sourceWarehouseConnectionId: 'warehouse', + skipSources: false, + databaseSchemas: [], + }, + io.io, + { + sourcesDeps: { validateDbt: vi.fn(async () => ({ ok: true as const, detail: 'dbt project valid' })) }, + context: vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-test' })), + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).not.toContain('Anthropic'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).toContain('dbt-main:'); + }); + it('does not fail context build when prerequisites were explicitly skipped and agents are skipped', async () => { const calls: string[] = []; const io = makeIo(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 0b0c400d..a4f081d0 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -4,8 +4,6 @@ import { cancel, isCancel, select } from '@clack/prompts'; import { getLatestLocalIngestStatus, savedMemoryCountsForReport } from '@ktx/context/ingest'; import { ktxLocalStateDbPath, loadKtxProject, type KtxLocalProject } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; -import type { KtxDemoArgs } from './demo.js'; -import { defaultDemoProjectDir } from './demo-assets.js'; import { formatSetupNextStepLines } from './next-steps.js'; import { isKtxSetupExitError, withSetupInterruptConfirmation } from './setup-interrupt.js'; import { @@ -23,7 +21,7 @@ import { runKtxSetupDatabasesStep, } from './setup-databases.js'; import { type KtxSetupEmbeddingsDeps, runKtxSetupEmbeddingsStep } from './setup-embeddings.js'; -import { type KtxSetupModelDeps, runKtxSetupAnthropicModelStep } from './setup-models.js'; +import { type KtxSetupModelDeps, isKtxSetupLlmConfigReady, runKtxSetupAnthropicModelStep } from './setup-models.js'; import { type KtxSetupProjectDeps, runKtxSetupProjectStep } from './setup-project.js'; import { isKtxPreAgentSetupReady, @@ -149,11 +147,9 @@ export interface KtxSetupDeps { removeAgents?: typeof removeKtxAgentInstall; readyMenuDeps?: KtxSetupReadyMenuDeps; entryMenuDeps?: KtxSetupEntryMenuDeps; - demo?: (args: KtxDemoArgs, io: KtxCliIo) => Promise; } const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']); -const READY_LLM_BACKENDS = new Set(['anthropic', 'vertex', 'gateway']); type KtxSetupEntryAction = 'setup' | 'new-project' | 'agents' | 'status' | 'demo' | 'exit'; type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents'; @@ -202,13 +198,13 @@ async function runKtxSetupEntryMenu( { value: 'new-project', label: 'Create a new KTX project' }, { value: 'agents', label: 'Connect a coding agent to KTX' }, { value: 'status', label: 'Check setup status' }, - { value: 'demo', label: 'Try KTX with packaged demo data' }, + { value: 'demo', label: 'Explore a pre-built KTX project' }, { value: 'exit', label: 'Exit' }, ] : [ { value: 'setup', label: 'Set up KTX for my data' }, { value: 'status', label: 'Check setup status' }, - { value: 'demo', label: 'Try KTX with packaged demo data' }, + { value: 'demo', label: 'Explore a pre-built KTX project' }, { value: 'exit', label: 'Exit' }, ]; const action = (await prompts.select({ @@ -223,24 +219,11 @@ async function runKtxSetupDemoFromEntryMenu( io: KtxCliIo, deps: KtxSetupDeps, ): Promise { - const runner = deps.demo ?? (await import('./demo.js')).runKtxDemo; - return await runner( - { - command: 'seeded', - projectDir: defaultDemoProjectDir(), - outputMode: 'viz', - inputMode: args.inputMode, - }, + const { runDemoTour } = await import('./setup-demo-tour.js'); + return await runDemoTour( + { inputMode: args.inputMode }, io, - ); -} - -function llmReady(status: KtxSetupStatus['llm']): boolean { - return ( - status.backend !== undefined && - READY_LLM_BACKENDS.has(status.backend) && - typeof status.model === 'string' && - status.model.length > 0 + { agents: deps.agents }, ); } @@ -308,10 +291,9 @@ export async function readKtxSetupStatus(projectDir: string): Promise database.ready) && status.sources.every((source) => source.ready) diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts index 779c2cc3..d8cd3907 100644 --- a/packages/context/src/ingest/local-bundle-runtime.test.ts +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -53,7 +53,13 @@ describe('createLocalBundleIngestRuntime', () => { project, adapters: [new FakeSourceAdapter()], }), - ).toThrow('ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner'); + ).toThrow( + [ + 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + `Configure an Anthropic provider, then rerun ingest:`, + ` ktx setup --project-dir ${project.projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, + ].join('\n'), + ); }); it('builds runner deps with local SQLite stores and context tools enabled', async () => { diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 43d0247b..afcb2525 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -536,6 +536,14 @@ function nextLocalJobId(): string { return `local-${Date.now().toString(36)}`; } +function localIngestLlmProviderGuardMessage(projectDir: string): string { + return [ + 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'Configure an Anthropic provider, then rerun ingest:', + ` ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, + ].join('\n'); +} + function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { agentRunner: AgentRunnerService; llmProvider?: KtxLlmProvider; @@ -548,9 +556,7 @@ function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { } if (!llmProvider) { - throw new Error( - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', - ); + throw new Error(localIngestLlmProviderGuardMessage(options.project.projectDir)); } return { diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 3adde085..c89cbc28 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -38,6 +38,31 @@ const baseTable: SemanticLayerSource = { measures: [], }; +describe('listConnectionIdsWithNames', () => { + it('discovers local KTX connection ids from semantic-layer directories', async () => { + const configService = { + listFiles: vi.fn().mockResolvedValue({ + files: [ + 'semantic-layer/warehouse/_schema/public.yaml', + 'semantic-layer/dbt-main/orders.yaml', + 'semantic-layer/.gitkeep', + ], + }), + }; + const catalog = connectionCatalog(); + catalog.listEnabledConnections.mockImplementation(async (ids: string[]) => + ids.map((id) => ({ id, name: id, connectionType: id === 'warehouse' ? 'postgres' : 'dbt' })), + ); + const service = new SemanticLayerService(configService as never, catalog, pythonPort); + + await expect(service.listConnectionIdsWithNames()).resolves.toEqual([ + { id: 'dbt-main', name: 'dbt-main', connectionType: 'dbt' }, + { id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }, + ]); + expect(catalog.listEnabledConnections).toHaveBeenCalledWith(['dbt-main', 'warehouse']); + }); +}); + describe('composeOverlay', () => { it('carries top-level segments from overlay into the composed source', () => { const overlay = { diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index ffae0b12..938763fe 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -12,6 +12,7 @@ interface WriteSourceOptions { } const SL_DIR_PREFIX = 'semantic-layer'; +const CONNECTION_ID_PATTERN = /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/; function formatPortError(error: unknown, fallback: string): string { if (typeof error === 'string') { @@ -61,11 +62,12 @@ export class SemanticLayerService { async listConnectionIds(): Promise { try { const result = await this.configService.listFiles(SL_DIR_PREFIX); - // Directories under semantic-layer/ are connectionIds (UUIDs) - const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + // Directories under semantic-layer/ are connectionIds. Local KTX projects use + // readable ids like "warehouse" and "dbt-main", not only UUIDs. return result.files .map((f) => f.replace(`${SL_DIR_PREFIX}/`, '').split('/')[0]) - .filter((name, i, arr) => uuidPattern.test(name) && arr.indexOf(name) === i); + .filter((name, i, arr) => CONNECTION_ID_PATTERN.test(name) && arr.indexOf(name) === i) + .sort(); } catch { return []; } diff --git a/packages/context/src/sl/tools/sl-discover.tool.test.ts b/packages/context/src/sl/tools/sl-discover.tool.test.ts new file mode 100644 index 00000000..3277d45d --- /dev/null +++ b/packages/context/src/sl/tools/sl-discover.tool.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolContext, ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources } from '../../tools/index.js'; +import type { SemanticLayerSource } from '../types.js'; +import { SlDiscoverTool } from './sl-discover.tool.js'; + +function makeTool() { + const semanticLayerService = { + listConnectionIdsWithNames: vi.fn(async () => [] as Array<{ id: string; name: string; connectionType: string }>), + loadAllSources: vi.fn(async () => [] as SemanticLayerSource[]), + }; + const slSearchService = { + search: vi.fn(async () => []), + }; + const tool = new SlDiscoverTool( + { + semanticLayerService: semanticLayerService as never, + slSearchService: slSearchService as never, + authorResolver: { resolve: vi.fn() }, + }, + { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }, + ); + return { tool, semanticLayerService, slSearchService }; +} + +function makeContext(overrides: Partial = {}): ToolContext { + return { + sourceId: 'src', + messageId: 'msg', + userId: 'user', + ...overrides, + }; +} + +function makeSession(semanticLayerService: Record): ToolSession { + return { + connectionId: 'dbt-main', + isWorktreeScoped: true, + preHead: 'base', + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: semanticLayerService as never, + wikiService: {} as never, + configService: {} as never, + gitService: {} as never, + }; +} + +describe('SlDiscoverTool - session-scoped reads', () => { + it('discovers sources through context.session.semanticLayerService when a session is present', async () => { + const { tool, semanticLayerService } = makeTool(); + const sessionSemanticLayerService = { + listConnectionIdsWithNames: vi.fn().mockResolvedValue([ + { id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }, + ]), + loadAllSources: vi.fn().mockResolvedValue([ + { + name: 'orders', + table: 'public.orders', + grain: ['order_id'], + columns: [{ name: 'order_id', type: 'string' }], + measures: [], + joins: [], + }, + ]), + }; + + const result = await tool.call({}, makeContext({ session: makeSession(sessionSemanticLayerService) })); + + expect(result.structured.totalSources).toBe(1); + expect(result.structured.sources[0]).toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + columnCount: 1, + }); + expect(sessionSemanticLayerService.listConnectionIdsWithNames).toHaveBeenCalled(); + expect(sessionSemanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse'); + expect(semanticLayerService.listConnectionIdsWithNames).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/sl/tools/sl-discover.tool.ts b/packages/context/src/sl/tools/sl-discover.tool.ts index ed7c1854..97426b40 100644 --- a/packages/context/src/sl/tools/sl-discover.tool.ts +++ b/packages/context/src/sl/tools/sl-discover.tool.ts @@ -1,5 +1,6 @@ import { z } from 'zod'; import { DEFAULT_PRIORITY, resolveDescription } from '../descriptions.js'; +import type { SemanticLayerService } from '../semantic-layer.service.js'; import type { SemanticLayerSource } from '../types.js'; import type { ToolContext, ToolOutput } from '../../tools/index.js'; import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js'; @@ -66,13 +67,14 @@ Use this to understand what data is available before writing a semantic_query. return slDiscoverInputSchema; } - async call(input: SlDiscoverInput, _context: ToolContext): Promise> { + async call(input: SlDiscoverInput, context: ToolContext): Promise> { const { query, sourceName } = input; + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; // Resolve connectionId: use provided value, or auto-detect let connectionId = input.connectionId; if (!connectionId) { - const connections = await this.semanticLayerService.listConnectionIdsWithNames(); + const connections = await semanticLayerService.listConnectionIdsWithNames(); if (connections.length === 0) { return { markdown: 'No semantic layer sources found. Run a schema scan first.', @@ -92,14 +94,14 @@ Use this to understand what data is available before writing a semantic_query. structured: { sources: [], totalSources: 0 }, }; } - return this.discoverAcrossConnections(connections, query); + return this.discoverAcrossConnections(semanticLayerService, connections, query); } } // If inspecting a specific source — show the SL interface (columns, measures, joins) // without the raw SQL. Use `sl_read_source` to see the full YAML including SQL. if (sourceName) { - const sources = await this.semanticLayerService.loadAllSources(connectionId); + const sources = await semanticLayerService.loadAllSources(connectionId); const source = sources.find((s) => s.name === sourceName); if (!source) { return { @@ -136,19 +138,20 @@ Use this to understand what data is available before writing a semantic_query. } // Single connection: list all sources - const connections = await this.semanticLayerService.listConnectionIdsWithNames(); + const connections = await semanticLayerService.listConnectionIdsWithNames(); const connInfo = connections.find((c) => c.id === connectionId); - return this.discoverForConnection(connectionId, connInfo?.name ?? connectionId, query); + return this.discoverForConnection(semanticLayerService, connectionId, connInfo?.name ?? connectionId, query); } private async discoverAcrossConnections( + semanticLayerService: SemanticLayerService, connections: Array<{ id: string; name: string; connectionType: string }>, query?: string, ): Promise> { // Load sources from all connections in parallel const results = await Promise.all( connections.map(async (conn) => { - const sources = await this.semanticLayerService.loadAllSources(conn.id); + const sources = await semanticLayerService.loadAllSources(conn.id); let filtered = sources; if (query) { filtered = await this.filterByQuery(conn.id, sources, query); @@ -205,11 +208,12 @@ Use this to understand what data is available before writing a semantic_query. } private async discoverForConnection( + semanticLayerService: SemanticLayerService, connectionId: string, connectionName: string, query?: string, ): Promise> { - const sources = await this.semanticLayerService.loadAllSources(connectionId); + const sources = await semanticLayerService.loadAllSources(connectionId); if (sources.length === 0) { return { diff --git a/scripts/check-boundaries.mjs b/scripts/check-boundaries.mjs index 86b98712..53455abd 100644 --- a/scripts/check-boundaries.mjs +++ b/scripts/check-boundaries.mjs @@ -95,7 +95,7 @@ function scansForContextProductionLlmBoundaries(relativePath) { } function scansForForbiddenIdentifiers(relativePath) { - return isCodeSource(relativePath) || isRuntimeAsset(relativePath); + return (isCodeSource(relativePath) && !isTestSource(relativePath)) || isRuntimeAsset(relativePath); } function skipsIdentifierScan(relativePath) { diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs index 8d7fabdd..db8afafe 100644 --- a/scripts/check-boundaries.test.mjs +++ b/scripts/check-boundaries.test.mjs @@ -65,6 +65,13 @@ describe('scanFileContent', () => { assert.equal(scanFileContent('python/ktx-sl/openspec/specs/semantic-layer/spec.md', name).length, 0); }); + it('allows product identifiers in test fixtures', () => { + const name = lowerProductName(); + + assert.equal(scanFileContent('packages/cli/src/setup.test.ts', `project: ${name}-dev`).length, 0); + assert.equal(scanFileContent('packages/context/src/ingest/importer.test.ts', `email: system@${name}.dev`).length, 0); + }); + it('allows public package identifiers in release packaging and managed runtime source', () => { const name = lowerProductName(); diff --git a/scripts/ci-artifact-upload.test.mjs b/scripts/ci-artifact-upload.test.mjs index 3fecdfbc..2c931cd0 100644 --- a/scripts/ci-artifact-upload.test.mjs +++ b/scripts/ci-artifact-upload.test.mjs @@ -31,7 +31,7 @@ describe('KTX CI artifact upload contract', () => { workflow, /name: Build and verify package artifacts\s+run: pnpm run artifacts:check\s+- name: Upload package artifacts/s, ); - assert.match(workflow, /uses: actions\/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f/); + assert.match(workflow, /uses: actions\/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a/); assert.match(workflow, /name: ktx-package-artifacts-\$\{\{ github\.sha \}\}/); assert.match(workflow, /dist\/artifacts\/manifest\.json/); assert.match(workflow, /dist\/artifacts\/npm\/\*\.tgz/); diff --git a/scripts/standalone-ci-workflow.test.mjs b/scripts/standalone-ci-workflow.test.mjs index 230b9e2f..195fce53 100644 --- a/scripts/standalone-ci-workflow.test.mjs +++ b/scripts/standalone-ci-workflow.test.mjs @@ -13,25 +13,39 @@ function assertIncludesAll(text, values) { } describe('standalone KTX CI workflow', () => { - it('runs the package checks from a filtered repository root', async () => { + it('runs package checks in parallel jobs from the repository root', async () => { const workflow = await readText('.github/workflows/ci.yml'); assert.match(workflow, /^name: KTX CI/m); assertIncludesAll(workflow, [ 'permissions:', 'contents: read', + 'typescript-checks:', + 'name: TypeScript checks', + 'slow-context-tests:', + 'name: Slow context tests', + 'slow-cli-tests:', + 'name: Slow CLI tests', + 'cli-smoke-tests:', + 'name: CLI smoke tests', + 'python-checks:', + 'name: Python checks', + 'artifact-checks:', + 'name: Artifact checks', 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd', - 'pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061', - 'actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238', + 'pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0', + 'actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e', 'node-version: "24"', 'cache-dependency-path: "pnpm-lock.yaml"', 'pnpm install --frozen-lockfile', 'pnpm run check', - 'pnpm run test:slow', + 'pnpm run build', + 'pnpm --filter @ktx/context run test:slow', + 'pnpm --filter @ktx/cli run test:slow', 'pnpm run smoke', 'actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405', 'python-version: "3.13"', - 'astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b', + 'astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b', 'cache-dependency-glob: "uv.lock"', 'uv sync --all-packages', 'uv run pytest', @@ -42,13 +56,14 @@ describe('standalone KTX CI workflow', () => { assert.doesNotMatch(workflow, /cd ktx/); assert.doesNotMatch(workflow, /ktx\/pnpm-lock\.yaml/); assert.doesNotMatch(workflow, /ktx\/uv\.lock/); + assert.doesNotMatch(workflow, /run: pnpm run test:slow/); }); it('uploads verified artifacts from root-relative paths', async () => { const workflow = await readText('.github/workflows/ci.yml'); assertIncludesAll(workflow, [ - 'actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f', + 'actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a', 'name: ktx-package-artifacts-${{ github.sha }}', 'dist/artifacts/manifest.json', 'dist/artifacts/npm/*.tgz',