Merge origin/main into fix-metabase-readiness

This commit is contained in:
Andrey Avtomonov 2026-05-12 10:28:35 +02:00
commit 60b29bb1e6
173 changed files with 9803 additions and 1140 deletions

View file

@ -15,19 +15,20 @@ concurrency:
cancel-in-progress: true
jobs:
check:
typescript-checks:
name: TypeScript checks
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup pnpm
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7
with:
run_install: false
- name: Setup Node.js
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "pnpm"
@ -39,19 +40,101 @@ jobs:
- name: Run TypeScript checks
run: pnpm run check
- name: Run slow TypeScript tests
run: pnpm run test:slow
slow-context-tests:
name: Slow context tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup pnpm
uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7
with:
run_install: false
- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "pnpm"
cache-dependency-path: "pnpm-lock.yaml"
- name: Install TypeScript dependencies
run: pnpm install --frozen-lockfile
- name: Build TypeScript packages
run: pnpm run build
- name: Run slow context tests
run: pnpm --filter @ktx/context run test:slow
slow-cli-tests:
name: Slow CLI tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup pnpm
uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7
with:
run_install: false
- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "pnpm"
cache-dependency-path: "pnpm-lock.yaml"
- name: Install TypeScript dependencies
run: pnpm install --frozen-lockfile
- name: Build TypeScript packages
run: pnpm run build
- name: Run slow CLI tests
run: pnpm --filter @ktx/cli run test:slow
cli-smoke-tests:
name: CLI smoke tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup pnpm
uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7
with:
run_install: false
- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "pnpm"
cache-dependency-path: "pnpm-lock.yaml"
- name: Install TypeScript dependencies
run: pnpm install --frozen-lockfile
- name: Run CLI smoke tests
run: pnpm run smoke
python-checks:
name: Python checks
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.13"
- name: Setup uv
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
with:
enable-cache: true
cache-dependency-glob: "uv.lock"
@ -62,11 +145,47 @@ jobs:
- name: Run Python checks
run: uv run pytest
artifact-checks:
name: Artifact checks
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup pnpm
uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7
with:
run_install: false
- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "pnpm"
cache-dependency-path: "pnpm-lock.yaml"
- name: Install TypeScript dependencies
run: pnpm install --frozen-lockfile
- name: Setup Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.13"
- name: Setup uv
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
with:
enable-cache: true
cache-dependency-glob: "uv.lock"
- name: Install Python dependencies
run: uv sync --all-packages
- name: Build and verify package artifacts
run: pnpm run artifacts:check
- name: Upload package artifacts
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: ktx-package-artifacts-${{ github.sha }}
path: |

View file

@ -24,12 +24,12 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup pnpm
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7
with:
run_install: false
- name: Setup Node.js
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "pnpm"
@ -44,7 +44,7 @@ jobs:
python-version: "3.13"
- name: Setup uv
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

377
README.md
View file

@ -1,312 +1,167 @@
# KTX
<h1 align="center">
<img src="assets/ktx-readme-header.png" alt="KTX" width="472" />
</h1>
KTX is a workspace-first context layer for database agents. It stores warehouse
memory in a project directory, generates and validates semantic-layer YAML,
indexes knowledge, scans database schemas, and exposes the result through a CLI
and MCP server.
<p align="center">
<strong>The context layer for analytics agents</strong>
</p>
KTX projects are plain files: YAML, Markdown, SQLite state, and generated
artifacts. You can inspect them, commit them, and serve them to any MCP client.
<p align="center">
<a href="https://www.npmjs.com/package/@kaelio/ktx"><img src="https://img.shields.io/npm/v/@kaelio/ktx?style=flat-square&color=f97316" alt="npm version" /></a>
<a href="https://github.com/Kaelio/ktx/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-blue?style=flat-square" alt="License" /></a>
<a href="https://github.com/Kaelio/ktx"><img src="https://img.shields.io/github/stars/Kaelio/ktx?style=flat-square" alt="GitHub stars" /></a>
</p>
## What KTX provides
---
- Durable warehouse memory with semantic-layer sources and knowledge pages.
- Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server,
BigQuery, and Snowflake.
- Agentic ingest with provenance links, tool transcripts, and replay metadata.
- Local semantic-layer query planning and optional query execution.
- A stdio MCP server with tools for connections, knowledge, semantic-layer
sources, ingest reports, and replay.
KTX turns warehouse metadata, semantic definitions, and business knowledge into
reviewable project files that agents can use while planning, querying, and
updating analytics work.
A KTX project is a directory of plain files — YAML semantic sources, Markdown
knowledge pages, and SQLite state — that you commit to git and review in PRs,
just like dbt models.
## Who KTX is for
KTX is built for analytics engineers and data teams who want data agents to
work on real analytics systems — not just generate one-off SQL.
Use KTX when you want agents to:
- **Generate SQL** from approved measures and joins
- **Repair semantic definitions** through reviewable diffs
- **Explain metric provenance** with warehouse evidence
- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI
platforms
Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and
SQLite.
## Quick start
Run the pre-seeded demo through the public npm package:
Install the CLI and run the setup wizard:
```bash
npx @kaelio/ktx setup demo --no-input
npx @kaelio/ktx setup demo inspect
```
The default demo uses packaged sample data and prebuilt context. It does not
require API keys, network access, or an LLM provider.
To replay the packaged ingest run, use:
```bash
npx @kaelio/ktx setup demo --mode replay --no-input
```
To run the full agentic demo with an LLM provider, set a provider key for the
current process:
```bash
ANTHROPIC_API_KEY=$YOUR_ANTHROPIC_API_KEY \
npx @kaelio/ktx setup demo --mode full --no-input
```
Interactive full-demo setup can prompt for a provider key without writing the
key to `ktx.yaml`.
You can also install the CLI in a project or globally:
```bash
npm install @kaelio/ktx
npx ktx --help
npm install -g @kaelio/ktx
ktx --help
ktx setup
```
## Build a local project
The wizard walks through six steps: configuring your LLM provider, setting up
embeddings, connecting your database, adding context sources (dbt, LookML,
Metabase, Looker, Notion), building context, and installing agent integration.
Create a project from a local workspace:
If it exits before completion, rerun `ktx setup` to resume where you left off.
Check your project status:
```bash
npm install @kaelio/ktx
PROJECT_DIR="$(mktemp -d)/ktx-demo"
npx ktx init "$PROJECT_DIR" --name ktx-demo
ktx status
```
Create a SQLite warehouse:
```
KTX project: /home/user/analytics
Project ready: yes
LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Primary sources configured: yes (postgres-warehouse)
Context sources configured: yes (dbt-main)
KTX context built: yes
Agent integration ready: yes (claude-code:project)
```
## What's in a project
```
my-project/
├── ktx.yaml # Project configuration
├── semantic-layer/
│ └── warehouse/
│ ├── orders.yaml # Semantic source definitions
│ ├── customers.yaml
│ └── order_items.yaml
├── knowledge/
│ ├── global/
│ │ ├── revenue.md # Business definitions and rules
│ │ └── segment-classification.md
│ └── user/
│ └── local/
├── raw-sources/
│ └── warehouse/
│ └── live-database/ # Scan artifacts and reports
└── .ktx/
└── db.sqlite # Local state (git-ignored)
```
Semantic sources and knowledge pages are committed to git. The `.ktx/` directory
holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the
next run.
## Serve agents
KTX integrates with coding agents through CLI skills, an MCP server, or both.
The setup wizard configures this automatically — here's what each mode looks
like.
**CLI skills** — the agent calls `ktx` commands directly through a skill file
installed in your agent's config (e.g., `.claude/skills/ktx/SKILL.md`):
```bash
python - "$PROJECT_DIR/demo.db" <<'PY'
import sqlite3
import sys
conn = sqlite3.connect(sys.argv[1])
conn.executescript("""
DROP TABLE IF EXISTS accounts;
CREATE TABLE accounts (
account_id INTEGER PRIMARY KEY,
account_name TEXT NOT NULL,
segment TEXT NOT NULL,
region TEXT NOT NULL
);
INSERT INTO accounts VALUES
(1, 'Acme Analytics', 'Mid-Market', 'NA'),
(2, 'Beacon Bank', 'Enterprise', 'EMEA'),
(3, 'Cobalt Coffee', 'SMB', 'NA'),
(4, 'Delta Devices', 'Mid-Market', 'APAC'),
(5, 'Evergreen Energy', 'Enterprise', 'NA');
""")
conn.close()
PY
ktx sl query --measure orders.revenue --dimension orders.status --format sql
ktx wiki search "revenue definition"
ktx sl validate orders
```
Replace the generated `ktx.yaml`:
**MCP server** — the agent calls KTX tools over the Model Context Protocol:
```bash
cat > "$PROJECT_DIR/ktx.yaml" <<YAML
project: ktx-demo
connections:
warehouse:
driver: sqlite
path: $PROJECT_DIR/demo.db
readonly: true
storage:
state: sqlite
search: sqlite-fts5
git:
auto_commit: true
author: "ktx <ktx@example.com>"
memory:
auto_commit: true
YAML
```
Write and validate a semantic-layer source:
```bash
npx ktx sl write accounts --project-dir "$PROJECT_DIR" \
--connection-id warehouse --yaml 'name: accounts
table: accounts
description: CRM accounts with segmentation attributes.
grain:
- account_id
columns:
- name: account_id
type: number
- name: account_name
type: string
- name: segment
type: string
- name: region
type: string
measures:
- name: account_count
expr: count(account_id)
joins: []
'
npx ktx sl validate accounts --project-dir "$PROJECT_DIR" \
--connection-id warehouse
```
Generate SQL and execute the query:
```bash
npx ktx sl query --project-dir "$PROJECT_DIR" \
--connection-id warehouse \
--measure accounts.account_count \
--dimension accounts.segment \
--order-by accounts.account_count:desc \
--limit 5 \
--format sql
npx ktx sl query --project-dir "$PROJECT_DIR" \
--connection-id warehouse \
--measure accounts.account_count \
--dimension accounts.segment \
--order-by accounts.account_count:desc \
--limit 5 \
--execute \
--max-rows 5
```
List and test the warehouse connection:
```bash
npx ktx connection list --project-dir "$PROJECT_DIR"
npx ktx connection test warehouse --project-dir "$PROJECT_DIR"
```
The connection test prints the configured driver and discovered table count:
```text
Driver: sqlite
Tables: 1
```
### Scan the demo warehouse
Scan artifacts are written under
`raw-sources/warehouse/live-database/<syncId>/` in the project directory.
```bash
SCAN_OUTPUT="$(npx ktx scan warehouse --project-dir "$PROJECT_DIR")"
printf '%s\n' "$SCAN_OUTPUT"
SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')"
npx ktx scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID"
npx ktx scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID"
```
For non-SQLite drivers, prefer credential references such as `--url env:NAME`
or `--url file:PATH` over literal credential URLs.
## Managed Python runtime
KTX installs its Python runtime only when a Python-backed command needs it.
The runtime lives outside the npm cache, is versioned by the installed CLI
version, and is managed by `ktx runtime` commands.
KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with
your system package manager or the official installer before running Python-
backed KTX commands. KTX doesn't download `uv` automatically; run
`ktx runtime doctor` if runtime installation fails:
```bash
npx ktx runtime install --yes
npx ktx runtime status
npx ktx runtime doctor
npx ktx runtime start
npx ktx runtime stop
npx ktx runtime prune --dry-run
npx ktx runtime prune --yes
```
Use `runtime prune --dry-run` to preview stale runtime directories from older
CLI versions. Add `--yes` to remove those stale directories after daemon
processes are stopped.
Commands such as `npx @kaelio/ktx sl query ... --yes` can install the core
runtime lazily from the bundled wheel. Local embeddings remain lazy; prepare
them only when you select local `sentence-transformers` embeddings:
```bash
npx ktx runtime install --feature local-embeddings --yes
npx ktx runtime start --feature local-embeddings
```
## Serve MCP
Start the stdio MCP server from the project directory:
```bash
npx ktx serve --mcp stdio --project-dir "$PROJECT_DIR" \
ktx serve --mcp stdio \
--user-id local \
--semantic-compute \
--execute-queries \
--yes
```
The `--semantic-compute` flag uses the managed Python runtime when no explicit
semantic compute URL is provided. KTX starts or reuses the managed runtime as
needed.
This exposes tools for connections, knowledge search, semantic-layer sources,
validation, queries, ingestion, and replay. The `--semantic-compute` flag starts
the managed Python runtime for query planning automatically.
The MCP server exposes `connection_list`, `knowledge_search`,
`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`,
`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`,
`ingest_status`, `ingest_report`, and `ingest_replay`.
Supported agents: Claude Code, Codex, Cursor, OpenCode, and any agent that
reads `.agents/` skills or MCP configuration.
## Workspace packages
- `packages/context`: core TypeScript context library.
- `packages/cli`: CLI wrapper over the context package.
- `packages/llm`: LLM and embedding provider helpers.
- `packages/connector-bigquery`: BigQuery scan connector.
- `packages/connector-clickhouse`: ClickHouse scan connector.
- `packages/connector-mysql`: MySQL scan connector.
- `packages/connector-postgres`: Postgres scan connector.
- `packages/connector-snowflake`: Snowflake scan connector.
- `packages/connector-sqlite`: SQLite scan connector.
- `packages/connector-sqlserver`: SQL Server scan connector.
- `python/ktx-sl`: semantic-layer engine.
- `python/ktx-daemon`: portable compute service for semantic-layer operations.
| Package | Purpose |
|---------|---------|
| `packages/cli` | CLI entry point |
| `packages/context` | Core context engine |
| `packages/llm` | LLM and embedding providers |
| `packages/connector-*` | Database connectors (Postgres, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, SQLite) |
| `python/ktx-sl` | Semantic-layer query planning |
| `python/ktx-daemon` | Portable compute service |
## Development
Install dependencies and run checks:
```bash
git clone https://github.com/kaelio/ktx.git
cd ktx
pnpm install
uv sync --all-groups
pnpm run build
pnpm run check
uv sync --all-packages
source .venv/bin/activate
uv run pytest
```
Use the optional development binary when you want a local `ktx-dev` command:
Use the development CLI for local testing:
```bash
pnpm run setup:dev
pnpm run link:dev
ktx-dev --help
```
The repository uses `pnpm` for TypeScript packages and `uv` for Python
packages.
## Release status
This repository builds one public npm artifact named `@kaelio/ktx`. The release
artifact manifest contains the public npm tarball and the bundled `kaelio-ktx`
runtime wheel. The first public npm handoff is policy-gated through
`release-policy.json`, which keeps Python package publishing disabled because
KTX-owned Python code ships inside the npm package as a bundled wheel. The
`python/ktx-sl` and `python/ktx-daemon` directories remain source packages for
development, not public release artifacts.
Build local package artifacts and verify the guarded dry-run publish path with:
```bash
source .venv/bin/activate
pnpm run artifacts:check
pnpm run release:readiness
pnpm run release:npm-publish
```
Run the live npm publish only from the manual `KTX Release` workflow with the
`publish_live` input enabled after the `NPM_TOKEN` secret is configured.
packages. See [Contributing](docs-site/content/docs/community/contributing.mdx)
for full development setup, testing, and PR guidelines.
## License

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View file

@ -5,23 +5,51 @@ import {
DocsTitle,
DocsDescription,
} from "fumadocs-ui/page";
import { notFound } from "next/navigation";
import { notFound, redirect } from "next/navigation";
import defaultMdxComponents from "fumadocs-ui/mdx";
import { CodeBlock } from "@/components/code-block";
import { DocsPageActions } from "@/components/docs-page-actions";
const docsIndexPath = "/docs/getting-started/introduction";
const docsIndexSlug = ["getting-started", "introduction"] as const;
function isDocsIndex(slug: string[] | undefined) {
return slug === undefined || slug.length === 0 || slug.join("/") === "";
}
function isHeroPage(slug: string[] | undefined) {
return slug?.join("/") === "getting-started/introduction";
}
export default async function Page(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
if (isDocsIndex(params.slug)) {
redirect(docsIndexPath);
}
const page = source.getPage(params.slug);
if (!page) notFound();
const MDX = page.data.body;
const hero = isHeroPage(params.slug);
return (
<DocsPage toc={page.data.toc}>
<DocsTitle>{page.data.title}</DocsTitle>
<DocsDescription>{page.data.description}</DocsDescription>
{!hero && (
<>
<div className="flex items-start justify-between gap-4">
<DocsTitle>{page.data.title}</DocsTitle>
<DocsPageActions
markdownUrl={`${page.url}.md`}
mdxSource={page.data.content}
/>
</div>
<DocsDescription>{page.data.description}</DocsDescription>
</>
)}
<DocsBody>
<MDX components={{ ...defaultMdxComponents, pre: CodeBlock }} />
</DocsBody>
@ -30,14 +58,16 @@ export default async function Page(props: {
}
export function generateStaticParams() {
return source.generateParams();
return [{ slug: [""] }, ...source.generateParams()];
}
export async function generateMetadata(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
const page = source.getPage(
isDocsIndex(params.slug) ? [...docsIndexSlug] : params.slug,
);
if (!page) notFound();
return {

View file

@ -188,6 +188,24 @@ pre {
border-radius: 0 !important;
}
.ktx-code code {
display: grid;
min-width: max-content;
padding: 0 !important;
border: 0 !important;
border-radius: 0 !important;
background: transparent !important;
font-size: inherit !important;
line-height: inherit !important;
color: inherit;
}
.ktx-code .line {
display: block;
min-height: 1.7em;
padding-inline: 0 !important;
}
/* Neutralize the outer figure styling that our wrapper now owns */
figure:has(> .ktx-code),
figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
@ -244,6 +262,74 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
color: #c8c3bc !important;
}
/* ── Mode D: Output preview (wizard prompts, status output) ── */
.ktx-code-output {
background: var(--color-fd-muted);
border: 1px solid var(--color-fd-border);
border-left: 3px solid color-mix(in oklch, var(--color-fd-primary) 50%, var(--color-fd-border));
position: relative;
box-shadow: 0 1px 2px rgba(27, 27, 24, 0.02);
}
.dark .ktx-code-output {
background: #111a1e;
border-color: rgba(255, 255, 255, 0.05);
border-left-color: rgba(34, 211, 238, 0.25);
}
.ktx-code-output:hover {
border-color: color-mix(in oklch, var(--color-fd-primary) 25%, var(--color-fd-border));
border-left-color: var(--color-fd-primary);
}
.dark .ktx-code-output:hover {
border-color: rgba(255, 255, 255, 0.08);
border-left-color: rgba(34, 211, 238, 0.45);
}
.ktx-code-output-label {
position: absolute;
top: 8px;
right: 14px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--color-fd-muted-foreground);
font-family: var(--font-display), var(--font-sans), sans-serif;
opacity: 0.4;
pointer-events: none;
z-index: 1;
}
.ktx-code-output-copy {
position: absolute !important;
top: 6px !important;
right: 6px !important;
opacity: 0;
transform: translateY(-4px);
transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease);
z-index: 2;
}
.ktx-code-output:hover .ktx-code-output-copy {
opacity: 0.5;
transform: translateY(0);
}
.ktx-code-output:hover .ktx-code-output-label {
opacity: 0;
}
.ktx-code-body-output {
background: transparent !important;
color: var(--ktx-ink-soft) !important;
}
.dark .ktx-code-body-output {
color: #8a9da6 !important;
}
/* ── Mode B: VS Code tab (filename) ───────── */
.ktx-code-tab {
background: var(--color-fd-card);
@ -477,14 +563,20 @@ th {
opacity: 0.7;
}
/* Hide the vertical indicator lines in sidebar sections */
#nd-sidebar div[data-state]::before,
#nd-sidebar a[data-active]::before {
content: none !important;
display: none !important;
}
/* Page link items */
#nd-sidebar a[data-active] {
font-size: 14px;
padding: 6px 12px;
border-radius: 6px;
margin-left: 0;
border-left: 2px solid transparent;
transition: background 0.15s ease, color 0.15s ease, border-color 0.15s ease;
transition: background 0.15s ease, color 0.15s ease;
}
#nd-sidebar a[data-active="false"]:hover {
@ -494,7 +586,6 @@ th {
#nd-sidebar a[data-active="true"] {
background: color-mix(in oklch, var(--color-fd-primary) 8%, transparent) !important;
border-left-color: var(--color-fd-primary) !important;
color: var(--color-fd-primary) !important;
font-weight: 500;
}

View file

@ -0,0 +1,11 @@
import { buildLlmsFullTxt } from "@/lib/llm-docs";
export const dynamic = "force-static";
export async function GET() {
return new Response(await buildLlmsFullTxt(), {
headers: {
"Content-Type": "text/plain; charset=utf-8",
},
});
}

View file

@ -0,0 +1,33 @@
import {
getLlmDocsPage,
getLlmDocsPages,
getPageMarkdown,
} from "@/lib/llm-docs";
export const dynamic = "force-static";
export async function GET(
_request: Request,
props: { params: Promise<{ slug?: string[] }> },
) {
const params = await props.params;
const page = getLlmDocsPage(params.slug);
if (!page) {
return new Response("Documentation page not found.\n", {
status: 404,
headers: {
"Content-Type": "text/plain; charset=utf-8",
},
});
}
return new Response(await getPageMarkdown(page), {
headers: {
"Content-Type": "text/markdown; charset=utf-8",
},
});
}
export function generateStaticParams() {
return getLlmDocsPages().map((page) => ({ slug: page.slug }));
}

View file

@ -0,0 +1,11 @@
import { buildLlmsTxt } from "@/lib/llm-docs";
export const dynamic = "force-static";
export function GET() {
return new Response(buildLlmsTxt(), {
headers: {
"Content-Type": "text/plain; charset=utf-8",
},
});
}

View file

@ -52,15 +52,14 @@ export function CodeBlock(props: Props) {
const language = detectLanguage(props, children);
const codeText = extractText(children);
const isTerminal =
(language !== null && TERMINAL_LANGS.has(language)) ||
WIZARD_GLYPHS.test(codeText);
const isTerminal = language !== null && TERMINAL_LANGS.has(language);
const isOutput = !isTerminal && WIZARD_GLYPHS.test(codeText);
const hasTitle = typeof title === "string" && title.length > 0;
// Mode A — Terminal
// Mode A — Terminal (commands the user types)
if (isTerminal) {
return (
<div className="ktx-code ktx-code-terminal group">
<div className="not-prose ktx-code ktx-code-terminal group">
<div className="ktx-code-terminal-head">
<span className="ktx-tl-dot" style={{ background: "#ff5f57" }} />
<span className="ktx-tl-dot" style={{ background: "#febc2e" }} />
@ -80,10 +79,23 @@ export function CodeBlock(props: Props) {
);
}
// Mode D — Output preview (wizard prompts, terminal output)
if (isOutput) {
return (
<div className="not-prose ktx-code ktx-code-output group relative">
<span className="ktx-code-output-label">output</span>
<CopyButton text={codeText} className="ktx-code-output-copy" />
<pre {...rest} className="ktx-code-body ktx-code-body-output">
{children}
</pre>
</div>
);
}
// Mode B — VS Code tab (filename present)
if (hasTitle) {
return (
<div className="ktx-code ktx-code-tab group">
<div className="not-prose ktx-code ktx-code-tab group">
<div className="ktx-code-tab-head">
<span className="ktx-file-glyph" data-lang={language ?? ""} />
<span className="ktx-code-tab-filename">{title}</span>
@ -99,7 +111,7 @@ export function CodeBlock(props: Props) {
// Mode C — Minimal default
return (
<div className="ktx-code ktx-code-minimal group relative">
<div className="not-prose ktx-code ktx-code-minimal group relative">
{language && <span className="ktx-code-minimal-lang">{language}</span>}
<CopyButton text={codeText} className="ktx-code-minimal-copy" />
<pre {...rest} className="ktx-code-body ktx-code-body-minimal">

View file

@ -0,0 +1,110 @@
"use client";
import { useState } from "react";
type CopyState = "idle" | "copied" | "error";
type Props = {
markdownUrl: string;
mdxSource: string;
};
export function DocsPageActions({ markdownUrl, mdxSource }: Props) {
return (
<div className="not-prose flex flex-wrap items-center gap-2 text-xs">
<CopyMarkdownButton markdownUrl={markdownUrl} />
<a
href={markdownUrl}
className="inline-flex h-8 items-center rounded-md border border-fd-border bg-fd-background px-3 font-medium text-fd-muted-foreground transition-colors hover:border-fd-primary/40 hover:text-fd-foreground"
>
View MD
</a>
<CopyTextButton label="Copy MDX" text={mdxSource} />
</div>
);
}
function CopyMarkdownButton({ markdownUrl }: { markdownUrl: string }) {
const [state, setState] = useState<CopyState>("idle");
const onClick = async () => {
try {
const response = await fetch(markdownUrl, {
headers: { Accept: "text/markdown" },
});
if (!response.ok) {
throw new Error(`Failed to fetch ${markdownUrl}`);
}
await navigator.clipboard.writeText(await response.text());
flash(setState, "copied");
} catch {
flash(setState, "error");
}
};
return (
<ActionButton
label={labelForState(state, "Copy MD")}
onClick={onClick}
state={state}
/>
);
}
function CopyTextButton({ label, text }: { label: string; text: string }) {
const [state, setState] = useState<CopyState>("idle");
const onClick = async () => {
try {
await navigator.clipboard.writeText(text);
flash(setState, "copied");
} catch {
flash(setState, "error");
}
};
return (
<ActionButton
label={labelForState(state, label)}
onClick={onClick}
state={state}
/>
);
}
function ActionButton({
label,
onClick,
state,
}: {
label: string;
onClick: () => void;
state: CopyState;
}) {
return (
<button
type="button"
onClick={onClick}
className="inline-flex h-8 items-center rounded-md border border-fd-border bg-fd-background px-3 font-medium text-fd-muted-foreground transition-colors hover:border-fd-primary/40 hover:text-fd-foreground data-[state=copied]:border-emerald-500/40 data-[state=copied]:text-emerald-600 data-[state=error]:border-red-500/40 data-[state=error]:text-red-600"
data-state={state}
>
{label}
</button>
);
}
function labelForState(state: CopyState, label: string) {
if (state === "copied") return "Copied";
if (state === "error") return "Copy failed";
return label;
}
function flash(
setState: (state: CopyState) => void,
state: Exclude<CopyState, "idle">,
) {
setState(state);
window.setTimeout(() => setState("idle"), 1500);
}

View file

@ -2,42 +2,12 @@ export function Logo() {
return (
<div className="flex items-center gap-2 group">
<div className="relative flex items-center justify-center transition-transform duration-300 ease-out group-hover:rotate-[-4deg]">
<svg
width="22"
height="22"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
<img
src="/brand/ktx-mascot.png"
alt=""
aria-hidden="true"
>
<defs>
<linearGradient id="ktx-grad-a" x1="0" y1="0" x2="24" y2="24" gradientUnits="userSpaceOnUse">
<stop offset="0%" stopColor="var(--color-fd-primary)" />
<stop offset="100%" stopColor="var(--color-fd-primary)" stopOpacity="0.55" />
</linearGradient>
<linearGradient id="ktx-grad-b" x1="0" y1="0" x2="24" y2="24" gradientUnits="userSpaceOnUse">
<stop offset="0%" stopColor="var(--color-fd-primary)" stopOpacity="0.85" />
<stop offset="100%" stopColor="var(--color-fd-primary)" stopOpacity="0.4" />
</linearGradient>
</defs>
{/* Bottom layer */}
<path
d="M3 17 L12 21.5 L21 17 L12 12.5 Z"
fill="url(#ktx-grad-a)"
opacity="0.4"
/>
{/* Middle layer */}
<path
d="M3 12 L12 16.5 L21 12 L12 7.5 Z"
fill="url(#ktx-grad-b)"
opacity="0.7"
/>
{/* Top layer */}
<path
d="M3 7 L12 11.5 L21 7 L12 2.5 Z"
fill="var(--color-fd-primary)"
/>
</svg>
className="h-8 w-8 object-contain"
/>
</div>
<span
className="text-[15px] font-semibold text-fd-foreground tracking-tight"

View file

@ -0,0 +1,40 @@
---
title: Agent Instructions
description: Suggested instructions for coding assistants that need to read and cite KTX docs.
---
Use these instructions when a coding assistant needs to answer questions from the KTX documentation.
```text
When answering KTX docs questions:
1. Start with https://docs.kaelio.com/ktx/llms.txt.
2. Fetch the smallest relevant Markdown page from the index.
3. Prefer /docs/<path>.md over rendered HTML.
4. Use https://docs.kaelio.com/ktx/llms-full.txt only when the task needs broad docs context.
5. Quote commands exactly from docs pages.
6. If docs and local repository behavior disagree, say what differs and prefer local verified output for code changes.
```
## What this is for
This page is for documentation consumption only:
- answering questions about KTX
- finding the right docs page
- citing setup or CLI guidance
- helping an assistant avoid stale or invented commands
It does not describe local tool configuration.
## Minimal project prompt
```text
You are helping with KTX. Read https://docs.kaelio.com/ktx/llms.txt first, then fetch only the Markdown pages needed for the task. Do not scrape the rendered docs site when a .md route exists.
```
## Repository prompt
```text
Before editing KTX docs, read /llms.txt and the affected .md docs pages. Keep AI Resources focused on docs consumption. After editing, verify /llms.txt, /llms-full.txt, and any changed .md routes.
```

View file

@ -0,0 +1,50 @@
---
title: Agent Quickstart
description: A task-first route for coding agents that need to understand KTX docs.
---
This page is for coding assistants reading or citing the KTX docs. It is intentionally limited to documentation lookup, docs navigation, and safe command discovery.
## First read
Agents should start with the smallest source that answers the task:
1. [`/llms.txt`](/llms.txt) - discover the docs and preferred entry points.
2. The relevant per-page Markdown URL, for example `/docs/getting-started/quickstart.md`.
3. [`/llms-full.txt`](/llms-full.txt) - use only when the task needs broad context across many pages.
## Task router
| User asks the agent to explain... | Read first | Then read |
|------------------------------------|------------|-----------|
| What KTX does | [Introduction](/docs/getting-started/introduction) | [The Context Layer](/docs/concepts/the-context-layer) |
| How to start from a checkout | [Quickstart](/docs/getting-started/quickstart) | [ktx setup](/docs/cli-reference/ktx-setup) |
| How to check project readiness | [ktx status](/docs/cli-reference/ktx-status) | [Quickstart](/docs/getting-started/quickstart) |
| How context gets built | [Building Context](/docs/guides/building-context) | [ktx ingest](/docs/cli-reference/ktx-ingest) |
| How semantic YAML works | [Writing Context](/docs/guides/writing-context) | [ktx sl](/docs/cli-reference/ktx-sl) |
| How machine-readable CLI output is shaped | [ktx agent](/docs/cli-reference/ktx-agent) | [Markdown Access](/docs/ai-resources/markdown-access) |
## Operating workflow
Use this workflow when the user asks an assistant to answer a KTX docs question:
1. Read [`/llms.txt`](/llms.txt).
2. Pick the smallest relevant `.md` page.
3. Use [`/llms-full.txt`](/llms-full.txt) only if the answer needs multiple sections of the docs.
4. Quote commands exactly from the docs page.
5. If a command affects a local project, ask the user before assuming credentials or live services are available.
## Docs lookup from a shell
```bash
curl https://docs.kaelio.com/ktx/llms.txt
curl https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
```
## Guardrails
- Do not invent CLI flags. Fetch the relevant CLI reference page.
- Do not scrape rendered HTML when a `.md` route exists.
- Do not assume docs lookup requires agent-client configuration.
- Do not include credentials or secrets in prompts, URLs, or copied docs snippets.
- When docs and local CLI behavior disagree, prefer the local CLI output and mention the mismatch.

View file

@ -0,0 +1,38 @@
---
title: AI Resources
description: Machine-readable docs and prompt recipes for coding assistants reading KTX documentation.
---
Use this section when a coding assistant, IDE agent, or automation system needs to understand the KTX documentation.
> **Documentation index**
>
> Start with [`/llms.txt`](/llms.txt) to discover the available docs. Use [`/llms-full.txt`](/llms-full.txt) when the assistant needs the complete docs corpus in one Markdown response.
## Choose the right path
| Goal | Use this page |
|------|---------------|
| Tell a coding assistant how to approach KTX docs | [Agent Quickstart](/docs/ai-resources/agent-quickstart) |
| Fetch docs as Markdown instead of HTML | [Markdown Access](/docs/ai-resources/markdown-access) |
| Add lightweight instructions to an assistant prompt | [Agent Instructions](/docs/ai-resources/agent-instructions) |
| Copy prompts for common agent workflows | [Prompt Recipes](/docs/ai-resources/prompt-recipes) |
## Available resources
| Resource | What it gives agents |
|----------|----------------------|
| [`/llms.txt`](/llms.txt) | Curated index of high-value KTX docs and Markdown endpoints |
| [`/llms-full.txt`](/llms-full.txt) | Complete docs corpus in one plain-text Markdown response |
| `/docs/<path>.md` | Per-page Markdown for any docs page |
| Page-level actions | Copy Markdown, view Markdown, or copy MDX from rendered docs pages |
| Prompt recipes | Reusable prompts for docs lookup, setup help, and docs editing |
## Agent usage notes
When an assistant is unsure where to begin, use this order:
1. Read [`/llms.txt`](/llms.txt).
2. Fetch the specific Markdown page for the task.
3. Use [Agent Quickstart](/docs/ai-resources/agent-quickstart) to choose the next command or page.
4. Use page-level copy actions when the user wants the exact Markdown or MDX source.

View file

@ -0,0 +1,75 @@
---
title: Markdown Access
description: Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown.
---
KTX docs are available as plain Markdown so assistants do not need to parse the rendered HTML site.
## Index
Fetch the curated index:
```text
https://docs.kaelio.com/ktx/llms.txt
```
Use this file to discover high-value pages, task-specific entry points, and Markdown URLs.
## Full corpus
Fetch the complete docs corpus:
```text
https://docs.kaelio.com/ktx/llms-full.txt
```
Use this when an assistant needs broad context across setup, concepts, CLI reference, integrations, and troubleshooting. Prefer the smaller per-page Markdown route for narrow tasks.
## Per-page Markdown
Every docs page has a Markdown route:
```text
https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-agent.md
https://docs.kaelio.com/ktx/docs/guides/building-context.md
```
Requests that ask for Markdown can also use the normal docs URL with `Accept: text/markdown`:
```bash
curl -H "Accept: text/markdown" https://docs.kaelio.com/ktx/docs/getting-started/quickstart
```
## Recommended retrieval order
1. Fetch `/llms.txt`.
2. Select one or two relevant page Markdown URLs.
3. Fetch `/llms-full.txt` only when page-level docs are not enough.
## Output contract
Markdown responses are designed for agent consumption:
- Frontmatter is removed.
- Each page includes a title, description, canonical URL, and Markdown URL.
- Code blocks stay as code blocks.
- Tables stay as Markdown tables.
- Missing docs pages return a plain-text `404` instead of silently falling back to HTML.
## Page actions
Rendered docs pages include page-level actions near the title:
- **Copy MD** copies the generated Markdown for the current page.
- **View MD** opens the generated Markdown route.
- **Copy MDX** copies the source MDX for the current page.
## Common mistakes
| Mistake | Better path |
|---------|-------------|
| Scraping the HTML page for a docs answer | Fetch the `.md` route instead |
| Loading `/llms-full.txt` for a single CLI flag lookup | Fetch the relevant CLI reference page |
| Treating `/llms.txt` as complete documentation | Use it as an index, then fetch linked pages |
| Copying rendered text by hand | Use **Copy MD** or **Copy MDX** from the page actions |

View file

@ -0,0 +1,11 @@
{
"title": "AI Resources",
"defaultOpen": true,
"pages": [
"index",
"agent-quickstart",
"markdown-access",
"agent-instructions",
"prompt-recipes"
]
}

View file

@ -0,0 +1,54 @@
---
title: Prompt Recipes
description: Copyable prompts for common KTX agent workflows.
---
Use these prompts when asking a coding assistant to work with KTX. Replace project names, connection ids, and business terms with your own values.
## Learn the docs
```text
Read https://docs.kaelio.com/ktx/llms.txt first. Then fetch only the KTX Markdown pages needed for this task. Do not scrape rendered HTML unless no Markdown route exists.
```
## Set up a project
```text
Set up KTX in this repository. Start by reading /docs/ai-resources/agent-quickstart.md and /docs/getting-started/quickstart.md. Use pnpm, not npm. After setup, run ktx status and summarize which steps are complete, which files changed, and what still needs credentials or user input.
```
## Find a command
```text
Find the correct KTX command for this task: <task>. Start with /llms.txt, then fetch the smallest relevant CLI reference .md page. Quote the exact command and flags from the docs.
```
## Explain setup
```text
Explain how to set up KTX for this repo. Read /docs/getting-started/quickstart.md and the relevant CLI reference pages. Summarize prerequisites, commands, generated files, and any credentials the user must provide manually.
```
## Compare concepts
```text
Explain the difference between these KTX concepts: <concepts>. Start from /llms.txt, fetch the relevant concept and guide pages as Markdown, and answer with links to the source pages.
```
## Review semantic changes
```text
Review the KTX semantic-layer and knowledge changes in this branch. Check that measures have clear definitions, joins use valid keys, hidden/internal columns are not exposed to agents, and validation passes. List concrete file and line issues first.
```
## Copy exact docs source
```text
Open the relevant KTX docs page and use the page action to copy the generated Markdown or source MDX. Preserve code fences and tables exactly.
```
## Update docs
```text
Update the KTX docs for agent readability. Keep AI Resources focused on docs consumption. After editing, verify /llms.txt, /llms-full.txt, and the affected .md routes.
```

View file

@ -1,152 +0,0 @@
---
title: Link Detection
description: How KTX's relationship detection performs on real-world schemas.
---
KTX infers foreign key relationships between tables even when the database declares no primary keys or foreign key constraints. This is critical for analytics warehouses, where constraints are rarely enforced. This page documents the methodology, scoring pipeline, and a reproducible benchmark you can run yourself.
## What this measures
Most analytics warehouses — Snowflake, BigQuery, Redshift — don't enforce referential integrity constraints. Tables like `fct_product_events` reference `dim_accounts` by convention (`account_id` → `id`), but nothing in the schema says so.
KTX's relationship detection discovers these links automatically. The benchmark measures how accurately it recovers known foreign key relationships from a schema with **all declared constraints removed** — the hardest operating mode.
Metrics tracked:
- **Accepted** — relationships scored above the accept threshold (default 0.85) and written to the project manifest
- **Review** — relationships scored between the review threshold (0.55) and accept threshold, flagged for human review
- **Rejected** — relationships scored below the review threshold
- **Skipped** — relationships not evaluated (e.g., filtered by candidate limits)
## Methodology
### Detection pipeline
Relationship detection runs as a multi-stage pipeline during `ktx dev scan`:
1. **Candidate generation** — scans the schema for potential FK relationships using multiple heuristics: exact column name matches, normalized table name matching, name inflection (singular/plural), column suffix patterns (`_id`, `_key`, `_code`, `_uuid`), self-references (`parent_id`, `manager_id`), and optionally embedding similarity and LLM proposals.
2. **Column profiling** — samples up to 10,000 rows per column (configurable via `profile_sample_rows`) to collect statistics: row counts, null rates, distinct value counts, uniqueness ratios, sample values, and text length ranges.
3. **Validation** — tests each candidate relationship against actual data by measuring target uniqueness, source coverage, violation ratio, and value overlap between child and parent columns.
4. **Scoring** — combines 7 weighted signals into a confidence score:
| Signal | Weight | What it captures |
|--------|--------|-----------------|
| Name similarity | 0.24 | How closely column/table names match FK conventions |
| Value overlap | 0.22 | What percentage of FK values exist in the PK column |
| Profile uniqueness | 0.22 | How unique the target column values are |
| Type compatibility | 0.10 | Whether data types are compatible (hard gate — score is 0 if incompatible) |
| Embedding similarity | 0.10 | Semantic similarity between column names |
| Profile null rate | 0.08 | Presence of non-null values |
| Structural prior | 0.04 | Baseline structural hints from schema conventions |
Each signal is normalized to \[0, 1\], multiplied by its weight, and summed. The final confidence is `0.56 + (weighted_sum × 0.65)`, clamped to \[0, 1\].
5. **Graph resolution** — resolves conflicts when multiple candidates target the same column, detects primary keys (by name pattern and validation), and classifies each relationship into `accepted`, `review`, or `rejected` based on thresholds.
### Threshold configuration
```yaml
scan:
relationships:
accept_threshold: 0.85
review_threshold: 0.55
```
Relationships scoring above `accept_threshold` are automatically accepted into the project manifest. Those between `review_threshold` and `accept_threshold` are flagged for analyst review. Below `review_threshold`, they're rejected.
### Test fixture
The benchmark uses the **Orbit-style product warehouse** — a synthetic schema modeled after a real SaaS analytics warehouse with all declared constraints removed. The fixture is a SQLite database with 6 tables:
| Table | Role | Estimated rows |
|-------|------|---------------|
| `dim_accounts` | Dimension | 3 |
| `dim_users` | Dimension | 4 |
| `dim_workspaces` | Dimension | 4 |
| `fct_product_events` | Fact | 5 |
| `fct_invoices` | Fact | 3 |
| `support_tickets` | Fact | 4 |
**Ground truth:** 6 primary keys (one `id` column per table) and 9 foreign key relationships, all `many_to_one`:
| Source column | Target |
|--------------|--------|
| `dim_users.account_id` | `dim_accounts.id` |
| `dim_workspaces.account_id` | `dim_accounts.id` |
| `dim_workspaces.user_id` | `dim_users.id` |
| `fct_product_events.account_id` | `dim_accounts.id` |
| `fct_product_events.user_id` | `dim_users.id` |
| `fct_product_events.workspace_id` | `dim_workspaces.id` |
| `fct_invoices.account_id` | `dim_accounts.id` |
| `support_tickets.account_id` | `dim_accounts.id` |
| `support_tickets.user_id` | `dim_users.id` |
The fixture runs in multiple modes to isolate the contribution of each pipeline stage: with LLM disabled, profiling disabled, validation disabled, and embeddings disabled.
## Results
Results for the default configuration will be added after the benchmark run is finalized.
## Reproducing the benchmark
### Prerequisites
- Node.js 22+
- pnpm
- The KTX repository cloned and dependencies installed (`pnpm install`)
### Running
From the repository root:
```bash
pnpm run relationships:verify-orbit
```
This runs `ktx dev scan` against the bundled SQLite fixture with enrichment disabled, then generates a verification report at:
```text
examples/orbit-relationship-verification/reports/orbit-verification.md
```
The report includes the full relationship summary, enrichment details, artifact paths, and any warnings.
### Custom project
To run verification against your own database (e.g., a local Orbit project):
```bash
KTX_ORBIT_PROJECT_DIR=/path/to/your-project pnpm run relationships:verify-orbit
```
### Configuration
The benchmark project configuration lives at `examples/orbit-relationship-verification/ktx.yaml`:
```yaml
scan:
enrichment:
backend: none
relationships:
enabled: true
llm_proposals: false
accept_threshold: 0.85
review_threshold: 0.55
profile_sample_rows: 10000
validation_concurrency: 4
```
Adjust `accept_threshold` and `review_threshold` to see how threshold changes affect the accepted/review/rejected distribution. Lower thresholds accept more relationships (higher recall, lower precision); higher thresholds are more conservative.
## Broader benchmark suite
Beyond the Orbit fixture, KTX includes a full benchmark corpus at `packages/context/test/fixtures/relationship-benchmarks/` with fixtures across multiple tiers:
- **Unit** — minimal schemas testing individual heuristics
- **Row-bearing** — small schemas with data for validation testing
- **Product** — full warehouse schemas like the Orbit fixture
Fixtures from public datasets (Chinook, Sakila, AdventureWorks, Northwind) supplement the synthetic fixtures. The benchmark runner measures precision, recall, and F1 for both primary key and foreign key detection across all fixtures and modes.

View file

@ -1,5 +0,0 @@
{
"title": "Benchmarks",
"defaultOpen": true,
"pages": ["link-detection"]
}

View file

@ -7,7 +7,7 @@ Hidden commands that provide machine-readable JSON output for coding agents. The
All `ktx agent` subcommands require `--json` and produce structured JSON output on stdout.
## Usage
## Command signature
```bash
ktx agent <subcommand> --json [options]
@ -124,3 +124,25 @@ ktx agent sql execute --json \
--sql-file /tmp/query.sql \
--max-rows 500
```
## Output
Every `ktx agent` command writes JSON to stdout and diagnostic text to stderr. Agents should parse stdout as JSON and treat a non-zero exit code as a failed tool call.
```json
{
"ok": true,
"data": {
"type": "agent-response"
}
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Missing JSON output | `--json` was omitted | Re-run the same subcommand with `--json` |
| Unknown connection id | The requested connection is not configured in `ktx.yaml` | Call `ktx agent context --json` or `ktx connection list` to discover valid ids |
| Query file cannot be read | `--query-file` points to a missing or invalid JSON file | Write the query payload to a real file and pass its absolute path |
| SQL execution rejected | SQL is not read-only or `--max-rows` is missing | Use semantic-layer queries first; for direct SQL, pass read-only SQL and an explicit row limit |

View file

@ -5,7 +5,7 @@ description: "Add, list, test, and map data sources."
Manage database and source connections in your KTX project. Connections define how KTX reaches your data warehouse, BI tools, and context sources.
## Usage
## Command signature
```bash
ktx connection <subcommand> [options]
@ -147,3 +147,28 @@ ktx connection mapping refresh metabase-prod --auto-accept
# Pick Notion root pages interactively
ktx connection notion pick my-notion
```
## Output
Interactive commands render prompts and status text. Commands with `--json` return machine-readable JSON suitable for scripts and agents.
```json
{
"connections": [
{
"id": "my-warehouse",
"driver": "postgres",
"readonly": false
}
]
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Connection test fails | Credentials, network access, database, warehouse, or schema is invalid | Verify the same URL with the database's native client, then rerun `ktx connection add ... --force` |
| Literal credentials rejected | KTX avoids writing raw secrets to `ktx.yaml` by default | Use `env:NAME` or `file:/path/to/secret`; use `--allow-literal-credentials` only for local throwaway projects |
| Mapping validation fails | BI database mappings do not point at valid warehouse connections | Run `ktx connection mapping refresh <connectionId> --auto-accept`, then set invalid mappings explicitly |
| Notion pick cannot run non-interactively | `--no-input` was used without root page or database ids | Pass `--root-page-id`, `--root-database-id`, or `--root-data-source-id` with `--no-input` |

View file

@ -5,7 +5,7 @@ description: "Low-level diagnostics, scans, adapter commands, and mapping tools.
Hidden commands for low-level project management, diagnostics, direct adapter control, and shell completion. Most users interact with these through higher-level commands like [`ktx ingest`](/docs/cli-reference/ktx-ingest) and [`ktx setup`](/docs/cli-reference/ktx-setup), but `ktx dev` provides direct access when you need fine-grained control.
## Usage
## Command signature
```bash
ktx dev <subcommand> [options]
@ -145,3 +145,22 @@ ktx dev completion zsh
# Install zsh completions
ktx dev completion zsh --install
```
## Output
`ktx dev` commands are diagnostic and may print plain text, JSON, or visual reports depending on the selected flags.
| Mode | How to request it | Use case |
|------|-------------------|----------|
| Plain text | `--plain` or default diagnostic output | Human-readable terminal inspection |
| JSON | `--json` | Agent parsing and automation |
| Visual report | `--viz` | Interactive memory-flow and ingest debugging |
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Doctor reports missing runtime pieces | Packages, Python environment, or linked CLI are not ready | Run `pnpm install`, `pnpm run setup:dev`, and `uv sync --all-groups` |
| Ingest run cannot find adapter | `--adapter` does not match a supported source adapter | Use configured source names from `ktx.yaml` or run higher-level `ktx ingest` |
| Replay/report file cannot be read | The report path is wrong or the run id is not stored locally | Run `ktx dev ingest status --json` to discover stored run ids and report files |
| Visual output fails in CI | TUI rendering requires an interactive terminal | Use `--plain --no-input` or `--json --no-input` |

View file

@ -5,7 +5,7 @@ description: "Build and refresh context from configured sources."
Ingest context from your configured sources — dbt, Looker, Metabase, MetricFlow, LookML, or Notion. The ingest process extracts metadata from your tools, then uses an LLM agent to reconcile it with existing context, writing semantic sources and knowledge pages to your project.
## Usage
## Command signature
```bash
ktx ingest [connectionId] [options]
@ -68,3 +68,28 @@ ktx ingest status --json
## Low-level ingest commands
For adapter-level control, use `ktx dev ingest`. See [`ktx dev`](/docs/cli-reference/ktx-dev) for the full low-level ingest surface including `run`, `status`, `watch`, and `replay` with output mode options (`--plain`, `--json`, `--viz`).
## Output
Ingest run commands print progress and create a stored ingest report. `ktx ingest status --json` returns the run state, adapter, connection, and summary information.
```json
{
"runId": "ingest-local-abc123",
"status": "completed",
"connectionId": "dbt-main",
"summary": {
"semanticSourcesChanged": 4,
"knowledgePagesChanged": 2
}
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| No eligible sources | `ktx.yaml` has no configured context source for ingest | Add a source with `ktx setup` or `ktx connection add`, then rerun ingest |
| Ingest needs credentials | The source adapter requires API or git access | Configure the referenced environment variable or secret file |
| Latest run not found | No ingest run has been started in this project | Run `ktx ingest <connectionId>` or `ktx ingest --all` first |
| Report watch fails in a non-interactive shell | Visual report needs a terminal | Use `ktx ingest status --json` for agent and CI workflows |

View file

@ -7,7 +7,7 @@ Discover your database schema — tables, columns, types, constraints, and relat
Scan commands live under `ktx dev scan`. See also the [Building Context](/docs/guides/building-context) guide for a walkthrough.
## Usage
## Command signature
```bash
ktx dev scan <connectionId> [options]
@ -143,3 +143,27 @@ ktx dev scan relationship-calibration --accept-threshold 0.9 --review-threshold
# Get threshold advice based on review decisions
ktx dev scan relationship-thresholds
```
## Output
Scan commands write scan artifacts under the KTX project directory and print status or report summaries. Use `--json` on report and relationship commands when an agent needs structured output.
```json
{
"runId": "scan-local-abc123",
"status": "completed",
"mode": "structural",
"changes": {
"tablesAdded": 42
}
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Scan cannot connect | Connection credentials or network access are invalid | Run `ktx connection test <connectionId>` and update the connection before scanning |
| Enriched scan cannot describe columns | LLM credentials are missing or invalid | Complete LLM setup with `ktx setup` before enriched scans |
| Relationship apply writes nothing | No accepted candidates match the provided run id or candidate ids | Inspect `ktx dev scan relationships <runId> --status accepted` first |
| Calibration is not ready | Too few reviewed relationship labels exist | Review and accept/reject more candidates, then rerun calibration |

View file

@ -5,7 +5,7 @@ description: "Run the MCP stdio server."
Start a Model Context Protocol (MCP) server that exposes your KTX project's context to coding agents. The server runs over stdio and provides tools for querying semantic sources, searching knowledge, managing connections, and running ingests.
## Usage
## Command signature
```bash
ktx serve --mcp stdio [options]
@ -49,3 +49,26 @@ ktx serve --mcp stdio --project-dir /path/to/my-project
## Agent integration
The MCP server is typically configured through `ktx setup --agents` rather than started manually. See the [Serving Agents](/docs/guides/serving-agents) guide and [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool configuration.
## Output
`ktx serve --mcp stdio` communicates through MCP messages on stdio. It is meant to be launched by an agent client, not read directly by a human terminal session.
```json
{
"command": "ktx",
"args": ["serve", "--mcp", "stdio", "--semantic-compute", "--execute-queries"],
"env": {
"KTX_PROJECT_DIR": "/home/user/analytics"
}
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Agent cannot start server | The agent config cannot find the `ktx` binary | Install `@kaelio/ktx` globally with `npm install -g @kaelio/ktx` or use an absolute command path in the agent config |
| Semantic tools are unavailable | Server was started without `--semantic-compute` | Add `--semantic-compute` or `--semantic-compute-url` to the server args |
| Query execution is denied | Server was started without `--execute-queries` | Add `--execute-queries` only for trusted projects where read-only execution is intended |
| Context resolves to wrong project | `KTX_PROJECT_DIR` is missing or points elsewhere | Set `KTX_PROJECT_DIR` to the project containing the intended `ktx.yaml` |

View file

@ -5,7 +5,7 @@ description: "Set up or resume a local KTX project."
Interactive wizard that walks you through configuring LLM credentials, embeddings, database connections, context sources, and agent integrations. When run without flags in a directory that has no `ktx.yaml`, it launches the full guided flow. When run in an existing project, it resumes from the first incomplete step.
## Usage
## Command signature
```bash
ktx setup [options]
@ -173,3 +173,27 @@ ktx setup context watch
# Run the packaged demo
ktx setup demo
```
## Output
Interactive setup renders prompts and progress messages. `ktx setup status` is the best command for agents because it summarizes readiness in one response.
```text
KTX project: /home/user/analytics
Project ready: yes
LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Primary sources configured: yes (postgres-warehouse)
Context sources configured: yes (dbt-main)
KTX context built: yes
Agent integration ready: yes (codex:project)
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Setup resumes an unexpected project | `KTX_PROJECT_DIR` or nearest `ktx.yaml` points to another directory | Pass `--project-dir <path>` explicitly |
| Health check for model fails | Provider key or model id is invalid | Set the correct environment variable or secret file and rerun setup |
| Setup cannot run in CI | Interactive prompts need a TTY | Use `--yes --no-input` with explicit flags for required values |
| Agent integration missing | Setup skipped the agents step | Run `ktx setup --agents --target <target> --agent-install-mode both` |

View file

@ -5,7 +5,7 @@ description: "List, read, validate, query, or write semantic-layer sources."
Interact with your project's semantic layer. Semantic sources are YAML definitions that describe your tables, columns, measures, joins, and grain — the vocabulary agents use to generate correct SQL.
## Usage
## Command signature
```bash
ktx sl <subcommand> [options]
@ -120,3 +120,28 @@ ktx sl query \
--execute \
--max-rows 1000
```
## Output
Semantic-layer commands return human-readable output by default. Use `--json` or `--format json` when an agent needs structured output; use `--format sql` to inspect generated SQL before execution.
```json
{
"sql": "SELECT orders.status, SUM(orders.total_amount) AS total_revenue FROM public.orders GROUP BY orders.status",
"rows": [
{
"orders.status": "completed",
"total_revenue": 125000
}
]
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Source not found | Source name or connection id is wrong | Run `ktx sl list --json` and retry with an exact source name and connection id |
| Validation fails | YAML references missing columns, invalid joins, or invalid SQL expressions | Fix the source YAML and rerun `ktx sl validate` |
| Query compile fails | Measure, dimension, filter, or segment name is invalid | Read the source with `ktx sl read`, then retry using declared fields |
| Execution returns too many rows | `--max-rows` is missing or too high | Add `--max-rows` with a bounded value before executing |

View file

@ -5,7 +5,7 @@ description: "Show current project status."
Print the current setup status of your KTX project — which steps are complete, which need attention, and whether the project is ready for agents.
## Usage
## Command signature
```bash
ktx status [options]
@ -26,3 +26,26 @@ ktx status
# Get status as JSON (useful for scripting)
ktx status --json
```
## Output
`ktx status` prints readiness for each setup area. Agents should use `ktx status --json` when they need to branch on readiness state.
```json
{
"projectReady": true,
"llmReady": true,
"embeddingsReady": true,
"primarySourcesConfigured": true,
"contextBuilt": true,
"agentIntegrationReady": true
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| No KTX project found | Current directory has no `ktx.yaml` and `KTX_PROJECT_DIR` is unset | Run from a KTX project or set `KTX_PROJECT_DIR` |
| Project ready is false | One or more setup steps are incomplete | Run `ktx setup` to resume setup |
| Agent integration ready is false | No agent target has been installed | Run `ktx setup --agents --target <target>` |

View file

@ -5,7 +5,7 @@ description: "List, read, search, or write knowledge pages."
Manage knowledge pages in your KTX project. Knowledge pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data.
## Usage
## Command signature
```bash
ktx wiki <subcommand> [options]
@ -90,3 +90,28 @@ ktx wiki write data-freshness \
--content "The orders table refreshes every 15 minutes..." \
--ref "https://wiki.example.com/data-pipelines"
```
## Output
Wiki commands print local knowledge pages and search results. Agents should search first, then read the most relevant page by key.
```json
{
"results": [
{
"key": "revenue-definitions",
"summary": "Canonical revenue metric definitions",
"score": 0.92
}
]
}
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing |
| Read fails for a key | The page key is wrong or scoped to a different user | Run `ktx wiki list` or search again to get the exact key |
| Write fails due to missing fields | `--summary` or `--content` was omitted | Pass both fields, and keep the summary short enough for search results |
| Agent writes duplicate pages | It did not search existing pages first | Always run `ktx wiki search` before `ktx wiki write` |

View file

@ -7,6 +7,11 @@ KTX is an open-source project and welcomes contributions — bug fixes, new conn
## Development setup
This page is for contributors working on the KTX repository. To install KTX for
an analytics project, use the published
[`@kaelio/ktx`](https://www.npmjs.com/package/@kaelio/ktx) package in the
[Quickstart](/docs/getting-started/quickstart).
### Prerequisites
- **Node.js 22+** and **pnpm** — for the TypeScript workspace
@ -44,7 +49,9 @@ pnpm run setup:dev
pnpm run link:dev
```
This makes the `ktx` command available globally, pointing at your local build.
This makes the `ktx-dev` command available globally, pointing at your local
build. Use this development binary when you need to test unpublished repository
changes.
## Repository structure
@ -220,3 +227,17 @@ Before submitting a pull request:
5. **Don't commit artifacts** — `node_modules/`, `.venv/`, `dist/`, coverage output, and local databases should not be committed.
For larger features or architectural changes, open an issue first to discuss the approach.
## Agent usage notes
Use this page when an agent is modifying the KTX repository itself rather than using KTX in an analytics project.
| Agent task | Command or section |
|------------|--------------------|
| Prepare the workspace | `pnpm install`, `pnpm run setup:dev`, `uv sync --all-groups` |
| Verify TypeScript changes | `pnpm run type-check`, `pnpm run test`, or package-filtered equivalents |
| Verify Python changes | `uv run pytest -q` and `uv run pre-commit run --files <files>` |
| Add a connector | Adding a connector |
| Check style expectations | Code conventions |
Common recovery path: if a check fails because generated files or local runtimes are missing, run the setup commands first. If a check fails because of a real type, lint, or test error, fix the source file and rerun the smallest failing check before broadening verification.

View file

@ -29,43 +29,51 @@ This reconciliation step is what separates auto-ingestion from a simple sync. A
Auto-ingestion is designed to plug into a PR-based workflow. Run ingestion on a branch, review the changed YAML and Markdown files, and merge them the same way you merge dbt models or application code.
```
dbt / Looker / Metabase KTX project repo
┌──────────────┐ ┌──────────────────────┐
│ Metadata │───ingestion──▶│ Branch: ingest/... │
│ changes │ │ │
└──────────────┘ │ + 3 new sources │
│ ~ 2 updated joins │
│ + 1 knowledge page │
│ │
│ ──── Open PR ──── │
│ │
│ Review semantic diff │
│ Approve & merge │
└──────────────────────┘
Agents see updated
context immediately
```text
dbt / Looker / Metabase / Notion
|
v
metadata changes
|
v
nightly cron or CI ingest
|
v
branch: ingest/nightly
|
| + 3 new sources
| ~ 2 updated joins
| + 1 knowledge page
v
open PR
|
v
review semantic diff
|
v
approve & merge
|
v
agents see updated context
```
A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge.
Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest --all --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning.
Once merged, agents querying through KTX's MCP server or CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request.
This workflow gives you the same review guarantees you have for dbt models. No semantic source reaches production without a human approving it. But unlike maintaining context manually, the heavy lifting — discovering new tables, drafting source definitions, extracting business rules from documentation — is done by the ingestion agent. You review and approve. You don't write from scratch.
## Feedback loops
Context improves over time through three feedback channels.
Context improves over time through two feedback channels.
**Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a knowledge page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest.
**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, source definitions can be tightened with better filters or grain declarations, and relationship thresholds can be adjusted.
**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, and source definitions can be tightened with better filters, join paths, or grain declarations.
**Relationship calibration.** KTX infers foreign key relationships between tables automatically, even when the database has no declared constraints. It does this by analyzing column names, types, value distributions, and asking the LLM for proposals. Each inferred relationship gets a confidence score. You control two thresholds: `acceptThreshold` (relationships above this score are accepted automatically, default 0.85) and `reviewThreshold` (relationships between review and accept are flagged for human review, default 0.55). As you accept or reject proposals, the system learns which patterns match your schema conventions.
Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Relationship calibration tunes the discovery process to your warehouse's conventions. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration.
Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration.
## Deterministic replay
@ -80,3 +88,14 @@ This matters for three reasons.
**Reproducibility.** Because ingestion sessions are recorded as structured transcripts (tool calls and responses, not just logs), they can be replayed for testing and validation. If you change your ingestion configuration or upgrade the LLM, you can replay previous sessions to see how the output would differ. This gives you a safety net for changes that affect how context is generated.
The transcript is stored with local ingest run state and can be reviewed or replayed when you need to audit a decision. Commit the resulting YAML and Markdown changes; commit reports or transcripts only when they are part of your team's review workflow.
## Agent usage notes
Use this page when an agent needs to explain review workflows, ingestion diffs, replayability, or why KTX writes YAML and Markdown instead of hiding context in a hosted service.
| Agent task | Relevant section | Next page |
|------------|------------------|-----------|
| Explain how generated context should be reviewed | The git workflow | [Building Context](/docs/guides/building-context) |
| Diagnose why ingestion changed a semantic source | Auto-ingestion and Deterministic replay | [ktx ingest](/docs/cli-reference/ktx-ingest) |
| Explain how context improves over time | Feedback loops | [Building Context](/docs/guides/building-context) |
| Tell a user what to commit | The git workflow | [Writing Context](/docs/guides/writing-context) |

View file

@ -9,7 +9,7 @@ Give an agent access to your database and it will generate SQL. It might even pr
The agent doesn't know that `orders.amount` includes refunds and needs a status filter. It doesn't know that `customers` should join to `orders` on `customer_id`, not `id`. It doesn't know that your team stopped using `legacy_segments` six months ago, or that "enterprise" means contracts over $100k, not just big logos. It sees column names and types. It doesn't see your business.
This isn't a model capability problem. GPT-4, Claude, and Gemini can all write correct SQL — when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision.
This isn't a model capability problem. Claude Code, Codex, and your BI agents can write correct SQL when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision.
Analytics engineers already know this pain. It's the same reason you write dbt tests, maintain a data dictionary, and spend half of standup explaining why someone's dashboard number doesn't match the board deck. The difference is that agents make decisions at machine speed, so the wrong context propagates faster than a human can catch it.
@ -19,9 +19,9 @@ The industry has moved through three distinct approaches to getting AI and data
**Wave one: database access.** Connect an LLM to a database, let it generate SQL. This works for simple lookups — "how many orders last week?" — but breaks on anything that requires business knowledge. The agent guesses at joins, invents metrics, and hallucinates table relationships. Every query is a coin flip.
**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but it's still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter.
**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but building and maintaining that structure by hand is manual, time-consuming, and still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter.
**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, data quality gotchas, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer.
**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, known data quality issues, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer.
## What a context layer is
@ -29,6 +29,13 @@ A context layer is the infrastructure that gives agents the business knowledge t
KTX organizes context into four pillars:
- Semantic sources
- Knowledge pages
- Scan artifacts
- Provenance
Each pillar covers a different kind of context agents need before they can safely write SQL, update semantic definitions, or explain an analytics result.
**Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives.
```yaml
@ -60,7 +67,7 @@ measures:
expr: count(id)
```
**Knowledge pages** are Markdown documents that capture business definitions, rules, and gotchas — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it.
**Knowledge pages** are Markdown documents that capture business definitions, rules, and operating context — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it.
```markdown
---
@ -90,13 +97,12 @@ Together, these four pillars give agents enough context to produce analytics art
## How KTX compares
KTX is a context layer, and its structured core is an agent-native semantic layer. That matters. MetricFlow, Cube, and Malloy all give teams ways to model metrics, dimensions, joins, and generated SQL. KTX covers that same semantic-layer job, then adds the surrounding context agents need to use it well: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools.
KTX is a context layer with an agent-native semantic layer at its core. MetricFlow, Cube, and Malloy model metrics, dimensions, joins, and generated SQL. KTX covers that semantic-layer work, then adds the context agents need to use and maintain it: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools.
The primary user is different. MetricFlow is centered on dbt-style metric definitions. Cube is centered on a governed semantic runtime for BI, applications, and agents. Malloy is centered on an expressive modeling and query language. KTX is centered on agents that need to read a semantic model, change it, validate it, inspect the generated SQL, and leave a reviewable git diff.
The workflow is the difference. Traditional semantic layers are powerful, but they are usually built and maintained through manual modeling work, product-specific runtimes, or language-specific workflows. They are not agent-native by default, which makes them harder for agents to inspect, edit, validate, and review in a tight loop. KTX is designed for agents that need to read context, change semantic files, inspect generated SQL, and leave a reviewable git diff.
| | KTX semantic layer | MetricFlow | Cube | Malloy |
|---|---|---|---|---|
| **Design center** | Agent-native semantic modeling inside a broader context layer | Metric definitions and dbt semantic models | Governed serving layer for BI, embedded analytics, APIs, and agents | Semantic modeling and analytical query language |
| **Model surface** | Plain YAML sources plus Markdown knowledge pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations |
| **What it models** | Sources, columns, measures, segments, joins, grain, filters, default time dimensions, and context references | Semantic models, entities, dimensions, measures, metrics, time grains, and metric types | Cubes, views, measures, dimensions, segments, joins, hierarchies, policies, and rollups | Sources, joins, dimensions, measures, calculations, nested results, and query pipelines |
| **Agent edit loop** | First-class. Agents can patch small files, save imperfect drafts, run validation, query through MCP, inspect SQL, and refine in the same workflow | Possible, but the interface is a dbt/metric workflow rather than an agent context workflow | Possible through code-first models and platform APIs, but changes are tied to runtime deployment and governance concerns | Possible, but agents must operate in Malloy's language and compiler model |
@ -105,15 +111,7 @@ The primary user is different. MetricFlow is centered on dbt-style metric defini
| **Context around semantics** | Built in: wiki pages, scan artifacts, relationship inference, ingest transcripts, replay, and agent-facing MCP tools | Primarily metric and dbt project context | Descriptions and `meta.ai_context` inside the semantic model, plus platform agent features | Annotations/tags can carry metadata; surrounding context depends on the application |
| **Best fit** | Agents maintaining analytics code, metrics, joins, SQL, docs, and semantic definitions | Teams standardizing metrics inside dbt workflows | Production semantic APIs, BI integrations, access control, caching, and concurrency | Expressive modeling and exploratory analysis above SQL |
**Agent-native by design.** KTX's advantage is not just that the files are YAML. The whole loop is shaped for agents: sources are small, overlays can add measures or computed columns without copying entire generated schemas, writes are permissive so an agent can save a draft, and validation/query tools give immediate feedback. An agent can move from "this metric is wrong" to "here is the semantic diff, generated SQL, and supporting context" without leaving the project.
**A semantic layer plus the context to use it.** Traditional semantic layers define what to calculate. KTX also stores why the definition exists, where it came from, what schema evidence supports it, and what an agent did when it changed. A measure can live next to a knowledge page about exclusions, a scan artifact that proves the join path, and an ingest transcript that explains the source of the definition. That is the difference between giving an agent a metric catalog and giving it operational memory.
**Fan-out handling is explicit and reviewable.** KTX asks model authors and agents to declare grain and relationship direction. The planner uses that metadata to avoid silent row multiplication: it detects `one_to_many` fan-out paths, separates independent fact measures into aggregate-locality CTEs, and refuses filters that would be unsafe to apply after pre-aggregation. Cube, MetricFlow, and Malloy all have strong approaches to this class of problem, but KTX's approach is deliberately inspectable in the files and in the generated plan.
**Where other systems are stronger.** KTX draws a clear product boundary around agent-native context and semantic modeling. Cube is stronger when you need a production semantic API with access policies, pre-aggregations, refresh workers, and high-concurrency serving. MetricFlow is stronger when your primary workflow is dbt-native metric standardization. Malloy is stronger when you want a full analytical language with nested query shapes. KTX is strongest when the semantic layer is the substrate agents will read, edit, validate, and extend as part of day-to-day analytics engineering.
**When KTX replaces your semantic layer vs. works beside it.** If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow, LookML, Looker, Metabase, dbt, or Notion, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against.
If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow or LookML, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against.
## The plain-files philosophy
@ -145,3 +143,14 @@ my-project/
Semantic sources and knowledge pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run.
This means your analytics context travels with your code. You can fork it, branch it, review it in a PR, and merge it with the same tools you use for dbt models. There's no sync problem between a remote server and your local state. There's no migration to run. The files are the source of truth.
## Agent usage notes
Use this page when an agent needs to explain why KTX exists, why schema-only database access is not enough, or how KTX differs from MetricFlow, Cube, Malloy, and traditional semantic layers.
| Agent task | Relevant section | Next page |
|------------|------------------|-----------|
| Explain why a database agent made a plausible but wrong query | The problem | [Writing Context](/docs/guides/writing-context) |
| Decide whether a metric belongs in YAML or Markdown | What a context layer is | [Writing Context](/docs/guides/writing-context) |
| Compare KTX to another semantic layer | How KTX compares | [Primary Sources](/docs/integrations/primary-sources) |
| Explain reviewability and source of truth | The plain-files philosophy | [Context as Code](/docs/concepts/context-as-code) |

View file

@ -1,59 +1,92 @@
---
title: Introduction
description: What KTX is and who it's for.
description: How KTX gives analytics agents trusted context for warehouse work.
---
Data agents can write SQL. The hard part is making sure they write the SQL your analytics team would have written.
KTX is the agent-native context layer for analytics engineering. At its core is a semantic layer: YAML sources that define tables, columns, measures, joins, grain, filters, segments, and computed fields. Around that core, KTX adds the context analytics agents need to work safely: warehouse scans, knowledge pages, ingestion from existing tools, provenance, validation, and MCP access.
KTX projects are plain files — YAML, Markdown, and SQLite — that you commit to git and review in PRs, just like dbt models. Agents can read them, edit them, validate them, query through them, and leave behind a diff your team can review.
<div className="not-prose mb-14">
<div className="mb-8">
<h1
className="text-4xl font-extrabold tracking-tight lg:text-5xl"
style={{
fontFamily: 'var(--font-display)',
background: 'linear-gradient(180deg, var(--color-fd-foreground) 0%, color-mix(in oklch, var(--color-fd-foreground) 75%, var(--color-fd-primary)) 100%)',
WebkitBackgroundClip: 'text',
backgroundClip: 'text',
color: 'transparent',
WebkitTextFillColor: 'transparent',
lineHeight: '1.1',
letterSpacing: '0',
}}
>
Make analytics context{'\n'}usable by agents
</h1>
<p className="mt-4 text-lg text-fd-muted-foreground max-w-2xl" style={{ lineHeight: '1.7' }}>
KTX turns warehouse metadata, semantic definitions, and business knowledge
into reviewable project files that agents can use while planning, querying,
and updating analytics work.
</p>
</div>
<div className="flex flex-wrap gap-3">
<a
href="/docs/getting-started/quickstart"
className="inline-flex h-10 items-center rounded-lg bg-fd-primary px-5 text-sm font-medium text-fd-primary-foreground transition-colors hover:opacity-90"
>
Get Started
</a>
<a
href="/docs/concepts/the-context-layer"
className="inline-flex h-10 items-center rounded-lg border border-fd-border bg-fd-background px-5 text-sm font-medium text-fd-foreground transition-colors hover:bg-fd-muted"
>
The Context Layer
</a>
<a
href="/docs/guides/building-context"
className="inline-flex h-10 items-center rounded-lg border border-fd-border bg-fd-background px-5 text-sm font-medium text-fd-foreground transition-colors hover:bg-fd-muted"
>
Building Context
</a>
</div>
</div>
## Who KTX is for
KTX is built for analytics engineers and data teams who want data agents to work on real analytics systems, not just generate one-off SQL.
KTX is built for analytics engineers and data teams who want data agents to
work on real analytics systems — not just generate one-off SQL.
Use KTX when you want agents to:
- Generate SQL from approved measures, dimensions, and joins
- Repair or extend semantic definitions through reviewable git diffs
- Explain where a metric definition came from and what business rules shape it
- Use warehouse scans and relationship evidence instead of guessing join paths
- Work alongside **dbt**, **LookML**, **MetricFlow**, **Looker**, **Metabase**, **Notion**, and BI platforms
- Work with warehouses like **PostgreSQL**, **Snowflake**, **BigQuery**, **ClickHouse**, **MySQL**, or **SQL Server**
- **Generate SQL** from approved measures and joins
- **Repair semantic definitions** through reviewable diffs
- **Explain metric provenance** with warehouse evidence
- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI platforms
If you've ever watched an agent confidently generate a query that joins on the wrong key or invents a metric that doesn't exist, KTX is the fix.
Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server.
## What KTX gives agents
- **A semantic layer they can edit** — plain YAML sources with measures, dimensions, joins, grain, segments, filters, and computed columns
- **Safe query planning** — grain-aware SQL generation, fan-out detection, chasm-trap handling, and dialect transpilation
- **Business context** — Markdown knowledge pages for definitions, rules, exceptions, and data quality notes
- **Schema evidence** — warehouse scans with table metadata, column stats, constraints, and inferred relationships
- **Provenance** — ingest transcripts and replay metadata that explain where context came from and why it changed
- **An agent-facing API** — MCP and CLI tools for reading, writing, validating, searching, and querying context
## How these docs are organized
## Explore the docs
<Cards>
<Card title="Quickstart" href="/docs/getting-started/quickstart">
Set up KTX and build your first context in under 10 minutes.
</Card>
<Card title="Concepts" href="/docs/concepts/the-context-layer">
Understand what a context layer is, why agents need one, and how KTX compares to other semantic layers.
Understand what a context layer is and why agents need one.
</Card>
<Card title="Guides" href="/docs/guides/building-context">
Hands-on workflows for scanning, ingesting, writing semantic sources, and serving agents.
</Card>
<Card title="Integrations" href="/docs/integrations/primary-sources">
Setup details for every supported database, context source, and agent client.
Hands-on workflows for scanning, ingesting, writing, and serving.
</Card>
<Card title="CLI Reference" href="/docs/cli-reference/ktx-setup">
Exhaustive flag and subcommand reference for every KTX command.
Complete flag and subcommand reference for every KTX command.
</Card>
</Cards>
## Next steps
## Agent usage notes
- **Get hands-on** — follow the [Quickstart](/docs/getting-started/quickstart) to set up KTX with your own database in under 10 minutes.
- **Understand the theory** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why schema access alone breaks on real analytics and how KTX addresses it.
| Agent task | Read next |
|------------|-----------|
| Discover machine-readable docs | [AI Resources](/docs/ai-resources) |
| Learn how a coding assistant should approach KTX | [Agent Quickstart](/docs/ai-resources/agent-quickstart) |
| Set up a new KTX project | [Quickstart](/docs/getting-started/quickstart) |
| Explain what problem KTX solves | [The Context Layer](/docs/concepts/the-context-layer) |
| Scan a database and ingest metadata | [Building Context](/docs/guides/building-context) |
| Edit semantic sources or knowledge pages | [Writing Context](/docs/guides/writing-context) |
| Look up exact command flags | [CLI Reference](/docs/cli-reference/ktx-setup) |

View file

@ -5,32 +5,34 @@ description: Set up KTX and build your first context in under 10 minutes.
This guide walks you through `ktx setup` — an interactive wizard that configures your LLM provider, connects your database, optionally ingests from your existing tools, builds context, and installs agent integration.
## Prerequisites
If you are a coding assistant trying to decide which KTX docs page to read, start with the [Agent Quickstart](/docs/ai-resources/agent-quickstart). This page is the human setup walkthrough.
- **Node.js 22+** and **pnpm**
- An **Anthropic API key** for LLM-powered enrichment and ingestion
- A **database connection** — PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite
- Optionally, a **dbt project**, **LookML repo**, **Metabase instance**, or other context source
## Workflow summary
Use this sequence when you are setting up KTX in an analytics project:
1. `npm install -g @kaelio/ktx` — install the published KTX CLI from npm.
2. `ktx setup` — create or resume a KTX project.
The setup wizard is stateful. If it exits before completion, rerun `ktx setup` in the same project directory to resume from the first incomplete step.
## Install and run setup
KTX is currently used from a local checkout or linked workspace CLI. Build and link the CLI first:
Install the published [`@kaelio/ktx`](https://www.npmjs.com/package/@kaelio/ktx) CLI:
```bash
git clone https://github.com/kaelio/ktx.git
cd ktx
pnpm install
pnpm run setup:dev
pnpm run link:dev
npm install -g @kaelio/ktx
```
Then run the setup wizard in the directory where you want your KTX project:
Then run the setup wizard:
```bash
ktx setup
```
The wizard walks through six steps. You can go back at any point, and if you exit early, running `ktx setup` again resumes where you left off.
The local checkout flow is only for contributors working on KTX itself. See [Contributing](/docs/community/contributing) for that setup.
The wizard walks through six steps. You can go back at any point, and if you exit early, rerunning `ktx setup` resumes where you left off.
## Step 1: Configure LLM
@ -70,10 +72,11 @@ KTX uses embeddings for semantic search over sources, wiki content, schema metad
**OpenAI embeddings** use `text-embedding-3-small` (1536 dimensions) and require an `OPENAI_API_KEY`.
**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX Python daemon. No API key is needed. If you run the daemon as a long-lived HTTP service, start it with:
**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX managed Python runtime. No API key is needed. KTX can install and start the runtime during setup; to prepare it ahead of time, run:
```bash
ktx-daemon serve-http --host 127.0.0.1 --port 8765
ktx runtime install --feature local-embeddings --yes
ktx runtime start --feature local-embeddings
```
## Step 3: Connect a database
@ -192,12 +195,29 @@ Then select which agents to install for:
│ ◻ Codex
│ ◻ Cursor
│ ◻ OpenCode
│ ◻ Custom agent (.agents)
```
**CLI mode** writes a skill file (e.g., `.claude/skills/ktx/SKILL.md`) that teaches the agent to call KTX commands directly.
**MCP mode** writes an MCP server configuration (e.g., `.mcp.json`) that lets the agent call KTX tools like `sl_query`, `knowledge_search`, and `sl_write_source` over the Model Context Protocol.
**Custom agent** uses the universal `.agents` target for agents that can read project-local skills or MCP configuration.
## Generated files
KTX writes project state as plain files so agents can inspect and edit changes in git.
| Path | Created by | Purpose |
|------|------------|---------|
| `ktx.yaml` | `ktx setup` | Main project configuration: connections, LLM settings, embeddings, and context sources |
| `.ktx/secrets/*` | `ktx setup` when file-backed secrets are selected | Local secret files referenced from `ktx.yaml`; do not commit these |
| `semantic-layer/<connection-id>/*.yaml` | context build, ingestion, or `ktx sl write` | Semantic source definitions agents use for SQL generation |
| `knowledge/global/*.md` | ingestion or `ktx wiki write --scope global` | Shared business context and metric definitions |
| `knowledge/user/<user-id>/*.md` | `ktx wiki write --scope user` | User-scoped notes for one agent/user context |
| `.mcp.json`, `.cursor/mcp.json`, `.agents/mcp/ktx.json`, `.opencode/mcp.json` | agent integration setup | MCP server configuration for supported agent clients |
| `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling `ktx agent` commands |
## Verify it worked
Check your project status:
@ -217,35 +237,17 @@ KTX context built: yes
Agent integration ready: yes (claude-code:project)
```
List your semantic sources:
## Common errors
```bash
ktx sl list
```
Query through the semantic layer:
```bash
ktx sl query \
--connection-id postgres-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--order-by orders.total_revenue:desc \
--limit 5 \
--format sql
```
This outputs the generated SQL. Add `--execute` to run it against your warehouse:
```bash
ktx sl query \
--connection-id postgres-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--order-by orders.total_revenue:desc \
--limit 5 \
--execute --max-rows 10
```
| Error or symptom | Likely cause | Recovery |
|------------------|--------------|----------|
| `ktx: command not found` | The KTX package is not installed globally, or the shell cannot find the global binary | Run `npm install -g @kaelio/ktx` and open a new shell |
| LLM health check fails | Missing, invalid, or unauthorized Anthropic API key | Export `ANTHROPIC_API_KEY` or rerun `ktx setup` and choose the file-backed secret option |
| OpenAI embedding check fails | `OPENAI_API_KEY` is missing when OpenAI embeddings are selected | Export `OPENAI_API_KEY`, or rerun setup and choose local sentence-transformers embeddings |
| Local embeddings hang or fail | The managed Python runtime cannot start or the local model runtime is unavailable | Install `uv`, run `ktx runtime doctor`, then run `ktx runtime install --feature local-embeddings --yes` and rerun setup |
| Database connection test fails | Credentials, network access, warehouse, database, or schema value is wrong | Test the same URL with the database's native client, then rerun `ktx connection add ... --force` or rerun setup |
| `KTX context built: no` in `ktx status` | Setup saved configuration but did not build context | Run `ktx setup context build` or rerun `ktx setup` and choose to build context now |
| Agent integration is incomplete | Setup skipped the agents step or the target was not installed | Run `ktx setup --agents --target codex --agent-install-mode both --project` using the target you need |
## Next steps

View file

@ -5,6 +5,17 @@ description: Write and refine semantic sources and knowledge pages.
After building context through scanning and ingestion, you'll want to refine it — edit semantic sources to match your business logic, add knowledge pages that capture tribal knowledge, and query your data through the semantic layer to verify everything works.
## Agent workflow summary
Agents should refine context in this order:
1. `ktx sl list --json` — discover available sources and connection ids.
2. `ktx sl read <source> --connection-id <id>` — inspect the current YAML.
3. Edit the source YAML directly or use `ktx sl write`.
4. `ktx sl validate <source> --connection-id <id>` — verify columns, joins, and table references.
5. `ktx sl query ... --format sql` — compile a representative query without executing it.
6. `ktx wiki search ...` and `ktx wiki write ...` — add business context that does not belong in schema YAML.
## Semantic Sources
Semantic sources are YAML files that describe your tables, columns, measures, and joins. They're the core of the context layer — the structured definitions that agents use to generate correct SQL.
@ -108,6 +119,26 @@ Key fields:
| `segments` | No | Named filter conditions |
| `inherits_columns_from` | No | Inherit column metadata from a manifest entry |
Source component fields:
| Component | Field | Required | Description |
|-----------|-------|----------|-------------|
| Column | `name` | Yes | Column identifier as used in SQL expressions |
| Column | `type` | Yes | Agent-facing type: `string`, `number`, `time`, or `boolean` |
| Column | `role` | No | Special role such as `time` for default time dimensions |
| Column | `visibility` | No | `public`, `internal`, or `hidden` |
| Column | `description` | Strongly recommended | Human-readable business meaning |
| Measure | `name` | Yes | Queryable metric name |
| Measure | `expr` | Yes | SQL aggregation expression at the source grain |
| Measure | `filter` | No | SQL predicate applied only to this measure |
| Measure | `description` | Strongly recommended | Definition agents can cite and compare |
| Segment | `name` | Yes | Reusable filter name |
| Segment | `expr` | Yes | SQL predicate for the segment |
| Join | `to` | Yes | Target semantic source name |
| Join | `on` | Yes | SQL join condition using source names or aliases |
| Join | `relationship` | Yes | `many_to_one`, `one_to_many`, or `one_to_one` |
| Join | `alias` | No | Query alias for repeated or clearer joins |
Column visibility controls what agents see:
| Visibility | Behavior |
@ -192,6 +223,16 @@ Query flags:
The query planner is grain-aware — it understands the cardinality of joins and avoids chasm traps (double-counting caused by many-to-many fan-outs). When you query measures that span multiple sources, KTX generates sub-queries at the correct grain before joining.
### Workflow: edit and validate a source
1. `ktx sl read orders --connection-id my-postgres > /tmp/orders.yaml` — capture the current definition.
2. Edit `/tmp/orders.yaml` to add columns, measures, joins, or descriptions.
3. `ktx sl write orders --connection-id my-postgres --yaml "$(cat /tmp/orders.yaml)"` — write the updated source.
4. `ktx sl validate orders --connection-id my-postgres` — check the definition against the live schema.
5. `ktx sl query --connection-id my-postgres --measure total_revenue --dimension order_date --format sql` — compile a representative query.
If validation fails, fix the YAML before asking an agent to use the source. Common validation failures are missing columns, invalid join targets, and measure expressions that reference fields outside the source.
## Knowledge Pages
Knowledge pages are Markdown files that capture business context — definitions, rules, gotchas, and anything an agent needs to understand beyond what the schema tells it.
@ -250,6 +291,18 @@ Write flags:
| `--ref <ref>` | Reference to external resources (repeatable) |
| `--sl-ref <ref>` | Link to a semantic source (repeatable) |
Knowledge page fields:
| Field | Required | Description |
|-------|----------|-------------|
| Key | Yes | Stable page identifier passed to `ktx wiki read` |
| Summary | Yes | Short text shown in search results |
| Content | Yes | Full Markdown business context |
| Scope | No | `global` for shared context or `user` for user-scoped notes |
| Tags | No | Search and organization labels |
| External refs | No | Links or identifiers for source-of-truth systems |
| Semantic-layer refs | No | Source names the page explains or constrains |
You can also create and edit knowledge pages directly as Markdown files in the `knowledge/` directory.
### Listing pages
@ -271,3 +324,21 @@ ktx wiki search "revenue recognition"
```
Search uses both full-text matching and semantic similarity — it finds relevant pages even when the exact terms don't match. Agents call this automatically when they need business context to answer a question.
### Workflow: add searchable business context
1. Search first: `ktx wiki search "order status definitions"`.
2. If no page already covers the rule, write a page with `ktx wiki write`.
3. Include a concise `--summary`; agents see this before loading full content.
4. Add `--tag` values for the business area and `--sl-ref` values for related semantic sources.
5. Search again with the user's likely wording to confirm the page is discoverable.
## Common errors
| Error or symptom | Likely cause | Recovery |
|------------------|--------------|----------|
| `ktx sl validate` reports a missing column | YAML references a column that is absent from the scanned table | Run a fresh scan or update the YAML to match the warehouse schema |
| Query compilation double-counts a measure | Join relationship or grain is missing or wrong | Add `grain` and explicit `relationship` values, then validate and recompile |
| Agent cannot find a metric | Measure name or description does not match business terminology | Add a measure description and a knowledge page with common synonyms |
| Knowledge search misses a page | Summary and tags do not include likely user wording | Rewrite the summary and add relevant tags, then search again |
| `ktx sl write` changes are hard to review | Large YAML was passed inline | Edit the source file directly or write from a temporary file, then review the git diff |

View file

@ -7,6 +7,29 @@ Context sources feed your existing analytics tooling into KTX. During ingestion,
All context sources are configured in `ktx.yaml` under `connections` with their respective `driver` value.
## Ingestion workflow
Agents should configure and ingest context sources in this order:
1. Add the context source connection in `ktx.yaml` or with `ktx setup`.
2. Store tokens as `env:NAME` or `file:/path/to/secret`.
3. Run `ktx ingest <connectionId>` for one source or `ktx ingest --all`.
4. Check progress with `ktx ingest status --json`.
5. Review generated `semantic-layer/` YAML and `knowledge/` Markdown files in git.
6. Validate changed semantic sources with `ktx sl validate`.
## Shared source fields
| Field | Required | Description |
|-------|----------|-------------|
| `driver` | Yes | Source adapter: `dbt`, `metricflow`, `lookml`, `metabase`, `looker`, or `notion` |
| `readonly` | Strongly recommended | Marks the source as read-only for KTX |
| `source_dir` | For local file sources | Absolute or project-relative source directory |
| `repo_url` | For Git-hosted sources | Git repository URL |
| `branch` | No | Git branch to read |
| `path` | No | Subdirectory inside a monorepo |
| `auth_token_ref` | For private APIs/repos | `env:NAME` or `file:/path/to/secret` token reference |
## dbt
Ingests schema definitions, model descriptions, column metadata, and test coverage from a dbt project.
@ -351,3 +374,13 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in
- Notion is knowledge-only — it does not produce semantic layer sources
- Rate limits apply; large workspaces may require multiple ingestion runs
- `last_successful_cursor` is auto-managed for incremental sync
## Common errors
| Error or symptom | Likely cause | Recovery |
|------------------|--------------|----------|
| Adapter cannot read source files | `source_dir`, `repo_url`, `branch`, or `path` is wrong | Verify the path locally or clone the repo manually with the same credentials |
| Private repo/API authentication fails | Token env var or secret file is missing | Export the env var or update `auth_token_ref` to a readable file |
| Ingest creates duplicate context | Existing source names or knowledge pages do not match imported terminology | Review the diff, rename duplicates, and add knowledge pages with canonical names |
| Notion ingest skips pages | Integration lacks access or root ids are missing | Share pages with the Notion integration and set `root_page_ids` or use `all_accessible` carefully |
| Generated semantic sources fail validation | Tool metadata does not match the live warehouse schema | Map BI/source databases to primary warehouse connections and rerun validation |

View file

@ -11,6 +11,20 @@ All connectors share these conventions:
- Connections are read-only — KTX never writes to your database
- Schema scanning discovers tables, columns, types, and constraints automatically
## Connection field reference
Agents should prefer environment or file references over literal secrets.
| Field | Required | Applies to | Description |
|-------|----------|------------|-------------|
| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `clickhouse`, `mysql`, `sqlserver`, or `sqlite` |
| `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` |
| `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, ClickHouse, SQL Server | Field-by-field connection values |
| `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan |
| `readonly` | Strongly recommended | all primary sources | Marks the connection as read-only in KTX config |
| `historicSql` | No | supported warehouses | Enables query-history ingestion when the warehouse supports it |
| `path` | Yes for path-style SQLite | SQLite | Local SQLite database path or `env:NAME` reference |
## PostgreSQL
The most full-featured connector. Supports schema introspection, foreign key detection, column statistics, and historic SQL via `pg_stat_statements`.
@ -488,3 +502,13 @@ No authentication required — SQLite is file-based. The file must be readable b
- SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB`
- Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON`
- In-memory databases supported with `path: ":memory:"` (for testing)
## Common errors
| Error or symptom | Likely cause | Recovery |
|------------------|--------------|----------|
| Connection URL appears in git diff | A literal credential URL was written to `ktx.yaml` | Replace it with `env:NAME` or `file:/path/to/secret` and rotate exposed credentials |
| Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
| Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup |
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output |
| SQL execution fails through agents | Connection is missing, unreachable, or execution is disabled in the server | Run `ktx connection test <id>` and check `ktx serve` flags |

View file

@ -6,8 +6,8 @@
"concepts",
"guides",
"integrations",
"benchmarks",
"cli-reference",
"ai-resources",
"community"
]
}

159
docs-site/lib/llm-docs.ts Normal file
View file

@ -0,0 +1,159 @@
import { source } from "@/lib/source";
const siteOrigin = "https://docs.kaelio.com/ktx";
export type LlmDocsPage = {
title: string;
description?: string;
url: string;
markdownUrl: string;
slug: string[];
getMarkdown: () => Promise<string>;
};
export function getLlmDocsPages(): LlmDocsPage[] {
return source.getPages().map(toLlmDocsPage);
}
export function getLlmDocsPage(slug: string[] | undefined) {
const page = source.getPage(slug);
return page ? toLlmDocsPage(page) : null;
}
export async function getPageMarkdown(page: LlmDocsPage) {
const description = page.description ? `\n\n> ${page.description}` : "";
const body = await page.getMarkdown();
return normalizeMarkdown(`# ${page.title}${description}
Canonical URL: ${absoluteUrl(page.url)}
Markdown URL: ${absoluteUrl(page.markdownUrl)}
${body}
`);
}
export function buildLlmsTxt() {
const pages = getLlmDocsPages();
const byUrl = new Map(pages.map((page) => [page.url, page]));
const link = (url: string, label: string, fallbackDescription: string) => {
const page = byUrl.get(url);
const description = page?.description ?? fallbackDescription;
const markdownUrl = page?.markdownUrl ?? `${url}.md`;
return `- [${label}](${absoluteUrl(markdownUrl)}): ${description}`;
};
return `# KTX
> Agent-native context layer for analytics engineering and database agents.
KTX provides semantic-layer files, warehouse scans, knowledge pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins.
## Agent Entry Points
${link("/docs/ai-resources", "AI Resources", "Machine-readable docs, prompt recipes, and agent setup paths")}
${link("/docs/ai-resources/agent-quickstart", "Agent Quickstart", "Task-first route for coding assistants using KTX")}
${link("/docs/ai-resources/markdown-access", "Markdown Access", "Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown")}
${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested instructions for coding assistants that need to read and cite KTX docs")}
## Start Here
${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")}
${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")}
${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and knowledge pages")}
## Machine-Readable Documentation
- [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response
- [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown
- [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough
- [Agent CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-agent.md")}): Machine-readable agent commands
## CLI Reference
${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}
${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")}
${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}
${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")}
${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")}
## Integrations
${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")}
${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")}
## All Documentation
${buildPageIndex(pages)}
`;
}
export async function buildLlmsFullTxt() {
const rendered = await Promise.all(getLlmDocsPages().map(getPageMarkdown));
return [`# KTX Full Documentation`, `Source: ${siteOrigin}`, ...rendered].join(
"\n\n---\n\n",
);
}
function toLlmDocsPage(page: ReturnType<typeof source.getPages>[number]) {
return {
title: page.data.title,
description: page.data.description,
url: page.url,
markdownUrl: `${page.url}.md`,
slug: page.slugs,
getMarkdown: async () => normalizeMarkdown(page.data.content),
} satisfies LlmDocsPage;
}
function normalizeMarkdown(markdown: string) {
return markdown
.trim()
.replace(/^---\n[\s\S]*?\n---\n?/, "")
.trim()
.replace(/\n{3,}/g, "\n\n");
}
function buildPageIndex(pages: LlmDocsPage[]) {
const grouped = new Map<string, LlmDocsPage[]>();
for (const page of pages) {
const category = page.slug[0] ?? "general";
grouped.set(category, [...(grouped.get(category) ?? []), page]);
}
return [...grouped.entries()]
.map(([category, categoryPages]) => {
const links = categoryPages
.map((page) => {
const description = page.description ? `: ${page.description}` : "";
return `- [${page.title}](${absoluteUrl(page.markdownUrl)})${description}`;
})
.join("\n");
return `### ${formatCategoryName(category)}
${links}`;
})
.join("\n\n");
}
function absoluteUrl(path: string) {
return `${siteOrigin}${path}`;
}
function formatCategoryName(category: string) {
const labels: Record<string, string> = {
"ai-resources": "AI Resources",
"cli-reference": "CLI Reference",
};
if (labels[category]) {
return labels[category];
}
return category
.split("-")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" ");
}

51
docs-site/middleware.ts Normal file
View file

@ -0,0 +1,51 @@
import { NextResponse, type NextRequest } from "next/server";
const markdownMimeTypes = new Set([
"text/markdown",
"text/x-markdown",
"application/markdown",
]);
export function middleware(request: NextRequest) {
if (!isMarkdownPreferred(request.headers.get("accept"))) {
return NextResponse.next();
}
const { pathname } = request.nextUrl;
if (!pathname.startsWith("/docs/") || pathname.endsWith(".md")) {
return NextResponse.next();
}
const rewriteUrl = request.nextUrl.clone();
rewriteUrl.pathname = `/llms.mdx${pathname}`;
return NextResponse.rewrite(rewriteUrl);
}
export const config = {
matcher: ["/docs/:path*"],
};
function isMarkdownPreferred(acceptHeader: string | null) {
if (!acceptHeader) return false;
const accepted = acceptHeader
.split(",")
.map((entry, index) => {
const [type = "", ...parameters] = entry.trim().split(";");
const quality = parameters
.map((parameter) => parameter.trim())
.find((parameter) => parameter.startsWith("q="));
return {
type: type.trim().toLowerCase(),
quality: quality ? Number.parseFloat(quality.slice(2)) : 1,
index,
};
})
.filter((entry) => Number.isFinite(entry.quality) && entry.quality > 0)
.sort((a, b) => b.quality - a.quality || a.index - b.index);
const preferred = accepted[0]?.type;
return preferred ? markdownMimeTypes.has(preferred) : false;
}

View file

@ -3,6 +3,15 @@ import { createMDX } from "fumadocs-mdx/next";
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {};
const config = {
async rewrites() {
return [
{
source: "/docs/:path*.md",
destination: "/llms.mdx/docs/:path*",
},
];
},
};
export default withMDX(config);

View file

@ -6,7 +6,8 @@
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start"
"start": "next start",
"test": "node --test tests/*.test.mjs"
},
"dependencies": {
"fumadocs-core": "15.7.13",

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

View file

@ -0,0 +1,14 @@
import assert from "node:assert/strict";
import test from "node:test";
const docsSiteUrl = process.env.DOCS_SITE_URL ?? "http://localhost:3000";
test("/docs redirects to the docs introduction", async () => {
const response = await fetch(`${docsSiteUrl}/docs`, { redirect: "manual" });
assert.equal(response.status, 307);
assert.equal(
response.headers.get("location"),
"/docs/getting-started/introduction",
);
});

View file

@ -0,0 +1,411 @@
# Agent-Friendly Docs Site Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make `docs-site` discoverable and readable by coding agents through `llms.txt`, bundled markdown, per-page markdown routes, markdown negotiation, and stricter agent-friendly docs content.
**Architecture:** Keep the existing Next 15 + Fumadocs app. Add a small `lib/llm-docs.ts` module that reads Fumadocs pages and builds machine-readable markdown responses, then expose those responses through route handlers and a markdown negotiation proxy. Rewrite existing MDX pages in place so the rendered UI and machine-readable routes share one source of truth.
**Tech Stack:** Next.js 15 App Router, Fumadocs, MDX, TypeScript, pnpm, Node 22.
---
### Task 1: Machine-Readable Docs Routes
**Files:**
- Create: `docs-site/lib/llm-docs.ts`
- Create: `docs-site/app/llms.txt/route.ts`
- Create: `docs-site/app/llms-full.txt/route.ts`
- Create: `docs-site/app/llms.mdx/docs/[[...slug]]/route.ts`
- Modify: `docs-site/next.config.mjs`
- [ ] **Step 1: Add the LLM docs utility**
Create `docs-site/lib/llm-docs.ts` with functions that:
```ts
import { source } from "@/lib/source";
const SITE_ORIGIN = "https://ktx.dev";
export type LlmDocsPage = {
title: string;
description?: string;
url: string;
markdownUrl: string;
slug: string[];
getMarkdown: () => Promise<string>;
};
export function getLlmDocsPages(): LlmDocsPage[] {
return source.getPages().map((page) => ({
title: page.data.title,
description: page.data.description,
url: page.url,
markdownUrl: `${page.url}.md`,
slug: page.slugs,
getMarkdown: async () => normalizeMarkdown(await page.data.getText("raw")),
}));
}
export function getLlmDocsPage(slug: string[] | undefined) {
const page = source.getPage(slug);
if (!page) return null;
return {
title: page.data.title,
description: page.data.description,
url: page.url,
markdownUrl: `${page.url}.md`,
slug: page.slugs,
getMarkdown: async () => normalizeMarkdown(await page.data.getText("raw")),
} satisfies LlmDocsPage;
}
export async function getPageMarkdown(page: LlmDocsPage) {
const body = await page.getMarkdown();
const description = page.description ? `\n\n> ${page.description}` : "";
return `# ${page.title}${description}\n\nCanonical URL: ${page.url}\nMarkdown URL: ${page.markdownUrl}\n\n${body}`;
}
export function buildLlmsTxt() {
const pages = getLlmDocsPages();
const byUrl = new Map(pages.map((page) => [page.url, page]));
const link = (url: string, label: string, fallbackDescription: string) => {
const page = byUrl.get(url);
const description = page?.description ?? fallbackDescription;
return `- [${label}](${url}): ${description}`;
};
return `# KTX
> Agent-native context layer for analytics engineering and database agents.
KTX provides semantic-layer files, warehouse scans, knowledge pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins.
## Start Here
${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")}
${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")}
${link("/docs/guides/serving-agents", "Serving Agents", "Expose KTX context through MCP and CLI tools")}
${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and knowledge pages")}
## Machine-Readable Documentation
- [Full documentation](/llms-full.txt): All docs pages in one plain-text markdown response
- [Quickstart markdown](/docs/getting-started/quickstart.md): Raw markdown for the setup guide
- [Agent CLI markdown](/docs/cli-reference/ktx-agent.md): Raw markdown for machine-readable agent commands
- [Serving Agents markdown](/docs/guides/serving-agents.md): Raw markdown for MCP and CLI workflows
## CLI Reference
${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}
${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")}
${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}
${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")}
${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")}
## Integrations
${link("/docs/integrations/agent-clients", "Agent Clients", "Configure Claude Code, Cursor, Codex, and OpenCode")}
${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")}
${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")}
`;
}
export async function buildLlmsFullTxt() {
const pages = getLlmDocsPages();
const rendered = await Promise.all(pages.map(getPageMarkdown));
return [`# KTX Full Documentation`, `Source: ${SITE_ORIGIN}`, ...rendered].join("\n\n---\n\n");
}
function normalizeMarkdown(markdown: string) {
return markdown.trim().replace(/\n{3,}/g, "\n\n");
}
```
- [ ] **Step 2: Add route handlers**
Create route files:
```ts
import { buildLlmsTxt } from "@/lib/llm-docs";
export const dynamic = "force-static";
export function GET() {
return new Response(buildLlmsTxt(), {
headers: { "Content-Type": "text/plain; charset=utf-8" },
});
}
```
```ts
import { buildLlmsFullTxt } from "@/lib/llm-docs";
export const dynamic = "force-static";
export async function GET() {
return new Response(await buildLlmsFullTxt(), {
headers: { "Content-Type": "text/plain; charset=utf-8" },
});
}
```
```ts
import { getLlmDocsPage, getPageMarkdown } from "@/lib/llm-docs";
import { notFound } from "next/navigation";
export const dynamic = "force-static";
export async function GET(
_request: Request,
props: { params: Promise<{ slug?: string[] }> },
) {
const params = await props.params;
const page = getLlmDocsPage(params.slug);
if (!page) notFound();
return new Response(await getPageMarkdown(page), {
headers: { "Content-Type": "text/markdown; charset=utf-8" },
});
}
export function generateStaticParams() {
return getLlmDocsPages().map((page) => ({ slug: page.slug }));
}
```
- [ ] **Step 3: Add `.md` rewrite**
Modify `docs-site/next.config.mjs`:
```js
import { createMDX } from "fumadocs-mdx/next";
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {
async rewrites() {
return [
{
source: "/docs/:path*.md",
destination: "/llms.mdx/docs/:path*",
},
];
},
};
export default withMDX(config);
```
- [ ] **Step 4: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: Next build completes and static routes include `llms.txt`, `llms-full.txt`, and the LLM markdown route.
### Task 2: Markdown Negotiation
**Files:**
- Create: `docs-site/proxy.ts`
- [ ] **Step 1: Add markdown negotiation proxy**
Create `docs-site/proxy.ts`:
```ts
import { isMarkdownPreferred, rewritePath } from "fumadocs-core/negotiation";
import { NextResponse, type NextRequest } from "next/server";
const { rewrite } = rewritePath("/docs/*path", "/llms.mdx/docs/*path");
export function proxy(request: NextRequest) {
if (!isMarkdownPreferred(request)) {
return NextResponse.next();
}
const rewrittenPath = rewrite(request.nextUrl.pathname);
if (!rewrittenPath) {
return NextResponse.next();
}
return NextResponse.rewrite(new URL(rewrittenPath, request.nextUrl));
}
export const config = {
matcher: ["/docs/:path*"],
};
```
- [ ] **Step 2: Verify build**
Run: `pnpm --filter ktx-docs build`
Expected: Build passes with the proxy included.
### Task 3: Agent-Friendly High-Priority Guides
**Files:**
- Modify: `docs-site/content/docs/getting-started/quickstart.mdx`
- Modify: `docs-site/content/docs/guides/serving-agents.mdx`
- Modify: `docs-site/content/docs/guides/writing-context.mdx`
- [ ] **Step 1: Rewrite quickstart structure**
Add sections for:
- Workflow summary
- Generated files
- Common errors and recovery
Keep existing setup detail, but make each command block copy-pasteable and each expected output complete enough for agents to recognize success.
- [ ] **Step 2: Rewrite Serving Agents as API reference**
Add tables for MCP tool inputs and CLI command inputs. Add workflows:
- Answer an analytics question through MCP
- Answer an analytics question through CLI
- Safely execute SQL with row limits
- [ ] **Step 3: Rewrite Writing Context with schemas and workflows**
Add semantic-source field tables, knowledge-page field tables, and workflows:
- Inspect a source
- Edit and validate a source
- Query through the semantic layer
- Write and search a knowledge page
- [ ] **Step 4: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: MDX compiles without syntax errors.
### Task 4: CLI Reference Normalization
**Files:**
- Modify: `docs-site/content/docs/cli-reference/*.mdx`
- [ ] **Step 1: Normalize every CLI page**
For each CLI reference page, ensure this structure exists:
```md
## Command signature
```bash
ktx <command> [subcommand] [options]
```
## Subcommands
| Subcommand | Description |
|---|---|
## Options
| Flag | Type | Required | Description | Default |
|---|---|---|---|---|
## Examples
```bash
ktx <real-command> --real-flag realistic-value
```
## Output
```text
complete expected output shape
```
## Common errors
| Error | Cause | Recovery |
|---|---|---|
```
Only add sections that are relevant to the command; do not invent output for commands whose output is intentionally interactive.
- [ ] **Step 2: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: MDX compiles without syntax errors.
### Task 5: Integration and Concept Page Polish
**Files:**
- Modify: `docs-site/content/docs/integrations/agent-clients.mdx`
- Modify: `docs-site/content/docs/integrations/primary-sources.mdx`
- Modify: `docs-site/content/docs/integrations/context-sources.mdx`
- Modify: `docs-site/content/docs/concepts/*.mdx`
- Modify: `docs-site/content/docs/benchmarks/link-detection.mdx`
- [ ] **Step 1: Normalize integrations**
Add structured sections for supported values, config snippets, authentication, generated files, and recovery notes. Keep existing examples aligned with current KTX commands.
- [ ] **Step 2: Add agent usage notes**
For concept and benchmark pages, add a compact `## Agent usage notes` section that tells agents when the page is relevant and which concrete page to read next.
- [ ] **Step 3: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: MDX compiles without syntax errors.
### Task 6: Route Verification and Final Checks
**Files:**
- No required source changes unless verification finds a bug.
- [ ] **Step 1: Run production build**
Run: `pnpm --filter ktx-docs build`
Expected: Build succeeds.
- [ ] **Step 2: Run TypeScript check**
Run: `pnpm --filter ktx-docs exec tsc --noEmit`
Expected: TypeScript exits successfully.
- [ ] **Step 3: Start local server**
Run: `pnpm --filter ktx-docs start`
Expected: Server starts on an available port.
- [ ] **Step 4: Verify machine-readable routes**
Run:
```bash
curl -i http://localhost:3000/llms.txt
curl -i http://localhost:3000/llms-full.txt
curl -i http://localhost:3000/docs/getting-started/quickstart.md
curl -i -H "Accept: text/markdown" http://localhost:3000/docs/getting-started/quickstart
curl -i http://localhost:3000/docs/not-a-page.md
```
Expected:
- `/llms.txt`: `200`, `Content-Type: text/plain; charset=utf-8`
- `/llms-full.txt`: `200`, `Content-Type: text/plain; charset=utf-8`
- `/docs/getting-started/quickstart.md`: `200`, `Content-Type: text/markdown; charset=utf-8`
- `/docs/getting-started/quickstart` with `Accept: text/markdown`: `200`, `Content-Type: text/markdown; charset=utf-8`
- `/docs/not-a-page.md`: `404`
- [ ] **Step 5: Inspect final diff**
Run: `git diff --stat && git diff --check`
Expected: Diff contains only docs-site and plan changes, with no whitespace errors.

View file

@ -0,0 +1,813 @@
# Demo Guided Tour Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace the disconnected "Try KTX with packaged demo data" flow with a guided tour that walks users through the same setup wizard steps using pre-filled, read-only selections, then connects their agent to the populated demo project.
**Architecture:** A new `setup-demo-tour.ts` module owns the demo tour flow. It renders read-only cards (database, sources), a simulated context build replay using the existing `renderContextBuildView` + `createRepainter` pipeline from `context-build-view.ts`, then hands off to the real `runKtxSetupAgentsStep`. The entry point in `setup.ts` (`runKtxSetupDemoFromEntryMenu`) is rewired to call this new module instead of `runKtxDemo`.
**Tech Stack:** TypeScript (ESM), Node.js raw stdin for keypress handling, existing `@clack/prompts` visual patterns, vitest for tests.
---
### Task 1: Create `setup-demo-tour.ts` with keypress utility and banner
**Files:**
- Create: `packages/cli/src/setup-demo-tour.ts`
- Test: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for `renderDemoBanner`**
```typescript
// packages/cli/src/setup-demo-tour.test.ts
import { describe, expect, it } from 'vitest';
import { renderDemoBanner } from './setup-demo-tour.js';
describe('renderDemoBanner', () => {
it('includes demo mode explanation', () => {
const output = renderDemoBanner();
expect(output).toContain('Demo mode');
expect(output).toContain('pre-processed');
expect(output).toContain('read-only');
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL — module not found
- [ ] **Step 3: Implement `renderDemoBanner` and `waitForDemoNavigation`**
```typescript
// packages/cli/src/setup-demo-tour.ts
import type { KtxCliIo } from './cli-runtime.js';
import { KtxSetupExitError } from './setup-interrupt.js';
const ESC = String.fromCharCode(0x1b);
function cyan(text: string): string {
return `${ESC}[36m${text}${ESC}[39m`;
}
function dim(text: string): string {
return `${ESC}[2m${text}${ESC}[22m`;
}
export function renderDemoBanner(): string {
const lines = [
'',
`┌ ${cyan('Demo mode')} — data has been pre-processed and KTX context is already built.`,
`│ This walkthrough illustrates the setup steps. Selections are pre-filled and read-only.`,
'',
];
return lines.join('\n');
}
export async function waitForDemoNavigation(
stdin: NodeJS.ReadStream = process.stdin,
): Promise<'forward' | 'back'> {
return new Promise((resolve, reject) => {
const wasRaw = stdin.isRaw;
if (stdin.setRawMode) stdin.setRawMode(true);
stdin.resume();
const onData = (data: Buffer) => {
const key = data.toString();
if (key === '\r' || key === '\n') {
cleanup();
resolve('forward');
} else if (key === '\x1b') {
cleanup();
resolve('back');
} else if (key === '\x03') {
cleanup();
reject(new KtxSetupExitError());
}
};
const cleanup = () => {
stdin.off('data', onData);
if (stdin.setRawMode) stdin.setRawMode(wasRaw ?? false);
};
stdin.on('data', onData);
});
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo tour banner and keypress navigation utility"
```
---
### Task 2: Add `renderDemoCard` function
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for `renderDemoCard`**
Append to the test file:
```typescript
import { renderDemoCardContent } from './setup-demo-tour.js';
describe('renderDemoCardContent', () => {
it('renders a card with title and selections', () => {
const output = renderDemoCardContent('Database connection', ['PostgreSQL (demo warehouse)']);
expect(output).toContain('Database connection');
expect(output).toContain('PostgreSQL (demo warehouse)');
expect(output).toContain('Press Enter to continue');
expect(output).toContain('Escape to go back');
});
it('renders multiple selections', () => {
const output = renderDemoCardContent('Context sources', ['dbt', 'Metabase', 'Notion']);
expect(output).toContain('dbt');
expect(output).toContain('Metabase');
expect(output).toContain('Notion');
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL — `renderDemoCardContent` not exported
- [ ] **Step 3: Implement `renderDemoCardContent` and `renderDemoCard`**
Add to `setup-demo-tour.ts`:
```typescript
export function renderDemoCardContent(title: string, selections: string[]): string {
const lines = [
`┌ ${title}`,
'│',
...selections.map((s) => `│ ${cyan('▸')} ${s}`),
'│',
`│ ${dim('Press Enter to continue, Escape to go back')}`,
'└',
'',
];
return lines.join('\n');
}
export async function renderDemoCard(
title: string,
selections: string[],
io: KtxCliIo,
stdin?: NodeJS.ReadStream,
waitNav?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>,
): Promise<'forward' | 'back'> {
io.stdout.write(renderDemoBanner());
io.stdout.write(renderDemoCardContent(title, selections));
const nav = waitNav ?? waitForDemoNavigation;
return nav(stdin);
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo tour read-only card rendering"
```
---
### Task 3: Add demo context build replay animation
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for demo replay event sequence**
Append to the test file:
```typescript
import { buildDemoReplayTimeline, DEMO_REPLAY_TARGETS } from './setup-demo-tour.js';
describe('buildDemoReplayTimeline', () => {
it('produces events for all four demo targets', () => {
const events = buildDemoReplayTimeline();
const connectionIds = new Set(events.map((e) => e.connectionId));
expect(connectionIds).toEqual(new Set(['demo-warehouse', 'dbt', 'metabase', 'notion']));
});
it('ends with all targets done', () => {
const events = buildDemoReplayTimeline();
const lastByConnection = new Map<string, string>();
for (const e of events) {
lastByConnection.set(e.connectionId, e.status);
}
for (const status of lastByConnection.values()) {
expect(status).toBe('done');
}
});
it('events are sorted by delayMs', () => {
const events = buildDemoReplayTimeline();
for (let i = 1; i < events.length; i++) {
expect(events[i]!.delayMs).toBeGreaterThanOrEqual(events[i - 1]!.delayMs);
}
});
});
describe('DEMO_REPLAY_TARGETS', () => {
it('has one primary source and three context sources', () => {
expect(DEMO_REPLAY_TARGETS.primarySources).toHaveLength(1);
expect(DEMO_REPLAY_TARGETS.contextSources).toHaveLength(3);
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL — exports not found
- [ ] **Step 3: Implement replay timeline and target definitions**
Add to `setup-demo-tour.ts`:
```typescript
import type { KtxPublicIngestPlanTarget } from './public-ingest.js';
import type { ContextBuildTargetState, ContextBuildViewState } from './context-build-view.js';
export interface DemoReplayEvent {
delayMs: number;
connectionId: string;
status: 'running' | 'done';
detailLine: string | null;
summaryText: string | null;
}
function createDemoTarget(connectionId: string, operation: 'scan' | 'source-ingest', driver: string): KtxPublicIngestPlanTarget {
return {
connectionId,
driver,
operation,
debugCommand: `ktx ${operation === 'scan' ? 'scan' : 'ingest'} ${connectionId}`,
steps: operation === 'scan' ? ['scan'] : ['source-ingest'],
};
}
const primaryTarget = createDemoTarget('demo-warehouse', 'scan', 'postgres');
const dbtTarget = createDemoTarget('dbt', 'source-ingest', 'dbt');
const metabaseTarget = createDemoTarget('metabase', 'source-ingest', 'metabase');
const notionTarget = createDemoTarget('notion', 'source-ingest', 'notion');
function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState {
return {
target,
status: 'queued',
detailLine: null,
summaryText: null,
startedAt: null,
elapsedMs: 0,
};
}
export const DEMO_REPLAY_TARGETS = {
primarySources: [primaryTarget],
contextSources: [dbtTarget, metabaseTarget, notionTarget],
};
export function buildDemoReplayTimeline(): DemoReplayEvent[] {
return [
{ delayMs: 0, connectionId: 'demo-warehouse', status: 'running', detailLine: 'scanning...', summaryText: null },
{ delayMs: 600, connectionId: 'demo-warehouse', status: 'running', detailLine: '[50%] scanning...', summaryText: null },
{ delayMs: 1200, connectionId: 'demo-warehouse', status: 'done', detailLine: null, summaryText: 'completed' },
{ delayMs: 1200, connectionId: 'dbt', status: 'running', detailLine: 'ingesting...', summaryText: null },
{ delayMs: 1800, connectionId: 'dbt', status: 'running', detailLine: '[60%] ingesting...', summaryText: null },
{ delayMs: 2200, connectionId: 'dbt', status: 'done', detailLine: null, summaryText: 'completed' },
{ delayMs: 2200, connectionId: 'metabase', status: 'running', detailLine: 'ingesting...', summaryText: null },
{ delayMs: 2800, connectionId: 'metabase', status: 'done', detailLine: null, summaryText: 'completed' },
{ delayMs: 2800, connectionId: 'notion', status: 'running', detailLine: 'ingesting...', summaryText: null },
{ delayMs: 3400, connectionId: 'notion', status: 'done', detailLine: null, summaryText: 'completed' },
];
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Implement `runDemoContextReplay` animation driver**
Add to `setup-demo-tour.ts`:
```typescript
import { renderContextBuildView, createRepainter } from './context-build-view.js';
export async function runDemoContextReplay(
io: KtxCliIo,
stdin?: NodeJS.ReadStream,
): Promise<'forward' | 'back'> {
const repainter = createRepainter(io);
const timeline = buildDemoReplayTimeline();
const state: ContextBuildViewState = {
primarySources: DEMO_REPLAY_TARGETS.primarySources.map((t) => createTargetState(t)),
contextSources: DEMO_REPLAY_TARGETS.contextSources.map((t) => createTargetState(t)),
frame: 0,
startedAt: Date.now(),
totalElapsedMs: 0,
};
const allTargets = [...state.primarySources, ...state.contextSources];
const targetMap = new Map(allTargets.map((t) => [t.target.connectionId, t]));
let eventIndex = 0;
const startTime = Date.now();
const FRAME_MS = 120;
await new Promise<void>((resolve) => {
const interval = setInterval(() => {
const elapsed = Date.now() - startTime;
state.frame += 1;
state.totalElapsedMs = elapsed;
while (eventIndex < timeline.length && timeline[eventIndex]!.delayMs <= elapsed) {
const event = timeline[eventIndex]!;
const target = targetMap.get(event.connectionId);
if (target) {
target.status = event.status;
target.detailLine = event.detailLine;
target.summaryText = event.summaryText;
if (event.status === 'running' && target.startedAt === null) {
target.startedAt = Date.now();
}
if (event.status === 'done') {
target.elapsedMs = target.startedAt ? Date.now() - target.startedAt : 0;
}
}
eventIndex += 1;
}
for (const t of allTargets) {
if (t.status === 'running' && t.startedAt !== null) {
t.elapsedMs = Date.now() - t.startedAt;
}
}
repainter.paint(renderContextBuildView(state, { styled: io.stdout.isTTY ?? false, showHint: false }));
if (eventIndex >= timeline.length && allTargets.every((t) => t.status === 'done')) {
clearInterval(interval);
resolve();
}
}, FRAME_MS);
});
io.stdout.write(renderDemoContextCompletionSummary());
return waitForDemoNavigation(stdin);
}
function renderDemoContextCompletionSummary(): string {
const lines = [
'',
`${cyan('★')} KTX finished ingesting demo data`,
'',
' Placeholder — final counts will come from pre-packaged demo results.',
'',
` ${dim('Press Enter to continue, Escape to go back')}`,
'',
];
return lines.join('\n');
}
```
Note: `renderDemoContextCompletionSummary` is a placeholder that will be updated when
the user provides the real pre-packaged demo data. The summary counts (business areas,
query definitions, knowledge pages) will be populated from those assets.
- [ ] **Step 6: Run tests and type-check**
Run: `pnpm --filter @ktx/cli run type-check && pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 7: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo context build replay animation"
```
---
### Task 4: Add transition message and completion summary
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing tests**
Append to test file:
```typescript
import { renderDemoAgentTransition, renderDemoCompletionSummary } from './setup-demo-tour.js';
describe('renderDemoAgentTransition', () => {
it('includes transition message about connecting agent', () => {
const output = renderDemoAgentTransition();
expect(output).toContain('Demo project is ready');
expect(output).toContain('connect your agent');
});
});
describe('renderDemoCompletionSummary', () => {
it('includes project path and temp warning', () => {
const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', true);
expect(output).toContain('/tmp/ktx-demo-abc123');
expect(output).toContain('temporary');
expect(output).toContain('ktx setup');
});
it('shows manual agent instructions when agent not installed', () => {
const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', false);
expect(output).toContain('ktx setup --agents');
});
it('shows success message when agent installed', () => {
const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', true);
expect(output).toContain('agent is connected');
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL — exports not found
- [ ] **Step 3: Implement transition and completion rendering**
Add to `setup-demo-tour.ts`:
```typescript
export function renderDemoAgentTransition(): string {
const lines = [
'',
`┌ Demo project is ready — let's connect your agent`,
'│',
'│ Your KTX context has been built with demo data.',
'│ Select an agent to start using it.',
'└',
'',
];
return lines.join('\n');
}
export function renderDemoCompletionSummary(projectDir: string, agentInstalled: boolean): string {
const lines = [
'',
`${cyan('★')} KTX demo is ready`,
'',
];
if (agentInstalled) {
lines.push(' Your agent is connected to a demo KTX project.');
} else {
lines.push(' Demo project created. Connect an agent to start using it:');
lines.push(` $ ktx setup --agents --project-dir ${projectDir}`);
}
lines.push(
'',
` ${dim('⚠')} This project is in a temporary directory and will be`,
` cleaned up by your system. To set up KTX with your own`,
' data, run: ktx setup',
'',
` Project: ${projectDir}`,
'',
);
return lines.join('\n');
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo tour transition and completion summary"
```
---
### Task 5: Implement `runDemoTour` orchestrator
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for the orchestrator**
Append to test file:
```typescript
import { vi } from 'vitest';
import type { KtxSetupAgentsResult } from './setup-agents.js';
import { runDemoTour } from './setup-demo-tour.js';
describe('runDemoTour', () => {
function createMockIo() {
const chunks: string[] = [];
return {
io: {
stdout: { isTTY: true, columns: 80, write: (chunk: string) => { chunks.push(chunk); } },
stderr: { write: () => {} },
},
chunks,
};
}
it('returns 0 on successful tour with agent installed', async () => {
const { io } = createMockIo();
const mockAgents = vi.fn<() => Promise<KtxSetupAgentsResult>>().mockResolvedValue({
status: 'ready',
projectDir: '/tmp/test',
installs: [{ target: 'claude-code' as const, scope: 'project' as const, mode: 'both' as const }],
});
const navigation = vi.fn<() => Promise<'forward' | 'back'>>().mockResolvedValue('forward');
const result = await runDemoTour(
{ inputMode: 'auto' },
io,
{ agents: mockAgents, waitForNavigation: navigation, skipReplayAnimation: true },
);
expect(result).toBe(0);
expect(mockAgents).toHaveBeenCalled();
});
it('handles back navigation from first step', async () => {
const { io } = createMockIo();
const navigation = vi.fn<() => Promise<'forward' | 'back'>>().mockResolvedValue('back');
const result = await runDemoTour(
{ inputMode: 'auto' },
io,
{ waitForNavigation: navigation, skipReplayAnimation: true },
);
expect(result).toBe(0);
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL — `runDemoTour` not exported or wrong signature
- [ ] **Step 3: Implement `runDemoTour`**
Add to `setup-demo-tour.ts`:
```typescript
import { defaultDemoProjectDir, ensureSeededDemoProject } from './demo-assets.js';
import type { KtxSetupAgentsResult } from './setup-agents.js';
import { runKtxSetupAgentsStep } from './setup-agents.js';
type DemoStep = 'databases' | 'sources' | 'context' | 'agents';
const DEMO_STEPS: DemoStep[] = ['databases', 'sources', 'context', 'agents'];
export interface DemoTourDeps {
agents?: (args: Parameters<typeof runKtxSetupAgentsStep>[0], io: KtxCliIo) => Promise<KtxSetupAgentsResult>;
waitForNavigation?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>;
ensureProject?: typeof ensureSeededDemoProject;
skipReplayAnimation?: boolean;
}
export async function runDemoTour(
args: { inputMode: 'auto' | 'disabled' },
io: KtxCliIo,
deps: DemoTourDeps = {},
): Promise<number> {
const waitNav = deps.waitForNavigation ?? waitForDemoNavigation;
const ensureProject = deps.ensureProject ?? ensureSeededDemoProject;
const projectDir = defaultDemoProjectDir();
await ensureProject({ projectDir });
let stepIndex = 0;
while (stepIndex < DEMO_STEPS.length) {
const step = DEMO_STEPS[stepIndex]!;
let direction: 'forward' | 'back';
if (step === 'databases') {
direction = await renderDemoCard('Database connection', ['PostgreSQL (demo warehouse)'], io, undefined, waitNav);
} else if (step === 'sources') {
direction = await renderDemoCard('Context sources', ['dbt', 'Metabase', 'Notion'], io, undefined, waitNav);
} else if (step === 'context') {
io.stdout.write(renderDemoBanner());
if (deps.skipReplayAnimation) {
direction = await waitNav();
} else {
direction = await runDemoContextReplay(io);
}
} else {
io.stdout.write(renderDemoAgentTransition());
const agentsRunner = deps.agents ?? runKtxSetupAgentsStep;
const agentsResult = await agentsRunner(
{
projectDir,
inputMode: args.inputMode,
yes: false,
agents: true,
scope: 'project',
mode: 'both',
skipAgents: false,
},
io,
);
const agentInstalled = agentsResult.status === 'ready';
if (agentsResult.status === 'back') {
direction = 'back';
} else {
io.stdout.write(renderDemoCompletionSummary(projectDir, agentInstalled));
return 0;
}
}
if (direction === 'back') {
if (stepIndex === 0) return 0;
stepIndex -= 1;
} else {
stepIndex += 1;
}
}
return 0;
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Run type-check**
Run: `pnpm --filter @ktx/cli run type-check`
Expected: PASS — all types align with existing interfaces
- [ ] **Step 6: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add runDemoTour orchestrator with step navigation"
```
---
### Task 6: Wire up in `setup.ts`
**Files:**
- Modify: `packages/cli/src/setup.ts`
- [ ] **Step 1: Read the current `runKtxSetupDemoFromEntryMenu` function**
Read `packages/cli/src/setup.ts` and locate `runKtxSetupDemoFromEntryMenu` (around lines 218-233).
Current implementation:
```typescript
async function runKtxSetupDemoFromEntryMenu(
args: Extract<KtxSetupArgs, { command: 'run' }>,
io: KtxCliIo,
deps: KtxSetupDeps,
): Promise<number> {
const runner = deps.demo ?? (await import('./demo.js')).runKtxDemo;
return await runner(
{
command: 'seeded',
projectDir: defaultDemoProjectDir(),
outputMode: 'viz',
inputMode: args.inputMode,
},
io,
);
}
```
- [ ] **Step 2: Replace with demo tour call**
Replace the function body to call `runDemoTour`:
```typescript
async function runKtxSetupDemoFromEntryMenu(
args: Extract<KtxSetupArgs, { command: 'run' }>,
io: KtxCliIo,
deps: KtxSetupDeps,
): Promise<number> {
const { runDemoTour } = await import('./setup-demo-tour.js');
return await runDemoTour(
{ inputMode: args.inputMode },
io,
{ agents: deps.agents },
);
}
```
- [ ] **Step 3: Update imports — remove unused `defaultDemoProjectDir` import if no longer needed elsewhere in setup.ts**
Check if `defaultDemoProjectDir` is used elsewhere in `setup.ts`. If it's only used
in `runKtxSetupDemoFromEntryMenu`, remove the import. If used elsewhere, keep it.
Also check if the `KtxDemoArgs` import is still needed. If `runKtxSetupDemoFromEntryMenu`
was the only consumer of `deps.demo` with that type, it may now be unused. Keep the
`demo` slot in `KtxSetupDeps` for backwards compatibility but it will no longer be
called from the entry menu path.
- [ ] **Step 4: Run type-check and tests**
Run: `pnpm --filter @ktx/cli run type-check && pnpm --filter @ktx/cli run test`
Expected: PASS — existing tests continue to work, demo tour is now wired in
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup.ts
git commit -m "feat(cli): wire demo tour into setup entry menu"
```
---
### Task 7: End-to-end verification
**Files:**
- None (verification only)
- [ ] **Step 1: Run full test suite**
Run: `pnpm --filter @ktx/cli run test 2>&1 | tee /tmp/ktx-demo-tour-test.log`
Expected: All tests pass. Check the output for any regressions.
- [ ] **Step 2: Run type-check across workspace**
Run: `pnpm run type-check`
Expected: PASS
- [ ] **Step 3: Run pre-commit checks if available**
Run: `pnpm run check` (if configured)
Expected: PASS
- [ ] **Step 4: Manual smoke test (if TTY available)**
Run: `pnpm --filter @ktx/cli run build && node packages/cli/dist/cli.js setup`
1. Select "Try KTX with packaged demo data"
2. Verify demo banner appears with full explanation text
3. Verify "Database connection" card shows with "PostgreSQL (demo warehouse)"
4. Press Enter → verify "Context sources" card shows with dbt, Metabase, Notion
5. Press Escape → verify you go back to database card
6. Press Enter twice → verify context build replay animation runs
7. Verify completion summary appears after replay
8. Press Enter → verify agents step prompt appears (interactive)
9. Press Escape all the way back → verify you return to entry menu
- [ ] **Step 5: Final commit if any adjustments needed**
```bash
git add -A
git commit -m "fix(cli): demo tour adjustments from smoke test"
```
---
## Open Seams for Demo Data
When the user provides the real pre-packaged demo results, update these locations:
1. **`renderDemoContextCompletionSummary()`** in `setup-demo-tour.ts` — replace placeholder text with actual counts (business areas, query definitions, knowledge pages) from the demo data
2. **`buildDemoReplayTimeline()`** in `setup-demo-tour.ts` — adjust timing and progress details to match the real ingestion profile
3. **`demo-assets.ts`** — update `REQUIRED_SEEDED_ASSET_PATHS` and `demoConfig()` if the demo dataset changes from SQLite/Orbit to Postgres/dbt/Metabase/Notion
4. **Pre-packaged asset files** in `packages/cli/assets/demo/` — replace with the new demo dataset

View file

@ -0,0 +1,171 @@
# Agent-Friendly Docs Site Design
## Goal
Make `docs-site` easier for coding agents and LLM readers to discover, ingest,
and use. The work applies the Vercel Academy agent-friendly docs patterns to the
KTX documentation site while preserving the current Fumadocs + Next.js
architecture.
Success means agents can:
- Discover the documentation from well-known root files.
- Fetch all documentation in one plain-text response.
- Fetch any docs page as markdown without parsing the HTML UI.
- Follow CLI, MCP, setup, integration, and semantic-layer workflows from
structured examples.
- Recover from common setup and command failures using explicit troubleshooting
notes.
## Current State
`docs-site` is a Next 15 app using Fumadocs. Source pages live under
`docs-site/content/docs`, and rendered docs are served under `/docs`.
The site currently has good human-facing MDX pages, but it does not expose:
- `/llms.txt`
- `/llms-full.txt`
- raw markdown routes such as `/docs/getting-started/quickstart.md`
- markdown content negotiation
Many docs pages already use tables and code blocks, but the structure is not
consistently optimized for literal agent parsing. CLI and agent-facing pages are
the highest-priority content because agents are most likely to copy commands and
JSON examples directly.
## Design
### Machine-readable access
Add a small LLM docs utility layer inside `docs-site`:
- `docs-site/lib/llm-docs.ts`
- Converts Fumadocs pages to raw or LLM-readable markdown.
- Builds a stable ordered list of docs pages from `source.getPages()`.
- Produces the `llms.txt` index content.
- Produces the `llms-full.txt` bundled content.
Add routes:
- `docs-site/app/llms.txt/route.ts`
- Returns `text/plain; charset=utf-8`.
- Includes `# KTX`, a blockquote summary, a short description, and sections
linking to key docs, markdown docs, CLI reference pages, integration pages,
and `/llms-full.txt`.
- `docs-site/app/llms-full.txt/route.ts`
- Returns `text/plain; charset=utf-8`.
- Concatenates all docs pages in source order.
- Prefixes each page with a stable heading and canonical `/docs/...` URL.
- `docs-site/app/llms.mdx/docs/[[...slug]]/route.ts`
- Returns one docs page as `text/markdown; charset=utf-8`.
- Uses the same slug shape as `/docs/[[...slug]]`.
- Returns 404 for unknown pages.
Add a Next rewrite in `docs-site/next.config.mjs`:
- `/docs/:path*.md` rewrites to `/llms.mdx/docs/:path*`
Add a markdown negotiation proxy for `/docs/...` requests:
- Requests whose `Accept` header prefers markdown are rewritten to the matching
LLM markdown route.
- Normal browser requests continue to render the existing Fumadocs UI.
- The proxy must leave `/llms.txt`, `/llms-full.txt`, assets, and non-docs
routes unchanged.
### Content rewrite pass
Rewrite the existing MDX content in a bounded, high-impact pass. The intent is
not to expand every page; it is to make every page more literal and consistent
for agents.
Apply these patterns across docs:
- Put command signatures in fenced code blocks.
- Use tables for flags, options, inputs, outputs, supported values, and
environment variables.
- Use realistic values in copy-paste examples.
- Show complete expected command output when output shape matters.
- Add explicit "Common errors" or "Recovery" sections for workflows where a
command can fail for predictable reasons.
- Add workflow sections that chain commands in the order an agent should use
them.
- Avoid placeholders that an agent could copy literally, unless the placeholder
is clearly marked as a value to replace.
Priority pages:
1. `getting-started/quickstart.mdx`
- Add a compact workflow summary.
- Make prerequisites and generated files explicit.
- Add troubleshooting for missing API keys, failed connection tests, daemon
startup, and unbuilt context.
2. `guides/serving-agents.mdx`
- Treat MCP tools and `ktx agent` commands as agent-facing API references.
- Add tool/command input tables, output expectations, safety constraints, and
workflows for answering analytics questions.
3. `guides/writing-context.mdx`
- Add semantic-source schema tables.
- Add workflows for listing, reading, editing, validating, querying, and
writing wiki knowledge.
4. `cli-reference/*.mdx`
- Normalize every command page to: command signature, subcommands table,
option tables, examples, output modes, common errors, and related workflows
where useful.
5. `integrations/agent-clients.mdx`, `integrations/primary-sources.mdx`, and
`integrations/context-sources.mdx`
- Normalize integration setup sections into structured config tables,
copy-paste examples, authentication requirements, and recovery notes.
6. Concept and benchmark pages
- Keep narrative content, but add compact "Agent usage notes" where it helps
agents decide when to read or cite the page.
### Documentation boundaries
The first pass should not introduce a separate public docs tree or a generated
API reference system. It should work with the existing MDX source files and
Fumadocs loader.
Do not add stale compatibility aliases or rename KTX concepts. Keep examples
aligned with commands and files that exist in the standalone KTX repository.
### Testing
Verification commands:
- `pnpm --filter ktx-docs build`
- `pnpm --filter ktx-docs exec tsc --noEmit` after generated Fumadocs source
files exist.
- Route checks against a local docs server:
- `GET /llms.txt` returns 200 and `text/plain`.
- `GET /llms-full.txt` returns 200 and `text/plain`.
- `GET /docs/getting-started/quickstart.md` returns 200 and
`text/markdown`.
- unknown markdown docs paths return 404.
For content checks, inspect the generated markdown responses to confirm they
contain:
- realistic command examples,
- tables,
- full output examples where documented,
- workflow sections,
- recovery/error sections.
## Acceptance Criteria
- `/llms.txt` gives agents a concise index with links to key KTX docs and
`/llms-full.txt`.
- `/llms-full.txt` returns all docs content in source order as plain text.
- Every Fumadocs page can be fetched through a `.md` URL.
- High-priority docs pages use consistent agent-friendly structure.
- The docs site builds successfully.
- Verification results and any skipped checks are reported clearly.

View file

@ -0,0 +1,252 @@
# Demo Guided Tour — Design Spec
## Problem
The "Try KTX with packaged demo data" option in `ktx setup` is completely
disconnected from the real setup wizard. It bypasses all wizard steps, plays
an animated replay in a temp directory, and exits with no bridge to actually
using KTX. Users don't learn the real setup flow and hit a dead end.
## Solution
Redesign the demo option as a **guided tour** that walks the user through the
same setup wizard steps with pre-filled, read-only selections. The tour ends
with a real interactive agents step so the user can immediately use the demo
project with their coding agent.
## Design Decisions
| Decision | Choice | Rationale |
|----------|--------|-----------|
| Implementation strategy | Demo mode flag on existing wizard steps | Maximum code reuse; wizard changes automatically apply to demo |
| LLM/embeddings steps | Skipped | Not relevant to pre-packaged demo data |
| Database selection | PostgreSQL (read-only card) | Pre-filled, matches demo dataset |
| Context sources | dbt, Metabase, Notion (read-only card) | Pre-filled, matches demo dataset |
| Context build | Replay through real progress visualization | Same spinners, progress bars, status icons as real build |
| Agents step | Real interactive step | User actually connects their agent |
| Project location | Temp directory (`/tmp/ktx-demo-{hex}`) | Frictionless, no directory prompt |
| Navigation | Enter to advance, Escape to go back | Consistent with rest of wizard |
## Flow
```
Entry menu: "Try KTX with packaged demo data"
Create demo project in /tmp/ktx-demo-{hex}
Copy pre-packaged assets (demo DB, replay, context artifacts)
┌────────────────────────────────────────────────────────────────┐
│ Demo banner (persistent, shown on every step) │
│ │
│ Demo mode — data has been pre-processed and KTX context is │
│ already built. This walkthrough illustrates the setup steps. │
│ Selections are pre-filled and read-only. │
└────────────────────────────────────────────────────────────────┘
Read-only card: Database connection
▸ PostgreSQL (demo warehouse)
[Enter → next, Escape → back to entry menu]
Read-only card: Context sources
▸ dbt
▸ Metabase
▸ Notion
[Enter → next, Escape → back to database card]
Context build replay
Same renderContextBuildView() / repainter as real wizard
Sources: demo-warehouse, dbt, metabase, notion
Replay at slightly faster-than-real pace
Completion summary: business areas, query definitions, knowledge pages
[Enter → next, Escape → back to sources card]
Transition message:
"Demo project is ready — let's connect your agent"
Interactive agents step (real runKtxSetupAgentsStep())
User selects agent target, scope, install mode
[Normal interactive navigation; Escape goes back to replay summary]
Final summary:
★ KTX demo is ready
Agent connected, project path shown
⚠ Temp directory warning
Pointer to `ktx setup` for real data
```
## Step Details
### Demo Banner
Shown at the top of every read-only step. Uses clack box-drawing style:
```
┌ Demo mode — data has been pre-processed and KTX context is already built.
│ This walkthrough illustrates the setup steps. Selections are pre-filled and read-only.
```
### Read-Only Step Cards
Rendered by a shared `renderDemoCard()` helper:
```typescript
async function renderDemoCard(
title: string,
selections: string[],
io: KtxCliIo,
): Promise<'forward' | 'back'>
```
- Renders a clack-style box with title, bullet list of pre-filled selections,
and navigation hint ("Press Enter to continue, Escape to go back")
- Listens for raw keypresses: Enter → `'forward'`, Escape → `'back'`
- Uses same box-drawing characters and colors as `@clack/prompts`
Card format:
```
┌ {title}
│ ▸ {selection 1}
│ ▸ {selection 2}
│ ...
│ Press Enter to continue, Escape to go back
```
### Demo Step Sequence
The demo reuses the main wizard's step loop with these steps:
```typescript
const demoSteps = ['databases', 'sources', 'context', 'agents'];
```
Steps `databases` and `sources` dispatch to `renderDemoCard()` instead of
their real interactive functions when demo mode is active. Step `context`
dispatches to the replay visualization. Step `agents` runs the real
`runKtxSetupAgentsStep()`.
Back navigation reuses `previousNavigableStepIndex()`. Escaping from the
first step (databases) returns to the entry menu.
### Context Build Replay
Uses the same rendering pipeline as the real context build:
- `renderContextBuildView()` for the progress display
- `createRepainter()` for terminal repainting
- Same spinner frames, progress bars (`████░░░░`), status icons (`✓`, `⠹`, `○`)
- Same source grouping (Primary sources / Context sources)
Sources shown:
```
Primary sources:
✓ demo-warehouse completed · Xs
Context sources:
✓ dbt completed · Xs
✓ metabase completed · Xs
✓ notion completed · Xs
```
Replay timing: events from the pre-packaged replay file are played back at
a slightly faster pace than real-time (compressed to feel brisk but not
instant).
Completion summary uses the existing format:
```
★ KTX finished ingesting your data
✓ Analyzed X business areas
✓ Reconciled — 0 conflicts
KTX created:
📊 X query definitions
📝 X knowledge pages
Press Enter to continue, Escape to go back
```
The exact counts and artifact names come from the pre-packaged demo results
(to be provided by the user as improved demo data).
### Agents Step Transition
A brief message bridges from the read-only tour to the interactive step:
```
┌ Demo project is ready — let's connect your agent
│ Your KTX context has been built with demo data.
│ Select an agent to start using it.
```
Then `runKtxSetupAgentsStep()` runs with the demo project directory,
normal interactive prompts enabled.
### Final Summary
```
★ KTX demo is ready
Your agent is connected to a demo KTX project.
⚠ This project is in a temporary directory and will be
cleaned up by your system. To set up KTX with your own
data, run: ktx setup
Project: /tmp/ktx-demo-a1b2c3
```
If the user skips the agents step, replace the first line with manual
agent connection instructions (`ktx setup --agents --project-dir /tmp/...`).
## Implementation Approach
Thread a `demoMode` flag through the main setup loop in `setup.ts`. When
active:
1. Skip `models` and `embeddings` steps entirely
2. Replace `databases` and `sources` step dispatch with `renderDemoCard()`
3. Replace `context` step dispatch with replay visualization
4. Run `agents` step normally
5. Show demo-specific completion summary instead of ready menu
The `renderDemoCard()` helper is a new function in a new file
(e.g. `setup-demo-cards.ts`) that handles read-only card rendering and
keypress listening.
The context build replay reuses existing `renderContextBuildView()` and
`createRepainter()` from `context-build-view.ts`, fed with events from
the pre-packaged replay file at an accelerated playback rate.
## Files Changed
| File | Change |
|------|--------|
| `packages/cli/src/setup.ts` | Add `demoMode` flag to setup loop; skip models/embeddings; dispatch to demo cards for databases/sources; show demo banner; demo completion summary |
| `packages/cli/src/setup-demo-cards.ts` | New file: `renderDemoCard()` helper, demo banner renderer, demo step definitions |
| `packages/cli/src/setup-context.ts` | Support replay mode for demo: feed pre-packaged events at accelerated pace through existing progress view |
| `packages/cli/src/demo.ts` | Remove or simplify `runKtxSetupDemoFromEntryMenu()` — now dispatches to the main setup loop with `demoMode: true` |
| `packages/cli/src/demo-assets.ts` | Update asset list if new demo data is provided; ensure demo project setup writes valid `ktx.yaml` for agent use |
## Open Items
- **Demo data**: User will provide improved pre-packaged results (Postgres,
dbt, Metabase, Notion). Current demo assets may need updating.
- **Replay speed**: Exact acceleration factor TBD — should feel brisk but
give users time to read source names and status transitions. Start with
~2x real-time and adjust.

View file

@ -1,18 +0,0 @@
---
summary: Account activation policy changed on January 15, 2026.
tags:
- growth
- activation
- policy
refs: []
sl_refs:
- orbit_demo.accounts
- orbit_demo.purchase_requests
usage_mode: auto
---
Before January 15, 2026, activation meant first requester login.
On and after January 15, 2026, activation requires an approved purchase request and at least three activated requesters.
Always separate pre-policy and post-policy cohorts when comparing activation rates.

View file

@ -1,18 +0,0 @@
---
summary: ARR uses contract-first precedence before subscription-derived revenue.
tags:
- finance
- arr
- revenue
refs: []
sl_refs:
- orbit_demo.contracts
- orbit_demo.arr_movements
usage_mode: auto
---
ARR is calculated from active recurring contract ARR before falling back to subscription-derived revenue.
Do not double-count subscription MRR when an active contract row covers the same account and period.
Exclude cancelled contracts ending before the metric date, future-starting contracts, internal accounts, and test accounts.

View file

@ -0,0 +1,44 @@
---
summary: "Required elements for valid customer updates: what happened, what is being done, who owns next step, and when customer will hear back. Vague status phrases are not acceptable."
usage_mode: auto
sort_order: 0
tags:
- policy
- customer-success
refs:
- orbit-how-we-work
- sales-ops-cs-handoff-process
---
## Customer Update Communication Standard
**Source:** Notion — People & Operating Norms, last edited 2026-05-07
---
## Policy
Every customer update must contain four elements. An update that omits any of these is incomplete and must not be sent.
| # | Required Element | Example |
|---|---|---|
| 1 | **What happened** | "The approval routing failed for the renewal PO because the department budget split was not configured." |
| 2 | **What is being done** | "We are reconfiguring the budget split and re-routing the approval to the correct approver." |
| 3 | **Who owns the next step** | "[Name] on our CS team owns this and is working it now." |
| 4 | **When the customer will hear back** | "You will have an update by 3pm today." |
## Named Anti-Pattern
- **Do not send:** "We are looking into it."
- This phrase is only acceptable when the actual blocker is genuinely unknown. If the blocker is known, name it.
- Vague status phrases without a named owner and a time commitment are not acceptable customer updates.
## When This Applies
- Any written update to a customer during an active issue, escalation, or implementation delay.
- Applies to email, Slack, and any other written channel.
- Verbal updates in calls should follow the same structure; a written summary must follow the call.
---
See also: [[orbit-how-we-work]], [[sales-ops-cs-handoff-process]]

View file

@ -1,20 +0,0 @@
---
summary: Customer health combines support severity and procurement activity.
tags:
- customer-success
- health
- churn-risk
refs:
- nrr-retention
sl_refs:
- orbit_demo.support_tickets
- orbit_demo.purchase_requests
- orbit_demo.accounts
usage_mode: auto
---
High-risk accounts have multiple recent high-severity tickets or no recent procurement activity on growth and enterprise plans.
Medium risk captures partial support pressure or a material month-over-month decline in procurement activity.
Internal and test accounts are excluded from customer health scoring.

View file

@ -1,19 +0,0 @@
---
summary: Discount expirations are tracked separately from organic contraction.
tags:
- finance
- retention
refs:
- arr-contract-first
- nrr-retention
sl_refs:
- orbit_demo.contracts
- orbit_demo.arr_movements
usage_mode: auto
---
Discount expiration events identify pricing changes when negotiated discounts end.
Track these separately from organic contraction so board reporting can split pricing-driven and usage-driven changes.
Use movement_reason on arr_movements when separating discount expiration from churn or seat-reduction events.

View file

@ -1,16 +0,0 @@
---
summary: Canonical metrics exclude internal and test accounts and users.
tags:
- data-quality
- governance
refs: []
sl_refs:
- orbit_demo.accounts
usage_mode: auto
---
All canonical customer metrics exclude rows marked as internal or test fixtures.
This exclusion applies at both account and user grain when joining procurement, support, and revenue activity.
If a metric unexpectedly increases, check whether new internal or test accounts were created without proper flags.

View file

@ -0,0 +1,47 @@
---
summary: "New hire week-one knowledge requirements: four things every new hire must understand by end of week one, with manager as responsible owner."
usage_mode: auto
sort_order: 0
tags:
- orbit
- policy
refs:
- orbit-company-overview
- orbit-how-we-work
---
## New Hire Week-One Onboarding Policy
**Source:** Notion — People & Operating Norms, last edited 2026-05-07
**Owner:** Manager (not People Ops)
---
## Policy
Every new hire must understand **four things by end of week one**. The manager — not People Ops — is responsible for supplying this context.
## Required Week-One Knowledge
| # | What the new hire must understand |
|---|---|
| 1 | **What Orbit sells** — the core procurement workflow product and value proposition |
| 2 | **Why procurement workflow gets messy inside a customer** — the pain points that make Orbit necessary |
| 3 | **Which team handles which part of the customer lifecycle** — team lanes and ownership boundaries |
| 4 | **What their first useful project is** — a concrete, scoped piece of work they can contribute to immediately |
## Ownership
- The **manager** is responsible for delivering this context, not People Ops or a generic onboarding doc.
- If the manager cannot supply item 4 (first useful project) by day one, they should have it ready by end of day two at the latest.
- Items 13 can be covered via existing documentation; the manager should point to the right pages rather than re-explaining from scratch.
## Suggested Reading for Items 13
- Item 1 & 2: [[orbit-company-overview]]
- Item 3: [[orbit-company-overview]] (Team Lanes section)
- Operating norms and how decisions are made: [[orbit-how-we-work]]
---
See also: [[orbit-company-overview]], [[orbit-how-we-work]]

View file

@ -1,19 +0,0 @@
---
summary: NRR is calculated at parent-account grain by calendar quarter.
tags:
- analytics
- retention
- nrr
refs:
- arr-contract-first
sl_refs:
- orbit_demo.arr_movements
- orbit_demo.accounts
usage_mode: auto
---
Net Revenue Retention uses parent-account rollups by calendar quarter.
The formula is starting ARR plus expansion minus contraction and churn, divided by starting ARR.
Exclude parent accounts with zero starting ARR, new business, reactivations, and internal/test accounts from the denominator.

View file

@ -0,0 +1,64 @@
---
summary: "Customer activation: email verified + first project + team invite within 14 days of signup. D7/D14 activation rates and Time-to-Activate formulas. Source tables: customer, project, invite."
usage_mode: auto
sort_order: 0
tags:
- activation
- kpi
- growth
- funnel
- metrics
refs:
- orbit-customers-source
- orbit-activation-policy-change-jan-2026
- orbit-mart-account-activity
tables:
- orbit_analytics.customer
- orbit_analytics.project
- orbit_analytics.invite
---
# Activation KPI Glossary
**Owner team:** Growth
**Source:** Notion — Orbit Demo Home / Data Team - Onboarding / Activation KPI Glossary, last edited 2026-05-07
Use this when a question is about signup-to-habit behavior. Orbit uses activation language across Growth, Product, and CS conversations.
## Activation Definition
A customer is **activated** when **all three** of the following happen **within 14 days of signup**:
1. Email is verified
2. First project is created
3. At least one teammate is invited
## Funnel Stages
| Stage | Signal | Data source |
|---|---|---|
| 1. Signup | Customer row created | `orbit_analytics.customer` |
| 2. Email Verified | `customer.email_verified_at` is not null | `orbit_analytics.customer` |
| 3. First Project | At least one row in `orbit_analytics.project` for the customer | `orbit_analytics.project` |
| 4. Team Invite | At least one row in `orbit_analytics.invite` for the customer | `orbit_analytics.invite` |
| 5. Activated | All of (2), (3), and (4) within 14 days of (1) | — |
## Conversion-Rate KPIs
| KPI | Formula |
|---|---|
| **D7 Activation Rate** | `activated_customers_within_7_days / signups_in_cohort` |
| **D14 Activation Rate** | `activated_customers_within_14_days / signups_in_cohort` |
| **Time-to-Activate** | `median(activated_at - created_at)` in hours |
Growth conversations typically use D7 and D14 Activation Rate. Product and CS may ask about individual funnel steps — confirm whether they mean the full activation definition or only one stage.
## Source Notes
- Use `orbit_analytics.customer` for `created_at` and `email_verified_at`.
- For project or invite timing, check `orbit_analytics.project` and `orbit_analytics.invite` before changing the activation definition.
- `created_at` is UTC; confirm timezone expectations before cohort filtering.
## Relationship to Account-Level Activation
This glossary defines **customer-level** activation (signup-to-habit). The **account-level** activation workflow (requester login → first approved purchase request → account activated) is a separate concept tracked in `mart_account_activity` and governed by the January 2026 policy change. See `orbit-activation-policy-change-jan-2026` for that definition.

View file

@ -0,0 +1,46 @@
---
summary: "January 2026 activation policy change: policy_version splits events into pre_2026_01_15 and post_2026_01_15 cohorts. mart_account_activity compares activation counts across the boundary."
usage_mode: auto
sort_order: 0
tags:
- activation
- growth
- policy
- governed-metric
- procurement
sl_refs:
- mart_account_activity
---
# Activation Policy Change — January 2026
**Governed metric key:** `activated_accounts`
**Owner team:** growth
**Notion:** `notion://notion_page_activation_policy_decision#policy-change`
**Sources:** `mart_account_activity`, `int_activation_policy_windows`, `stg_activation_events`
## Policy Boundary
The activation workflow changed on **2026-01-15**. All activation events are tagged with `policy_version`:
- `pre_2026_01_15` — events before the workflow update
- `post_2026_01_15` — events after the workflow update
## Activation Event Types
`first_requester_login`, `requester_activated`, `first_approved_purchase_request`, `account_activated`
## Account Activation Sequence
1. First requester login → `first_requester_login`
2. Requester activated → `requester_activated`
3. First approved purchase request → `first_approved_purchase_request`
4. Account activated → `account_activated`
## Exclusions
Internal and test accounts (lifecycle_status = `internal` or `test` on `stg_accounts`) are excluded from activation counts. Sessions (`stg_sessions`) are used for pre-policy activation and activity exclusions.
## Dashboard
Exposed via the **Growth Activation Dashboard** (`https://orbit-demo.example.com/dashboards/activation`), which depends on `mart_account_activity`.

View file

@ -0,0 +1,39 @@
---
summary: "ARR is calculated contract-first: active contract ARR takes precedence over subscription ARR for any covered period."
usage_mode: auto
sort_order: 0
tags:
- arr
- governed-metric
- finance
- contracts
- subscriptions
sl_refs:
- mart_arr_daily
- mart_account_segments
---
# ARR — Contract-First Definition
**Governed metric key:** `arr`
**Owner team:** finance
**Notion:** `notion://notion_page_arr_contract_reporting#arr-contract-first`
**Source:** `mart_arr_daily` (grain: `metric_date`)
## Rule
ARR is calculated **contract-first**: when an active contract exists for an account and period, `int_active_contract_arr` is used. Subscription ARR (`stg_subscriptions`) is only used when no active contract covers the period.
## Known Assertion
The dbt test on `mart_arr_daily.arr_cents` asserts the value equals **1,874,200,000 cents ($18,742,000)** as of `metric_date = 2026-03-31`.
## Intermediate model
`int_active_contract_arr` — active contract ARR as of 2026-03-31 (grain: `contract_id`).
## Related
- `stg_contracts` — contract records (status: draft, active, cancelled, expired)
- `stg_subscriptions` — fallback ARR source (status: active, cancelled, past_due, trialing)
- `mart_arr_daily` — board-prep daily ARR mart

View file

@ -0,0 +1,71 @@
---
summary: "Orbit: procurement workflow software (requests → approvals → supplier onboarding → POs). Plans: Starter, Growth, Enterprise. Team lanes and open operating questions as of May 2026."
usage_mode: auto
sort_order: 0
tags:
- company-context
- product
- plans
- team-lanes
- procurement
refs:
- orbit-plan-segment-normalization
- orbit-procurement-qualifying-actions
---
# Orbit Company Overview
**Source:** Notion — Orbit Demo Home / Company Overview + Orbit Demo Home (root), last edited 2026-05-07
## What Orbit Sells
Orbit sells procurement workflow and spend-control software. The core value proposition: route purchase requests, collect approvals, onboard suppliers, and issue purchase orders without turning every exception into a status hunt.
**Primary buyers:** Finance, Procurement, Business Operations.
**Daily users:** department admins, office managers, IT leads, legal ops partners — anyone who has to get a vendor through the building.
## Product Workflow
1. Requester submits a purchase request
2. Approval routing collects the right decision
3. Supplier invite and onboarding happen before work starts
4. Purchase order is created from the approved request
5. Renewal handoff keeps the relationship from drifting
## Plans
| Plan | Target customer |
|---|---|
| **Starter** | Teams moving out of spreadsheet tracking |
| **Growth** | Default mid-market plan |
| **Enterprise** | Multiple approval policies, parent/child account structures, heavier renewal coordination |
**Legacy alias:** `pro_plus` in older notes means Growth. Treat as Growth unless Sales Ops says otherwise. See `orbit-plan-segment-normalization` for the data-layer normalization rule.
## Team Lanes
| Team | Responsibilities |
|---|---|
| Product | Requester onboarding, supplier onboarding, approval routing, PO workflow quality |
| Growth | Activation, self-serve conversion |
| Sales Ops | Account segmentation, plan mapping, contracts, handoff hygiene |
| Customer Success | Implementation, support escalations, account health, renewal risk |
| Finance | Billing, close, board prep |
| Data | Cross-functional support for all departments |
| Executive | Company priorities, weekly operating review |
## Open / Unsettled Questions (as of May 2026)
- Whether supplier onboarding stays fully inside Product or splits more work with CS for larger accounts.
- Whether Growth is still the right default-plan language in sales materials.
- How renewal handoff works when Sales Ops updates account segment late in-quarter.
- Implementation handoff template decision still pending.
- Renewal risk review agenda should not live only in meeting notes.
## Common Customer Pain Points (Pre-Sale)
- "We have too many request paths."
- "Approvals happen, but no one can explain the state of the request."
- "Supplier onboarding is split across three teams."
- "Renewals are visible too late."
- "People keep asking Finance for status because there is nowhere better to look."

View file

@ -0,0 +1,46 @@
---
summary: "Customer health risk definition: risk_level (low/medium/high) derived from open critical support tickets and recent procurement activity. Mart: mart_customer_health, as of 2026-03-31."
usage_mode: auto
sort_order: 0
tags:
- customer-health
- risk
- customer-success
- governed-metric
- support
sl_refs:
- mart_customer_health
---
# Customer Health Risk Definition
**Governed metric key:** `active_customers`
**Owner team:** customer_success
**Notion:** `notion://notion_page_customer_health_playbook#risk-definition`
**Sources:** `mart_customer_health`, `int_customer_health_signals`
## Risk Levels
`low`, `medium`, `high` — derived from two signal types:
1. **Support ticket signals** (`stg_support_tickets`): open or pending tickets with severity `high` or `critical` increase risk.
2. **Procurement activity signals** (`stg_purchase_requests`, `stg_purchase_orders`): recent qualifying procurement actions reduce risk.
## Intermediate Model
`int_customer_health_signals` — combines open critical ticket count and recent procurement action count per account.
## Mart
`mart_customer_health` — account-grain risk mart as of **2026-03-31**.
- `account_id`: dbt not_null, unique
- `risk_level`: dbt accepted_values [low, medium, high]
## Support Ticket Severities
`low`, `medium`, `high`, `critical`
## Account Ownership Context
`stg_account_owners` provides effective-dated ownership (owner_team: sales_ops, customer_success, finance) for escalation routing.

View file

@ -0,0 +1,42 @@
---
summary: "Recurring customer stakeholder needs by role: Finance, Department leaders, Procurement, Legal, and Customer Success each have distinct priorities that should inform product and positioning decisions."
usage_mode: auto
sort_order: 0
tags:
- product
- customer-success
- orbit
refs:
- orbit-company-overview
- orbit-product-review-checklist
---
## Customer Stakeholder Needs by Role
**Source:** Notion — Product & Customers, last edited 2026-05-07
---
## Policy
These are recurring, role-specific customer needs observed across accounts. Use them to inform product prioritization, positioning, and CS engagement strategies.
## Stakeholder Map
| Role | Primary Need | Implication |
|---|---|---|
| **Finance** | Committed spend visibility earlier in the procurement cycle | Surface budget commitments at request approval, not at PO creation |
| **Department leaders** | Request speed — faster time from request to approval | Reduce approval routing friction; minimize back-and-forth |
| **Procurement** | Supplier file complete before the first invoice | Supplier onboarding must be finished before PO is issued, not after |
| **Legal** | Fewer emergency reviews | Route contracts with legal implications earlier; avoid last-minute escalations |
| **Customer Success (internal)** | Renewal risk visible before the account is already annoyed | CS needs leading indicators of dissatisfaction, not lagging ones |
## Usage Notes
- These needs are recurring patterns, not one-off requests. They should be treated as standing assumptions until explicitly updated.
- When prioritizing roadmap items, map each item to the stakeholder(s) it serves and verify the need is still active.
- When positioning Orbit to a new prospect, use this map to tailor the value proposition to the roles present in the buying committee.
---
See also: [[orbit-company-overview]], [[orbit-product-review-checklist]], [[orbit-known-product-gaps]]

View file

@ -0,0 +1,60 @@
---
summary: "orbit_analytics.customer: one row per customer. Columns, joins to account/subscription_event, measures (customer_count, paying_customer_count, mrr), and watch-outs."
usage_mode: auto
sort_order: 0
tags:
- data-source
- customers
- orbit-analytics
- measures
refs:
- orbit-plan-segment-normalization
- orbit-activation-kpi-glossary
tables:
- orbit_analytics.customer
- orbit_analytics.account
- orbit_analytics.subscription_event
---
# Orbit Customers Source
**Table:** `orbit_analytics.customer`
**Grain:** one row per signed-up customer
**Source:** Notion — Orbit Demo Home / Data Team - Onboarding / Orbit Customers Source, last edited 2026-05-07
Use this when a question needs customer identity, plan tier, signup timing, recent activity, or the standard customer joins.
## Columns
| Column | Type | Notes |
|---|---|---|
| `id` | number | Primary key, surrogate key |
| `email` | string | Login email, unique — **do not use as join key** |
| `name` | string | Display name |
| `country` | string | ISO 3166-1 alpha-2 code |
| `plan_tier` | string | One of `free`, `pro`, `enterprise` |
| `created_at` | time | UTC signup timestamp |
| `last_seen_at` | time | UTC most recent app activity |
| `email_verified_at` | time | UTC email verification timestamp (used in activation funnel) |
## Joins
- **one-to-many**`orbit_analytics.account` on `customer.id = account.customer_id`
- **one-to-many**`orbit_analytics.subscription_event` on `customer.id = subscription_event.customer_id`
Always join through `customer.id`. Do not join on `email`.
## Standard Measures
| Measure | Formula |
|---|---|
| `customer_count` | `count(distinct id)` |
| `paying_customer_count` | `count(distinct id) where plan_tier in ('pro', 'enterprise')` |
| `mrr` | `sum(subscription_event.amount) where event_type = 'renewed'` |
## Watch-outs
- **Join key:** Always use `customer.id`, never `email`.
- **Timezone:** `created_at` and `last_seen_at` are UTC. Confirm whether a question expects UTC or a local business day before filtering.
- **Paying vs. all:** `free` customers must be excluded from paying-customer follow-ups. Use `paying_customer_count`, not `customer_count`.
- **plan_tier values:** `free`, `pro`, `enterprise`. Note: `pro_plus` is a legacy alias for `growth` in the account/contract layer (see `orbit-plan-segment-normalization`), but `plan_tier` on this table uses `pro` not `pro_plus`.

View file

@ -0,0 +1,44 @@
---
summary: "dbt exposures declared in models/exposures.yml: three dashboards (Retention Executive, Executive Revenue, Growth Activation) with their upstream mart dependencies and owners."
usage_mode: auto
sort_order: 0
tags:
- dbt
- exposures
- dashboards
- orbit
sl_refs:
- mart_nrr_quarterly
- mart_retention_movement_breakout
- mart_arr_daily
- mart_revenue_daily
- mart_account_activity
---
# Orbit dbt Exposures
Declared in `models/exposures.yml`. All exposures are type `dashboard` with maturity `high` or `medium`.
## Retention Executive Dashboard
- **URL:** https://orbit-demo.example.com/dashboards/retention
- **Maturity:** high
- **Owner:** Analytics (analytics@orbit-demo.example.com)
- **Depends on:** `mart_nrr_quarterly`, `mart_retention_movement_breakout`
- **Description:** Executive retention view covering NRR and movement breakout.
## Executive Revenue Dashboard
- **URL:** https://orbit-demo.example.com/dashboards/revenue
- **Maturity:** high
- **Owner:** Finance (finance@orbit-demo.example.com)
- **Depends on:** `mart_arr_daily`, `mart_revenue_daily`
- **Description:** Board reporting view for ARR and gross-to-net revenue.
## Growth Activation Dashboard
- **URL:** https://orbit-demo.example.com/dashboards/activation
- **Maturity:** medium
- **Owner:** Growth (growth@orbit-demo.example.com)
- **Depends on:** `mart_account_activity`
- **Description:** Activation policy comparison around the January 2026 workflow update.

View file

@ -0,0 +1,54 @@
---
summary: "Overview of the kaelio_demo dbt project: connection, schema layout, model layers, and governed metrics."
usage_mode: auto
sort_order: 0
tags:
- dbt
- orbit
- data-model
- governed-metrics
sl_refs:
- stg_accounts
- stg_contracts
- stg_arr_movements
- mart_arr_daily
- mart_nrr_quarterly
- mart_revenue_daily
- mart_account_activity
- mart_procurement_activity
- mart_customer_health
- mart_account_segments
---
# Orbit dbt Project Overview
**Project name:** `kaelio_demo`
**dbt version:** 1.0.0
**Profile target:** Postgres (`orbit_analytics` schema, `kaelio_demo` database)
**Raw source schema:** `orbit_raw`
**Analytics schema:** `orbit_analytics` (all models materialised as views by default)
## Model Layers
| Layer | Prefix | Purpose |
|---|---|---|
| Staging | `stg_` | 1-to-1 with `orbit_raw` tables; adds type-casting, column tests, enum constraints |
| Intermediate | `int_` | Business-logic joins and rollups; not exposed to BI directly |
| Mart | `mart_` | Board/dashboard-ready aggregates; each has a `governed_metric_key` and `owner_team` |
## Governed Metrics (mart layer)
| Mart | `governed_metric_key` | Owner | Notion |
|---|---|---|---|
| `mart_arr_daily` | `arr` | finance | `notion_page_arr_contract_reporting` |
| `mart_nrr_quarterly` | `net_revenue_retention` | analytics | `notion_page_retention_policy_current` |
| `mart_retention_movement_breakout` | `net_revenue_retention` | analytics | `notion_page_retention_policy_current` |
| `mart_revenue_daily` | `net_revenue` | finance | `notion_page_revenue_reporting_policy` |
| `mart_account_activity` | `activated_accounts` | growth | `notion_page_activation_policy_decision` |
| `mart_procurement_activity` | `weekly_active_requesters` | product | `notion_page_procurement_instrumentation` |
| `mart_customer_health` | `active_customers` | customer_success | `notion_page_customer_health_playbook` |
| `mart_account_segments` | `segment` | sales_ops | `notion_page_sales_ops_segmentation` |
## Raw Source Tables (`orbit_raw` schema)
accounts, account_hierarchy, plans, contracts, subscriptions, contract_discount_terms, arr_movements, invoices, invoice_line_items, refunds, plan_segment_mapping, users, activation_events, sessions, purchase_requests, approval_events, suppliers, supplier_onboarding_events, purchase_orders, support_tickets, account_owners.

View file

@ -0,0 +1,75 @@
---
summary: "Orbit operating model: remote-first, written-first, weekly rhythm, decision process, escalation policy, and standing operating norms."
usage_mode: auto
sort_order: 0
tags:
- policy
- orbit
refs:
- orbit-company-overview
- customer-communication-policy
---
## How We Work
**Source:** Notion — Orbit Demo Home / How We Work, last edited 2026-05-07
---
## Operating Model
- Orbit is a **mostly remote, mostly written** company.
- Meetings must serve a specific purpose: making a decision, unblocking a handoff, or building shared context that writing alone would be slower to achieve.
- If a meeting does not meet one of those three purposes, default to async written communication.
---
## Weekly Rhythm
| Day(s) | Focus |
|---|---|
| **Monday** | Commitments and dependency checks |
| **Tuesday Thursday** | Customer calls, product work, implementation, and building |
| **Friday** | Closing loops — review what shipped, what slipped, and write down any decisions |
Use this rhythm when scheduling work, meetings, or reviews. Do not schedule decision-making meetings on Fridays; use Friday to record decisions already made.
---
## Decision-Making Process
1. **The person closest to the work writes the recommendation.**
2. **Stakeholders who will live with the decision get to push back.**
3. **The accountable lead makes the call** when a real tradeoff exists.
4. **The result is written where the work is happening.** Decisions that exist only in Slack or a meeting are not considered durable.
> A decision that isn't written down didn't happen.
---
## Standing Operating Norms
These are explicitly codified rules Orbit has identified as recurring failure modes:
- **Name the accountable person before work begins.** If no one is named, no one is accountable.
- **Never let a quick sync be the only source of truth.** Write it down after.
- **Bring a customer example when proposing product changes.** Abstract proposals without customer grounding are harder to evaluate.
- **Involve affected teams before a plan is finalized.** Surprises in execution are more expensive than slower planning.
- **Prefer a rough written decision today over a perfect recap that never gets written.** Done and documented beats polished and lost.
---
## Escalation Policy
- **Escalations are coordination tools, not indicators of individual failure.** Escalating is the correct behavior when a problem exceeds the current team's ability to resolve it alone.
- When escalating, the person escalating must:
1. Bring in the right people (those with authority or context to unblock).
2. Summarize current state clearly — what has been tried, what is blocked, and why.
3. Name the customer impact explicitly.
4. Keep updates moving until the risk is resolved or a workaround is established.
- Escalations that stall because no one owns the next update are a process failure, not a customer failure.
- An escalation is closed when the risk is resolved or a documented workaround is in place — not when the immediate noise stops.
---
See also: [[orbit-company-overview]], [[orbit-team-lanes-detail]], [[customer-communication-policy]]

View file

@ -0,0 +1,47 @@
---
summary: "Known Orbit product friction: approval routing for non-standard cases (weird supplier setups, split budgets, renewal changes) causes teams to fall back to side channels outside Orbit."
usage_mode: auto
sort_order: 0
tags:
- product
- orbit
- customer-success
refs:
- orbit-customer-stakeholder-needs
- orbit-product-review-checklist
- orbit-company-overview
---
## Known Product Gaps and Friction Points
**Source:** Notion — Product & Customers (Notes from Recent Customer Calls), last edited 2026-05-07
---
## Primary Friction: Approval Routing for Exceptions
The primary source of customer friction is **approval routing around non-standard cases**. When a procurement request does not fit the standard routing rules, teams fall back to side channels (email, Slack, spreadsheets) outside Orbit.
### Specific Triggers
| Trigger | Why It Causes Fallback |
|---|---|
| **Weird supplier setups** | Non-standard supplier configurations don't fit the default approval chain |
| **Split department budgets** | Requests that span multiple budget owners require manual coordination not supported in the routing UI |
| **Renewal changes** | Mid-term contract changes (scope, price, term) don't map cleanly to the new-request flow |
## Impact
- Teams that fall back to side channels for exceptions create a split record: part of the procurement history is in Orbit, part is not.
- This undermines the supplier file completeness that Procurement requires (see [[orbit-customer-stakeholder-needs]]).
- It also creates renewal risk because CS cannot see the full picture of what was agreed.
## Status
- This is a known, unresolved gap as of May 2026.
- Treat as a standing assumption in roadmap and analysis decisions until a fix is shipped and validated.
- Do not design analyses or reports that assume all procurement activity flows through Orbit for accounts with known exception patterns.
---
See also: [[orbit-customer-stakeholder-needs]], [[orbit-product-review-checklist]], [[orbit-company-overview]]

View file

@ -0,0 +1,49 @@
---
summary: "mart_account_activity: pre/post policy 30-day activation rates per policy_change_date. policy_change_date = 2026-01-15 is the Jan 2026 boundary. Rates are 01 ratios."
usage_mode: auto
sort_order: 0
tags:
- activation
- policy
- mart
- orbit-analytics
sl_refs:
- mart_account_activity
tables:
- orbit_analytics.mart_account_activity
---
# mart_account_activity
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/63.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/101.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/106.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/107.json -->
**Table:** `orbit_analytics.mart_account_activity`
**Grain:** one row per `policy_change_date`
## Columns
| Column | Type | Notes |
|---|---|---|
| `policy_change_date` | date | The policy boundary date (primary value: `2026-01-15`) |
| `pre_policy_30_day_activation_rate` | decimal | 30-day activation rate before the policy change (01 ratio) |
| `post_policy_30_day_activation_rate` | decimal | 30-day activation rate after the policy change (01 ratio) |
## Key measures (SL: `mart_account_activity`)
- `avg_pre_policy_activation_rate``avg(pre_policy_30_day_activation_rate)`
- `avg_post_policy_activation_rate``avg(post_policy_30_day_activation_rate)`
## Common query patterns
- **Policy comparison:** `WHERE policy_change_date = date '2026-01-15'`
- **As percent:** `round(pre_policy_30_day_activation_rate * 100, 1)`
- **Side-by-side:** UNION of pre and post rows with a `policy_window` label column
## Business rules
- The January 2026 activation policy change (`policy_change_date = 2026-01-15`) is the primary boundary. `policy_version` in upstream events splits into `pre_2026_01_15` and `post_2026_01_15` cohorts.
- Rates are ratios (01); multiply by 100 for percentage display.
- See [orbit-activation-policy-change-jan-2026](orbit-activation-policy-change-jan-2026) for full policy context.

View file

@ -0,0 +1,55 @@
---
summary: "mart_account_segments: account segmentation with contract ARR, plan codes, size_band, segment (self_serve/commercial/enterprise), and contract_status. One row per account_id."
usage_mode: auto
sort_order: 0
tags:
- arr
- segmentation
- accounts
- mart
- orbit-analytics
sl_refs:
- mart_account_segments
tables:
- orbit_analytics.mart_account_segments
---
# mart_account_segments
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/69.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/100.json -->
**Table:** `orbit_analytics.mart_account_segments`
**Grain:** one row per `account_id`
## Columns
| Column | Type | Notes |
|---|---|---|
| `account_id` | text | Primary key |
| `parent_account_id` | text | Parent account for hierarchy rollups |
| `current_plan_code` | text | Raw plan code from billing system |
| `normalized_plan_code` | text | Canonical plan code (`pro_plus``growth`) |
| `size_band` | text | Company size band |
| `segment` | text | Reporting segment: `self_serve`, `commercial`, `enterprise` |
| `contract_arr_cents` | bigint | Contract ARR in cents |
| `contract_status` | text | `active`, `churned`, etc. |
## Key measures (SL: `mart_account_segments`)
- `account_count``count(*)`
- `total_contract_arr_cents``sum(contract_arr_cents)`
- `active_contract_arr_cents``sum(contract_arr_cents)` where `contract_status = 'active'`
- `active_contract_arr_millions` — active ARR in $M
## Common query patterns
- **ARR by segment:** `GROUP BY segment WHERE contract_status = 'active'`
- **Top accounts:** `ORDER BY contract_arr_cents DESC` with `is_internal = false AND is_test = false` (join to `orbit_raw.accounts`)
- **Unmapped segment:** `COALESCE(segment, 'unmapped')`
## Business rules
- `normalized_plan_code` maps `pro_plus``growth`. Always use `normalized_plan_code` for plan-based reporting. See [orbit-plan-segment-normalization](orbit-plan-segment-normalization).
- `segment` is derived from `canonical_plan_code × size_band` via `stg_plan_segment_mapping`.
- `contract_arr_cents` is the contract-first ARR value. See [orbit-arr-contract-first-definition](orbit-arr-contract-first-definition).

View file

@ -0,0 +1,46 @@
---
summary: "mart_arr_daily: daily ARR snapshot with contract-first valuation, arr_cents and display columns, used for ARR trend and EoQ reporting."
usage_mode: auto
sort_order: 0
tags:
- arr
- revenue
- mart
- orbit-analytics
sl_refs:
- mart_arr_daily
tables:
- orbit_analytics.mart_arr_daily
---
# mart_arr_daily
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/56.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/96.json -->
**Table:** `orbit_analytics.mart_arr_daily`
**Grain:** one row per `metric_date`
## Columns
| Column | Type | Notes |
|---|---|---|
| `metric_date` | date | Snapshot date |
| `arr_cents` | bigint | ARR in cents (contract-first: active contract ARR takes precedence over subscription ARR) |
| `display` | text | Human-readable ARR label (e.g. formatted dollar string) |
## Key measures (SL: `mart_arr_daily`)
- `total_arr_cents``sum(arr_cents)`
- `arr_millions``round(sum(arr_cents) / 100000000.0, 3)` — ARR in $M
## Common query patterns
- **Current ARR:** filter `metric_date = current_date` (or latest available date)
- **EoQ ARR:** filter `metric_date = date '2026-03-31'`
- **ARR trend:** group by `metric_date`, plot `arr_cents`
## Business rules
- ARR is calculated contract-first: active contract ARR takes precedence over subscription ARR for any covered period. See [orbit-arr-contract-first-definition](orbit-arr-contract-first-definition).
- `display` is a formatted label for UI rendering; use `arr_cents` for all arithmetic.

View file

@ -0,0 +1,55 @@
---
summary: "mart_nrr_quarterly: quarterly NRR by segment with net_revenue_retention ratio, expansion/contraction/churn ARR cents, and quarter_label. Enterprise is the primary reporting segment."
usage_mode: auto
sort_order: 0
tags:
- nrr
- retention
- revenue
- mart
- orbit-analytics
sl_refs:
- mart_nrr_quarterly
tables:
- orbit_analytics.mart_nrr_quarterly
---
# mart_nrr_quarterly
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/58.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/98.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/103.json -->
**Table:** `orbit_analytics.mart_nrr_quarterly`
**Grain:** one row per `quarter_label` × `segment`
## Columns
| Column | Type | Notes |
|---|---|---|
| `quarter_start_date` | date | First day of the quarter |
| `quarter_label` | text | Quarter identifier, e.g. `'2026-Q1'` |
| `segment` | text | Customer segment: `enterprise`, `commercial`, `self_serve` |
| `starting_arr_cents` | bigint | ARR at start of quarter in cents |
| `expansion_arr_cents` | bigint | ARR added from expansions |
| `contraction_arr_cents` | bigint | ARR lost from contractions (includes discount expirations) |
| `churned_arr_cents` | bigint | ARR lost from churn |
| `net_revenue_retention` | decimal | NRR ratio (e.g. `1.12` = 112%) |
## Key measures (SL: `mart_nrr_quarterly`)
- `avg_nrr``avg(net_revenue_retention)` across all rows
- `avg_nrr_enterprise``avg(net_revenue_retention)` filtered to `segment = 'enterprise'`
- `total_expansion_arr_cents`, `total_contraction_arr_cents`, `total_churned_arr_cents`
## Common query patterns
- **Q1 enterprise NRR:** `WHERE quarter_label = '2026-Q1' AND segment = 'enterprise'`
- **NRR as percent:** `round(net_revenue_retention * 100, 1)`
- **Trend by quarter:** `ORDER BY quarter_start_date`
## Business rules
- `net_revenue_retention` is a ratio, not a percentage. Multiply by 100 for display.
- Contraction includes discount expirations (classified as contraction, not churn). See [orbit-nrr-discount-expiration-treatment](orbit-nrr-discount-expiration-treatment).
- Enterprise is the primary executive reporting segment.

View file

@ -0,0 +1,47 @@
---
summary: "mart_procurement_activity: weekly active requester counts by contract_arr_threshold_cents. Standard threshold is 20000000 cents ($200k ARR). Used for golden-week procurement metrics."
usage_mode: auto
sort_order: 0
tags:
- procurement
- mart
- orbit-analytics
- active-requesters
sl_refs:
- mart_procurement_activity
tables:
- orbit_analytics.mart_procurement_activity
---
# mart_procurement_activity
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/88.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/108.json -->
**Table:** `orbit_analytics.mart_procurement_activity`
**Grain:** one row per `week_start_date` × `contract_arr_threshold_cents`
## Columns
| Column | Type | Notes |
|---|---|---|
| `week_start_date` | date | Monday of the reporting week |
| `week_end_date` | date | Sunday of the reporting week |
| `contract_arr_threshold_cents` | bigint | ARR threshold filter applied (e.g. `20000000` = $200k) |
| `active_requesters` | bigint | Count of qualifying active requesters for the week |
## Key measures (SL: `mart_procurement_activity`)
- `total_active_requesters``sum(active_requesters)`
- `active_requesters_200k_threshold``sum(active_requesters)` where `contract_arr_threshold_cents = 20000000`
## Common query patterns
- **Golden week (week of 2026-03-23):** `WHERE week_start_date = date '2026-03-23' AND contract_arr_threshold_cents = 20000000`
- **Weekly trend at $200k threshold:** `WHERE contract_arr_threshold_cents = 20000000 ORDER BY week_start_date`
## Business rules
- `active_requesters` counts non-internal, non-test requesters on large active contracts. See [orbit-procurement-qualifying-actions](orbit-procurement-qualifying-actions).
- The standard threshold is `contract_arr_threshold_cents = 20000000` ($200k ARR).
- Always filter by `contract_arr_threshold_cents` — the table contains rows for multiple threshold values.

View file

@ -0,0 +1,55 @@
---
summary: "mart_retention_movement_breakout: quarterly ARR movement by segment, movement_type, and movement_reason. NRR waterfall source. Contraction includes discount expirations."
usage_mode: auto
sort_order: 0
tags:
- nrr
- retention
- arr
- mart
- orbit-analytics
sl_refs:
- mart_retention_movement_breakout
tables:
- orbit_analytics.mart_retention_movement_breakout
---
# mart_retention_movement_breakout
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/105.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/115.json -->
**Table:** `orbit_analytics.mart_retention_movement_breakout`
**Grain:** one row per `quarter_label` × `segment` × `movement_type` × `movement_reason`
## Columns
| Column | Type | Notes |
|---|---|---|
| `quarter_start_date` | date | First day of the quarter |
| `quarter_label` | text | Quarter identifier, e.g. `'2026-Q1'` |
| `segment` | text | Customer segment: `enterprise`, `commercial`, `self_serve` |
| `movement_type` | text | `expansion`, `contraction`, or `churn` |
| `movement_reason` | text | Specific reason (e.g. `discount_expiration`) |
| `parent_account_count` | bigint | Number of parent accounts in this bucket |
| `expansion_arr_cents` | bigint | Expansion ARR in cents |
| `contraction_arr_cents` | bigint | Contraction ARR in cents |
| `churned_arr_cents` | bigint | Churned ARR in cents |
## Key measures (SL: `mart_retention_movement_breakout`)
- `total_expansion_arr_cents`, `total_contraction_arr_cents`, `total_churned_arr_cents`
- `expansion_arr_millions`, `contraction_arr_millions`, `churned_arr_millions`
- `parent_account_count`
## Common query patterns
- **Q1 enterprise waterfall:** `WHERE quarter_label = '2026-Q1' AND segment = 'enterprise'`
- **Movement summary:** `GROUP BY movement_type ORDER BY movement_type`
- **Discount expiration contraction:** `WHERE movement_reason = 'discount_expiration'`
## Business rules
- Contraction includes discount expirations, classified as contraction (not churn), tracked via `movement_reason`. See [orbit-nrr-discount-expiration-treatment](orbit-nrr-discount-expiration-treatment).
- This table is the row-level source for `mart_nrr_quarterly` aggregations.
- Only one of `expansion_arr_cents`, `contraction_arr_cents`, `churned_arr_cents` is non-zero per row.

View file

@ -0,0 +1,56 @@
---
summary: "mart_revenue_daily: daily gross-to-net revenue reconciliation with gross_revenue_cents, credits_cents, refunds_cents, net_revenue_cents, and reconciliation_check."
usage_mode: auto
sort_order: 0
tags:
- revenue
- reconciliation
- mart
- orbit-analytics
sl_refs:
- mart_revenue_daily
tables:
- orbit_analytics.mart_revenue_daily
---
# mart_revenue_daily
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/57.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/97.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/102.json -->
<!-- from: raw-sources/postgres-warehouse/metabase/2026-05-12-035303-local-metabase-3-114d957b-f564-4f46-8d4c-2770720a95be/cards/104.json -->
**Table:** `orbit_analytics.mart_revenue_daily`
**Grain:** one row per `revenue_date`
## Columns
| Column | Type | Notes |
|---|---|---|
| `revenue_date` | date | Revenue recognition date |
| `gross_revenue_cents` | bigint | Gross invoice revenue in cents |
| `credits_cents` | bigint | Credits applied in cents |
| `refunds_cents` | bigint | Refunds issued in cents |
| `net_revenue_cents` | bigint | Net revenue = gross credits refunds |
| `reconciliation_check` | boolean | Must be `true` on every row; flags rows where net ≠ gross credits refunds |
## Key measures (SL: `mart_revenue_daily`)
- `total_gross_revenue_cents``sum(gross_revenue_cents)`
- `total_credits_cents``sum(credits_cents)`
- `total_refunds_cents``sum(refunds_cents)`
- `total_net_revenue_cents``sum(net_revenue_cents)`
- `net_revenue_millions``round(sum(net_revenue_cents) / 100000000.0, 3)`
- `gross_revenue_millions``round(sum(gross_revenue_cents) / 100000000.0, 3)`
## Common query patterns
- **Q1 net revenue:** `WHERE revenue_date BETWEEN '2026-01-01' AND '2026-03-31'`
- **February reconciliation:** `WHERE revenue_date BETWEEN '2026-02-01' AND '2026-02-28'`
- **Monthly trend:** `GROUP BY date_trunc('month', revenue_date)`
## Business rules
- `reconciliation_check` must be `true` on every row. Any `false` row indicates a data quality issue.
- Gross-to-net reconciliation: gross revenue credits refunds = net revenue. See [orbit-revenue-gross-to-net-reconciliation](orbit-revenue-gross-to-net-reconciliation).
- All amounts are in cents; divide by 100 for USD, by 100,000,000 for $M.

View file

@ -0,0 +1,71 @@
---
summary: "Metabase SQL Library collection (collection 7): reusable query patterns, the account_join snippet, and field-filter conventions used across Orbit Showcase cards."
usage_mode: auto
sort_order: 0
tags:
- metabase
- sql-patterns
- orbit-showcase
sl_refs:
- mart_account_segments
- mart_procurement_activity
- mart_customer_health
- mart_retention_movement_breakout
- mart_revenue_daily
- mart_nrr_quarterly
---
# Orbit Metabase SQL Library — Patterns & Conventions
Collection **7 "SQL Library"** (parent: Orbit Showcase, collection 5) contains reference queries that demonstrate how to write Metabase native SQL against the Orbit analytics marts. Cards here are intentionally illustrative; several have `dashboardCount: 0` and are not embedded in live dashboards.
## Reusable snippet: `account_join`
Card 55 ("Large contract requesters") references `{{snippet: account_join}}`. The resolved SQL shows the canonical pattern for joining `orbit_analytics.mart_account_segments` to `orbit_raw.accounts`:
```sql
FROM orbit_analytics.mart_account_segments mart
LEFT JOIN orbit_raw.accounts a
ON a.account_id = mart.account_id
AND a.is_internal = false
AND a.is_test = false
```
Key points:
- The `is_internal = false AND is_test = false` guard is applied **in the JOIN condition**, not the WHERE clause, so it does not drop rows from `mart_account_segments` that have no matching account row.
- The alias `mart` is used for `mart_account_segments` throughout the snippet.
- This pattern is equivalent to the filter used in card 48 ("Top accounts by contract ARR"), which applies the same guards in the WHERE clause instead.
## Field-filter conventions
Cards in this collection use Metabase dimension field filters (`type: dimension`) for optional narrowing:
- `segment` filter → maps to `mart_account_segments.segment` or `mart_retention_movement_breakout.segment`.
- `date_range` filter → maps to `mart_procurement_activity.week_start_date`.
- `quarter` filter → maps to `mart_nrr_quarterly.quarter_label`.
These filters are **optional** (`[[ ... ]]` blocks in raw SQL); the resolved SQL drops them, leaving the unfiltered dataset. SL sources derived from these cards should not bake in the filter.
## Hard-coded date anti-pattern
Card 54 ("February credits drilldown") is explicitly documented as a **counter-example**: it hard-codes `revenue_date BETWEEN DATE '2026-02-01' AND DATE '2026-02-28'`. This card is not embedded in any dashboard and should not be used as a template. Use `mart_revenue_daily` directly with a runtime date filter instead.
## Near-duplicate pair: cards 48 and 55
Both cards query `mart_account_segments` + `orbit_raw.accounts` and project `account_name`, `contract_arr`, `segment`, `size_band`. They differ only in:
- Card 48: no ARR floor filter, LIMIT 20, on 1 dashboard.
- Card 55: `contract_arr_cents >= 20,000,000` ($200k floor), LIMIT 25, no dashboard.
Card 48 is the canonical reference; card 55 is a filtered variant for large-contract analysis.
## Cards and their mart sources
| Card | Name | Mart | Dashboard count |
|------|------|------|----------------|
| 48 | Top accounts by contract ARR | mart_account_segments | 1 |
| 49 | Procurement actions by week | mart_procurement_activity | 1 |
| 50 | Accounts at risk | mart_customer_health | 1 |
| 51 | ARR movement breakout | mart_retention_movement_breakout | 1 |
| 52 | Revenue refund audit | mart_revenue_daily | 0 |
| 53 | Enterprise NRR quarter breakout | mart_nrr_quarterly | 0 |
| 54 | February credits drilldown | mart_revenue_daily | 0 |
| 55 | Large contract requesters | mart_account_segments | 0 |

View file

@ -0,0 +1,47 @@
---
summary: "NRR definition and the Q1 2026 discount-expiration contraction treatment: discount expirations are classified as contraction, not churn, and tracked separately via is_discount_expiration_contraction."
usage_mode: auto
sort_order: 0
tags:
- nrr
- retention
- governed-metric
- analytics
- discount
- contraction
sl_refs:
- mart_nrr_quarterly
---
# NRR — Discount Expiration Treatment
**Governed metric key:** `net_revenue_retention`
**Owner team:** analytics
**Notion:** `notion://notion_page_retention_policy_current#nrr-definition` and `#discount-expiration-treatment`
**Sources:** `mart_nrr_quarterly`, `mart_retention_movement_breakout`
## NRR Definition
Net Revenue Retention (NRR) is calculated quarterly at the **parent-account** grain using `int_parent_account_arr_movements`. The enterprise segment is the primary reporting cut.
**Known assertions:**
- Enterprise NRR **2026-Q1 = 1.018** (101.8%)
- Enterprise NRR **2025-Q4 = 1.064** (106.4%)
## Discount Expiration Treatment
Contraction ARR arising from the expiry of launch/renewal/migration/goodwill discounts is **not classified as churn**. It is tracked via the boolean flag `is_discount_expiration_contraction` on `int_parent_account_arr_movements` and surfaced as `movement_reason = 'discount_expiration'` in `mart_retention_movement_breakout`.
**Known assertion:** 11 parent accounts had `movement_type = 'contraction'` and `movement_reason = 'discount_expiration'` in Q1 2026.
## Discount Types (from `stg_contract_discount_terms`)
`launch`, `renewal`, `migration`, `goodwill`
## Movement Types
`new`, `expansion`, `contraction`, `churn`, `reactivation`
## Why This Matters
Without the discount-expiration carve-out, Q1 2026 enterprise NRR would appear lower than it is. The Q4 → Q1 drop (1.064 → 1.018) is partly explained by discount expirations, not organic churn.

View file

@ -0,0 +1,49 @@
---
summary: "Plan code normalization rules: pro_plus maps to growth. Reporting segments (self_serve, commercial, enterprise) are derived from canonical_plan_code × size_band via stg_plan_segment_mapping."
usage_mode: auto
sort_order: 0
tags:
- segmentation
- plans
- sales-ops
- governed-metric
- normalization
sl_refs:
- mart_account_segments
---
# Plan & Segment Normalization
**Governed metric key:** `segment`
**Owner team:** sales_ops
**Notion:** `notion://notion_page_sales_ops_segmentation#growth-plan-normalization`
**Sources:** `mart_account_segments`, `stg_plan_segment_mapping`, `stg_plans`
## Canonical Plan Codes
| Raw / Legacy Code | Canonical Code |
|---|---|
| `starter` | `starter` |
| `growth` | `growth` |
| `pro_plus` | **`growth`** (normalized) |
| `enterprise` | `enterprise` |
The normalization is applied via `stg_plans.canonical_plan_code`. `mart_account_segments.normalized_plan_code` reflects the post-normalization value.
## Reporting Segments
Segments are derived from `canonical_plan_code` × `size_band` using the effective-dated lookup `stg_plan_segment_mapping`:
| Segment | Typical plan + size band |
|---|---|
| `self_serve` | starter / smb |
| `commercial` | growth / mid_market |
| `enterprise` | enterprise / enterprise |
## Size Bands
`smb`, `mid_market`, `enterprise`
## Effective Dating
`stg_plan_segment_mapping` has `effective_from` / `effective_to` columns, allowing segment rules to change over time without rewriting history.

View file

@ -0,0 +1,46 @@
---
summary: "Qualifying procurement actions for weekly active requester counts: non-internal, non-test requesters on large active contracts. Covers golden week metric and supplier onboarding."
usage_mode: auto
sort_order: 0
tags:
- procurement
- product
- governed-metric
- weekly-active-requesters
- suppliers
sl_refs:
- mart_procurement_activity
---
# Procurement — Qualifying Actions & Weekly Active Requesters
**Governed metric key:** `weekly_active_requesters`
**Owner team:** product
**Notion:** `notion://notion_page_procurement_instrumentation#qualifying-procurement-actions`
**Sources:** `mart_procurement_activity`, `int_procurement_qualifying_actions`
## Qualifying Action Definition
A qualifying procurement action is any activity by a **non-internal, non-test** requester on a **large active contract** within the measurement week. Captured in `int_procurement_qualifying_actions`.
Qualifying action types include:
- Submitting a purchase request (`stg_purchase_requests`, status: submitted/approved)
- Supplier onboarding milestones (`stg_supplier_onboarding_events`, event_type: profile_completed, approved)
- Purchase order creation (`stg_purchase_orders`)
## Exclusions
- Accounts with `lifecycle_status IN ('internal', 'test')` on `stg_accounts`
- Requesters without an approved purchase request in the window
## Supplier Onboarding Milestones
`invited``profile_started``profile_completed``approved`
## Approval Decisions (`stg_approval_events`)
`approved`, `rejected`, `returned`
## Dashboard
Exposed via the **Growth Activation Dashboard** (`https://orbit-demo.example.com/dashboards/activation`), which depends on `mart_account_activity`.

View file

@ -0,0 +1,43 @@
---
summary: "Orbit product design policy: new features must make requester or approver experience clearer; complexity for its own sake is not built."
usage_mode: auto
sort_order: 0
tags:
- product
- policy
- orbit
refs:
- orbit-product-review-checklist
- orbit-company-overview
---
## Orbit Product Design Principles
**Source:** Notion — Product & Customers, last edited 2026-05-07
---
## Core Policy
Orbit does not build complexity for its own sake.
## Feature Complexity Rule
- When a new feature adds multiple configuration choices, it **must** be evaluated on whether it makes the requester or approver experience clearer.
- If the added configuration does not make the requester or approver experience clearer, the feature should not be built as designed.
- The test: can a first-time requester or approver use the new feature without needing to understand the configuration choices behind it?
## Design Heuristics
- **Default to simpler.** If two designs achieve the same outcome, prefer the one with fewer choices exposed to the end user.
- **Configuration is a last resort.** Expose configuration only when different customers have legitimately incompatible needs that cannot be resolved by a sensible default.
- **Requester and approver clarity are the primary UX metrics.** Speed, completeness, and confidence for those two roles are the measures of a good Orbit feature.
## What This Is Not
- This principle does not prohibit powerful or flexible features.
- It prohibits features where the complexity is internal to Orbit's implementation but leaks into the requester or approver experience without benefit.
---
See also: [[orbit-product-review-checklist]], [[orbit-company-overview]]

View file

@ -0,0 +1,44 @@
---
summary: "Five-question checklist to evaluate every Orbit product change: requester clarity, approver context, supplier onboarding ownership, PO accuracy, and CS rollout visibility."
usage_mode: auto
sort_order: 0
tags:
- product
- policy
- orbit
refs:
- orbit-company-overview
- sales-ops-cs-handoff-process
---
## Product Review Checklist
**Source:** Notion — Product & Customers, last edited 2026-05-07
---
## Policy
Every product change must be evaluated against all five questions before shipping. A "no" on any question is a blocker unless explicitly waived by the accountable lead with a written rationale.
## The Five Questions
| # | Question | What a "no" means |
|---|---|---|
| 1 | **Does a first-time requester know what to do next?** | The requester experience is unclear or requires prior knowledge not surfaced in the UI. |
| 2 | **Can an approver make a decision without missing context?** | The approver is missing information needed to approve or reject confidently. |
| 3 | **Is supplier onboarding assigned to a named person, not a queue?** | Supplier onboarding has no clear owner and will stall. |
| 4 | **Does the PO reflect the approved request?** | There is a mismatch between what was approved and what the PO captures. |
| 5 | **Can Customer Success detect a stuck rollout after week two?** | CS has no signal to identify customers who are not progressing past initial setup. |
## Usage
- Use this checklist in product reviews, design critiques, and pre-launch readiness checks.
- Questions 12 are requester/approver experience checks.
- Question 3 is a supplier onboarding ownership check.
- Question 4 is a PO accuracy check.
- Question 5 is a post-launch CS visibility check.
---
See also: [[orbit-company-overview]], [[orbit-product-design-principles]], [[sales-ops-cs-handoff-process]]

View file

@ -0,0 +1,50 @@
---
summary: "Gross-to-net revenue reconciliation: mart_revenue_daily reconciles gross invoice revenue, credits, and refunds to net revenue daily. reconciliation_check must be true on every row."
usage_mode: auto
sort_order: 0
tags:
- revenue
- net-revenue
- governed-metric
- finance
- reconciliation
sl_refs:
- mart_revenue_daily
---
# Revenue — Gross-to-Net Reconciliation
**Governed metric key:** `net_revenue`
**Owner team:** finance
**Notion:** `notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation`
**Source:** `mart_revenue_daily` (grain: `revenue_date`)
## Formula
```
net_revenue = gross_revenue - credits - refunds
```
All amounts are in **cents** (USD only — `stg_invoices.currency` is asserted to be `USD`).
## Components
| Column | Source | Description |
|---|---|---|
| `gross_revenue_cents` | `stg_invoices` / `stg_invoice_line_items` | Billed amounts before adjustments |
| `credit_cents` | `stg_invoice_line_items` (type=credit) | Credits applied to invoices |
| `refund_cents` | `stg_refunds` | Refunds reduce net revenue in the refund month |
| `net_revenue_cents` | Derived | gross credits refunds |
## Intermediate model
`int_revenue_components` — daily gross, credit, refund, and net revenue components.
## Quality Gates
- `reconciliation_check` must be `true` on every row of `mart_revenue_daily`.
- `assert_february_2026_net_revenue` — a dbt singular test covering February 2026 net revenue total.
## Line Item Types (`stg_invoice_line_items`)
`subscription`, `seat`, `usage`, `addon`, `credit`

View file

@ -1,17 +0,0 @@
---
summary: Procurement workflow activity measures active requesters and qualifying actions.
tags:
- product
- procurement
refs:
- activation-policy
sl_refs:
- orbit_demo.purchase_requests
usage_mode: auto
---
Weekly active requesters counts distinct non-internal requesters with a qualifying procurement action in the calendar week.
Qualifying actions include purchase request creation, approval decisions, supplier invites, and purchase-order creation.
Purchase-request comments and short sessions are excluded from the canonical requester activity metric.

View file

@ -1,17 +0,0 @@
---
summary: Gross-to-net revenue reconciles paid invoices, credits, and refunds.
tags:
- finance
- revenue
refs:
- arr-contract-first
sl_refs:
- orbit_demo.invoices
usage_mode: auto
---
Gross revenue starts from paid invoice activity. Net revenue subtracts credits and successful refunds in the month they are recorded.
Exclude unpaid, void, draft, failed, internal, and test-account invoice activity from canonical revenue reporting.
February 2026 has an elevated refund event captured in the source notes and revenue dashboard.

View file

@ -0,0 +1,58 @@
---
summary: "Sales Ops → Customer Success implementation handoff: required fields, ownership, enterprise account risk, and policy that CS must not rediscover sales-stage details."
usage_mode: auto
sort_order: 0
tags:
- policy
- sales-ops
- customer-success
refs:
- orbit-company-overview
- orbit-how-we-work
- orbit-plan-segment-normalization
---
## Sales Ops → Customer Success Implementation Handoff
**Source:** Notion — People & Operating Norms, last edited 2026-05-07
**Owner:** Sales Ops (sender), Customer Success (receiver)
---
## Policy
Sales Ops must complete the handoff **before the first implementation call**. Customer Success should not need to rediscover any of the following details.
## Required Handoff Fields
| Field | Notes |
|---|---|
| Current plan | Starter / Growth / Enterprise — use canonical plan name, not legacy aliases |
| Account segment | self_serve / commercial / enterprise (see `orbit-plan-segment-normalization`) |
| Contract shape | Term, ARR, any discounts or custom terms |
| Renewal contact | Named person on the customer side responsible for renewal |
| Unusual approval requirements | Any non-standard approval routing the customer has configured or requested |
| Unusual supplier requirements | Any supplier onboarding exceptions or pre-approved vendor lists |
## Ownership
- **Sales Ops** is responsible for populating and delivering the handoff before the first implementation call.
- **Customer Success** is responsible for flagging missing fields to Sales Ops before the call, not during or after.
- If a field is unknown at handoff time, Sales Ops must note it explicitly as "unknown — to be resolved by [date]" rather than leaving it blank.
## Common Failure Mode
Handoffs that omit contract shape or renewal contact force CS to re-engage Sales Ops mid-implementation, which delays time-to-value and creates duplicate discovery work. This is the primary failure mode this process is designed to prevent.
---
## Enterprise Account Risk: Parent/Child Complexity
- Enterprise accounts with parent/child account structures require extra care during handoff.
- Small assumptions made during handoff in these accounts tend to produce large downstream problems (billing mismatches, approval routing failures, supplier onboarding gaps).
- When the account has parent/child complexity, Sales Ops must explicitly flag it in the handoff and document the account hierarchy before the first implementation call.
- CS should treat any undocumented parent/child relationship as a blocker — do not proceed with implementation setup until the structure is confirmed.
---
See also: [[orbit-company-overview]], [[orbit-how-we-work]], [[orbit-plan-segment-normalization]]

View file

@ -1,17 +0,0 @@
---
summary: Account segments derive from plan normalization and effective-dated mapping.
tags:
- sales-ops
- segmentation
refs: []
sl_refs:
- orbit_demo.accounts
- orbit_demo.contracts
usage_mode: auto
---
Account segment labels combine plan_code, canonical_plan_code, and size_band fields.
Historical plan code pro_plus maps to growth for current segment analysis.
Use the mapping active at the metric date when segment definitions change over time.

View file

@ -1,17 +0,0 @@
---
summary: Support escalation tiers map ticket severity to SLA targets.
tags:
- support
- sla
refs:
- customer-health-scoring
sl_refs:
- orbit_demo.support_tickets
usage_mode: auto
---
Critical support tickets require immediate response and on-call escalation.
High severity tickets should receive first response within four business hours.
Resolution time is measured from created_at to resolved_at and only applies to resolved tickets.

View file

@ -43,12 +43,12 @@
},
"generated": {
"semanticLayer": {
"path": "semantic-layer/orbit_demo",
"sourceCount": 6
"path": "semantic-layer",
"sourceCount": 46
},
"knowledge": {
"path": "knowledge/global",
"pageCount": 10
"pageCount": 28
},
"links": {
"path": "links",

View file

@ -0,0 +1,27 @@
name: int_activation_policy_windows
table: orbit_analytics.int_activation_policy_windows
grain:
- policy_version
columns:
- name: policy_version
type: string
descriptions:
user: pre_2026_01_15 or post_2026_01_15
- name: activated_account_count
type: number
descriptions:
ktx: Column activated account count from int_activation_policy_windows.
- name: window_start
type: time
descriptions:
ktx: Column window start from int_activation_policy_windows.
- name: window_end
type: time
descriptions:
ktx: Column window end from int_activation_policy_windows.
joins: []
measures:
- name: total_activated_accounts
expr: sum(activated_account_count)
descriptions:
user: Activation cohort counts around the January 2026 policy change.

View file

@ -0,0 +1,24 @@
name: int_active_contract_arr
table: orbit_analytics.int_active_contract_arr
grain:
- contract_id
columns:
- name: contract_id
type: string
descriptions:
user: "dbt: not_null, unique"
- name: account_id
type: string
descriptions:
ktx: Identifier for the related account on int_active_contract_arr.
- name: arr_cents
type: number
descriptions:
ktx: Column arr cents from int_active_contract_arr.
joins: []
measures:
- name: total_arr_cents
expr: sum(arr_cents)
description: Total active contract ARR in cents as of 2026-03-31.
descriptions:
user: Active contract ARR as of 2026-03-31.

View file

@ -0,0 +1,25 @@
name: int_customer_health_signals
table: orbit_analytics.int_customer_health_signals
grain:
- account_id
columns:
- name: account_id
type: string
descriptions:
ktx: Identifier for the related account on int_customer_health_signals.
- name: open_critical_ticket_count
type: number
descriptions:
ktx: Column open critical ticket count from int_customer_health_signals.
- name: recent_procurement_action_count
type: number
descriptions:
ktx: Column recent procurement action count from int_customer_health_signals.
- name: risk_level
type: string
descriptions:
user: "Derived risk level: low, medium, high"
joins: []
measures: []
descriptions:
user: Support-ticket and recent-procurement signals for customer health risk.

View file

@ -0,0 +1,49 @@
name: int_parent_account_arr_movements
table: orbit_analytics.int_parent_account_arr_movements
grain:
- arr_movement_id
columns:
- name: arr_movement_id
type: string
descriptions:
user: "dbt: not_null, unique"
- name: parent_account_id
type: string
descriptions:
ktx: Identifier for the related parent account on int_parent_account_arr_movements.
- name: movement_type
type: string
descriptions:
user: "dbt: accepted_values [new, expansion, contraction, churn, reactivation]"
- name: is_discount_expiration_contraction
type: boolean
descriptions:
user: Discount expiration contraction flag used to keep discount movement separate from churn.
- name: movement_date
type: time
descriptions:
ktx: Date or time value for movement date on int_parent_account_arr_movements.
- name: arr_cents
type: number
descriptions:
ktx: Column arr cents from int_parent_account_arr_movements.
joins: []
measures:
- name: expansion_arr_cents
expr: sum(arr_cents)
filter: movement_type = 'expansion'
description: Sum of expansion ARR movements in cents.
- name: contraction_arr_cents
expr: sum(arr_cents)
filter: movement_type = 'contraction'
description: Sum of contraction ARR movements in cents.
- name: churn_arr_cents
expr: sum(arr_cents)
filter: movement_type = 'churn'
description: Sum of churn ARR movements in cents.
- name: discount_expiration_contraction_arr_cents
expr: sum(arr_cents)
filter: is_discount_expiration_contraction = true
description: Contraction ARR from discount expirations — kept separate from churn in NRR calculation.
descriptions:
user: Parent-account movement rollups for retention metrics.

View file

@ -0,0 +1,27 @@
name: int_procurement_qualifying_actions
table: orbit_analytics.int_procurement_qualifying_actions
grain:
- purchase_request_id
columns:
- name: purchase_request_id
type: string
descriptions:
ktx: Identifier for the related purchase request on int_procurement_qualifying_actions.
- name: account_id
type: string
descriptions:
ktx: Identifier for the related account on int_procurement_qualifying_actions.
- name: requester_user_id
type: string
descriptions:
ktx: Identifier for the related requester user on int_procurement_qualifying_actions.
- name: action_week
type: time
descriptions:
ktx: Column action week from int_procurement_qualifying_actions.
joins: []
measures:
- name: qualifying_action_count
expr: count(purchase_request_id)
descriptions:
user: Non-internal, non-test requester activity for large active contracts in the golden week.

View file

@ -0,0 +1,37 @@
name: int_revenue_components
table: orbit_analytics.int_revenue_components
grain:
- revenue_date
columns:
- name: revenue_date
type: time
descriptions:
ktx: Date or time value for revenue date on int_revenue_components.
- name: gross_revenue_cents
type: number
descriptions:
ktx: Column gross revenue cents from int_revenue_components.
- name: credit_cents
type: number
descriptions:
ktx: Column credit cents from int_revenue_components.
- name: refund_cents
type: number
descriptions:
ktx: Column refund cents from int_revenue_components.
- name: net_revenue_cents
type: number
descriptions:
ktx: Column net revenue cents from int_revenue_components.
joins: []
measures:
- name: total_gross_revenue_cents
expr: sum(gross_revenue_cents)
- name: total_credit_cents
expr: sum(credit_cents)
- name: total_refund_cents
expr: sum(refund_cents)
- name: total_net_revenue_cents
expr: sum(net_revenue_cents)
descriptions:
user: Daily gross, credit, refund, and net revenue components.

View file

@ -0,0 +1,23 @@
name: mart_account_activity
table: orbit_analytics.mart_account_activity
grain:
- policy_version
columns:
- name: policy_version
type: string
descriptions:
user: pre_2026_01_15 or post_2026_01_15
- name: activated_account_count
type: number
descriptions:
ktx: Column activated account count from mart_account_activity.
- name: window_label
type: string
descriptions:
ktx: Column window label from mart_account_activity.
joins: []
measures:
- name: total_activated_accounts
expr: sum(activated_account_count)
descriptions:
user: "Activation policy comparison values. Governed metric: activated_accounts. Owner: growth. See notion://notion_page_activation_policy_decision#policy-change."

View file

@ -0,0 +1,27 @@
name: mart_account_segments
table: orbit_analytics.mart_account_segments
grain:
- account_id
columns:
- name: account_id
type: string
descriptions:
user: "dbt: not_null, unique"
- name: normalized_plan_code
type: string
descriptions:
user: pro_plus is normalized to growth through plans.canonical_plan_code.
- name: size_band
type: string
descriptions:
ktx: Column size band from mart_account_segments.
- name: segment
type: string
descriptions:
user: "Reporting segment: self_serve, commercial, enterprise"
joins: []
measures:
- name: account_count
expr: count(account_id)
descriptions:
user: "Current plan, size band, and reporting segment for accounts. Governed metric: segment. Owner: sales_ops. See notion://notion_page_sales_ops_segmentation#growth-plan-normalization."

View file

@ -0,0 +1,20 @@
name: mart_arr_daily
table: orbit_analytics.mart_arr_daily
grain:
- metric_date
columns:
- name: metric_date
type: time
descriptions:
user: "dbt: not_null, unique"
- name: arr_cents
type: number
descriptions:
user: "ARR in cents. dbt assertion: expected value 1874200000 (i.e. $18,742,000) as of 2026-03-31."
joins: []
measures:
- name: arr_cents
expr: sum(arr_cents)
description: Total ARR in cents across metric dates.
descriptions:
user: "Board-prep ARR as of the metric date. Governed metric: arr. Owner: finance. Contract-first ARR calculation — see notion://notion_page_arr_contract_reporting#arr-contract-first."

View file

@ -0,0 +1,30 @@
name: mart_customer_health
table: orbit_analytics.mart_customer_health
grain:
- account_id
columns:
- name: account_id
type: string
descriptions:
user: "dbt: not_null, unique"
- name: risk_level
type: string
descriptions:
user: "dbt: accepted_values [low, medium, high]"
- name: open_critical_ticket_count
type: number
descriptions:
ktx: Column open critical ticket count from mart_customer_health.
- name: recent_procurement_action_count
type: number
descriptions:
ktx: Column recent procurement action count from mart_customer_health.
joins: []
measures:
- name: account_count
expr: count(account_id)
- name: high_risk_account_count
expr: count(account_id)
filter: risk_level = 'high'
descriptions:
user: "Customer-health risk mart as of 2026-03-31. Governed metric: active_customers. Owner: customer_success. See notion://notion_page_customer_health_playbook#risk-definition."

Some files were not shown because too many files have changed in this diff Show more