Merge origin/main into remove-ingest-fallbacks

This commit is contained in:
Andrey Avtomonov 2026-05-19 23:46:23 +02:00
commit 7101424a12
215 changed files with 6272 additions and 82916 deletions

View file

@ -191,9 +191,8 @@ jobs:
coverage:
name: Coverage
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
env:
CODECOV_TOKEN_CONFIGURED: ${{ secrets.CODECOV_TOKEN != '' }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@ -232,25 +231,39 @@ jobs:
run: pnpm run test:coverage:ts
- name: Upload TypeScript coverage
if: env.CODECOV_TOKEN_CONFIGURED == 'true'
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
use_oidc: true
token: ${{ secrets.CODECOV_TOKEN }}
slug: Kaelio/ktx
files: ./packages/cli/coverage/lcov.info,./packages/connector-bigquery/coverage/lcov.info,./packages/connector-clickhouse/coverage/lcov.info,./packages/connector-mysql/coverage/lcov.info,./packages/connector-postgres/coverage/lcov.info,./packages/connector-snowflake/coverage/lcov.info,./packages/connector-sqlite/coverage/lcov.info,./packages/connector-sqlserver/coverage/lcov.info,./packages/context/coverage/lcov.info,./packages/llm/coverage/lcov.info
flags: typescript
name: typescript
fail_ci_if_error: false
disable_search: true
fail_ci_if_error: true
- name: Warn when Codecov token is missing for TypeScript
if: env.CODECOV_TOKEN_CONFIGURED != 'true'
run: echo "::warning::CODECOV_TOKEN is not configured; skipping TypeScript coverage upload"
- name: Generate Python coverage
run: pnpm run test:coverage:py
- name: Upload Python coverage
if: env.CODECOV_TOKEN_CONFIGURED == 'true'
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
use_oidc: true
token: ${{ secrets.CODECOV_TOKEN }}
slug: Kaelio/ktx
files: ./coverage/python.xml
flags: python
name: python
fail_ci_if_error: false
disable_search: true
fail_ci_if_error: true
- name: Warn when Codecov token is missing for Python
if: env.CODECOV_TOKEN_CONFIGURED != 'true'
run: echo "::warning::CODECOV_TOKEN is not configured; skipping Python coverage upload"
artifact-checks:
name: Artifact checks

View file

@ -7,10 +7,10 @@ on:
description: "Release kind: rc publishes to next, stable publishes to latest"
required: true
type: choice
default: "rc"
default: "stable"
options:
- rc
- stable
- rc
force_release:
description: "Force a patch release even if semantic-release finds no releasable commits"
required: false
@ -20,7 +20,7 @@ on:
description: "Create the release and publish @kaelio/ktx to npm instead of running a dry-run"
required: true
type: boolean
default: false
default: true
permissions:
contents: write
@ -90,6 +90,26 @@ jobs:
env:
KTX_PRERELEASE_BRANCH: next
- name: Prepare npm package root for release verification
run: |
set -euo pipefail
mkdir -p dist/public-npm-package
node --input-type=module <<'EOF'
import { writeFile } from 'node:fs/promises';
const packageJson = {
name: '@kaelio/ktx',
version: '0.0.0',
private: false
};
await writeFile(
'dist/public-npm-package/package.json',
`${JSON.stringify(packageJson, null, 2)}\n`
);
EOF
- name: Dry-run semantic release
if: ${{ !inputs.publish_live }}
run: |

1
.gitignore vendored
View file

@ -57,6 +57,7 @@ yarn-error.log*
.agents
.claude
.superpowers
docs/superpowers
# Editors and OS files
.idea/

View file

@ -72,8 +72,9 @@ KTX is a pnpm + uv workspace.
- Python daemon: `python/ktx-daemon`
- Examples and fixtures: `examples/`
- Workspace scripts: `scripts/`
- Local agent skills are private overlays. Do not commit `.agents/` or
`.claude/` to this public repository.
- Local agent skills and internal planning docs are private overlays. Do not
commit `.agents/`, `.claude/`, or `docs/superpowers/` to this public
repository.
Some package names still contain `ktx` during the split. Do not mass-rename
symbols, package names, paths, or docs to `ktx` unless the task asks for that

113
README.md
View file

@ -6,35 +6,68 @@
The context layer for analytics agents
</h1>
<p align="center">by Kaelio</p>
<p align="center">
<a href="https://www.npmjs.com/package/@kaelio/ktx"><img src="https://img.shields.io/npm/v/@kaelio/ktx?style=flat-square&color=f97316" alt="npm version" /></a>
<a href="https://codecov.io/gh/Kaelio/ktx"><img src="https://codecov.io/gh/Kaelio/ktx/branch/main/graph/badge.svg" alt="Codecov" /></a>
<a href="https://codecov.io/gh/Kaelio/ktx"><img src="https://codecov.io/gh/Kaelio/ktx/graph/badge.svg?branch=main" alt="Codecov" /></a>
<a href="https://github.com/Kaelio/ktx/actions/workflows/ci.yml?query=branch%3Amain"><img src="https://img.shields.io/github/actions/workflow/status/Kaelio/ktx/ci.yml?branch=main&label=tests&style=flat-square" alt="Tests" /></a>
<a href="https://docs.kaelio.com/ktx/docs/"><img src="https://img.shields.io/badge/docs-KTX-22c55e?style=flat-square" alt="Documentation" /></a>
<a href="https://github.com/Kaelio/ktx/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-blue?style=flat-square" alt="License" /></a>
<a href="https://github.com/Kaelio/ktx"><img src="https://img.shields.io/github/stars/Kaelio/ktx?style=flat-square" alt="GitHub stars" /></a>
</p>
---
KTX turns warehouse metadata, semantic definitions, and business knowledge into
reviewable project files that agents can use to plan, query, and update
analytics work.
KTX is a self-improving context layer that teaches agents how to query your
warehouse accurately - from approved metric definitions, joinable columns, and
business knowledge it builds and maintains for you.
Use KTX when you want agents to:
Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and
SQLite. Integrates with dbt, MetricFlow, LookML, Looker, Metabase, and Notion.
- Generate SQL from approved measures and joins
- Repair semantic definitions through reviewable diffs
- Explain metric provenance with warehouse evidence
- Work alongside dbt, MetricFlow, LookML, Looker, Metabase, and Notion
## Why KTX
Supports PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and
SQLite.
General-purpose agents struggle on data tasks. They re-explore your warehouse
on every question, invent their own metric logic, and return numbers that
don't match approved definitions.
Traditional semantic layers don't fix this. They demand constant manual
upkeep and don't absorb the rest of your company's knowledge.
KTX does both, automatically:
- **Learns from company knowledge.** Ingests wiki content, organizes it,
removes duplicates, and flags contradictions for human review.
- **Maps the data stack.** Samples tables, captures metadata and usage
patterns, detects joinable columns, and annotates sources so agents write
better queries.
- **Builds a semantic layer.** Combines raw tables and high-level metrics
through a join graph that automatically resolves chasm and fan traps, so
agents fetch metrics declaratively instead of rewriting canonical SQL each
time.
- **Serves agents at execution.** Exposes CLI and MCP tools with combined
full-text and semantic search across wiki and semantic-layer entities.
Agents can run raw SQL when they need it, or compose semantic-layer queries
when they want approved metrics with reliable joins.
<p align="center">
<img src="docs-site/public/images/ingestion-flow-transparent.svg" alt="KTX ingestion flow from source systems through validation to wiki and semantic-layer outputs" width="900" />
</p>
## Agent Setup
Ask an agent such as Claude Code, Codex, Cursor, or OpenCode to install and
configure KTX from your project directory:
```text
Follow instructions from
https://docs.kaelio.com/ktx/docs/agents-setup.md
to install and configure ktx
```
## Quick Start
```bash
pnpm add --global @kaelio/ktx
npm install -g @kaelio/ktx
ktx setup
ktx status
```
@ -49,8 +82,8 @@ KTX project: /home/user/analytics
Project ready: yes
LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Databases configured: yes (postgres-warehouse)
Context sources configured: yes (dbt-main)
Databases configured: yes (warehouse)
Context sources configured: yes (dbt_main)
KTX context built: yes
Agent integration ready: yes (codex:project)
```
@ -65,7 +98,7 @@ Agent integration ready: yes (codex:project)
| `ktx connection test <id>` | Test one connection |
| `ktx ingest <id>` | Build context for one connection |
| `ktx ingest --all` | Build context for every configured connection |
| `ktx ingest text <file>` | Capture free-form notes into memory |
| `ktx ingest text <file> --connection-id <connectionId>` | Capture free-form notes into memory |
| `ktx sl list` | List semantic-layer sources |
| `ktx sl search "revenue"` | Search semantic-layer sources |
| `ktx sl validate <source> --connection-id <id>` | Validate a semantic source |
@ -93,14 +126,17 @@ Commit `ktx.yaml`, `semantic-layer/`, and `wiki/`. Keep `.ktx/` local.
## Agent Usage
Setup can install KTX instructions for Claude Code, Codex, Cursor, OpenCode,
and universal `.agents` clients:
Install KTX integration for Claude Code, Claude Desktop, Codex, Cursor,
OpenCode, and generic `.agents` clients:
```bash
ktx setup --agents --target codex
ktx setup --agents
```
Agent-facing workflows typically start with:
Pass `--target <target>` to install or repair one specific integration.
A typical agent workflow combines wiki and semantic-layer search before
querying:
```bash
ktx sl search "revenue" --json
@ -108,31 +144,14 @@ ktx wiki search "refund policy" --json
ktx sl query --connection-id warehouse --measure orders.revenue --format sql
```
During agent setup, choose **MCP tools + analytics skill** for client agents.
Choose **MCP tools + analytics skill + admin CLI skill** only when a developer
or operator agent also needs pinned `ktx` admin commands.
During setup, choose **Ask data questions with KTX MCP** for client agents.
Choose **Ask data questions + manage KTX with CLI commands** when an operator
agent also needs pinned `ktx` admin commands.
The analytics skill teaches client agents the MCP workflow: discover data,
prefer semantic-layer measures, inspect entity details before raw SQL, and
capture durable learnings. Admin CLI skills call `ktx` commands directly
through a skill file installed in your agent's config:
```bash
ktx sl query --measure orders.revenue --dimension orders.status --format sql
ktx wiki search "revenue definition"
ktx sl validate orders
```
Supported client agents: Claude Code, Claude Desktop, Codex, Cursor, OpenCode,
and clients that can use the printed MCP endpoint or `.agents` admin skills.
Claude Desktop setup registers a local `ktx mcp stdio` server in Claude
Desktop's config and generates `.ktx/agents/claude/ktx-plugin.zip` with the
analytics skill.
The release artifact manifest contains the public npm tarball and the bundled
`kaelio-ktx` runtime wheel. The `python/ktx-sl` and `python/ktx-daemon`
directories remain source packages for development, not public release
artifacts.
After setup, KTX prints **Required before using agents** with the exact
commands to run. If the output includes `ktx mcp start --project-dir ...`, run
it before opening your agent. Claude Desktop uses its own launcher and prints
separate skill upload steps under `.ktx/agents/claude/`.
## Workspace packages
@ -189,7 +208,7 @@ uv run pytest -q
## Docs
- [Quickstart](docs-site/content/docs/getting-started/quickstart.mdx)
- [CLI Reference](docs-site/content/docs/cli-reference/index.mdx)
- [CLI Reference](docs-site/content/docs/cli-reference/ktx.mdx)
- [Building Context](docs-site/content/docs/guides/building-context.mdx)
- [Contributing](docs-site/content/docs/community/contributing.mdx)

109
codecov.yml Normal file
View file

@ -0,0 +1,109 @@
codecov:
branch: main
require_ci_to_pass: true
notify:
after_n_builds: 2
coverage:
precision: 2
round: down
range: "70...100"
status:
project:
default:
target: auto
threshold: 1%
if_ci_failed: error
typescript:
target: auto
threshold: 1%
flags:
- typescript
if_ci_failed: error
python:
target: auto
threshold: 1%
flags:
- python
if_ci_failed: error
patch:
default:
target: 75%
threshold: 5%
if_ci_failed: error
informational: true
comment:
layout: "header, diff, flags, components, files"
behavior: default
require_changes: false
require_base: false
require_head: true
flags:
typescript:
paths:
- packages/
carryforward: false
python:
paths:
- python/
carryforward: false
component_management:
individual_components:
- component_id: pkg_cli
name: CLI
paths:
- packages/cli/src/**
- component_id: pkg_context
name: Context engine
paths:
- packages/context/src/**
- component_id: pkg_llm
name: LLM
paths:
- packages/llm/src/**
- component_id: connector_bigquery
name: BigQuery connector
paths:
- packages/connector-bigquery/src/**
- component_id: connector_clickhouse
name: ClickHouse connector
paths:
- packages/connector-clickhouse/src/**
- component_id: connector_mysql
name: MySQL connector
paths:
- packages/connector-mysql/src/**
- component_id: connector_postgres
name: Postgres connector
paths:
- packages/connector-postgres/src/**
- component_id: connector_snowflake
name: Snowflake connector
paths:
- packages/connector-snowflake/src/**
- component_id: connector_sqlite
name: SQLite connector
paths:
- packages/connector-sqlite/src/**
- component_id: connector_sqlserver
name: SQL Server connector
paths:
- packages/connector-sqlserver/src/**
- component_id: py_semantic_layer
name: Python semantic layer
paths:
- python/ktx-sl/semantic_layer/**
- component_id: py_daemon
name: Python daemon
paths:
- python/ktx-daemon/src/ktx_daemon/**
ignore:
- docs-site/**
- examples/**
- packages/*/coverage/**
- packages/*/dist/**
- python/ktx-sl/demos/**

View file

@ -0,0 +1,4 @@
import { source } from "@/lib/source";
import { createFromSource } from "fumadocs-core/search/server";
export const { GET } = createFromSource(source);

View file

@ -51,10 +51,7 @@ export default async function Page(props: {
<>
<div className="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between sm:gap-4">
<DocsTitle>{page.data.title}</DocsTitle>
<DocsPageActions
markdownUrl={`${page.url}.md`}
mdxSource={mdxSource}
/>
<DocsPageActions mdxSource={mdxSource} />
</div>
<DocsDescription className="wrap-anywhere">
{page.data.description}

View file

@ -69,7 +69,11 @@
--color-fd-muted-foreground: #7a8d96;
}
html, body {
/* Keep html overflow at the default `visible` so body's overflow
propagates to the viewport (per CSS Overflow spec). That lets
`react-remove-scroll-bar` lock viewport scroll via body alone while
leaving the sticky sidebar placeholder anchored to the viewport. */
body {
overflow-x: clip;
}
@ -161,6 +165,17 @@ pre {
line-height: 1.7 !important;
}
/* Disable monospace ligatures so `--flag` keeps a visible space and double
dashes don't fuse into an em-dash glyph. */
code,
pre,
pre code,
.ktx-code,
.ktx-code code {
font-variant-ligatures: none !important;
font-feature-settings: "liga" 0, "calt" 0 !important;
}
.dark pre {
background: transparent !important;
}
@ -206,6 +221,72 @@ pre {
padding-inline: 0 !important;
}
.ktx-code .ktx-token-key {
color: #0f766e;
}
.ktx-code .ktx-token-keyword {
color: #0e7490;
font-weight: 650;
}
.ktx-code .ktx-token-function {
color: #7c3aed;
font-weight: 650;
}
.ktx-code .ktx-token-flag {
color: #0369a1;
}
.ktx-code .ktx-token-string {
color: #b45309;
}
.ktx-code .ktx-token-number,
.ktx-code .ktx-token-constant {
color: #be123c;
}
.ktx-code .ktx-token-comment {
color: #64748b;
font-style: italic;
}
.ktx-code .ktx-token-punctuation {
color: #64748b;
}
.dark .ktx-code .ktx-token-key {
color: #5eead4;
}
.dark .ktx-code .ktx-token-keyword {
color: #67e8f9;
}
.dark .ktx-code .ktx-token-function {
color: #c4b5fd;
}
.dark .ktx-code .ktx-token-flag {
color: #7dd3fc;
}
.dark .ktx-code .ktx-token-string {
color: #fbbf24;
}
.dark .ktx-code .ktx-token-number,
.dark .ktx-code .ktx-token-constant {
color: #fb7185;
}
.dark .ktx-code .ktx-token-comment,
.dark .ktx-code .ktx-token-punctuation {
color: #94a3b8;
}
/* Neutralize the outer figure styling that our wrapper now owns */
figure:has(> .ktx-code),
figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
@ -216,57 +297,10 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
margin: 0;
}
/* ── Mode A: Terminal ─────────────────────── */
.ktx-code-terminal {
background: #0c1417;
border: 1px solid rgba(255, 255, 255, 0.08);
color: #c8c3bc;
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 12px 32px -16px rgba(0, 0, 0, 0.3);
}
.ktx-code-terminal:hover {
border-color: rgba(34, 211, 238, 0.2);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 14px 32px -12px rgba(34, 211, 238, 0.18);
}
.ktx-code-terminal-head {
display: flex;
align-items: center;
gap: 6px;
padding: 10px 12px;
border-bottom: 1px solid rgba(255, 255, 255, 0.06);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.03), transparent);
}
.ktx-tl-dot {
width: 11px;
height: 11px;
border-radius: 999px;
flex-shrink: 0;
}
.ktx-code-terminal-label {
margin-left: 8px;
font-size: 11px;
font-weight: 500;
letter-spacing: 0.02em;
color: rgba(255, 255, 255, 0.4);
}
.ktx-code-body-terminal {
background: transparent !important;
color: #c8c3bc !important;
}
/* ── Mode D: Output preview (wizard prompts, status output) ── */
.ktx-code-output {
background: var(--color-fd-muted);
border: 1px solid var(--color-fd-border);
border-left: 3px solid color-mix(in oklch, var(--color-fd-primary) 50%, var(--color-fd-border));
position: relative;
box-shadow: 0 1px 2px rgba(27, 27, 24, 0.02);
}
@ -274,17 +308,14 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
.dark .ktx-code-output {
background: #111a1e;
border-color: rgba(255, 255, 255, 0.05);
border-left-color: rgba(34, 211, 238, 0.25);
}
.ktx-code-output:hover {
border-color: color-mix(in oklch, var(--color-fd-primary) 25%, var(--color-fd-border));
border-left-color: var(--color-fd-primary);
}
.dark .ktx-code-output:hover {
border-color: rgba(255, 255, 255, 0.08);
border-left-color: rgba(34, 211, 238, 0.45);
}
.ktx-code-output-label {
@ -304,8 +335,8 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
.ktx-code-output-copy {
position: absolute !important;
top: 6px !important;
right: 6px !important;
top: 7px !important;
right: 8px !important;
opacity: 0;
transform: translateY(-4px);
transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease);
@ -362,55 +393,32 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 10px 8px 14px;
padding: 5px 8px 5px 12px;
border-bottom: 1px solid var(--color-fd-border);
background: linear-gradient(180deg, var(--color-fd-muted), transparent);
background: rgba(0, 0, 0, 0.025);
}
.dark .ktx-code-tab-head {
border-bottom-color: rgba(255, 255, 255, 0.05);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.02), transparent);
background: rgba(255, 255, 255, 0.02);
}
.ktx-file-glyph {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 999px;
background: var(--color-fd-muted-foreground);
flex-shrink: 0;
}
.ktx-file-glyph[data-lang="yaml"],
.ktx-file-glyph[data-lang="yml"] { background: #fbbf24; }
.ktx-file-glyph[data-lang="ts"],
.ktx-file-glyph[data-lang="tsx"],
.ktx-file-glyph[data-lang="typescript"] { background: #3b82f6; }
.ktx-file-glyph[data-lang="js"],
.ktx-file-glyph[data-lang="jsx"],
.ktx-file-glyph[data-lang="javascript"] { background: #facc15; }
.ktx-file-glyph[data-lang="json"] { background: #84cc16; }
.ktx-file-glyph[data-lang="md"],
.ktx-file-glyph[data-lang="mdx"] { background: #a3a3a3; }
.ktx-file-glyph[data-lang="sql"] { background: #f97316; }
.ktx-file-glyph[data-lang="py"],
.ktx-file-glyph[data-lang="python"] { background: #22d3ee; }
.ktx-code-tab-filename {
font-family: var(--font-mono), ui-monospace, monospace;
font-size: 12.5px;
color: var(--color-fd-foreground);
font-size: 11.5px;
color: #6b7280;
}
.ktx-lang-pill {
margin-left: 4px;
padding: 1px 6px;
font-size: 10px;
font-weight: 600;
margin-right: 4px;
padding: 0 7px;
font-size: 9px;
font-weight: 500;
text-transform: uppercase;
letter-spacing: 0.04em;
color: var(--color-fd-muted-foreground);
letter-spacing: 0.06em;
color: #9ca3af;
border: 1px solid var(--color-fd-border);
border-radius: 4px;
border-radius: 3px;
background: var(--color-fd-card);
font-family: var(--font-display), var(--font-sans), sans-serif;
}
@ -445,30 +453,10 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
border-color: rgba(34, 211, 238, 0.2);
}
.ktx-code-minimal-lang {
position: absolute;
top: 8px;
left: 14px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--color-fd-muted-foreground);
font-family: var(--font-display), var(--font-sans), sans-serif;
opacity: 0;
transition: opacity 0.2s var(--ktx-ease);
pointer-events: none;
z-index: 1;
}
.ktx-code-minimal:hover .ktx-code-minimal-lang {
opacity: 0.5;
}
.ktx-code-minimal-copy {
position: absolute !important;
top: 6px !important;
right: 6px !important;
top: 7px !important;
right: 8px !important;
opacity: 0;
transform: translateY(-4px);
transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease);
@ -778,8 +766,8 @@ body::after {
mix-blend-mode: overlay;
}
/* Make sure content stays above background */
body > * {
/* Make sure page content stays above the decorative background. */
.ktx-site-shell {
position: relative;
z-index: 2;
}
@ -1058,8 +1046,7 @@ body > * {
.pill-badge .pill-dot { animation: none; }
.card-lift { transition: none; }
.ktx-code,
.ktx-code-minimal-copy,
.ktx-code-minimal-lang {
.ktx-code-minimal-copy {
transition: none;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state] svg {

View file

@ -28,8 +28,8 @@ export const metadata: Metadata = {
description:
"Open-source context infrastructure that makes agentic analytics reliable.",
icons: {
icon: "/brand/ktx-mascot.svg",
shortcut: "/brand/ktx-mascot.svg",
icon: "/ktx/brand/ktx-mascot.svg",
shortcut: "/ktx/brand/ktx-mascot.svg",
},
};
@ -41,7 +41,9 @@ export default function RootLayout({ children }: { children: ReactNode }) {
suppressHydrationWarning
>
<body>
<RootProvider>{children}</RootProvider>
<RootProvider search={{ options: { api: "/ktx/api/search" } }}>
<div className="ktx-site-shell">{children}</div>
</RootProvider>
</body>
</html>
);

View file

@ -3,6 +3,11 @@ import {
getLlmDocsPages,
getPageMarkdown,
} from "@/lib/llm-docs";
import {
agentSetupSlug,
isAgentSetupSlug,
readAgentSetupMarkdown,
} from "@/lib/agent-setup-markdown";
export const dynamic = "force-static";
@ -11,6 +16,14 @@ export async function GET(
props: { params: Promise<{ slug?: string[] }> },
) {
const params = await props.params;
if (isAgentSetupSlug(params.slug)) {
return new Response(await readAgentSetupMarkdown(), {
headers: {
"Content-Type": "text/markdown; charset=utf-8",
},
});
}
const page = getLlmDocsPage(params.slug);
if (!page) {
return new Response("Documentation page not found.\n", {
@ -29,5 +42,8 @@ export async function GET(
}
export function generateStaticParams() {
return getLlmDocsPages().map((page) => ({ slug: page.slug }));
return [
...getLlmDocsPages().map((page) => ({ slug: page.slug })),
{ slug: [...agentSetupSlug] },
];
}

View file

@ -1,5 +1,3 @@
"use client";
import {
type ComponentPropsWithoutRef,
type ReactNode,
@ -13,8 +11,57 @@ type Props = ComponentPropsWithoutRef<"pre"> & {
"data-language"?: string;
};
const TERMINAL_LANGS = new Set(["bash", "sh", "shell", "zsh"]);
const OUTPUT_LANGS = new Set(["text", "plain", "plaintext", "console", "output"]);
const WIZARD_GLYPHS = /^\s*[◆◇◯◐○●]/;
const JSON_TOKEN_PATTERN =
/"(?:\\.|[^"\\])*"|-?\b\d+(?:\.\d+)?\b|\b(?:true|false|null)\b|[{}[\],:]/g;
const SQL_TOKEN_PATTERN =
/--[^\n]*|'(?:''|[^'])*'|\b\d+(?:\.\d+)?\b|\b(?:select|from|join|left|right|inner|outer|on|where|group|by|order|limit|as|sum|avg|min|max|count|coalesce|date_trunc|case|when|then|else|end|and|or|is|not|null|false|true|with|having|over|partition|insert|update|delete|create|alter|drop|table|view)\b|[(),.;=*<>+-]/gi;
const CODE_LIKE_TOKEN_PATTERN =
/\/\/[^\n]*|\/\*[\s\S]*?\*\/|#(?![{\w-]+:)[^\n]*|`(?:\\.|[^`\\])*`|"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|-?\b\d+(?:\.\d+)?\b|\b(?:const|let|var|function|return|import|export|from|type|interface|extends|async|await|if|else|for|while|switch|case|break|continue|try|catch|throw|new|class|public|private|protected|readonly|true|false|null|undefined|pnpm|uv|ktx|node|npx|curl|git)\b|--?[\w-]+|[{}[\](),.;:=*<>|&+-]/g;
const SQL_FUNCTIONS = new Set([
"sum",
"avg",
"min",
"max",
"count",
"coalesce",
"date_trunc",
]);
const CODE_KEYWORDS = new Set([
"const",
"let",
"var",
"function",
"return",
"import",
"export",
"from",
"type",
"interface",
"extends",
"async",
"await",
"if",
"else",
"for",
"while",
"switch",
"case",
"break",
"continue",
"try",
"catch",
"throw",
"new",
"class",
"public",
"private",
"protected",
"readonly",
]);
const COMMAND_KEYWORDS = new Set(["pnpm", "uv", "ktx", "node", "npx", "curl", "git"]);
const CODE_CONSTANTS = new Set(["true", "false", "null", "undefined"]);
function extractText(node: ReactNode): string {
if (typeof node === "string") return node;
@ -27,6 +74,33 @@ function extractText(node: ReactNode): string {
return "";
}
function findLanguageInNode(node: ReactNode): string | null {
if (!isValidElement(node)) return null;
const props = (node as ReactElement<{
className?: string;
"data-language"?: string;
children?: ReactNode;
}>).props;
const dataLang = props["data-language"];
if (typeof dataLang === "string" && dataLang) return dataLang;
const className = typeof props.className === "string" ? props.className : "";
const m = className.match(/language-([\w-]+)/);
if (m) return m[1];
const children = props.children;
if (Array.isArray(children)) {
for (const child of children) {
const found = findLanguageInNode(child);
if (found) return found;
}
} else if (children) {
return findLanguageInNode(children);
}
return null;
}
function detectLanguage(props: Props, children: ReactNode): string | null {
const dataLang = props["data-language"];
if (typeof dataLang === "string" && dataLang) return dataLang;
@ -35,54 +109,288 @@ function detectLanguage(props: Props, children: ReactNode): string | null {
const m = className.match(/language-([\w-]+)/);
if (m) return m[1];
if (isValidElement(children)) {
const childProps = (children as ReactElement<{ className?: string }>).props;
const childClass = typeof childProps.className === "string" ? childProps.className : "";
const cm = childClass.match(/language-([\w-]+)/);
if (cm) return cm[1];
return findLanguageInNode(children);
}
function stripOneLeadingBlankLine(text: string) {
return text.startsWith("\n") ? text.slice(1) : text;
}
function extractCodeHeader(language: string | null, code: string) {
const normalized = normalizeLanguage(language);
const firstLineEnd = code.indexOf("\n");
const firstLine = firstLineEnd === -1 ? code : code.slice(0, firstLineEnd);
const rest = firstLineEnd === -1 ? "" : code.slice(firstLineEnd + 1);
const commentPrefix =
normalized === "sql"
? "--"
: normalized === "javascript" ||
normalized === "js" ||
normalized === "jsx" ||
normalized === "typescript" ||
normalized === "ts" ||
normalized === "tsx"
? "//"
: "#";
if (!firstLine.trimStart().startsWith(commentPrefix)) {
return { header: null, code };
}
return null;
const candidate = firstLine
.trim()
.slice(commentPrefix.length)
.trim();
const looksLikePath =
candidate.includes("/") &&
/\.[A-Za-z0-9]+(?:["'`)]*)?$/.test(candidate);
if (!looksLikePath) return { header: null, code };
return {
header: candidate,
code: stripOneLeadingBlankLine(rest),
};
}
function normalizeLanguage(language: string | null) {
return language?.toLowerCase() ?? "";
}
function pushMatchedToken(
parts: ReactNode[],
token: string,
className: string,
key: string,
) {
parts.push(
<span key={key} className={className}>
{token}
</span>,
);
}
function highlightJson(code: string) {
const parts: ReactNode[] = [];
let lastIndex = 0;
let tokenIndex = 0;
for (const match of code.matchAll(JSON_TOKEN_PATTERN)) {
const token = match[0];
const index = match.index ?? 0;
if (index > lastIndex) parts.push(code.slice(lastIndex, index));
const nextText = code.slice(index + token.length);
const className = token.startsWith('"')
? /^\s*:/.test(nextText)
? "ktx-token-key"
: "ktx-token-string"
: /^-?\d/.test(token)
? "ktx-token-number"
: /^(true|false|null)$/.test(token)
? "ktx-token-constant"
: "ktx-token-punctuation";
pushMatchedToken(parts, token, className, `json-${tokenIndex}`);
lastIndex = index + token.length;
tokenIndex += 1;
}
if (lastIndex < code.length) parts.push(code.slice(lastIndex));
return parts;
}
function highlightYaml(code: string) {
const parts: ReactNode[] = [];
const lines = code.split(/(\n)/);
let tokenIndex = 0;
for (const line of lines) {
if (line === "\n") {
parts.push(line);
continue;
}
const commentIndex = line.search(/\s#/);
const fullLineComment = line.trimStart().startsWith("#");
const contentEnd =
fullLineComment || commentIndex === -1 ? line.length : commentIndex + 1;
const content = fullLineComment ? "" : line.slice(0, contentEnd);
const comment = fullLineComment ? line : line.slice(contentEnd);
const keyMatch = content.match(/^(\s*(?:-\s*)?)([A-Za-z_][\w.-]*)(\s*:)/);
if (keyMatch) {
parts.push(keyMatch[1]);
pushMatchedToken(parts, keyMatch[2], "ktx-token-key", `yaml-key-${tokenIndex}`);
pushMatchedToken(
parts,
keyMatch[3],
"ktx-token-punctuation",
`yaml-colon-${tokenIndex}`,
);
const rest = content.slice(keyMatch[0].length);
if (rest) parts.push(...highlightInlineValue(rest, `yaml-${tokenIndex}`));
} else if (content) {
parts.push(...highlightInlineValue(content, `yaml-${tokenIndex}`));
}
if (comment) {
pushMatchedToken(parts, comment, "ktx-token-comment", `yaml-comment-${tokenIndex}`);
}
tokenIndex += 1;
}
return parts;
}
function highlightInlineValue(value: string, keyPrefix: string) {
const parts: ReactNode[] = [];
let lastIndex = 0;
let tokenIndex = 0;
const pattern = /'(?:''|[^'])*'|"(?:\\.|[^"\\])*"|-?\b\d+(?:\.\d+)?\b|\b(?:true|false|null)\b|[()[\]{},:=!<>+-]/g;
for (const match of value.matchAll(pattern)) {
const token = match[0];
const index = match.index ?? 0;
if (index > lastIndex) parts.push(value.slice(lastIndex, index));
const className =
token.startsWith("'") || token.startsWith('"')
? "ktx-token-string"
: /^-?\d/.test(token)
? "ktx-token-number"
: /^(true|false|null)$/.test(token)
? "ktx-token-constant"
: "ktx-token-punctuation";
pushMatchedToken(parts, token, className, `${keyPrefix}-value-${tokenIndex}`);
lastIndex = index + token.length;
tokenIndex += 1;
}
if (lastIndex < value.length) parts.push(value.slice(lastIndex));
return parts;
}
function highlightSql(code: string) {
const parts: ReactNode[] = [];
let lastIndex = 0;
let tokenIndex = 0;
for (const match of code.matchAll(SQL_TOKEN_PATTERN)) {
const token = match[0];
const index = match.index ?? 0;
if (index > lastIndex) parts.push(code.slice(lastIndex, index));
const lowerToken = token.toLowerCase();
const className = token.startsWith("--")
? "ktx-token-comment"
: token.startsWith("'")
? "ktx-token-string"
: /^\d/.test(token)
? "ktx-token-number"
: SQL_FUNCTIONS.has(lowerToken)
? "ktx-token-function"
: /^[a-z_]+$/i.test(token)
? "ktx-token-keyword"
: "ktx-token-punctuation";
pushMatchedToken(parts, token, className, `sql-${tokenIndex}`);
lastIndex = index + token.length;
tokenIndex += 1;
}
if (lastIndex < code.length) parts.push(code.slice(lastIndex));
return parts;
}
function highlightCodeLike(code: string) {
const parts: ReactNode[] = [];
let lastIndex = 0;
let tokenIndex = 0;
for (const match of code.matchAll(CODE_LIKE_TOKEN_PATTERN)) {
const token = match[0];
const index = match.index ?? 0;
if (index > lastIndex) parts.push(code.slice(lastIndex, index));
const lowerToken = token.toLowerCase();
const className =
token.startsWith("//") || token.startsWith("/*") || token.startsWith("#")
? "ktx-token-comment"
: token.startsWith("'") || token.startsWith('"') || token.startsWith("`")
? "ktx-token-string"
: /^-?\d/.test(token)
? "ktx-token-number"
: CODE_CONSTANTS.has(lowerToken)
? "ktx-token-constant"
: CODE_KEYWORDS.has(lowerToken)
? "ktx-token-keyword"
: COMMAND_KEYWORDS.has(lowerToken)
? "ktx-token-function"
: token.startsWith("-")
? "ktx-token-flag"
: "ktx-token-punctuation";
pushMatchedToken(parts, token, className, `code-${tokenIndex}`);
lastIndex = index + token.length;
tokenIndex += 1;
}
if (lastIndex < code.length) parts.push(code.slice(lastIndex));
return parts;
}
function highlightCode(language: string | null, code: string) {
const normalized = normalizeLanguage(language);
if (normalized === "json" || normalized === "jsonc") return highlightJson(code);
if (normalized === "yaml" || normalized === "yml") return highlightYaml(code);
if (normalized === "sql") return highlightSql(code);
if (
[
"bash",
"sh",
"shell",
"zsh",
"javascript",
"js",
"jsx",
"typescript",
"ts",
"tsx",
"python",
"py",
].includes(normalized)
) {
return highlightCodeLike(code);
}
return code;
}
export function CodeBlock(props: Props) {
const { children, title, className: _ignored, ...rest } = props;
const language = detectLanguage(props, children);
const codeText = extractText(children);
const rawCodeText = extractText(children);
const extractedHeader = extractCodeHeader(language, rawCodeText);
const codeText = extractedHeader.code;
const headerTitle =
typeof title === "string" && title.length > 0
? title
: extractedHeader.header;
const highlightedCode = highlightCode(language, codeText);
const isTerminal = language !== null && TERMINAL_LANGS.has(language);
const isOutput = !isTerminal && WIZARD_GLYPHS.test(codeText);
const hasTitle = typeof title === "string" && title.length > 0;
// Mode A - Terminal (commands the user types)
if (isTerminal) {
return (
<div className="not-prose ktx-code ktx-code-terminal group">
<div className="ktx-code-terminal-head">
<span className="ktx-tl-dot" style={{ background: "#ff5f57" }} />
<span className="ktx-tl-dot" style={{ background: "#febc2e" }} />
<span className="ktx-tl-dot" style={{ background: "#28c840" }} />
<span className="ktx-code-terminal-label">
{hasTitle ? title : "zsh"}
</span>
<CopyButton
text={codeText}
className="ml-auto text-white/80"
/>
</div>
<pre {...rest} className="ktx-code-body ktx-code-body-terminal">
{children}
</pre>
</div>
);
}
const hasHeader = typeof headerTitle === "string" && headerTitle.length > 0;
const isOutput =
!hasHeader &&
(WIZARD_GLYPHS.test(rawCodeText) ||
(language !== null && OUTPUT_LANGS.has(language)));
// Mode D - Output preview (wizard prompts, terminal output)
if (isOutput) {
return (
<div className="not-prose ktx-code ktx-code-output group relative">
<span className="ktx-code-output-label">output</span>
<CopyButton text={codeText} className="ktx-code-output-copy" />
<CopyButton text={rawCodeText} className="ktx-code-output-copy" />
<pre {...rest} className="ktx-code-body ktx-code-body-output">
{children}
</pre>
@ -90,18 +398,17 @@ export function CodeBlock(props: Props) {
);
}
// Mode B - VS Code tab (filename present)
if (hasTitle) {
// Mode B - Header (filename present)
if (hasHeader) {
return (
<div className="not-prose ktx-code ktx-code-tab group">
<div className="ktx-code-tab-head">
<span className="ktx-file-glyph" data-lang={language ?? ""} />
<span className="ktx-code-tab-filename">{title}</span>
{language && <span className="ktx-lang-pill">{language}</span>}
<span className="ktx-code-tab-filename">{headerTitle}</span>
<CopyButton text={codeText} className="ml-auto" />
</div>
<pre {...rest} className="ktx-code-body ktx-code-body-tab">
{children}
{highlightedCode}
</pre>
</div>
);
@ -110,10 +417,9 @@ export function CodeBlock(props: Props) {
// Mode C - Minimal default
return (
<div className="not-prose ktx-code ktx-code-minimal group relative">
{language && <span className="ktx-code-minimal-lang">{language}</span>}
<CopyButton text={codeText} className="ktx-code-minimal-copy" />
<pre {...rest} className="ktx-code-body ktx-code-body-minimal">
{children}
{highlightedCode}
</pre>
</div>
);

View file

@ -25,12 +25,12 @@ export function CopyButton({ text, className = "" }: Props) {
type="button"
onClick={onClick}
aria-label={copied ? "Copied" : "Copy code"}
className={`inline-flex items-center justify-center w-7 h-7 rounded-md transition-all hover:bg-white/5 ${className}`}
className={`inline-flex items-center justify-center w-9 h-9 rounded-md transition-all hover:bg-fd-muted ${className}`}
>
{copied ? (
<svg
width="14"
height="14"
width="18"
height="18"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
@ -44,8 +44,8 @@ export function CopyButton({ text, className = "" }: Props) {
</svg>
) : (
<svg
width="13"
height="13"
width="17"
height="17"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"

View file

@ -2,109 +2,37 @@
import { useState } from "react";
type CopyState = "idle" | "copied" | "error";
type Props = {
markdownUrl: string;
mdxSource: string;
};
export function DocsPageActions({ markdownUrl, mdxSource }: Props) {
function stripFrontmatter(source: string) {
return source.trim().replace(/^---\n[\s\S]*?\n---\n?/, "").trim();
}
export function DocsPageActions({ mdxSource }: Props) {
const [copied, setCopied] = useState(false);
const onCopy = async () => {
try {
await navigator.clipboard.writeText(stripFrontmatter(mdxSource));
setCopied(true);
setTimeout(() => setCopied(false), 1500);
} catch {
// Clipboard denied - fail silently
}
};
return (
<div className="not-prose flex flex-wrap items-center gap-2 text-xs">
<CopyMarkdownButton markdownUrl={markdownUrl} />
<a
href={markdownUrl}
className="inline-flex h-8 items-center rounded-md border border-fd-border bg-fd-background px-3 font-medium text-fd-muted-foreground transition-colors hover:border-fd-primary/40 hover:text-fd-foreground"
<button
type="button"
onClick={onCopy}
className="inline-flex h-8 items-center rounded-md border border-fd-border bg-fd-background px-3 font-medium text-fd-muted-foreground transition-colors hover:border-fd-primary/40 hover:text-fd-foreground data-[state=copied]:border-emerald-500/40 data-[state=copied]:text-emerald-600"
data-state={copied ? "copied" : "idle"}
>
View MD
</a>
<CopyTextButton label="Copy MDX" text={mdxSource} />
{copied ? "Copied" : "Copy as Markdown"}
</button>
</div>
);
}
function CopyMarkdownButton({ markdownUrl }: { markdownUrl: string }) {
const [state, setState] = useState<CopyState>("idle");
const onClick = async () => {
try {
const response = await fetch(markdownUrl, {
headers: { Accept: "text/markdown" },
});
if (!response.ok) {
throw new Error(`Failed to fetch ${markdownUrl}`);
}
await navigator.clipboard.writeText(await response.text());
flash(setState, "copied");
} catch {
flash(setState, "error");
}
};
return (
<ActionButton
label={labelForState(state, "Copy MD")}
onClick={onClick}
state={state}
/>
);
}
function CopyTextButton({ label, text }: { label: string; text: string }) {
const [state, setState] = useState<CopyState>("idle");
const onClick = async () => {
try {
await navigator.clipboard.writeText(text);
flash(setState, "copied");
} catch {
flash(setState, "error");
}
};
return (
<ActionButton
label={labelForState(state, label)}
onClick={onClick}
state={state}
/>
);
}
function ActionButton({
label,
onClick,
state,
}: {
label: string;
onClick: () => void;
state: CopyState;
}) {
return (
<button
type="button"
onClick={onClick}
className="inline-flex h-8 items-center rounded-md border border-fd-border bg-fd-background px-3 font-medium text-fd-muted-foreground transition-colors hover:border-fd-primary/40 hover:text-fd-foreground data-[state=copied]:border-emerald-500/40 data-[state=copied]:text-emerald-600 data-[state=error]:border-red-500/40 data-[state=error]:text-red-600"
data-state={state}
>
{label}
</button>
);
}
function labelForState(state: CopyState, label: string) {
if (state === "copied") return "Copied";
if (state === "error") return "Copy failed";
return label;
}
function flash(
setState: (state: CopyState) => void,
state: Exclude<CopyState, "idle">,
) {
setState(state);
window.setTimeout(() => setState("idle"), 1500);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,201 @@
# Goal
Set up KTX from scratch end-to-end as a fully autonomous, agent-driven replacement for the interactive `ktx setup` wizard. Detect the environment, install missing prerequisites, ask the user only for information you genuinely need (which connections to add, credentials), write a valid configuration, verify it works, and run a fast schema ingest. Keep the user updated throughout.
# Operating principles
- **Be autonomous.** Detect, decide, and act. Only ask the user when you need information that only they can provide: project location, which databases/sources to connect, credentials, and similar choices.
- **Stream short status updates.** Before each major phase ("Checking prerequisites…", "Installing uv…", "Configuring warehouse connection…", "Running fast ingest…") print a one-line update. Not chatty — just enough that the user can see what's happening.
- **Verify against docs, never guess.** CLI flags, config keys, and command names must come from the docs or from `ktx <command> --help`. If something looks wrong or missing, say so explicitly.
- **Print every command you run and its exit code.** Terse, not silent.
- **Fail loudly with cause + fix.** When a command fails: capture the exact error, identify the cause, change something, retry. Never retry an unchanged command. Exceptions for *known soft-failures* are listed in Phase 4 — handle those without retrying.
- **No LLM-based ingestion in this flow.** Only `--fast` ingest (schema-only). The user can run `--deep` later.
- **Platform-agnostic.** Detect the host OS first and pick the right install commands / path syntax. Anything path- or shell-specific must branch on OS.
# Authoritative docs
KTX docs are served at `https://docs.kaelio.com/ktx/`. **Start by fetching `https://docs.kaelio.com/ktx/llms.txt`** to discover the docs map. Scan it for a "troubleshooting" entry — if one exists, read it **before** running install/setup so you can apply known fixes preemptively rather than after failing. If no troubleshooting page is listed (current state of the docs), proceed. Then fetch any other `.md` pages you need (setup, ingest, status, connection types). **Never invent CLI flags or config keys** — verify against the docs or `ktx --help` / `ktx <subcommand> --help`.
> **Note on the `ktx status` JSON example in the docs.** The docs page for `ktx status` shows an example shaped like `{"title": "...", "checks": [...]}`. That example is outdated. The real CLI output uses a top-level `verdict` field plus a `connections[]` array — see Phase 5 for the canonical success criteria. Trust the shape in this prompt over the docs example.
# Workflow
## Phase 1 — Detect environment
Determine the host OS (e.g. via `uname -s`, `process.platform`, or `$env:OS`). Use the right install commands per OS for the rest of this flow.
| Tool | macOS / Linux | Windows (PowerShell) |
|------|---------------|----------------------|
| `uv` | `curl -LsSf https://astral.sh/uv/install.sh \| sh` then re-source shell env | `irm https://astral.sh/uv/install.ps1 \| iex` |
| Node.js | use system / fnm / nvm — **do not** auto-install | use system / nvm-windows — **do not** auto-install |
| KTX CLI | `npm install -g …` (see Phase 2) | `npm install -g …` (see Phase 2) |
If Node.js is missing, **stop and ask the user** to install it (https://nodejs.org/). Do not attempt to auto-install Node.
## Phase 2 — Verify and install prerequisites
Check each tool in order; install only if missing.
1. **Node.js** — run `node --version`. Require >= 22. If missing or older, stop and instruct the user.
2. **`uv`** — run `uv --version`. If missing, run the OS-appropriate install command, then re-source the shell environment (`export PATH="$HOME/.local/bin:$PATH"` on Linux/macOS) so `uv` is on `PATH`.
3. **KTX CLI**
- Install ktx with `npm install -g @kaelio/ktx`
- Verify with `ktx --version`.
Print one status line per tool ("✓ uv 0.11.15 found", "Installing uv…", "✓ ktx 0.x.y installed").
## Phase 3 — Gather user choices
Ask the user (grouped if your harness supports it; otherwise sequentially):
1. **Project directory.** Default: current working directory. Confirm before continuing.
2. **LLM provider.** Default: `claude-code` with model `sonnet` (the user is already inside Claude Code; no extra API key needed). Offer `anthropic` (paste API key, stored as `env:` or `file:` ref) and `vertex` (GCP project + location) as alternatives. Skip if defaults are accepted.
3. **Embeddings backend.** Default: `sentence-transformers` (local, no API key, managed Python runtime). Offer `openai` only if the user has a key.
4. **Database connections.** Ask how many to add, then loop. For each, collect:
- Connection name (e.g. `warehouse`, `analytics`).
- Driver: one of `sqlite`, `postgres`, `mysql`, `sqlserver`, `bigquery`, `snowflake`.
- Connection URL/DSN (or service-account file for BigQuery). Accept `env:VAR_NAME` or `file:/abs/path` to avoid pasting raw secrets.
- **Heads-up for the user**: even if they paste a literal URL, KTX will silently relocate it into `<project>/.ktx/secrets/<connection>-url` and rewrite `ktx.yaml` to `url: file:…` — this is correct, secure behavior and not a bug.
- Schemas / datasets to include (postgres / sqlserver / snowflake / bigquery only).
- Optional `enabled_tables` allowlist if the user wants to scope ingest to specific tables.
5. **BI / metadata sources** (dbt, Metabase, Looker, LookML, MetricFlow, Notion). Default: none. Ask only if the user mentions them.
## Phase 4 — Configure the project
Drive the existing wizard non-interactively (verify exact flag names with `ktx setup --help` and the docs — the automation flags are hidden from help but accepted):
```
ktx setup \
--project-dir <path> \
--no-input --yes \
--llm-backend <claude-code|anthropic|vertex> --llm-model <model> \
[--anthropic-api-key-env ANTHROPIC_API_KEY | --anthropic-api-key-file <path>] \
[--vertex-project <p> --vertex-location <loc>] \
--embedding-backend <sentence-transformers|openai> \
[--embedding-api-key-env OPENAI_API_KEY] \
--skip-sources \
--database <driver> --database-connection-id <name> --database-url <url|env:VAR|file:/path> \
[--database-schema <schema> …]
```
Notes on the flags above:
- **Project creation is automatic with `--no-input --yes`.** When
`ktx.yaml` exists, setup resumes it. When it doesn't exist, setup creates it
at `--project-dir`.
- **`--database-connection-id` is dual-purpose.** With `--database` or
`--database-url`, it names the new connection. Without those flags, it
selects an existing connection id.
- **Configure one new database connection per setup command.** If the user
wants multiple new connections, run setup again for each connection.
- **You don't need `--skip-agents` in this flow.** The agent integration step
is opt-in: setup leaves it alone unless you pass `--agents --target
<target>`.
- **`--skip-sources`** is correct and is the documented way to leave BI/metadata sources unconfigured.
### Known soft-failure: `ktx setup` exits 1 after a successful fast build
When you select a configuration that only does fast (schema-only) ingest, `ktx setup`'s final readiness verification fails with:
```
KTX context build did not pass agent-readiness verification.
<connection>: deep database context has not completed.
```
This is **expected** and **does not mean setup failed**. Treat the exit code as a soft-failure **only if all of the following hold**:
- The build log shows the fast ingest reached `[100%] Scan completed` for every configured connection.
- `ktx connection test <name>` (run next) exits 0 for every connection.
- `ktx status --json --no-input` reports `verdict: "ready"`.
If those three conditions hold, proceed to Phase 5 without retrying setup, and **do not** switch to `--deep` to "fix" the readiness gate — deep ingest is explicitly out of scope. Mention this in the final report under "Docs / CLI gaps" so the user is aware.
If any of those three conditions do not hold, this is a real failure — capture the error, fetch the relevant docs page, fix the cause, retry.
After `ktx setup` writes `ktx.yaml`, edit it directly for anything flags don't cover:
- Per-connection `enabled_tables` allowlist (snake_case, under `connections.<name>.enabled_tables`).
- Any advanced settings the user requested.
Use a YAML-aware editor (e.g. `uv run python -c "import yaml; …"`) — do not hand-edit blindly.
## Phase 5 — Verify
`ktx setup` already runs a fast schema ingest of every database connection it configures, so you do not need to re-ingest by default. For each configured connection:
```
ktx connection test <connection-name> # must exit 0
```
Only re-run ingest if setup's build log did **not** reach 100% for that connection:
```
ktx ingest <connection-name> --fast --no-input
```
**Mutex warning on `ktx ingest`**: passing both `--yes` and `--no-input` fails with `Choose only one runtime install mode: --yes or --no-input`. Setup already installed the managed Python runtime, so pass **only `--no-input`** to `ktx ingest`. (`--yes` is only needed when an ingest invocation has to install the runtime itself, which is not the case here.)
Then run the global health check:
```
ktx status --json --no-input
```
Success requires (canonical shape — supersedes the example in the docs):
- `verdict: "ready"` at the top of the JSON.
- Every `connections[].status === "ok"`.
- `ktx connection test <name>` exited 0 for every connection.
Do **not** run `--deep` ingest in this flow — that requires LLM time and is out of scope.
### Optional: directly probe the embeddings daemon
If the user asks for stronger verification that `sentence-transformers` is actually serving (not just that setup said "ok"), do all of:
1. `ktx dev runtime status --json` → expect `"kind": "ready"` and `"features": [..., "local-embeddings"]`.
2. `pgrep -fa ktx-daemon` → expect a process running `ktx-daemon serve-http`.
3. `curl -sS http://127.0.0.1:<port>/health` → expect HTTP 200 with `{"status":"healthy",…}`.
4. `curl -sS -X POST http://127.0.0.1:<port>/embeddings/compute -H 'content-type: application/json' -d '{"text":"hello"}'` → expect `{"embedding": [...384 floats...]}`.
Discover the port from setup's log line `Started KTX local embeddings daemon: http://127.0.0.1:<port>` or from the daemon's OpenAPI at `GET /openapi.json`. Note: the routes are `/health` and `/embeddings/compute` — not `/healthz` or `/embeddings`.
## Phase 6 — Final report
Print a structured report:
```
KTX SETUP COMPLETE
Project: <path>
LLM: <backend> / <model>
Embeddings: <backend> / <model>
Runtime: managed Python ✓ (if sentence-transformers daemon was started)
Connections:
- <name> (<driver>) status=ok schemas=[…] tables=<N>
- …
Sources: <list or "none">
Verdict: ready
```
Then **Next steps** (copy-pasteable):
1. Enrich with AI descriptions and embeddings: `ktx ingest <connection> --deep` (several minutes per connection).
2. Add more connections later by rerunning this setup or via `ktx setup --database … --database-connection-id …`.
3. Configure BI sources (dbt, Metabase, Looker, LookML, MetricFlow, Notion) — see `ktx setup --help` for `--source …` flags.
4. Install agent integration: `ktx setup --agents --target <claude-code|claude-desktop|codex|cursor|opencode|universal>` (with optional `--global` for `claude-code`/`codex`).
5. Connect the agent / MCP: see docs at `https://docs.kaelio.com/ktx/`.
Under **Docs / CLI gaps to flag** include any of these that applied during your run:
- `ktx setup` exits non-zero after a successful fast build (deep-readiness gate); status reports ready.
- `ktx ingest` rejects `--yes` and `--no-input` together; docs don't note the conflict.
- `ktx status --json` real shape (`verdict`, `connections[]`) doesn't match the example in the docs page.
- The pasted DB URL was moved to `.ktx/secrets/<name>-url` automatically.
End with a single line: `RESULT: PASS` or `RESULT: FAIL — <one-line reason>`.
# Operating rules (recap)
- Print every command you run and its exit code. Status updates may be terse, but never silent.
- On failure: capture the error, fetch the relevant docs page, fix the cause, retry. Never retry an unchanged command.
- Known soft-failures (listed in Phase 4 and Phase 5) are not real failures — handle them as documented; do not retry or escalate.
- If you find a docs/CLI gap ("docs say X but CLI does Y"), call it out in the final report.
- Never commit credentials — KTX accepts `env:` and `file:` references; prefer those. KTX will also auto-relocate literal URLs into `.ktx/secrets/`, but that does not protect anyone who pasted the URL into chat history.

View file

@ -5,6 +5,10 @@ description: A task-first route for coding agents that need to understand KTX do
This page is for coding assistants reading or citing the KTX docs. It is intentionally limited to documentation lookup, docs navigation, and safe command discovery.
For Markdown endpoints, use [Markdown Access](/docs/ai-resources/markdown-access).
For reusable task prompts, use [Prompt Recipes](/docs/ai-resources/prompt-recipes).
To install KTX into an agent client, use [Agent Clients](/docs/integrations/agent-clients).
## First read
Agents should start with the smallest source that answers the task:

View file

@ -1,64 +0,0 @@
---
title: AI Resources
description: Machine-readable docs, retrieval paths, and prompt recipes for coding assistants using KTX documentation.
---
Use this section when a coding assistant, IDE agent, or automation system needs
to read, cite, or update KTX documentation. These resources are optimized for
retrieval: agents can fetch small Markdown pages, use the full corpus only when
needed, and copy prompts that point them at current setup and CLI behavior.
> **Documentation index**
>
> Start with [`/llms.txt`](/llms.txt) to discover the available docs. Use
> [`/llms-full.txt`](/llms-full.txt) when the assistant needs the complete docs
> corpus in one Markdown response.
## What agents can do
| Need | Recommended path |
|------|------------------|
| Find the right setup or CLI page | Fetch [`/llms.txt`](/llms.txt), then read the smallest matching `.md` page |
| Answer a setup question | Read [Agent Quickstart](/docs/ai-resources/agent-quickstart), then [Quickstart](/docs/getting-started/quickstart) or [ktx setup](/docs/cli-reference/ktx-setup) |
| Quote a command or flag | Read the matching [CLI Reference](/docs/cli-reference) page as Markdown |
| Update docs in this repo | Use [Agent Instructions](/docs/ai-resources/agent-instructions) and verify generated Markdown routes after editing |
| Reuse a prompt | Copy from [Prompt Recipes](/docs/ai-resources/prompt-recipes) |
## Section map
| Goal | Use this page |
|------|---------------|
| Give an assistant a task-first route through the docs | [Agent Quickstart](/docs/ai-resources/agent-quickstart) |
| Fetch docs as Markdown instead of rendered HTML | [Markdown Access](/docs/ai-resources/markdown-access) |
| Add lightweight KTX docs guidance to a system prompt | [Agent Instructions](/docs/ai-resources/agent-instructions) |
| Copy prompts for setup, command lookup, and docs editing | [Prompt Recipes](/docs/ai-resources/prompt-recipes) |
## Available resources
| Resource | What it gives agents |
|----------|----------------------|
| [`/llms.txt`](/llms.txt) | Curated index of high-value KTX docs and Markdown endpoints |
| [`/llms-full.txt`](/llms-full.txt) | Complete docs corpus in one plain-text Markdown response |
| `/docs/<path>.md` | Per-page Markdown for any docs page |
| Page-level actions | Copy Markdown, view Markdown, or copy MDX from rendered docs pages |
| Prompt recipes | Reusable prompts for docs lookup, setup help, command discovery, and docs editing |
## Agent usage notes
When an assistant is unsure where to begin, use this retrieval order:
1. Read [`/llms.txt`](/llms.txt).
2. Fetch one or two specific Markdown pages for the task.
3. Use [Agent Quickstart](/docs/ai-resources/agent-quickstart) to choose the
next command, guide, or CLI reference page.
4. Use [`/llms-full.txt`](/llms-full.txt) only when the answer requires broad
context across setup, integrations, concepts, and CLI reference.
5. Use page-level copy actions when the user wants exact generated Markdown or
source MDX.
## Boundaries
AI Resources explain how agents consume the docs. To install KTX into an
agent client, use [Agent Clients](/docs/integrations/agent-clients). To set up a
project, use [Quickstart](/docs/getting-started/quickstart) or
[`ktx setup`](/docs/cli-reference/ktx-setup).

View file

@ -2,7 +2,6 @@
"title": "AI Resources",
"defaultOpen": true,
"pages": [
"index",
"agent-quickstart",
"markdown-access",
"agent-instructions",

View file

@ -14,7 +14,7 @@ Read https://docs.kaelio.com/ktx/llms.txt first. Then fetch only the KTX Markdow
## Set up a project
```text
Set up KTX in this repository. Start by reading /docs/ai-resources/agent-quickstart.md and /docs/getting-started/quickstart.md. Use pnpm, not npm. After setup, run ktx status and summarize which steps are complete, which files changed, and what still needs credentials or user input.
Set up KTX in this repository. Start by reading /docs/ai-resources/agent-quickstart.md and /docs/getting-started/quickstart.md. Install the published CLI with npm; use pnpm only when working from a KTX source checkout. After setup, run ktx status and summarize which steps are complete, which files changed, and what still needs credentials or user input.
```
## Find a command

View file

@ -36,7 +36,7 @@ directory. Use it from any directory to generate editor or agent schema files.
| Flag | Description | Default |
|------|-------------|---------|
| `--output <file>` | Write the schema to a file instead of stdout | |
| `--output <file>` | Write the schema to a file instead of stdout | - |
## `dev runtime` Subcommands

View file

@ -33,7 +33,7 @@ connections when you use `--all`.
| `--plain` | Print plain text output | `true` |
| `--json` | Print JSON output | `false` |
| `--yes` | Install required managed runtime features without prompting | `false` |
| `--no-input` | Disable interactive terminal input | |
| `--no-input` | Disable interactive terminal input | - |
`--fast` and `--deep` are mutually exclusive. Depth flags apply only to
database connections. Query-history flags apply only to database connections
@ -60,7 +60,7 @@ read one item from stdin.
| Flag | Description | Default |
|------|-------------|---------|
| `--text <content>` | Text content to ingest; repeat for a batch | `[]` |
| `--connection-id <connectionId>` | Optional KTX connection id for semantic-layer capture | |
| `--connection-id <connectionId>` | Optional KTX connection id for semantic-layer capture | - |
| `--user-id <id>` | Memory user id for capture attribution | `local-cli` |
| `--json` | Print JSON output | `false` |
| `--fail-fast` | Stop after the first failed text item | `false` |

View file

@ -26,10 +26,10 @@ below.
| Flag | Description | Default |
|------|-------------|---------|
| `--agents` | Install agent integration only | `false` |
| `--target <target>` | Agent target: `claude-code`, `codex`, `cursor`, `opencode`, or `universal` | - |
| `--agents` | Install agent configuration and rules only | `false` |
| `--target <target>` | Agent target: `claude-code`, `claude-desktop`, `codex`, `cursor`, `opencode`, or `universal` | - |
| `--global` | Install agent integration into the global target scope for `claude-code` or `codex` | `false` |
| `--yes` | Accept safe defaults in non-interactive setup | `false` |
| `--yes` | Accept project creation and runtime install defaults where setup asks for confirmation | `false` |
| `--no-input` | Disable interactive terminal input | - |
Use the global `--project-dir <path>` option when setup should target a
@ -40,12 +40,12 @@ specific directory.
These flags are useful for repeatable setup in examples, tests, CI fixtures, and
scripted project creation. They are not shown in `ktx setup --help`.
### Project Mode
### Project Creation
| Flag | Description | Default |
|------|-------------|---------|
| `--new` | Create a new KTX project before setup | `false` |
| `--existing` | Use an existing KTX project | `false` |
Setup resumes an existing `ktx.yaml` when one is present. When no project
exists, interactive setup prompts for where to create it. In scripts, pass
`--project-dir <dir> --no-input --yes` to create the target directory without
prompts.
### LLM Provider
@ -56,7 +56,6 @@ scripted project creation. They are not shown in `ktx setup --help`.
| `--llm-model <model>` | LLM model ID or backend model alias to validate and save |
| `--anthropic-api-key-env <name>` | Environment variable containing the Anthropic API key |
| `--anthropic-api-key-file <path>` | File containing the Anthropic API key |
| `--anthropic-model <model>` | Legacy alias for `--llm-model` |
| `--vertex-project <project>` | Vertex AI project ID, `env:NAME`, or `file:/path` reference |
| `--vertex-location <location>` | Vertex AI location, `env:NAME`, or `file:/path` reference |
| `--skip-llm` | Leave LLM setup incomplete |
@ -82,15 +81,19 @@ embedding credential source.
### Runtime
Setup prepares the managed Python runtime when your selected configuration
needs it. The runtime step runs after database and source setup and before the
initial context build.
needs it. In the full setup flow, the runtime step runs after database and
source setup and before the initial context build.
KTX prepares the `core` runtime feature when agent integration, query-history
ingest, Looker source ingest, or daemon-backed context build paths need it. KTX
prepares the `local-embeddings` runtime feature when you choose managed local
`sentence-transformers` embeddings. Existing external daemon URLs, such as
`KTX_DAEMON_URL` or `KTX_SQL_ANALYSIS_URL`, satisfy the matching dependency and
skip managed runtime installation for that dependency.
KTX prepares the `core` runtime feature when query-history ingest, Looker
source ingest, database introspection fallback, or daemon-backed context build
paths need it. KTX prepares the `local-embeddings` runtime feature when you
choose managed local `sentence-transformers` embeddings. Existing external
daemon URLs, such as `KTX_DAEMON_URL` or `KTX_SQL_ANALYSIS_URL`, satisfy the
matching dependency and skip managed runtime installation for that dependency.
`ktx setup --agents` doesn't prepare runtime features or build context. It only
installs agent configuration and rules. Start MCP with `ktx mcp start` before
using HTTP-based agents; MCP startup prepares the runtime it needs.
Interactive setup prompts before installing runtime features. Use `--yes` to
install them without prompting. Use `--no-input` to fail fast when required
@ -100,9 +103,8 @@ runtime features are missing.
| Flag | Description |
|------|-------------|
| `--database <driver>` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake` |
| `--database-connection-id <id>` | Existing selected connection id; repeatable |
| `--new-database-connection-id <id>` | Connection id for one new database connection |
| `--database <driver>` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `sqlserver`, `bigquery`, `snowflake` |
| `--database-connection-id <id>` | Existing selected connection id; repeatable. With `--database` or `--database-url`, connection id for the new connection. |
| `--database-url <url>` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection; also used as the SQLite path |
| `--database-schema <schema>` | Database schema or dataset to include; repeatable |
| `--skip-databases` | Leave database setup incomplete |
@ -173,10 +175,11 @@ ktx setup \
ktx setup \
--project-dir ./analytics \
--no-input \
--yes \
--skip-llm \
--skip-embeddings \
--database postgres \
--new-database-connection-id warehouse \
--database-connection-id warehouse \
--database-url env:DATABASE_URL \
--database-schema public
@ -184,7 +187,7 @@ ktx setup \
ktx setup \
--project-dir ./analytics \
--database postgres \
--new-database-connection-id warehouse \
--database-connection-id warehouse \
--database-url env:DATABASE_URL \
--enable-query-history \
--query-history-min-executions 5
@ -232,4 +235,5 @@ Use `ktx status` for repeatable readiness checks after setup exits.
| `--enable-query-history` is rejected | The selected database driver does not support query history | Use Postgres, BigQuery, or Snowflake, or rerun without query-history flags |
| Source setup rejects location flags | Both `--source-path` and `--source-git-url` were supplied | Choose the local path or the Git URL, not both |
| Agent integration missing | Setup skipped the agents step | Run `ktx setup --agents --target <target>` |
| Agent setup cannot prompt for a target | Non-TTY `ktx setup --agents` needs a target | Run `ktx setup --agents --target <target>` or rerun in a TTY |
| Global agent install is rejected | `--global` was used with a target other than `claude-code` or `codex` | Omit `--global`, or choose `--target claude-code` or `--target codex` |

View file

@ -20,7 +20,7 @@ ktx sl <subcommand> [options]
| `list` | List semantic-layer sources |
| `search <query>` | Search semantic-layer sources |
| `validate <sourceName>` | Validate a semantic-layer source against the database schema |
| `query` | Compile or execute a semantic-layer query |
| `query` | Compile or execute a Semantic Query |
## Options
@ -52,7 +52,7 @@ ktx sl <subcommand> [options]
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id | - |
| `--query-file <path>` | JSON semantic-layer query file | - |
| `--query-file <path>` | JSON Semantic Query file | - |
| `--measure <measure>` | Measure to query; repeatable (at least one required) | - |
| `--dimension <dimension>` | Dimension to include; repeatable | - |
| `--filter <filter>` | Filter expression; repeatable | - |
@ -67,7 +67,7 @@ ktx sl <subcommand> [options]
| `--max-rows <n>` | Maximum rows to return when executing | - |
`sl query` requires at least one `--measure` unless `--query-file` is set.
`--query-file` should point to a JSON semantic-layer query object.
`--query-file` should point to a JSON Semantic Query object.
## Examples

View file

@ -1,13 +1,29 @@
---
title: "Overview"
description: "Command map and shared options for the KTX CLI."
title: "ktx"
description: "Root command map, global options, and project resolution for the KTX CLI."
---
The `ktx` CLI sets up local projects, builds agent-ready context, checks
connections, queries semantic-layer sources, searches wiki pages, runs the MCP
server, and manages the bundled Python runtime.
## Command Map
## Command signature
```bash
ktx [global-options] <command>
```
When you run bare `ktx` in an interactive terminal outside any KTX project, the
CLI starts the same guided setup flow as `ktx setup`. Inside an existing
project, use command-specific help:
```bash
ktx --help
ktx setup --help
ktx ingest --help
```
## Command map
```text
ktx
@ -45,7 +61,7 @@ ktx
The public context-build entrypoint is `ktx ingest [connectionId]` or
`ktx ingest --all`.
## Global Options
## Global options
| Flag | Description |
|------|-------------|
@ -54,14 +70,14 @@ The public context-build entrypoint is `ktx ingest [connectionId]` or
| `-v`, `--version` | Show the CLI package name and version. |
| `-h`, `--help` | Show help for the current command. |
## Project Resolution
## Project resolution
Most commands are project-aware. Pass `--project-dir <path>` when scripting or
when you are outside the project directory. If you omit it, KTX checks
`KTX_PROJECT_DIR`, then walks upward for the nearest `ktx.yaml`, then falls back
to the current directory.
## Common Workflows
## Common workflows
```bash
# Start or resume setup

View file

@ -2,7 +2,7 @@
"title": "CLI Reference",
"defaultOpen": true,
"pages": [
"index",
"ktx",
"ktx-setup",
"ktx-connection",
"ktx-ingest",

View file

@ -91,7 +91,6 @@ packages/
connector-postgres/ # PostgreSQL connector
connector-snowflake/ # Snowflake connector
connector-bigquery/ # BigQuery connector
connector-clickhouse/ # ClickHouse connector
connector-mysql/ # MySQL connector
connector-sqlserver/ # SQL Server connector
connector-sqlite/ # SQLite connector

View file

@ -1,141 +1,115 @@
---
title: Context-Aware SQL
description: How KTX turns reviewed context, grain, and relationship evidence into safe SQL for agents.
title: Semantic Querying
description: How KTX compiles a short Semantic Query into safe, dialect-correct SQL using a reviewed join graph.
---
## Why query planning needs context
import { SemanticLayerFlow } from "@/components/semantic-layer-flow";
Agents can generate SQL from schema alone, but safe analytics SQL needs more
than table names. KTX uses reviewed context to understand grain, joins, measures,
filters, and where aggregation must happen.
KTX's semantic layer is a compiler that turns intent into SQL. The agent
declares _what_ it wants — measures, dimensions, filters — in a small
Semantic Query. KTX figures out the _how_: which tables to join, what
grain to aggregate at, how to keep fan-out from inflating measures, and
what dialect the warehouse speaks.
Read this page as four mechanics:
This page covers four mechanics:
- context files feed the semantic engine;
- evidence becomes a join graph with grain and relationship metadata;
- review keeps the graph current;
- query planning avoids fan-out and ambiguous joins.
- The Semantic Query contract agents send to the compiler.
- The planner steps that turn a Semantic Query into SQL.
- The join graph that backs those steps, and how it's built.
- The fan-out failure mode the compiler is designed to prevent.
## Where the semantic layer fits
## Imperative SQL vs declarative Semantic Querying
This planner is one subsystem inside KTX's broader context layer. It uses source
YAML, wiki context, scan evidence, and provenance to make context actionable for
SQL generation.
Writing analytics SQL is imperative work. Every question forces the
agent to hold two things in mind at once: _what_ it wants — a measure, a
slice, a filter — and _how_ to compute it: which tables to join, which
key links them, what grain to aggregate at, how to keep one fact from
inflating another, and what dialect the warehouse speaks. Plumbing on
top of intent, every query.
<div
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-label="How context inputs flow through the semantic layer into agent workflows"
>
<div className="grid gap-0 lg:grid-cols-[1fr_2rem_1.12fr_2rem_1fr]">
<section className="bg-fd-background p-4">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Context inputs"}
</p>
<div className="grid gap-2 text-sm">
<div className="border-l-2 border-fd-primary bg-fd-card px-3 py-2">
<p className="font-mono text-xs text-fd-foreground">semantic-layer/</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"source YAML, measures, joins, grain"}
</p>
</div>
<div className="border-l-2 border-amber-500 bg-fd-card px-3 py-2">
<p className="font-mono text-xs text-fd-foreground">wiki/</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"business rules, definitions, caveats"}
</p>
</div>
<div className="border-l-2 border-orange-500 bg-fd-card px-3 py-2">
<p className="font-mono text-xs text-fd-foreground">raw-sources/</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"schema scans, keys, imported metadata"}
</p>
</div>
<div className="border-l-2 border-slate-500 bg-fd-card px-3 py-2 dark:border-cyan-200">
<p className="font-mono text-xs text-fd-foreground">provenance</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"ingest decisions and review history"}
</p>
</div>
</div>
</section>
KTX's semantic layer separates those concerns:
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
<span className="h-px w-full bg-fd-border" />
</div>
- **You and KTX maintain the how.** Sources, joins, grain, measures, and
segments live in reviewable YAML — the analytical contract the team
agrees on, version-controlled.
- **The agent declares the what.** It sends a Semantic Query and trusts
the compiler to produce safe SQL.
<section className="relative bg-[#102226] p-5 text-white dark:bg-[#0b181b]">
<div className="absolute inset-y-0 left-0 w-1 bg-fd-primary" />
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-cyan-200">
{"Semantic layer engine"}
</p>
<div className="grid gap-2 sm:grid-cols-2">
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="text-sm font-semibold">Join graph</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"sources as nodes, joins as typed edges"}
</p>
</div>
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="text-sm font-semibold">Grain</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"row identity before aggregation"}
</p>
</div>
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="text-sm font-semibold">Measures</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"verified formulas and filters"}
</p>
</div>
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="whitespace-nowrap break-normal text-sm font-semibold">Relationships</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"many_to_one, one_to_many, one_to_one"}
</p>
</div>
</div>
<div className="mt-3 rounded-md border border-cyan-100/20 bg-cyan-50/10 px-3 py-2 text-sm">
{"Safe query planning before SQL is generated."}
</div>
</section>
The agent stops reasoning about plumbing. It states intent. KTX turns
that into SQL the warehouse can run.
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
<span className="h-px w-full bg-fd-border" />
</div>
<SemanticLayerFlow />
<section className="bg-fd-muted/35 p-4">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Agent workflows"}
</p>
<div className="space-y-2 text-sm">
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Search sources and wiki pages"}
</div>
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Compile trusted SQL"}
</div>
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Explain metrics and provenance"}
</div>
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Patch files and validate review"}
</div>
</div>
</section>
</div>
</div>
## The Semantic Query contract
## Join graph
A Semantic Query is the JSON payload the agent sends. Every field is optional
except `measures`, and column references are fully qualified
(`source.column`) so the compiler never has to guess where a name came
from.
A semantic source is a node. A join is a typed edge. KTX uses the graph to
choose valid paths and detect row-multiplying joins before SQL is generated.
Notice what's _not_ in the payload: no `FROM`, no `JOIN`, no `GROUP BY`,
no `WITH`. The agent states what it wants. KTX picks the join path, the
grain, the SQL shape, and the dialect.
| Field | Purpose |
|-------|---------|
| `measures` | Names of pre-defined measures, or inline expressions like `sum(orders.amount)` |
| `dimensions` | Columns to group by, optionally with a `granularity` for time fields |
| `filters` | Row-level predicates, classified into `WHERE` or `HAVING` at planning time |
| `segments` | Named filter sets defined on a source, applied as additional predicates |
| `order_by` | Sort fields with optional direction |
| `limit` | Row cap on the result |
A typical agent call looks like this:
```json
{
"measures": ["orders.revenue", "tickets.ticket_count"],
"dimensions": ["customers.segment"],
"filters": ["orders.created_at >= '2025-01-01'"],
"limit": 1000
}
```
That payload is enough for KTX to plan and compile. The agent never
authors a join, a CTE, or a dialect-specific cast.
## What the planner does
The planner is a deterministic pipeline. Each Semantic Query runs through the
same ordered steps before any SQL is emitted.
1. **Resolve refs.** Qualify bare column names, look up pre-defined
measure expressions, and classify each measure as raw or derived.
2. **Pick an anchor and build the join tree.** Choose the largest measure
source as the root, then run a shortest-path search across the typed
join graph to reach every required source.
3. **Detect fan-out.** Group measures by their owning source. If more
than one group exists, the planner marks the query as a chasm trap
and switches to aggregate-locality compilation.
4. **Classify filters.** Split predicates into row-level (`WHERE`) and
aggregate-level (`HAVING`) based on whether they reference a measure.
5. **Generate SQL.** Emit Postgres-shaped SQL with the right shape:
single-source aggregation when the query is safe, per-source CTEs
when fan-out is present.
6. **Transpile to the target dialect.** Run the result through `sqlglot`
so the warehouse receives syntax it understands.
The output is the SQL string, the resolved plan, and any warnings
surfaced during planning.
## The join graph
A semantic source is a node. A declared join is a typed edge. The graph
is bidirectional: every forward edge has a reverse with the relationship
inverted, so the planner can traverse from any anchor.
| Relationship | Planning impact |
|--------------|-----------------|
| `many_to_one` | Usually safe for adding dimensions |
| `one_to_many` | Can multiply measures and trigger fan-out handling |
| `one_to_one` | Usually safe when keys are correct |
| Equal-cost paths | Ambiguous unless aliases or explicit joins disambiguate |
| `many_to_one` | Safe direction for adding dimensions |
| `one_to_many` | Multiplies measures and triggers fan-out handling |
| `one_to_one` | Safe in either direction when keys match |
| Equal-cost paths | Treated as ambiguous; aliases or explicit joins resolve them |
<figure
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card p-4 shadow-sm"
@ -143,43 +117,60 @@ choose valid paths and detect row-multiplying joins before SQL is generated.
>
<div className="grid gap-3 md:grid-cols-[1fr_1fr_1fr]">
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
<p className="text-sm font-semibold text-fd-foreground">customers</p>
<p className="mt-1 text-xs text-fd-muted-foreground">grain: customer_id</p>
<p className="text-sm font-semibold text-fd-foreground">{"customers"}</p>
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: customer_id"}</p>
</div>
<div className="rounded-md border-2 border-fd-primary bg-fd-background px-4 py-3">
<p className="text-sm font-semibold text-fd-foreground">orders</p>
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id</p>
<p className="text-sm font-semibold text-fd-foreground">{"orders"}</p>
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: order_id"}</p>
</div>
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
<p className="text-sm font-semibold text-fd-foreground">order_items</p>
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id, line_id</p>
<p className="text-sm font-semibold text-fd-foreground">{"order_items"}</p>
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: order_id, line_id"}</p>
</div>
</div>
<div className="my-3 grid gap-2 text-center text-xs font-medium text-fd-muted-foreground md:grid-cols-[1fr_1fr]">
<div>orders -> customers: many_to_one</div>
<div>orders -> order_items: one_to_many</div>
<div>{"orders -> customers: many_to_one"}</div>
<div>{"orders -> order_items: one_to_many"}</div>
</div>
<figcaption className="mt-4 border-t border-fd-border pt-3 text-left text-xs leading-5 text-fd-muted-foreground">
<span className="font-medium text-fd-foreground">{"Example: "}</span>
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it can duplicate order-level measures."}
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it duplicates order-level measures."}
</figcaption>
</figure>
The graph is bidirectional for planning. If `orders -> customers` is
`many_to_one`, the reverse path is `one_to_many`.
Edges and grain come from your YAML. The compiler treats them as fact,
not a guess.
```yaml
# semantic-layer/warehouse/orders.yaml
name: orders
table: public.orders
grain: [order_id]
joins:
- to: customers
on: customer_id = customers.id
relationship: many_to_one
- to: order_items
on: id = order_items.order_id
relationship: one_to_many
measures:
- name: revenue
expr: sum(case when status != 'refunded' then amount end)
```
## Building and maintaining the graph
KTX starts from evidence, writes reviewable source YAML, and treats the merged
diff as the accepted graph.
KTX builds the graph from evidence and accepted edits, not from runtime
inference. Each input contributes a different kind of authority.
| Evidence | What it contributes |
|----------|---------------------|
| Declared primary keys | Initial row grain |
| Declared foreign keys | Formal join candidates |
| Inferred relationships | Edges when warehouses lack constraints |
| Inferred relationships | Edges when the warehouse lacks constraints |
| dbt, MetricFlow, and LookML imports | Existing metrics, dimensions, explores, and joins |
| Query history | Real join and filter patterns |
| Query history | Real join and filter patterns from analyst SQL |
| Analyst review | Final authority before context is merged |
<div
@ -295,105 +286,55 @@ diff as the accepted graph.
</div>
</div>
## Modeling problems
## Fan-out and aggregate locality
Fan-out is the classic failure mode: an order-level measure joins to line-item
rows before aggregation, so one order becomes many rows.
Fan-out is the classic analytics failure mode. Two fact tables join to a
shared dimension. A naive query joins them all together first, so each
row from one fact is multiplied by the matching rows from the other.
Measures duplicate, numbers go wrong, and the agent doesn't notice.
| Problem | What happens | How KTX handles it |
|---------|--------------|--------------------|
| Order measure joins to `order_items` | `orders.revenue` repeats once per item | Detect `one_to_many` and pre-aggregate |
| Two fact sources share `customers` | Measures multiply across the shared dimension | Treat as a chasm trap and plan each fact locally |
| Filter crosses `one_to_many` | Filtering changes measure grain | Reject or localize the filter |
| Equal-cost paths connect sources | Join choice is ambiguous | Prefer safer paths or require aliases |
## Execution planning
The planner resolves sources, chooses a join tree, checks relationship paths,
and picks a simple or aggregate-locality SQL shape.
KTX's planner detects the shape by grouping measures by their owning
source. If more than one source contributes raw measures, the generator
switches to aggregate locality: each fact is pre-aggregated at its own
grain inside a CTE, and the CTEs are joined back to the dimension at the
end.
| Naive SQL shape | Semantic-layer SQL shape |
|-----------------|--------------------------|
| Join facts and dimensions first, then aggregate | Aggregate each fact source at its own grain, then join results |
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source when locality is needed |
| Trust the shortest textual join path | Prefer safe relationship paths and reject disconnected sources |
| Let dimension grain differ across facts | Raise when asymmetric dimensions would fan out another measure |
| Join facts and dimensions first, then aggregate | Aggregate each fact at its own grain, then join |
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source |
| Trust the shortest textual join path | Prefer typed safe paths, reject disconnected sources |
| Let dimension grain differ across facts | Raise when an asymmetric dimension would fan out another measure |
<div
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-label="Fan-out safe execution shape"
>
<div className="border-b border-fd-border bg-fd-muted/35 px-4 py-3">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Fan-out handling"}
</p>
<p className="mt-1 text-sm leading-6 text-fd-muted-foreground">
{"The same question planned before and after KTX preserves the measure grain."}
</p>
</div>
<div className="grid gap-3 bg-fd-background p-4 md:grid-cols-[0.92fr_1.08fr]">
<section className="flex min-h-full flex-col rounded-md border border-fd-border bg-fd-card">
<div className="border-b border-fd-border px-4 py-3">
<p className="text-[11px] font-semibold uppercase tracking-wide text-red-600 dark:text-red-300">
{"Unsafe shape"}
</p>
<p className="mt-1 text-sm font-semibold text-fd-foreground">
{"Join first, aggregate later"}
</p>
</div>
<pre className="m-0 min-h-[13rem] flex-1 overflow-x-auto bg-transparent px-4 py-3 text-xs leading-5 text-fd-foreground">
{`orders
-> join order_items
-> join customers
The result is the same analyst answer, computed with the join shape an
analyst would have written by hand.
group by
customer_segment
## Where the context comes from
measure
sum(orders.amount)`}
</pre>
<div className="border-t border-fd-border bg-red-50/60 px-4 py-3 text-sm leading-6 text-red-950 dark:bg-red-950/20 dark:text-red-100">
{"Order-level revenue is exposed to line-item fan-out before aggregation."}
</div>
</section>
<section className="flex min-h-full flex-col rounded-md border border-fd-primary/40 bg-fd-card shadow-[inset_4px_0_0_var(--color-fd-primary)]">
<div className="border-b border-fd-border px-4 py-3">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-primary">
{"KTX shape"}
</p>
<p className="mt-1 text-sm font-semibold text-fd-foreground">
{"Aggregate locally, then join"}
</p>
</div>
<pre className="m-0 min-h-[13rem] flex-1 overflow-x-auto bg-transparent px-4 py-3 text-xs leading-5 text-fd-foreground">
{`orders_agg as (
select customer_id, sum(amount) revenue
from orders
group by customer_id
)
select customers.segment, sum(revenue)
from orders_agg
join customers`}
</pre>
<div className="border-t border-fd-border bg-fd-primary/10 px-4 py-3 text-sm leading-6 text-fd-foreground">
{"The measure is pre-aggregated at order grain before dimensions are joined."}
</div>
</section>
</div>
</div>
The planner is only as good as the YAML it reads. KTX builds and
maintains that YAML for you.
The result is structured planning: validated sources, typed relationships,
graph search, fan-out detection, aggregate locality, and dialect transpilation.
- `raw-sources/<connection>/` holds scan evidence from your warehouse:
schemas, columns, keys, samples, and observed usage patterns.
- `wiki/` holds business language, definitions, and caveats. The
planner doesn't read wiki at compile time, but the agent does, so
measure names and dimensions stay anchored to terms the team uses.
- `semantic-layer/<connection>/` holds the structured sources, joins,
grain, measures, and segments the planner actually compiles against.
Every accepted edit flows back into the next ingest, so the graph stays
current as the warehouse changes.
## Agent usage notes
Use this page when an agent needs to explain how KTX turns reviewed semantic
context into SQL, why relationship metadata matters, or why a query was rejected
as unsafe.
Point an agent at this page when it needs to explain why KTX asks for
grain, why a query was rejected as unsafe, or why the compiled SQL looks
different from what the agent first proposed.
| Agent task | Relevant section | Next page |
|------------|------------------|-----------|
| Explain why KTX asks for `grain` and relationship types | Join graph | [Writing Context](/docs/guides/writing-context) |
| Diagnose duplicated measures after a join | Modeling problems | [ktx sl](/docs/cli-reference/ktx-sl) |
| Explain safe SQL generation | Execution planning | [ktx sl](/docs/cli-reference/ktx-sl) |
| Describe how semantic context stays current | Building and maintaining the graph | [Context as Code](/docs/concepts/context-as-code) |
| Explain the Semantic Query shape | The Semantic Query contract | [ktx sl](/docs/cli-reference/ktx-sl) |
| Describe what the planner does between query and SQL | What the planner does | [ktx sl](/docs/cli-reference/ktx-sl) |
| Explain why KTX asks for grain and relationship types | The join graph | [Writing context](/docs/guides/writing-context) |
| Diagnose duplicated measures after a join | Fan-out and aggregate locality | [ktx sl](/docs/cli-reference/ktx-sl) |
| Describe how semantic context stays current | Building and maintaining the graph | [Context as code](/docs/concepts/context-as-code) |

View file

@ -74,7 +74,7 @@ measures:
```
For join graphs, fan-out handling, and execution mechanics, read
[Context-Aware SQL](/docs/concepts/semantic-layer-internals).
[Semantic Querying](/docs/concepts/semantic-layer-internals).
## Wiki pages

View file

@ -1,6 +1,6 @@
---
title: Introduction
description: What KTX is, how it works, and where to start.
description: KTX is an open-source, self-improving context layer for data agents.
---
import { ProductMechanics } from "@/components/product-mechanics";
@ -23,54 +23,75 @@ import { ProductMechanics } from "@/components/product-mechanics";
Make analytics context usable by agents
</h1>
<p className="mt-4 max-w-2xl text-lg text-fd-muted-foreground" style={{ lineHeight: '1.7' }}>
{'KTX turns warehouse metadata, semantic definitions, BI usage, and team knowledge into local files and runtime tools that database agents can trust.'}
{'KTX is an open-source context layer for database agents. It turns warehouse metadata, BI models, query history, docs, and approved metric definitions into reviewable files agents can search and execute.'}
</p>
</div>
</div>
## Why KTX
## Why KTX helps
- Schemas show columns, not business rules.
- Agents need trusted metrics, joins, filters, caveats, and provenance.
- KTX captures that context before agents write SQL, docs, or semantic edits.
KTX gives agents a shared context workspace before they write SQL, answer a
question, or update analytics definitions.
## What KTX creates
- **Context as code.** KTX writes wiki pages and semantic-layer definitions as
git-based files you can review, diff, and merge.
- **Self-improving ingest.** KTX reads warehouses, BI tools, modeling code,
query history, and notes, then reconciles new evidence with accepted context.
- **Executable semantics.** Agents can use approved measures, joins, filters,
dimensions, and segments instead of rebuilding canonical SQL from scratch.
- **Agent-native access.** CLI and MCP tools let agents search context, compile
semantic queries, run read-only SQL, and propose updates.
| Path | What it gives agents |
|------|----------------------|
| `semantic-layer/` | Measures, dimensions, joins, grain, filters, segments |
| `wiki/` | Business definitions, caveats, policies, analyst notes |
| `raw-sources/` | Extracted metadata, scan output, relationship evidence |
| `.ktx/` | Local indexes, embeddings, setup state, runtime data |
KTX complements existing semantic layers by pairing metric definitions with the
surrounding business knowledge, caveats, provenance, and review workflow agents
need for data work.
## How KTX works
KTX has two connected sides: it builds and maintains the context layer, then
serves that context to agents at runtime.
| Side | What KTX does |
|------|---------------|
| **Ingest and auto-maintain knowledge** | Reads your data stack and company knowledge, reconciles new evidence with accepted context, and keeps changes to `semantic-layer/` plus `wiki/` as version-controlled diffs automatically. |
| **Serve agents at runtime** | Helps agents find the right wiki pages and semantic-layer entities, then compile or execute semantic queries through CLI and MCP tools. |
<ProductMechanics />
## Use it for
- **Generate SQL** from approved measures, dimensions, joins, and filters
- **Explain provenance** with wiki context and warehouse evidence
- **Repair context** through reviewable YAML and Markdown diffs
- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and warehouses
Use KTX when agents need more than raw database access. Agents can search wiki
context, find semantic-layer entities, compile trusted semantic queries, run
read-only SQL, and use the same tools through MCP.
Databases: SQLite, PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL
Server.
- Generate SQL from approved metrics, joins, filters, and dimensions.
- Explain metric provenance with wiki context and source evidence.
- Repair context through reviewable YAML and Markdown diffs.
- Work alongside dbt, MetricFlow, LookML, Looker, Metabase, Notion, and
supported databases.
## Start here
Choose the route that matches what you want to do next. The quickstart is the
best first step for users; contributor setup lives in the community docs.
<Cards>
<Card title="Quickstart" href="/docs/getting-started/quickstart">
Set up KTX and build your first context in under 10 minutes.
Install KTX, run setup, build context, and connect an agent.
</Card>
<Card title="Guides" href="/docs/guides/building-context">
Hands-on workflows for scanning, ingesting, writing, and serving.
<Card title="The Context Layer" href="/docs/concepts/the-context-layer">
Understand why agents need more than schema access and raw SQL.
</Card>
<Card title="Building Context" href="/docs/guides/building-context">
Refresh context from databases, BI tools, query history, and documents.
</Card>
<Card title="Writing Context" href="/docs/guides/writing-context">
Edit semantic-layer YAML and wiki Markdown safely.
</Card>
<Card title="CLI Reference" href="/docs/cli-reference/ktx-setup">
<Card title="CLI Reference" href="/docs/cli-reference/ktx">
Complete flag and subcommand reference for every KTX command.
</Card>
<Card title="AI Resources" href="/docs/ai-resources">
<Card title="Agent Quickstart" href="/docs/ai-resources/agent-quickstart">
Machine-readable docs and agent-facing setup notes.
</Card>
</Cards>

View file

@ -1,135 +1,137 @@
---
title: Quickstart
description: Set up KTX, build local context, and connect your coding agent.
description: Install KTX, run setup, and connect your coding agent.
---
This guide gets a local analytics project ready for KTX. You will install the
CLI, run the setup wizard, connect a database, build context, and install agent
rules that teach your coding assistant which KTX commands to run.
import { CopyButton } from "@/components/copy-button";
If you are a coding assistant choosing a docs route, start with the
[Agent Quickstart](/docs/ai-resources/agent-quickstart). This page is the
human setup walkthrough.
This guide takes a local analytics project from empty to agent-ready. You'll
install the CLI, run one guided setup command, and hand the context to a
coding assistant.
## What setup does
If you're a coding assistant choosing a docs route, start with the
[Agent Quickstart](/docs/ai-resources/agent-quickstart) instead.
`ktx setup` is the main project workflow. It can create or resume `ktx.yaml`,
configure model and embedding providers, add database connections, add optional
context sources, build the first context artifacts, and install agent
integration.
<div
className="not-prose my-8 rounded-xl border p-5 sm:p-6"
style={{
borderColor: 'color-mix(in oklch, #ff8a4d 35%, transparent)',
background: 'color-mix(in oklch, #ff8a4d 8%, transparent)',
}}
>
<div
className="text-xs font-semibold uppercase tracking-wider"
style={{ color: '#ff8a4d' }}
>
Need a warehouse to play with?
</div>
<div className="mt-2 text-base leading-relaxed text-fd-foreground">
Try KTX against a real data stack - Postgres, dbt, Metabase, and Notion
pre-loaded with the Orbit demo corpus. The page lists demo credentials
you can paste straight into `ktx setup`.
</div>
<a
href="https://kaelio.com/start"
className="mt-4 inline-flex items-center gap-1 text-base font-semibold no-underline hover:underline"
style={{
color: '#ff8a4d',
textDecorationColor: '#ff8a4d',
}}
>
Get demo credentials at kaelio.com/start →
</a>
</div>
When you run bare `ktx` in an interactive terminal outside a KTX project, the
CLI opens the same setup experience. Inside an existing project, `ktx setup`
resumes incomplete work or opens a menu for changing setup, connecting an
agent, checking status, or exploring a demo project.
<div
className="not-prose my-6 rounded-lg border p-4"
style={{
borderColor: 'color-mix(in oklch, var(--color-fd-primary) 35%, transparent)',
background: 'color-mix(in oklch, var(--color-fd-primary) 8%, transparent)',
}}
>
<div className="text-sm font-semibold text-fd-foreground">
Run setup from an agent
</div>
<div className="mt-2 text-sm leading-6 text-fd-muted-foreground">
You can ask an agent such as Claude Code, Codex, Cursor, or OpenCode to
install and configure KTX for you. The{' '}
<a href="/ktx/docs/agents-setup.md" className="font-medium underline">
agent setup Markdown prompt
</a>{' '}
tells the agent how to check prerequisites, ask only for credentials or
connection choices, run <code>ktx setup</code>, verify connections, and
report the result.
</div>
<div className="mt-3 text-sm leading-6 text-fd-muted-foreground">
Use a prompt like this from the project you want to configure:
</div>
<div className="mt-3 max-w-full overflow-hidden rounded-md border bg-fd-background">
<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
<span className="text-xs font-semibold uppercase tracking-wide text-fd-muted-foreground">
Prompt
</span>
<CopyButton
text={`Follow instructions from
https://docs.kaelio.com/ktx/docs/agents-setup.md
to install and configure ktx`}
className="-my-1"
/>
</div>
<div className="p-3 font-mono text-sm leading-6 text-fd-foreground">
<div>Follow instructions from</div>
<div className="break-all">https://docs.kaelio.com/ktx/docs/agents-setup.md</div>
<div>to install and configure ktx</div>
</div>
</div>
</div>
## Install the CLI
Install the published `@kaelio/ktx` package:
Install the published package globally:
```bash
npm install -g @kaelio/ktx
```
Then run setup from the analytics project directory:
KTX is open source. If you'd like to hack on it or run from a local checkout,
the source lives at [github.com/kaelio/ktx](https://github.com/kaelio/ktx) -
see [Contributing](/docs/community/contributing) to get set up.
## Run setup
From your project directory, run:
```bash
ktx setup
```
The local checkout workflow is only for KTX contributors. See
[Contributing](/docs/community/contributing) for that path.
The wizard walks you through everything KTX needs in one pass:
## Step 1: Choose the project
1. **Project** - creates or resumes `ktx.yaml` in the current directory.
2. **LLM** - picks a Claude backend. The default uses your local Claude Code
session, so no API key is required. You can also use an Anthropic API key
or Vertex AI.
3. **Embeddings** - picks an embeddings backend. Choose OpenAI for hosted
embeddings or `sentence-transformers` to run locally without an API key.
4. **Database** - adds at least one primary connection. Supported drivers:
SQLite, PostgreSQL, MySQL, SQL Server, BigQuery, and Snowflake.
5. **Context sources** - optionally adds dbt, MetricFlow, LookML, Looker,
Metabase, or Notion. You can skip and add them later.
6. **Build** - runs the first ingest so semantic-layer sources and wiki pages
are ready for agents.
7. **Agent integration** - installs project-local rules for Claude Code,
Codex, Cursor, OpenCode, or universal `.agents`.
In an interactive terminal, setup can create a new KTX project or resume the
nearest existing project. The main project file is `ktx.yaml`.
For scripted setup, pass the project directory explicitly:
```bash
ktx setup --project-dir ./analytics
```
If setup exits early, rerun `ktx setup` in the same directory. KTX keeps local
setup progress under `.ktx/setup/` and resumes from the remaining work.
## Step 2: Configure the LLM
KTX uses a Claude model for ingest agents that turn schemas, SQL, BI metadata,
and documents into semantic-layer sources and wiki context.
Setup supports three LLM provider paths:
| Provider | Use when | Credential model |
|----------|----------|------------------|
| Claude subscription (Pro/Max) | You want KTX to use your local Claude Code session | Claude Code local authentication |
| Anthropic API key | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
| Google Vertex AI for Anthropic Claude | Your organization runs Claude through Google Cloud | Application Default Credentials plus Vertex project and location |
For Anthropic API, setup can read the key from the environment or save a pasted
key to `.ktx/secrets/anthropic-api-key`. `ktx.yaml` stores an `env:` or `file:`
reference, not the raw key.
For Vertex AI, setup uses Google Application Default Credentials. It can read
your active `gcloud` project, list visible projects, or accept explicit
`--vertex-project` and `--vertex-location` values.
To use your local Claude Code session instead of an API key, set:
```yaml
llm:
provider:
backend: claude-code
models:
default: sonnet
triage: haiku
candidateExtraction: sonnet
curator: sonnet
reconcile: sonnet
repair: sonnet
```
`claude-code` uses the Claude Code authentication already configured on your
machine. It doesn't use `ANTHROPIC_API_KEY`, Vertex credentials, AI Gateway
tokens, or Bedrock credentials. In non-interactive setup, pass
`--llm-model opus`, `--llm-model sonnet`, `--llm-model haiku`, or a full Claude
model ID to select the Claude Code model.
Setup checks the selected model before saving. Anthropic API setup fetches live
Claude model choices when possible and falls back to bundled defaults if model
discovery is unavailable.
## Step 3: Configure embeddings
KTX uses embeddings for semantic search over semantic-layer sources, wiki
context, schema metadata, and relationship evidence.
| Backend | Default model | Notes |
|---------|---------------|-------|
| OpenAI | `text-embedding-3-small` | Recommended for hosted embeddings. Requires an OpenAI API key. |
| Local sentence-transformers | `all-MiniLM-L6-v2` | Runs through the KTX-managed Python runtime. No hosted embedding key is required. |
OpenAI setup reads `OPENAI_API_KEY` or saves a local secret file. Local
sentence-transformers setup can install and start the managed runtime during
setup. To prepare that runtime before setup, run:
If you choose local `sentence-transformers` embeddings, KTX uses the managed
Python runtime. To prepare it before setup, run:
```bash
ktx dev runtime install --feature local-embeddings --yes
ktx dev runtime start --feature local-embeddings
```
## Step 4: Add a database
KTX needs at least one primary database connection before it can build database
context. The wizard supports SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server,
BigQuery, and Snowflake.
You can usually enter connection fields interactively or provide a URL. Secret
URLs can be stored as local files under `.ktx/secrets/` or referenced with
`env:NAME` in `ktx.yaml`.
After saving a connection, setup tests it and builds fast schema context:
During the database step, setup tests the saved connection and builds initial
schema context:
```text
Testing warehouse
@ -137,114 +139,24 @@ Testing warehouse
Building schema context for warehouse
Running fast database ingest
Database ready
warehouse - PostgreSQL - schema context complete
```
PostgreSQL, BigQuery, and Snowflake can also enable query-history ingest. Query
history helps KTX learn common query patterns, joins, service-account filters,
and warehouse-specific usage. BigQuery and Snowflake support a lookback window;
Postgres reads the current `pg_stat_statements` aggregate data instead.
If setup exits early, rerun `ktx setup` in the same directory. KTX keeps
progress under `.ktx/setup/` and resumes from the remaining work.
## Step 5: Add context sources
> **Note:** Running bare `ktx` in an interactive terminal outside a KTX
> project opens the same wizard. Inside a project, it opens a menu for
> resuming setup, connecting an agent, checking status, or exploring a
> pre-built demo project.
Context sources are optional, but they make the first context layer much richer.
Setup can add:
## Verify
| Source | Typical input | What KTX learns |
|--------|---------------|-----------------|
| dbt | Local project or Git repo | Models, columns, tests, descriptions, tags |
| MetricFlow | Local project or Git repo | Semantic models, metrics, dimensions, entities |
| LookML | Local files or Git repo | Views, explores, dimensions, measures, joins |
| Looker | API URL and credentials | Explores, looks, dashboards, model metadata |
| Metabase | API URL and key | Questions, dashboards, BI database mappings |
| Notion | Integration token and crawl settings | Business docs and knowledge pages |
Setup maps BI and source metadata back to your primary warehouse connection so
generated context points at the right tables.
You can skip this step and add sources later by rerunning `ktx setup`.
## Step 6: Build context
The context build turns configured databases and sources into local artifacts
agents can read. It runs database ingest first, then source ingest and memory
updates.
Fast database ingest records deterministic schema grounding. Deep ingest adds
AI-enriched descriptions, embeddings, relationship evidence, and query-history
context when configured.
When the build finishes, setup verifies that agent-ready context exists:
```text
KTX context is ready for agents.
Databases:
warehouse: deep context complete
Context sources:
dbt_main: memory update complete
Verification:
Agent context: ready
Semantic search: ready
```
If a foreground build is interrupted, rerun `ktx setup` or build the same target
with `ktx ingest <connectionId>`.
## Step 7: Install agent integration
The final setup step installs project-local rules for your coding assistant.
Supported targets are Claude Code, Codex, Cursor, OpenCode, and universal
`.agents`.
You can also run this step later:
```bash
ktx setup --agents --target codex
```
Claude Code and Codex also support global installs:
```bash
ktx setup --agents --target codex --global
```
Agent rules are CLI-based. They point agents at the KTX CLI path that created
the file, so agents do not need a separate `ktx` binary in `PATH`. If the CLI
path changes after reinstalling or moving a checkout, rerun `ktx setup --agents`.
## Generated files
KTX writes plain files so people and agents can inspect changes in git.
| Path | Purpose |
|------|---------|
| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and query-history settings |
| `.ktx/secrets/*` | Local secret files referenced from `ktx.yaml`; do not commit these |
| `.ktx/setup/*` | Local setup and context-build state |
| `.ktx/agents/install-manifest.json` | Manifest used to manage installed agent files |
| `semantic-layer/<connection-id>/*.yaml` | Semantic source definitions used for SQL generation |
| `wiki/global/*.md` | Shared business context and metric definitions |
| `wiki/user/<user-id>/*.md` | User-scoped notes and local context |
| `.claude/skills/ktx/SKILL.md` | Claude Code project skill |
| `.agents/skills/ktx/SKILL.md` | Codex or universal project skill |
| `.cursor/rules/ktx.mdc` | Cursor project rule |
| `.opencode/commands/ktx.md` | OpenCode project command |
## Verify setup
Run:
When setup finishes, check readiness:
```bash
ktx status
```
Example output:
```text
KTX project: /home/user/analytics
Project ready: yes
@ -256,24 +168,59 @@ KTX context built: yes
Agent integration ready: yes (codex:project)
```
Use JSON when an agent or script needs a structured readiness check:
For a structured check inside scripts, use `ktx status --json`.
```bash
ktx status --json
When setup builds deep context, its final context check looks like:
```text
KTX context is ready for agents.
Databases:
warehouse: deep context complete
Context sources:
dbt_main: memory update complete
```
## Scripted setup example
## Connect a coding agent
Use non-interactive setup when creating repeatable fixtures or automation:
The setup wizard installs project-local agent rules in the last step. To
install or change targets later:
```bash
ktx setup --agents
```
Claude Code and Codex also support global installs with `--global`. Agent
rules point at the KTX CLI path that created them, so agents don't need a
separate `ktx` binary on `PATH`. If the CLI path changes, rerun
`ktx setup --agents`.
## What setup writes
KTX writes plain files so people and agents can review changes in git.
| Path | Purpose |
|------|---------|
| `ktx.yaml` | Project configuration |
| `.ktx/secrets/*` | Local secret files referenced from `ktx.yaml` - do not commit |
| `semantic-layer/<connection-id>/*.yaml` | Semantic sources for SQL generation |
| `wiki/global/*.md` | Shared business context and metric definitions |
| `.claude/skills/ktx/`, `.agents/skills/ktx/`, `.cursor/rules/ktx.mdc`, `.opencode/commands/ktx.md` | Installed agent rules |
## Scripted setup
For repeatable fixtures and automation, skip prompts with flags:
```bash
ktx setup \
--project-dir ./analytics \
--no-input \
--yes \
--skip-llm \
--skip-embeddings \
--database postgres \
--new-database-connection-id warehouse \
--database-connection-id warehouse \
--database-url env:DATABASE_URL \
--database-schema public
```
@ -287,23 +234,21 @@ ktx ingest warehouse --fast
See [ktx setup](/docs/cli-reference/ktx-setup) for the full automation flag
surface.
## Common errors
## Common issues
| Symptom | Likely cause | Recovery |
|---------|--------------|----------|
| `ktx: command not found` | The global package is not installed or your shell cannot find it | Reinstall `@kaelio/ktx` and open a new shell |
| Setup resumes the wrong project | `KTX_PROJECT_DIR` or the nearest `ktx.yaml` points somewhere else | Pass `--project-dir <path>` |
| Anthropic health check fails | API key, model id, or access is invalid | Fix `ANTHROPIC_API_KEY` or rerun setup with a different key or model |
| Vertex AI health check fails | Vertex API, Claude access, project, location, or IAM permissions are missing | Check the project, location, Application Default Credentials, and Vertex AI permissions |
| OpenAI embeddings fail | `OPENAI_API_KEY` is missing or invalid | Export the key or choose local sentence-transformers embeddings |
| Local embeddings fail | Managed Python runtime cannot install or start | Run `ktx dev runtime status`, then install the local embeddings runtime |
| Database test fails | Credentials, network access, database, warehouse, or schema is wrong | Test the same values with the database's native client, then rerun setup |
| Context is not built | Setup saved configuration but skipped or interrupted the build | Run `ktx setup` or `ktx ingest --all` |
| Agent integration is incomplete | Setup skipped the agents step or installed a different target | Run `ktx setup --agents --target <target>` |
| Symptom | Fix |
|---------|-----|
| `ktx: command not found` | Reinstall `@kaelio/ktx` and open a new shell |
| Setup resumes the wrong project | Pass `--project-dir <path>` |
| LLM or embeddings health check fails | Rerun setup and pick a different credential, model, or backend |
| Database test fails | Verify the same connection with the database's native client, then rerun setup |
| Agent integration is incomplete | Run `ktx setup --agents --target <target>` |
## Next steps
- Build and refresh context with [Building Context](/docs/guides/building-context).
- Edit semantic sources and wiki pages with [Writing Context](/docs/guides/writing-context).
- Refresh context with [Building Context](/docs/guides/building-context).
- Edit semantic sources and wiki pages with
[Writing Context](/docs/guides/writing-context).
- Connect more tools with [Agent Clients](/docs/integrations/agent-clients).
- Read [The Context Layer](/docs/concepts/the-context-layer) to understand the architecture.
- Read [The Context Layer](/docs/concepts/the-context-layer) to understand
the architecture.

View file

@ -3,12 +3,9 @@ title: Building Context
description: Build and refresh KTX context from databases, source tools, query history, and text.
---
Building context turns configured connections into local semantic-layer sources
and wiki pages. Agents use those files to understand your schema, business
definitions, metric logic, joins, and known caveats before they write SQL.
Use this guide after `ktx setup` has created `ktx.yaml` and at least one
database or context-source connection.
Build context after `ktx setup` creates `ktx.yaml` and at least one database or
context-source connection. KTX writes local semantic-layer sources and wiki
pages for agents to use before writing SQL.
## The build loop
@ -22,15 +19,12 @@ Most projects use this loop:
5. Validate and query representative sources before handing the context to an
agent.
`ktx ingest --all` runs database connections first, then context-source
connections. That order lets dbt, BI, Notion, and text ingest attach context to
known warehouse tables.
`ktx ingest --all` runs databases first, then context-source connections, so
external metadata can attach to known warehouse tables.
## Database ingest
Database ingest connects to a configured warehouse and records local schema
context. It gives agents table, column, type, constraint, and row-count
grounding without requiring them to inspect the database directly.
Database ingest records table, column, type, constraint, and row-count context.
```bash
# Build one configured database connection
@ -55,20 +49,16 @@ ktx ingest warehouse --deep
ktx ingest --all --deep
```
Deep ingest needs LLM and embedding readiness. If those providers are not
configured, run `ktx setup` or use `--fast`.
Deep ingest needs LLM and embedding readiness. Otherwise run `ktx setup` or use
`--fast`.
When you use `claude-code`, KTX still controls the tool surface for ingest and
memory capture. Claude Code built-in tools, discovered MCP servers, plugins,
skills, agents, and slash commands are not invokable by KTX agent loops unless
they are exact KTX MCP tools for the current run.
With `claude-code`, KTX agent loops can invoke only the KTX MCP tools for the
current run.
## Query history
PostgreSQL, BigQuery, and Snowflake can add query-history context. This helps
KTX learn common joins, filters, service-account patterns, redaction rules, and
usage-heavy query templates. BigQuery and Snowflake support a lookback window;
Postgres reads the current `pg_stat_statements` aggregate data instead.
PostgreSQL, BigQuery, and Snowflake can add query-history context: common joins,
filters, service-account patterns, redaction rules, and high-usage templates.
Enable it during setup, store it under `connections.<id>.context.queryHistory`,
or request it for one run:
@ -84,19 +74,13 @@ for one run.
## Relationship evidence
Many databases do not declare all foreign keys. KTX can score relationship
candidates using signals such as name similarity, type compatibility, value
overlap, embedding similarity, uniqueness, null rate, and structural priors.
The public CLI does not expose separate relationship review subcommands.
Relationship evidence is built as part of deep database ingest when the
connector and readiness checks support it.
KTX scores relationship candidates during supported deep database ingest. The
public CLI does not expose separate relationship review subcommands.
## Context-source ingest
Context-source connections pull business metadata from tools your team already
uses. The current public `ktx ingest` command is connection-centric: pass one
configured connection id, or pass `--all`.
Context-source connections pull metadata from dbt, BI tools, Notion, and other
configured systems. Pass one connection id or `--all`.
```bash
# Build one source connection
@ -117,14 +101,13 @@ Supported source types:
| `metabase` | Metabase API | Questions, dashboards, table metadata, and mappings |
| `notion` | Notion API | Wiki pages and business knowledge |
Source ingest extracts metadata, reconciles it with existing local context, and
writes semantic-layer YAML plus wiki Markdown. It merges rather than blindly
overwriting local edits.
Source ingest writes semantic-layer YAML and wiki Markdown, merging with local
edits.
## Text ingest
Use `ktx ingest text` for notes, Markdown files, runbooks, Slack exports, or
other free-form knowledge that should become searchable KTX memory.
Use `ktx ingest text` for notes, Markdown, runbooks, Slack exports, or other
searchable memory.
```bash
# Capture a Markdown file
@ -146,14 +129,12 @@ Useful flags:
| `--json` | Print structured output |
| `--fail-fast` | Stop after the first failed text item |
Text ingest is a good fit for small, high-signal documents. For system-specific
connectors such as Notion, dbt, or Metabase, prefer configured source ingest so
KTX can preserve source metadata.
Use text ingest for small, high-signal documents. Prefer configured source
ingest for Notion, dbt, Metabase, and similar systems.
## Output and artifacts
Every ingest run prints a summary. Use `--json` when an agent or script needs a
structured plan and per-target results.
Every ingest run prints a summary. Use `--json` for scripts and agents.
```bash
ktx ingest --all --json
@ -168,9 +149,7 @@ Typical generated files:
| `wiki/user/<user-id>/*.md` | Text and memory ingest | User-scoped context |
| `.ktx/setup/context-build.json` | Setup context build | Resume and readiness state for setup |
Ingest sessions also record transcripts with tool calls, LLM responses, and
write decisions. Inspect them when you need to debug why a source or wiki page
was written a certain way.
Ingest transcripts include tool calls, LLM responses, and write decisions.
## Example: first full refresh

View file

@ -3,8 +3,8 @@ title: LLM configuration
description: Configure KTX LLM providers, model roles, and prompt caching.
---
KTX uses the top-level `llm` block in `ktx.yaml` for text generation,
structured extraction, and ingest or memory agent loops.
Configure text generation, structured extraction, and ingest or memory loops in
the top-level `llm` block.
## Backends
@ -15,9 +15,7 @@ Set `llm.provider.backend` to one of these values:
- `vertex`: Use Vertex AI Anthropic models through Google Cloud credentials.
- `gateway`: Use AI Gateway-compatible Anthropic model ids.
- `claude-code`: Use your local Claude Code session through the Claude Agent
SDK. KTX removes provider-routing environment variables from Claude Code
child processes, so this backend doesn't silently fall back to
`ANTHROPIC_API_KEY`, Vertex, Gateway, or Bedrock credentials.
SDK. KTX strips provider-routing environment variables from child processes.
## Claude Code
@ -36,26 +34,20 @@ llm:
repair: sonnet
```
During setup, choose the Claude Code backend interactively or pass the model in
automation:
During setup, choose the backend interactively or pass the model in automation:
```bash
ktx setup --llm-backend claude-code --llm-model opus --no-input
```
For Claude Code, `sonnet`, `opus`, and `haiku` map to the current KTX defaults.
You can also pass a full Claude model ID, such as `claude-opus-4-7`.
For Claude Code, `sonnet`, `opus`, and `haiku` map to KTX defaults. Full Claude
model IDs are also accepted.
`claude-code` keeps KTX tool boundaries intact. KTX exposes only the MCP tools
needed for the current KTX agent loop, disables Claude Code built-in tools,
keeps plugins empty, and denies every non-KTX tool request through
`canUseTool`. The Claude Agent SDK may still report host-discovered slash
commands, skills, and subagent names in init metadata; that metadata is not an
execution grant for KTX agent loops.
`claude-code` exposes only KTX MCP tools for the current agent loop. SDK init
metadata may still list host slash commands, skills, and subagents; KTX does not
grant execution access to them.
## Prompt caching
`llm.promptCaching` has partial parity on `claude-code`. KTX doesn't pass
Anthropic cache-control markers to the Claude Agent SDK. Status and doctor warn
when you configure prompt-cache TTL, tool, or history fields that the Claude
Agent SDK backend ignores.
`llm.promptCaching` has partial parity on `claude-code`. Status and doctor warn
when the Claude Agent SDK backend ignores configured cache fields.

View file

@ -3,9 +3,8 @@ title: Serving Agents
description: Expose KTX context to Claude Code, Codex, Cursor, OpenCode, and custom agents.
---
KTX serves agents through the public CLI and project-local instruction files.
Agents do not need a separate server. They read the generated rules, call KTX
commands, inspect local context files, and use JSON output when they need
KTX serves agents through the CLI and project-local instruction files. Agents
read generated rules, call KTX commands, inspect context files, and use JSON for
structured results.
## Recommended setup
@ -39,14 +38,13 @@ ktx setup --agents --target claude-code --global
ktx setup --agents --target codex --global
```
KTX records installed files in `.ktx/agents/install-manifest.json`. Rerun
`ktx setup --agents` after moving a checkout or reinstalling the CLI so the
generated instructions point at the current CLI path.
Installed files are recorded in `.ktx/agents/install-manifest.json`. Rerun
`ktx setup --agents` after moving a checkout or reinstalling the CLI.
## Agent command set
All supported agent clients use the same command surface. Use `--project-dir`
when the agent is running outside the KTX project directory.
All supported clients use the same command surface. Use `--project-dir` outside
the KTX project directory.
### Readiness
@ -54,9 +52,8 @@ when the agent is running outside the KTX project directory.
ktx status --json
```
Agents should run this before relying on context. It reports project, LLM,
embedding, database, context-source, context-build, and agent-integration
readiness.
Run this before relying on context. It reports project, provider, connection,
context-build, and agent-integration readiness.
### Semantic layer discovery
@ -66,8 +63,8 @@ ktx sl list --connection-id warehouse --json
ktx sl search "revenue" --json --limit 10
```
Agents use these commands to discover source names, connection ids, measures,
dimensions, and likely files to inspect.
Use these commands to find source names, connection ids, measures, dimensions,
and files to inspect.
### Semantic-layer validation and queries
@ -106,9 +103,8 @@ ktx wiki list --json
ktx wiki search "revenue recognition" --json --limit 10
```
Agents should search wiki context when a question depends on business
definitions, metric caveats, process rules, or terms that are not obvious from
schema names.
Search wiki context for business definitions, metric caveats, process rules, and
non-obvious terms.
### Context refresh
@ -120,8 +116,7 @@ ktx ingest --all
ktx ingest text docs/revenue-notes.md --connection-id warehouse
```
Use `--deep` only when LLM and embedding setup is ready and the user expects an
AI-enriched refresh.
Use `--deep` only when LLM and embedding setup is ready.
## Good agent behavior
@ -135,14 +130,12 @@ Agents should:
- Validate edited semantic sources with `ktx sl validate`.
- Keep generated context changes reviewable in git.
Agents should not assume a background server, ORPC route, frontend app, or
external migration system exists. KTX is a local context layer with a CLI and
plain project files.
KTX is a local context layer with a CLI and plain project files. Do not assume a
background server, ORPC route, frontend app, or external migration system.
## Manual setup
Manual setup is useful for custom agents that can read project-local
instructions but are not yet a named target.
Use manual setup for custom agents that can read project-local instructions.
1. Install the universal target:

View file

@ -3,12 +3,8 @@ title: Writing Context
description: Edit semantic sources and wiki pages so agents use your business logic.
---
KTX context is meant to be edited. Ingest gives you a grounded first draft, then
you refine source YAML and wiki Markdown until agents can answer data questions
with the same definitions your team uses.
Use this guide when you are adding measures, fixing joins, documenting business
rules, or reviewing context changes made by an agent.
Ingest creates the first draft. Edit source YAML and wiki Markdown when you need
sharper metrics, joins, or business rules.
## Editing workflow
@ -45,10 +41,8 @@ Use this order for most context changes:
## Semantic sources
Semantic sources are YAML files that describe queryable entities. A source is
usually a table, but it can also point at a custom SQL expression. Sources
define the vocabulary agents use for measures, dimensions, segments, joins, and
grain-aware query planning.
Semantic sources are YAML files for queryable tables or custom SQL. They define
agent-facing measures, dimensions, segments, joins, and grain.
Source files live at:
@ -198,8 +192,8 @@ joins:
## Measures
Good measures have precise names, SQL expressions at the correct grain, and
descriptions that say what is included and excluded.
Good measures have precise names, correct-grain SQL, and descriptions that name
key inclusions and exclusions.
```yaml
measures:
@ -209,14 +203,13 @@ measures:
description: Completed order revenue after refunds, excluding cancelled orders.
```
Prefer one canonical measure plus wiki synonyms over several nearly identical
measures. If your team uses multiple definitions, document the distinction in a
wiki page and link it with `sl_refs`.
Prefer one canonical measure plus wiki synonyms. Put competing definitions in a
linked wiki page.
## Joins and grain
`grain` and `relationship` prevent agents from producing double-counted SQL.
State the row grain even when it seems obvious.
`grain` and `relationship` prevent double-counted SQL. State the row grain even
when it seems obvious.
```yaml
grain:
@ -228,8 +221,7 @@ joins:
```
Use `many_to_one` for dimensions such as customer, account, product, or plan.
Use `one_to_many` only when the target can fan out the source rows, such as
orders to order items.
Use `one_to_many` only when the target can fan out rows.
## Validate and query
@ -239,8 +231,7 @@ Validation checks source YAML against the live database schema:
ktx sl validate orders --connection-id warehouse
```
It catches missing columns, invalid join targets, and table-reference problems
before an agent relies on the source.
It catches missing columns, invalid joins, and table-reference problems.
Compile a query to inspect generated SQL:
@ -268,9 +259,8 @@ ktx sl query \
## Wiki pages
Wiki pages capture business context that does not belong in a single source
file: metric policies, dashboard caveats, company vocabulary, data freshness,
known issues, and source-of-truth notes.
Wiki pages hold context that does not belong in one source file: policies,
caveats, vocabulary, freshness, known issues, and source-of-truth notes.
Wiki files live under:
@ -280,8 +270,7 @@ wiki/
user/<user-id>/
```
Use global pages for shared business rules. Use user-scoped pages for local
notes, personal conventions, or context that should not be shared broadly.
Use global pages for shared rules and user-scoped pages for local notes.
### Wiki page example
@ -338,8 +327,7 @@ ktx sl search "revenue" --json
ktx wiki search "revenue recognition" --json --limit 10
```
Check that definitions are specific, hidden columns stay hidden, joins have
explicit relationships, and measures compile into the expected SQL.
Check definitions, hidden columns, join relationships, and generated SQL.
## Common errors

View file

@ -7,32 +7,32 @@ KTX exposes context to end-user agents through MCP tools. The CLI remains the
admin surface for setup, ingest, status, daemon lifecycle, and debugging.
Run `ktx setup` and select your client agent targets, or configure manually
using the snippets below. Choose **MCP tools + analytics skill** for client
agents. Choose **MCP tools + analytics skill + admin CLI skill** only when a
developer or operator agent also needs pinned `ktx` admin commands.
using the snippets below. Choose **Ask data questions with KTX MCP** for client
agents. Choose **Ask data questions + manage KTX with CLI commands** only when
a developer or operator agent also needs pinned `ktx` admin commands.
## Install with setup
Start the MCP server before connecting an end-user agent:
```bash
ktx mcp start
```
Then install client integration:
Install client integration first:
```bash
ktx setup --agents
```
Then start the MCP server before using HTTP-based clients:
```bash
ktx mcp start
```
Use `--target` for one target:
```bash
ktx setup --agents --target codex
```
Use `--global` only with `claude-code` or `codex`. Claude Desktop always
generates a project-local plugin ZIP:
Use `--global` only with `claude-code` or `codex`. Claude Desktop always writes
global Claude Desktop config and generates project-local skill ZIPs:
```bash
ktx setup --agents --target claude-code --global
@ -46,9 +46,9 @@ remove only files KTX installed.
The interactive command asks two questions:
```txt
How should client agents connect to this KTX project?
│ ○ MCP tools + analytics skill
│ ○ MCP tools + analytics skill + admin CLI skill
What should agents be allowed to do with this KTX project?
│ ○ Ask data questions with KTX MCP
│ ○ Ask data questions + manage KTX with CLI commands
◆ Which agent targets should KTX install?
@ -66,21 +66,31 @@ also asks where to install supported agent config:
```txt
◆ Where should KTX install supported agent config?
│ ○ Project
│ ○ Global
│ KTX project: /path/to/your/ktx-project
│ ○ Project scope (KTX project directory)
│ ○ Global scope (user config)
```
## Generated files
KTX writes MCP client configuration and an analytics skill by default. It writes
admin CLI skills only when you choose **MCP tools + analytics skill + admin CLI
skill**.
KTX writes MCP client configuration and analytics guidance by default. It writes
admin CLI guidance only when you choose **Ask data questions + manage KTX with
CLI commands**.
| Target | MCP tools + analytics skill | Adds with admin CLI skill |
After setup, KTX prints **Required before using agents**. Complete those steps
before opening the configured agent. If it shows `ktx mcp start --project-dir ...`,
run that command before using Claude Code, Codex, Cursor, OpenCode, or generic
MCP clients. The same output also prints the matching `ktx mcp stop` command
for when you want to stop MCP later. Claude Desktop uses its own launcher and
prints separate skill upload steps.
| Target | Ask data questions with KTX MCP | Adds when agents can manage KTX with CLI |
|--------|------------------------------|---------------------------|
| Claude Code | `.mcp.json`, `.claude/skills/ktx-analytics/SKILL.md` | `.claude/skills/ktx/SKILL.md`, `.claude/rules/ktx.md` |
| Claude Desktop | `~/Library/Application Support/Claude/claude_desktop_config.json` stdio entry + `.ktx/agents/claude/ktx-plugin.zip` with analytics skill | Adds `skills/ktx/SKILL.md` inside the plugin ZIP |
| Claude Desktop | `~/Library/Application Support/Claude/claude_desktop_config.json` stdio entry + `.ktx/agents/claude/ktx-analytics.zip` upload | Adds `.ktx/agents/claude/ktx.zip` upload |
| Codex | Printed snippet for `~/.codex/config.toml`, `.agents/skills/ktx-analytics/SKILL.md` | `.agents/skills/ktx/SKILL.md`, `.codex/instructions/ktx.md` |
| Cursor | `.cursor/mcp.json`, `.cursor/rules/ktx-analytics.mdc` | `.cursor/rules/ktx.mdc` |
| OpenCode | Printed snippet for `opencode.json`, `.opencode/commands/ktx-analytics.md` | `.opencode/commands/ktx.md` |
@ -144,7 +154,7 @@ During setup, select **Cursor** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| MCP tools + analytics skill | `.cursor/mcp.json`, `.cursor/rules/ktx-analytics.mdc` |
| Ask data questions with KTX MCP | `.cursor/mcp.json`, `.cursor/rules/ktx-analytics.mdc` |
| Admin CLI rules | `.cursor/rules/ktx.mdc` |
Cursor supports project-scoped installation only.
@ -168,8 +178,8 @@ same markdown command definitions.
## Claude Desktop
During setup, select **Claude Desktop** from the agent targets. KTX writes the
MCP server entry directly into Claude Desktop's config and generates a separate
plugin ZIP for the analytics skill:
MCP server entry directly into Claude Desktop's config and prepares uploadable
Claude Desktop skill packages for the KTX workflows:
- `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or
`%AppData%/Claude/claude_desktop_config.json` (Windows) gets an
@ -177,14 +187,22 @@ plugin ZIP for the analytics skill:
launcher shim at `.ktx/agents/claude/ktx-plugin-runner.sh`. The shim locates
a usable Node.js (Volta, NVM, Homebrew, system) so Claude Desktop can spawn
the server without needing `node` in PATH.
- `.ktx/agents/claude/ktx-plugin.zip` contains the `ktx-analytics` skill (and
the admin `ktx` skill if you choose **MCP tools + analytics skill + admin
CLI skill**). Install the ZIP from Claude Desktop's plugin UI to load the
skill.
- `.ktx/agents/claude/ktx-analytics.zip` contains the `ktx-analytics` skill.
If you choose **Ask data questions + manage KTX with CLI commands**, KTX also
generates `.ktx/agents/claude/ktx.zip` with the admin `ktx` skill. Claude
Desktop requires each uploaded ZIP to contain exactly one skill folder.
After `ktx setup`, restart Claude Desktop so it picks up the new MCP server
entry, then install the plugin ZIP. No daemon needs to be running — Claude
Desktop spawns the MCP server itself per session.
entry. No daemon needs to be running -- Claude Desktop spawns the MCP server
itself per session.
Upload each generated skill ZIP from Claude Desktop:
1. Open **Customize** > **Skills**.
2. Click **+** > **Create skill** > **Upload a skill**.
3. Upload `.ktx/agents/claude/ktx-analytics.zip`.
4. If generated, upload `.ktx/agents/claude/ktx.zip`.
5. Toggle the uploaded KTX skills on.
Claude Desktop does not introspect local stdio MCP servers, so the per-tool
"Connector"-style UI is not rendered for KTX. The tools are still callable
@ -192,7 +210,8 @@ from any Claude Desktop chat.
If you move the KTX checkout or project directory, rerun `ktx setup --agents`
to refresh the absolute paths in `claude_desktop_config.json` and the launcher
shim, then reinstall the regenerated plugin ZIP.
shim, regenerate the skill ZIPs, then restart Claude Desktop and upload the new
ZIPs.
---
@ -235,7 +254,7 @@ During setup, select **OpenCode** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| MCP tools + analytics skill | Snippet for `opencode.json`, `.opencode/commands/ktx-analytics.md` |
| Ask data questions with KTX MCP | Snippet for `opencode.json`, `.opencode/commands/ktx-analytics.md` |
| Admin CLI commands | `.opencode/commands/ktx.md` |
OpenCode supports project-scoped installation only.
@ -266,7 +285,7 @@ Admin CLI skills call the same KTX CLI commands:
| `ktx sl list --json` | List semantic-layer sources |
| `ktx sl search <query> --json` | Search semantic-layer sources |
| `ktx sl validate <source> --connection-id <id>` | Validate semantic source definitions |
| `ktx sl query --format json` | Execute a semantic-layer query when semantic compute is configured |
| `ktx sl query --format json` | Execute a Semantic Query when semantic compute is configured |
### Security constraints
@ -280,8 +299,8 @@ Admin CLI skills call the same KTX CLI commands:
| | Claude Code | Claude Desktop | Cursor | Codex | OpenCode |
|---|---|---|---|---|---|
| MCP tools | Yes | Local stdio via `claude_desktop_config.json` | Yes | Snippet | Snippet |
| Analytics skill | `.claude/skills/ktx-analytics/SKILL.md` | Included in plugin ZIP | `.cursor/rules/ktx-analytics.mdc` | `.agents/skills/ktx-analytics/SKILL.md` | `.opencode/commands/ktx-analytics.md` |
| Admin CLI skills | Optional | Optional in plugin ZIP | Optional (.mdc) | Optional | Optional |
| Global install | Yes | Project-local ZIP | No | Yes | No |
| Rule or instruction file | `.claude/rules/ktx.md` | Plugin `SETUP.md` | `.cursor/rules/ktx.mdc` | `.codex/instructions/ktx.md` | `.opencode/commands/ktx.md` |
| Skill file | `.claude/skills/ktx/SKILL.md` | `skills/ktx/SKILL.md` in plugin ZIP | Not separate | `.agents/skills/ktx/SKILL.md` | Not separate |
| Analytics skill | `.claude/skills/ktx-analytics/SKILL.md` | Upload `.ktx/agents/claude/ktx-analytics.zip` | `.cursor/rules/ktx-analytics.mdc` | `.agents/skills/ktx-analytics/SKILL.md` | `.opencode/commands/ktx-analytics.md` |
| Admin CLI skills | Optional | Optional `.ktx/agents/claude/ktx.zip` upload | Optional (.mdc) | Optional | Optional |
| Global install | Yes | Claude Desktop config | No | Yes | No |
| Rule or instruction file | `.claude/rules/ktx.md` | Not separate | `.cursor/rules/ktx.mdc` | `.codex/instructions/ktx.md` | `.opencode/commands/ktx.md` |
| Skill file | `.claude/skills/ktx/SKILL.md` | `ktx/SKILL.md` inside `ktx.zip` | Not separate | `.agents/skills/ktx/SKILL.md` | Not separate |

View file

@ -1,71 +0,0 @@
---
title: Integrations
description: Connect KTX to warehouses, analytics tools, and coding agents.
---
KTX integrations bring trusted context into an analytics project and make that
context available to coding agents through the CLI. Start with `ktx setup` when
you want the guided flow, then use the integration reference pages for exact
configuration fields, generated files, and manual setup.
## Integration types
| Type | What it connects | Start here |
|------|------------------|------------|
| Primary sources | Warehouses and databases that KTX scans for schemas, constraints, row counts, and optional query history | [Primary Sources](/docs/integrations/primary-sources) |
| Context sources | Existing analytics and knowledge tools such as dbt, MetricFlow, LookML, Metabase, Looker, and Notion | [Context Sources](/docs/integrations/context-sources) |
| Agent clients | Claude Code, Codex, Cursor, OpenCode, and universal `.agents` consumers | [Agent Clients](/docs/integrations/agent-clients) |
## Recommended setup flow
Use this order for a new project:
1. Run `ktx setup` from the analytics project directory.
2. Configure an LLM backend and embeddings so KTX can enrich and search context.
3. Add at least one primary source connection.
4. Add optional context sources that describe the same warehouse or business domain.
5. Build context during setup, or run `ktx ingest <connectionId>` later.
6. Install agent integration with `ktx setup --agents` when the context is ready.
For repeatable setup, pass `--project-dir`, `--no-input`, and the relevant
automation flags documented in [`ktx setup`](/docs/cli-reference/ktx-setup).
## What setup writes
| Path | Purpose |
|------|---------|
| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, and query history |
| `.ktx/secrets/*` | Local file-backed secrets when you choose file references during setup |
| `.ktx/setup/*` | Local setup progress and context-build state |
| `semantic-layer/<connection-id>/` | YAML semantic sources generated by database and source ingestion |
| `wiki/` | Markdown business context, definitions, and ingested knowledge |
| `.ktx/agents/install-manifest.json` | Manifest of agent integration files installed by `ktx setup --agents` |
| Agent client files | Skills, rules, or commands that teach agents when and how to call KTX |
## Common commands
```bash
# Start or resume the guided flow
ktx setup
# Add or refresh every configured integration
ktx ingest --all
# Refresh one configured warehouse, source, or knowledge integration
ktx ingest warehouse
# Install one project-scoped agent target
ktx setup --agents --target codex
# Check whether integrations are ready
ktx status
```
## Choosing docs
Read [Primary Sources](/docs/integrations/primary-sources) when you need
database driver fields, authentication formats, query history support, or
warehouse-specific notes. Read [Context Sources](/docs/integrations/context-sources)
when you need source adapter fields, repository authentication, BI tool mapping,
or Notion crawl options. Read [Agent Clients](/docs/integrations/agent-clients)
when you need generated file locations or manual agent configuration.

View file

@ -1,5 +1,5 @@
{
"title": "Integrations",
"defaultOpen": true,
"pages": ["index", "primary-sources", "context-sources", "agent-clients"]
"pages": ["primary-sources", "context-sources", "agent-clients"]
}

View file

@ -1,12 +1,17 @@
---
title: Primary Sources
description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite.
description: Connect KTX to PostgreSQL, Snowflake, BigQuery, MySQL, SQL Server, or SQLite.
---
KTX connects to your data warehouse or database to build schema context,
discover relationships, and execute semantic layer queries. Each connection is
defined in `ktx.yaml` under the `connections` key.
For analytics tools and knowledge systems such as dbt, MetricFlow, LookML,
Metabase, Looker, and Notion, use [Context Sources](/docs/integrations/context-sources).
For Claude Code, Codex, Cursor, OpenCode, and other agent clients, use
[Agent Clients](/docs/integrations/agent-clients).
All connectors share these conventions:
- Sensitive values support `env:VAR_NAME` (read from environment) and
@ -21,9 +26,9 @@ Agents should prefer environment or file references over literal secrets.
| Field | Required | Applies to | Description |
|-------|----------|------------|-------------|
| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `clickhouse`, `mysql`, `sqlserver`, or `sqlite` |
| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `mysql`, `sqlserver`, or `sqlite` |
| `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` |
| `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, ClickHouse, SQL Server | Field-by-field connection values |
| `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, SQL Server | Field-by-field connection values |
| `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan |
| `context.queryHistory` | No | PostgreSQL, Snowflake, BigQuery | Enables query-history ingestion when the warehouse supports it |
| `path` | Yes for path-style SQLite | SQLite | Local SQLite database path or `env:NAME` reference |
@ -264,63 +269,6 @@ staged artifact shape as Postgres and Snowflake.
---
## ClickHouse
Connects over HTTP (port 8123) or HTTPS (port 8443). Supports the ClickHouse native type system including `Nullable`, `LowCardinality`, and `Array` wrappers.
### Connection config
```yaml title="ktx.yaml"
connections:
my-clickhouse:
driver: clickhouse
url: http://localhost:8123/analytics
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-clickhouse:
driver: clickhouse
host: clickhouse.internal
port: 8123
database: analytics
username: default
password: env:CH_PASSWORD
ssl: false
```
### Authentication
| Method | Config |
|--------|--------|
| Basic auth | `username` + `password` (HTTP basic auth) |
| No auth | Default user `default` with no password |
| HTTPS | Set `ssl: true` (uses port 8443 by default) |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `system.tables`, engine-based detection |
| Primary keys | Yes | Via `system.columns` |
| Foreign keys | No | Not a ClickHouse concept |
| Row count estimates | Yes | Via `system.parts` aggregation |
| Column statistics | No | - |
| Query history | No | - |
| Table sampling | Yes | - |
### Dialect notes
- Parameter binding uses `{param:Type}` syntax (e.g., `{database:String}`)
- Detects views vs. tables by engine name (`View`, `MaterializedView`)
- Handles `Nullable(T)` and `LowCardinality(Nullable(T))` type wrappers
- Dictionary tables are excluded from scanning
- Results returned in JSONCompact or JSONEachRow format
---
## MySQL
Standard MySQL/MariaDB connector with full foreign key support and schema introspection.
@ -510,4 +458,4 @@ No authentication required - SQLite is file-based. The file must be readable by
| Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
| Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest <connectionId> --query-history` or `ktx setup` |
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context |
| Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |
| Semantic Query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |

View file

@ -0,0 +1,12 @@
import { readFile } from "node:fs/promises";
import { join } from "node:path";
export const agentSetupSlug = ["agents-setup"] as const;
export function isAgentSetupSlug(slug: string[] | undefined) {
return slug?.length === 1 && slug[0] === agentSetupSlug[0];
}
export function readAgentSetupMarkdown() {
return readFile(join(process.cwd(), "content/agents-setup.md"), "utf8");
}

View file

@ -52,8 +52,8 @@ KTX provides semantic-layer files, warehouse scans, wiki pages, provenance, and
## Agent Entry Points
${link("/docs/ai-resources", "AI Resources", "Machine-readable docs, prompt recipes, and agent setup paths")}
${link("/docs/ai-resources/agent-quickstart", "Agent Quickstart", "Task-first route for coding assistants using KTX")}
${link("/docs/agents-setup", "Agent Setup", "Copy-pasteable prompt for agents installing and configuring KTX")}
${link("/docs/ai-resources/markdown-access", "Markdown Access", "Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown")}
${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested instructions for coding assistants that need to read and cite KTX docs")}
@ -73,6 +73,7 @@ ${link("/docs/guides/writing-context", "Writing Context", "Write semantic source
## CLI Reference
${link("/docs/cli-reference/ktx", "ktx", "Root command map and global options")}
${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}
${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}
${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Wiki page commands")}

View file

@ -1,6 +1,6 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
import "./.next/types/routes.d.ts";
import "./.next/dev/types/routes.d.ts";
// NOTE: This file should not be edited
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.

View file

@ -15,6 +15,12 @@ const config = {
},
async redirects() {
return [
{
source: "/",
destination: "/ktx/docs/getting-started/introduction",
permanent: false,
basePath: false,
},
{
source: "/docs",
destination: "/docs/getting-started/introduction",

View file

@ -10,6 +10,7 @@
"test": "node --test tests/*.test.mjs"
},
"dependencies": {
"@xyflow/react": "^12.10.2",
"fumadocs-core": "16.8.10",
"fumadocs-mdx": "15.0.4",
"fumadocs-ui": "16.8.10",
@ -18,11 +19,11 @@
"react-dom": "19.2.6"
},
"devDependencies": {
"@tailwindcss/postcss": "^4",
"@types/node": "^25.7.0",
"@types/react": "^19",
"@types/react-dom": "^19",
"typescript": "^6.0",
"@tailwindcss/postcss": "^4",
"tailwindcss": "^4"
"tailwindcss": "^4",
"typescript": "^6.0"
}
}

View file

@ -6,7 +6,7 @@ const markdownMimeTypes = new Set([
"application/markdown",
]);
export function middleware(request: NextRequest) {
export function proxy(request: NextRequest) {
if (!isMarkdownPreferred(request.headers.get("accept"))) {
return NextResponse.next();
}

View file

@ -0,0 +1,211 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1346" height="1710" viewBox="0 0 1346 1710" role="img" aria-labelledby="title desc">
<title id="title">KTX ingestion flow</title>
<desc id="desc">Source systems flow through source adapters, context builder, reconciliation, and validation to create wiki Markdown and semantic-layer YAML outputs.</desc>
<defs>
<filter id="card-shadow" x="-12%" y="-12%" width="124%" height="124%" color-interpolation-filters="sRGB">
<feDropShadow dx="0" dy="2" stdDeviation="2" flood-color="#0f172a" flood-opacity="0.14"/>
</filter>
<filter id="dark-shadow" x="-12%" y="-12%" width="124%" height="124%" color-interpolation-filters="sRGB">
<feDropShadow dx="0" dy="2" stdDeviation="2" flood-color="#020617" flood-opacity="0.22"/>
</filter>
<filter id="glow-blue" x="-160%" y="-160%" width="420%" height="420%">
<feGaussianBlur stdDeviation="7" result="blur"/>
<feMerge>
<feMergeNode in="blur"/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<marker id="arrow" viewBox="0 0 10 10" refX="8.5" refY="5" markerWidth="9" markerHeight="9" orient="auto-start-reverse">
<path d="M 0 0 L 10 5 L 0 10 z" fill="#94a3b8"/>
</marker>
<style>
.card { fill: #ffffff; stroke: #e2e8f0; stroke-width: 1.4; filter: url(#card-shadow); }
.stage { fill: #0b1f23; stroke: #17343a; stroke-width: 1.2; filter: url(#dark-shadow); }
.title { fill: #24272d; font: 700 20px Inter, Arial, sans-serif; }
.body { fill: #666b73; font: 500 16px Inter, Arial, sans-serif; }
.tag { fill: #6b7280; font: 500 14px Inter, Arial, sans-serif; }
.mono { font: 700 16px "SFMono-Regular", Consolas, monospace; }
.stage-title { fill: #f8fafc; font: 700 20px Inter, Arial, sans-serif; }
.stage-body { fill: #b8c6ca; font: 500 16px Inter, Arial, sans-serif; }
.index { fill: #07313a; font: 700 18px Inter, Arial, sans-serif; text-anchor: middle; dominant-baseline: middle; }
.edge { fill: none; stroke: #94a3b8; stroke-width: 2; stroke-linecap: round; stroke-linejoin: round; }
.dash { fill: none; stroke: #64748b; stroke-width: 1.8; stroke-dasharray: 5 8; stroke-linecap: round; }
</style>
</defs>
<g id="source-cards">
<g transform="translate(24 39)">
<rect class="card" x="0" y="0" width="298" height="285" rx="4"/>
<rect x="0" y="0" width="298" height="4" rx="2" fill="#3b82f6"/>
<text class="title" x="22" y="45">Databases</text>
<text class="body" x="22" y="82">Schemas, columns, keys,</text>
<text class="body" x="22" y="112">row counts, and query</text>
<text class="body" x="22" y="142">history.</text>
<g transform="translate(22 174)">
<rect x="0" y="0" width="116" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="23">PostgreSQL</text>
<rect x="124" y="0" width="104" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="136" y="23">Snowflake</text>
<rect x="0" y="46" width="94" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="69">BigQuery</text>
<rect x="102" y="46" width="72" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="114" y="69">SQLite</text>
</g>
</g>
<g transform="translate(358 39)">
<rect class="card" x="0" y="0" width="298" height="285" rx="4"/>
<rect x="0" y="0" width="298" height="4" rx="2" fill="#f97316"/>
<text class="title" x="22" y="45">BI tools</text>
<text class="body" x="22" y="82">Dashboards, questions,</text>
<text class="body" x="22" y="112">explores, usage, and trusted</text>
<text class="body" x="22" y="142">examples.</text>
<g transform="translate(22 174)">
<rect x="0" y="0" width="99" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="23">Metabase</text>
<rect x="109" y="0" width="75" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="121" y="23">Looker</text>
</g>
</g>
<g transform="translate(692 39)">
<rect class="card" x="0" y="0" width="298" height="285" rx="4"/>
<rect x="0" y="0" width="298" height="4" rx="2" fill="#f59e0b"/>
<text class="title" x="22" y="45">Modeling code</text>
<text class="body" x="22" y="82">Existing metrics, dimensions,</text>
<text class="body" x="22" y="112">models, joins, and entities.</text>
<g transform="translate(22 146)">
<rect x="0" y="0" width="47" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="23">dbt</text>
<rect x="57" y="0" width="83" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="69" y="23">LookML</text>
<rect x="0" y="46" width="109" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="69">MetricFlow</text>
</g>
</g>
<g transform="translate(1026 39)">
<rect class="card" x="0" y="0" width="298" height="285" rx="4"/>
<rect x="0" y="0" width="298" height="4" rx="2" fill="#10b981"/>
<text class="title" x="22" y="45">Docs and notes</text>
<text class="body" x="22" y="82">Policies, caveats, team</text>
<text class="body" x="22" y="112">definitions, and analyst</text>
<text class="body" x="22" y="142">context.</text>
<g transform="translate(22 174)">
<rect x="0" y="0" width="72" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="23">Notion</text>
<rect x="82" y="0" width="87" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="94" y="23">Any text</text>
</g>
</g>
</g>
<g id="edges">
<path class="edge" d="M172 324 V380 Q172 394 186 394 H507 Q507 394 507 380 V324"/>
<path class="edge" d="M841 324 V380 Q841 394 827 394 H507"/>
<path class="edge" d="M1175 324 V380 Q1175 394 1161 394 H673 Q673 394 673 408 V433" marker-end="url(#arrow)"/>
<path class="edge" d="M507 394 H673"/>
<path class="edge" d="M673 607 V651" marker-end="url(#arrow)"/>
<path class="edge" d="M673 823 V866" marker-end="url(#arrow)"/>
<path class="edge" d="M673 1038 V1081" marker-end="url(#arrow)"/>
<path class="edge" d="M673 1254 V1305 Q673 1322 656 1322 H305 Q291 1322 291 1336 V1364" marker-end="url(#arrow)"/>
<path class="edge" d="M673 1254 V1305 Q673 1322 690 1322 H1043 Q1057 1322 1057 1336 V1364" marker-end="url(#arrow)"/>
<path class="dash" d="M546 1523 H800"/>
<path d="M546 1523 l9 -6 v12 z" fill="#64748b"/>
<path d="M800 1523 l-9 -6 v12 z" fill="#64748b"/>
</g>
<g id="particles">
<circle cx="256" cy="394" r="18" fill="#3b82f6" opacity="0.18" filter="url(#glow-blue)"/>
<circle cx="256" cy="394" r="6" fill="#3b82f6" opacity="0.9"/>
<circle cx="632" cy="394" r="18" fill="#f97316" opacity="0.18" filter="url(#glow-blue)"/>
<circle cx="632" cy="394" r="6" fill="#f97316" opacity="0.9"/>
<circle cx="830" cy="394" r="18" fill="#10b981" opacity="0.18" filter="url(#glow-blue)"/>
<circle cx="830" cy="394" r="6" fill="#10b981" opacity="0.9"/>
<circle cx="673" cy="625" r="17" fill="#10b981" opacity="0.18" filter="url(#glow-blue)"/>
<circle cx="673" cy="625" r="6" fill="#10b981" opacity="0.9"/>
<circle cx="673" cy="1054" r="17" fill="#f59e0b" opacity="0.18" filter="url(#glow-blue)"/>
<circle cx="673" cy="1054" r="6" fill="#f59e0b" opacity="0.9"/>
<circle cx="573" cy="1322" r="17" fill="#3b82f6" opacity="0.18" filter="url(#glow-blue)"/>
<circle cx="573" cy="1322" r="6" fill="#3b82f6" opacity="0.9"/>
</g>
<g id="stages">
<g transform="translate(474 438)">
<rect class="stage" x="0" y="0" width="400" height="169" rx="4"/>
<circle cx="48" cy="84" r="23" fill="#55dced"/>
<text class="index" x="48" y="84">1</text>
<text class="stage-title" x="90" y="64">Source adapters</text>
<text class="stage-body" x="90" y="100">Read each configured system in</text>
<text class="stage-body" x="90" y="130">its native shape.</text>
</g>
<g transform="translate(474 653)">
<rect class="stage" x="0" y="0" width="400" height="169" rx="4"/>
<circle cx="48" cy="84" r="23" fill="#55dced"/>
<text class="index" x="48" y="84">2</text>
<text class="stage-title" x="90" y="64">Context builder</text>
<text class="stage-body" x="90" y="100">Turn source evidence into</text>
<text class="stage-body" x="90" y="130">proposed context updates.</text>
</g>
<g transform="translate(474 868)">
<rect class="stage" x="0" y="0" width="400" height="169" rx="4"/>
<circle cx="48" cy="84" r="23" fill="#55dced"/>
<text class="index" x="48" y="84">3</text>
<text class="stage-title" x="90" y="64">Reconciliation</text>
<text class="stage-body" x="90" y="100">Merge new evidence with the</text>
<text class="stage-body" x="90" y="130">context that already exists.</text>
</g>
<g transform="translate(474 1082)">
<rect class="stage" x="0" y="0" width="400" height="172" rx="4"/>
<circle cx="48" cy="86" r="23" fill="#55dced"/>
<text class="index" x="48" y="86">4</text>
<text class="stage-title" x="90" y="63">Validation</text>
<text class="stage-body" x="90" y="99">Check references and</text>
<text class="stage-body" x="90" y="129">semantics before agents rely on</text>
<text class="stage-body" x="90" y="159">them.</text>
</g>
</g>
<g id="outputs">
<g transform="translate(60 1373)">
<rect class="card" x="0" y="0" width="485" height="329" rx="4"/>
<rect x="0" y="0" width="485" height="4" rx="2" fill="#10b981"/>
<text class="mono" x="24" y="47" fill="#10b981">wiki/*.md</text>
<text class="title" x="24" y="92">Wiki</text>
<g transform="translate(24 116)">
<rect x="0" y="0" width="92" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="23">free-form</text>
<rect x="102" y="0" width="150" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="114" y="23">auto-maintained</text>
</g>
<text class="body" x="24" y="184">Definitions, caveats, policies, analyst notes, and</text>
<text class="body" x="24" y="214">business language that agents can search.</text>
</g>
<g transform="translate(803 1373)">
<rect class="card" x="0" y="0" width="485" height="329" rx="4"/>
<rect x="0" y="0" width="485" height="4" rx="2" fill="#3b82f6"/>
<text class="mono" x="24" y="47" fill="#3b82f6">semantic-layer/*.yaml</text>
<text class="title" x="24" y="92">Semantic layer</text>
<g transform="translate(24 116)">
<rect x="0" y="0" width="100" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="12" y="23">structured</text>
<rect x="110" y="0" width="108" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="122" y="23">executable</text>
<rect x="228" y="0" width="150" height="36" rx="4" fill="#fbfaf8" stroke="#e5e1dc"/>
<text class="tag" x="240" y="23">auto-maintained</text>
</g>
<text class="body" x="24" y="184">Metrics, joins, tables, dimensions, filters, and</text>
<text class="body" x="24" y="214">segments that KTX can validate and compile into</text>
<text class="body" x="24" y="244">SQL.</text>
</g>
<g transform="translate(618 1505)">
<rect x="0" y="0" width="111" height="36" rx="4" fill="#ffffff" stroke="#e5e1dc"/>
<text class="tag" x="15" y="23">references</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 137 KiB

View file

@ -5,5 +5,13 @@ export const docs = defineDocs({
});
export default defineConfig({
mdxOptions: {},
mdxOptions: {
rehypeCodeOptions: {
addLanguageClass: true,
themes: {
light: "min-light",
dark: "github-dark",
},
},
},
});

View file

@ -111,3 +111,33 @@ test("/ktx/docs redirects to the docs introduction", async () => {
`${docsBasePath}/docs/getting-started/introduction`,
);
});
test("/ redirects into the /ktx docs site", async () => {
const response = await fetch(`${docsSiteUrl}/`, {
redirect: "manual",
});
assert.equal(response.status, 307);
assert.equal(
response.headers.get("location"),
`${docsBasePath}/docs/getting-started/introduction`,
);
});
test("/ktx/api/search returns docs search results", async () => {
const response = await fetch(
`${docsSiteUrl}${docsBasePath}/api/search?query=setup`,
);
assert.equal(response.status, 200);
const results = await response.json();
assert.ok(Array.isArray(results), "search response should be an array");
assert.ok(
results.some(
(result) =>
typeof result.url === "string" && result.url.startsWith("/docs/"),
),
"search should return at least one docs result",
);
});

View file

@ -0,0 +1,70 @@
import assert from "node:assert/strict";
import { access, readFile } from "node:fs/promises";
import { dirname, join } from "node:path";
import { test } from "node:test";
import { fileURLToPath } from "node:url";
const docsSiteDir = join(dirname(fileURLToPath(import.meta.url)), "..");
async function readDocsFile(path) {
return readFile(join(docsSiteDir, path), "utf8");
}
test("root provider uses the base-path-aware search API", async () => {
const layout = await readDocsFile("app/layout.tsx");
assert.match(layout, /search=\{\{/);
assert.match(layout, /api:\s*"\/ktx\/api\/search"/);
});
test("metadata icons include the docs base path", async () => {
const layout = await readDocsFile("app/layout.tsx");
assert.match(layout, /icon:\s*"\/ktx\/brand\/ktx-mascot\.svg"/);
assert.match(layout, /shortcut:\s*"\/ktx\/brand\/ktx-mascot\.svg"/);
assert.doesNotMatch(layout, /:\s*"\/brand\/ktx-mascot\.svg"/);
});
test("markdown negotiation uses the Next proxy convention", async () => {
await assert.doesNotReject(access(join(docsSiteDir, "proxy.ts")));
await assert.rejects(access(join(docsSiteDir, "middleware.ts")));
const proxy = await readDocsFile("proxy.ts");
assert.match(proxy, /export function proxy/);
assert.doesNotMatch(proxy, /export function middleware/);
});
test("site background stacking does not target every body child", async () => {
const css = await readDocsFile("app/global.css");
assert.doesNotMatch(css, /body\s*>\s*\*\s*\{[^}]*z-index/s);
assert.match(css, /\.ktx-site-shell\s*\{[^}]*z-index:\s*2/s);
});
test("search lock relies on body overflow propagation, not html or sidebar overrides", async () => {
const css = await readDocsFile("app/global.css");
// Body still clips horizontal overflow defensively.
assert.match(css, /(^|\s)body\s*\{[^}]*overflow-x:\s*clip/s);
// html must keep its default `visible` overflow so body's lock
// (`overflow: hidden` from react-remove-scroll-bar) propagates to the
// viewport. Locking html directly breaks `position: sticky` on the
// sidebar placeholder.
assert.doesNotMatch(css, /(^|\s)html\s*,?\s*\{[^}]*overflow(-y|\s*:)\s*(hidden|clip)/s);
assert.doesNotMatch(
css,
/html:has\(body\[data-scroll-locked\]\)[^{]*\{[^}]*overflow:\s*(hidden|clip)/s,
);
// No site-specific overrides to body's data-scroll-locked overflow or
// to the sidebar placeholder when locked.
assert.doesNotMatch(
css,
/html\s+body\[data-scroll-locked\][^{]*\{[^}]*overflow:/s,
);
assert.doesNotMatch(
css,
/body\[data-scroll-locked\]\s+\[data-sidebar-placeholder\][^{]*\{[^}]*position:\s*fixed/s,
);
});

View file

@ -23,7 +23,7 @@ test("docs introduction frames the concept before showing product mechanics", as
const heroIndex = introduction.indexOf("Make analytics context");
const whyIndex = introduction.indexOf("## Why KTX");
const createsIndex = introduction.indexOf("## What KTX creates");
const worksIndex = introduction.indexOf("## How KTX works");
const mechanicsIndex = introduction.indexOf("<ProductMechanics />");
const useCaseIndex = introduction.indexOf("## Use it for");
const heroSource = introduction.slice(0, mechanicsIndex);
@ -34,12 +34,12 @@ test("docs introduction frames the concept before showing product mechanics", as
"problem framing should appear after the hero",
);
assert.ok(
createsIndex > whyIndex,
"artifact summary should appear after problem framing",
worksIndex > whyIndex,
"mechanics bridge should appear after problem framing",
);
assert.ok(
mechanicsIndex > createsIndex,
"mechanics component should appear after the artifact summary",
mechanicsIndex > worksIndex,
"mechanics component should appear after the mechanics bridge",
);
assert.ok(
mechanicsIndex < useCaseIndex,
@ -49,49 +49,47 @@ test("docs introduction frames the concept before showing product mechanics", as
assert.doesNotMatch(heroSource, /The Context Layer/);
assert.doesNotMatch(heroSource, /Building Context/);
assert.doesNotMatch(heroSource, /flex flex-wrap gap-3/);
assert.doesNotMatch(introduction, /raw-sources/);
assert.doesNotMatch(introduction, /\.ktx/);
});
test("product mechanics component covers source-specific context and SQL expansion", async () => {
test("product mechanics component explains ingestion outputs", async () => {
const component = await readDocsFile("components/product-mechanics.tsx");
for (const expectedText of [
"How KTX works",
"Build context from source evidence",
"Run agent requests through the model",
"Ingestion",
"Runtime",
"wiki/",
"semantic-layer/",
"raw-sources/",
".ktx/",
"sl_refs",
"Database structure",
"BI and usage evidence",
"Semantic modeling",
"Company documentation",
"Notion pages",
"Sources",
"KTX transforms evidence",
"KTX builds the model",
"Outputs KTX writes",
"Postgres",
"How ingestion works",
"Ingestion flow",
"From scattered source systems to agent-ready context",
"wiki/*.md",
"semantic-layer/*.yaml",
"Wiki",
"Semantic layer",
"Databases",
"BI tools",
"Modeling code",
"Docs and notes",
"Source adapters",
"Context builder",
"Reconciliation",
"Validation",
"PostgreSQL",
"Snowflake",
"BigQuery",
"and many others",
"Metabase",
"Looker",
"dbt",
"MetricFlow",
"LookML",
"extract evidence",
"reconcile entities",
"validate references",
"semantic query plan",
"dialect SQL",
"bounded rows",
"provenance",
"measure: orders.total_revenue",
"dimension: customers.segment",
"select",
"Notion",
"Any text",
"compile into SQL",
'"use client"',
"@xyflow/react",
"<ReactFlow",
"getSmoothStepPath",
"animateMotion",
"mechanics-particle",
"buildParticlePath",
]) {
assert.ok(
component.includes(expectedText),
@ -99,7 +97,27 @@ test("product mechanics component covers source-specific context and SQL expansi
);
}
assert.match(
component,
/nodesDraggable=\{false\}/,
"ReactFlow canvas should disable node dragging",
);
assert.match(
component,
/panOnDrag=\{false\}/,
"ReactFlow canvas should disable panning",
);
assert.match(
component,
/zoomOnScroll=\{false\}/,
"ReactFlow canvas should disable scroll zoom",
);
assert.doesNotMatch(component, /raw-sources/);
assert.doesNotMatch(component, /\.ktx/);
assert.doesNotMatch(component, /Product mechanics/);
assert.doesNotMatch(component, /How KTX works/);
assert.doesNotMatch(component, /Runtime/);
assert.doesNotMatch(component, /A semantic compiler for analytics agents/);
assert.doesNotMatch(component, /KTX does more than retrieve Markdown/);
assert.doesNotMatch(component, /Plain Markdown \+ RAG/);
@ -109,15 +127,11 @@ test("product mechanics component covers source-specific context and SQL expansi
assert.doesNotMatch(component, /KTX works in two moments/);
assert.doesNotMatch(component, /name: "Metabase and query history"/);
assert.doesNotMatch(component, /name: "dbt, MetricFlow, LookML"/);
assert.doesNotMatch(component, /query history/);
assert.doesNotMatch(component, /analyst notes/);
assert.doesNotMatch(component, /ClickHouse/);
assert.doesNotMatch(component, /MySQL/);
assert.doesNotMatch(component, /SQL Server/);
assert.doesNotMatch(component, /SQLite/);
assert.doesNotMatch(
component,
/\/ktx\/brand\/(?:postgresql|snowflake|bigquery|clickhouse|mysql|sqlserver|sqlite|metabase|dbt|looker|notion)\.svg/,
/\/ktx\/brand\/(?:postgresql|snowflake|bigquery|mysql|sqlserver|sqlite|metabase|dbt|looker|notion)\.svg/,
);
assert.doesNotMatch(component, /<img/);
assert.doesNotMatch(component, /w-\[calc\(100vw/);

View file

@ -3,7 +3,7 @@
This runbook covers the maintainer workflow for publishing `@kaelio/ktx` to
npm through GitHub Actions. The workflow uses semantic-release to choose the
next version, update release metadata, publish the package, create the GitHub
release, and commit the release files back to the repository.
release, and commit prerelease files back to the `next` branch.
## Release channels
@ -15,7 +15,9 @@ KTX has two npm release channels:
Run rc releases from the source branch you want to publish. The workflow
creates or updates the `next` prerelease branch from that source branch before
running semantic-release, because semantic-release requires a dedicated
prerelease branch in addition to the stable `main` branch.
prerelease branch in addition to the stable `main` branch. You can publish an
rc from `main` when you want to validate the current stable branch before a
stable release.
Run stable releases only from `main`. The workflow rejects stable releases from
other branches.
@ -29,11 +31,26 @@ Before you publish, confirm these requirements:
`.github/workflows/release.yml` workflow.
- The workflow keeps `id-token: write` permission so npm can verify the
GitHub Actions run through OpenID Connect.
- The repository has a baseline semantic-release tag for the latest published
package version, such as `v0.1.0-rc.1`.
- The repository has release metadata in `release-policy.json` for the current
public package line, such as `0.1.0-rc.1` or `0.1.0`.
- The repository has a stable baseline tag when you need semantic-release to
publish the first stable version as `0.1.0`.
If no baseline tag exists, semantic-release treats the run as the first release
and may choose a version that doesn't match the currently published package.
semantic-release doesn't support choosing an arbitrary first `0.x` stable
release. If KTX has no stable tag yet and you need the first stable release to
be `0.1.0`, create and push the baseline tag once before running the live
stable workflow:
```bash
root_commit="$(git rev-list --max-parents=0 HEAD | tail -n 1)"
git tag v0.0.0 "${root_commit}"
git push origin v0.0.0
```
KTX follows the same versioning schema as the main Kaelio release workflow:
breaking-change and `major` commit markers create a minor release, not an
automatic major release. A major version requires an intentional manual release
path.
## Dry-run a release
@ -44,7 +61,7 @@ publishing to npm.
2. Select **KTX Release**.
3. Select the branch to release from.
4. Set **release_kind** to `rc` or `stable`.
5. Leave **publish_live** set to `false`.
5. Set **publish_live** to `false`.
6. Optional: Set **force_release** to `true` when you need a patch release even
if semantic-release doesn't find a releasable commit.
7. Run the workflow.
@ -60,9 +77,9 @@ promoting to `latest`.
1. Open **Actions** in GitHub.
2. Select **KTX Release**.
3. Select the source branch to release from.
3. Select the source branch to release from, including `main` when needed.
4. Set **release_kind** to `rc`.
5. Set **publish_live** to `true`.
5. Leave **publish_live** set to `true`.
6. Optional: Set **force_release** to `true`.
7. Run the workflow.
@ -78,19 +95,21 @@ Publish a stable release from `main` after you have validated an rc package.
1. Open **Actions** in GitHub.
2. Select **KTX Release**.
3. Select `main`.
4. Set **release_kind** to `stable`.
5. Set **publish_live** to `true`.
4. Leave **release_kind** set to `stable`.
5. Leave **publish_live** set to `true`.
6. Optional: Set **force_release** to `true`.
7. Run the workflow.
The workflow publishes `@kaelio/ktx` with `--access public --tag latest`, runs
the published package smoke test, creates a GitHub release, and commits the
release metadata.
the published package smoke test, and creates a GitHub release. Stable releases
don't commit release metadata back to `main`, because `main` is protected and
requires changes through pull requests.
## Release metadata
semantic-release calls `scripts/update-public-release-version.mjs` during the
prepare step. That script updates:
prepare step before `@semantic-release/npm` publishes the package. That script
updates:
- `package.json` with the semantic-release version.
- `release-policy.json` with `publicNpmPackageVersion`, npm publish settings,
@ -98,7 +117,10 @@ prepare step. That script updates:
The artifact packaging and readiness scripts read `publicNpmPackageVersion`
from `release-policy.json`, so manual version edits in build scripts aren't
needed for rc releases.
needed for rc releases. The semantic-release npm plugin publishes the generated
`dist/public-npm-package` tree and writes the release tarball under
`dist/artifacts/npm`. Stable releases use the updated metadata during the
workflow run, but that generated metadata isn't committed back to `main`.
The bundled Python runtime wheel also derives its version from
`publicNpmPackageVersion`. Stable npm versions are reused as-is, and rc

View file

@ -1,411 +0,0 @@
# Agent-Friendly Docs Site Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make `docs-site` discoverable and readable by coding agents through `llms.txt`, bundled markdown, per-page markdown routes, markdown negotiation, and stricter agent-friendly docs content.
**Architecture:** Keep the existing Next 15 + Fumadocs app. Add a small `lib/llm-docs.ts` module that reads Fumadocs pages and builds machine-readable markdown responses, then expose those responses through route handlers and a markdown negotiation proxy. Rewrite existing MDX pages in place so the rendered UI and machine-readable routes share one source of truth.
**Tech Stack:** Next.js 15 App Router, Fumadocs, MDX, TypeScript, pnpm, Node 22.
---
### Task 1: Machine-Readable Docs Routes
**Files:**
- Create: `docs-site/lib/llm-docs.ts`
- Create: `docs-site/app/llms.txt/route.ts`
- Create: `docs-site/app/llms-full.txt/route.ts`
- Create: `docs-site/app/llms.mdx/docs/[[...slug]]/route.ts`
- Modify: `docs-site/next.config.mjs`
- [ ] **Step 1: Add the LLM docs utility**
Create `docs-site/lib/llm-docs.ts` with functions that:
```ts
import { source } from "@/lib/source";
const SITE_ORIGIN = "https://ktx.dev";
export type LlmDocsPage = {
title: string;
description?: string;
url: string;
markdownUrl: string;
slug: string[];
getMarkdown: () => Promise<string>;
};
export function getLlmDocsPages(): LlmDocsPage[] {
return source.getPages().map((page) => ({
title: page.data.title,
description: page.data.description,
url: page.url,
markdownUrl: `${page.url}.md`,
slug: page.slugs,
getMarkdown: async () => normalizeMarkdown(await page.data.getText("raw")),
}));
}
export function getLlmDocsPage(slug: string[] | undefined) {
const page = source.getPage(slug);
if (!page) return null;
return {
title: page.data.title,
description: page.data.description,
url: page.url,
markdownUrl: `${page.url}.md`,
slug: page.slugs,
getMarkdown: async () => normalizeMarkdown(await page.data.getText("raw")),
} satisfies LlmDocsPage;
}
export async function getPageMarkdown(page: LlmDocsPage) {
const body = await page.getMarkdown();
const description = page.description ? `\n\n> ${page.description}` : "";
return `# ${page.title}${description}\n\nCanonical URL: ${page.url}\nMarkdown URL: ${page.markdownUrl}\n\n${body}`;
}
export function buildLlmsTxt() {
const pages = getLlmDocsPages();
const byUrl = new Map(pages.map((page) => [page.url, page]));
const link = (url: string, label: string, fallbackDescription: string) => {
const page = byUrl.get(url);
const description = page?.description ?? fallbackDescription;
return `- [${label}](${url}): ${description}`;
};
return `# KTX
> Agent-native context layer for analytics engineering and database agents.
KTX provides semantic-layer files, warehouse scans, knowledge pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins.
## Start Here
${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")}
${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")}
${link("/docs/guides/serving-agents", "Serving Agents", "Expose KTX context through MCP and CLI tools")}
${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and knowledge pages")}
## Machine-Readable Documentation
- [Full documentation](/llms-full.txt): All docs pages in one plain-text markdown response
- [Quickstart markdown](/docs/getting-started/quickstart.md): Raw markdown for the setup guide
- [Agent CLI markdown](/docs/cli-reference/ktx-agent.md): Raw markdown for machine-readable agent commands
- [Serving Agents markdown](/docs/guides/serving-agents.md): Raw markdown for MCP and CLI workflows
## CLI Reference
${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}
${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")}
${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}
${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")}
${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")}
## Integrations
${link("/docs/integrations/agent-clients", "Agent Clients", "Configure Claude Code, Cursor, Codex, and OpenCode")}
${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")}
${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")}
`;
}
export async function buildLlmsFullTxt() {
const pages = getLlmDocsPages();
const rendered = await Promise.all(pages.map(getPageMarkdown));
return [`# KTX Full Documentation`, `Source: ${SITE_ORIGIN}`, ...rendered].join("\n\n---\n\n");
}
function normalizeMarkdown(markdown: string) {
return markdown.trim().replace(/\n{3,}/g, "\n\n");
}
```
- [ ] **Step 2: Add route handlers**
Create route files:
```ts
import { buildLlmsTxt } from "@/lib/llm-docs";
export const dynamic = "force-static";
export function GET() {
return new Response(buildLlmsTxt(), {
headers: { "Content-Type": "text/plain; charset=utf-8" },
});
}
```
```ts
import { buildLlmsFullTxt } from "@/lib/llm-docs";
export const dynamic = "force-static";
export async function GET() {
return new Response(await buildLlmsFullTxt(), {
headers: { "Content-Type": "text/plain; charset=utf-8" },
});
}
```
```ts
import { getLlmDocsPage, getPageMarkdown } from "@/lib/llm-docs";
import { notFound } from "next/navigation";
export const dynamic = "force-static";
export async function GET(
_request: Request,
props: { params: Promise<{ slug?: string[] }> },
) {
const params = await props.params;
const page = getLlmDocsPage(params.slug);
if (!page) notFound();
return new Response(await getPageMarkdown(page), {
headers: { "Content-Type": "text/markdown; charset=utf-8" },
});
}
export function generateStaticParams() {
return getLlmDocsPages().map((page) => ({ slug: page.slug }));
}
```
- [ ] **Step 3: Add `.md` rewrite**
Modify `docs-site/next.config.mjs`:
```js
import { createMDX } from "fumadocs-mdx/next";
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {
async rewrites() {
return [
{
source: "/docs/:path*.md",
destination: "/llms.mdx/docs/:path*",
},
];
},
};
export default withMDX(config);
```
- [ ] **Step 4: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: Next build completes and static routes include `llms.txt`, `llms-full.txt`, and the LLM markdown route.
### Task 2: Markdown Negotiation
**Files:**
- Create: `docs-site/proxy.ts`
- [ ] **Step 1: Add markdown negotiation proxy**
Create `docs-site/proxy.ts`:
```ts
import { isMarkdownPreferred, rewritePath } from "fumadocs-core/negotiation";
import { NextResponse, type NextRequest } from "next/server";
const { rewrite } = rewritePath("/docs/*path", "/llms.mdx/docs/*path");
export function proxy(request: NextRequest) {
if (!isMarkdownPreferred(request)) {
return NextResponse.next();
}
const rewrittenPath = rewrite(request.nextUrl.pathname);
if (!rewrittenPath) {
return NextResponse.next();
}
return NextResponse.rewrite(new URL(rewrittenPath, request.nextUrl));
}
export const config = {
matcher: ["/docs/:path*"],
};
```
- [ ] **Step 2: Verify build**
Run: `pnpm --filter ktx-docs build`
Expected: Build passes with the proxy included.
### Task 3: Agent-Friendly High-Priority Guides
**Files:**
- Modify: `docs-site/content/docs/getting-started/quickstart.mdx`
- Modify: `docs-site/content/docs/guides/serving-agents.mdx`
- Modify: `docs-site/content/docs/guides/writing-context.mdx`
- [ ] **Step 1: Rewrite quickstart structure**
Add sections for:
- Workflow summary
- Generated files
- Common errors and recovery
Keep existing setup detail, but make each command block copy-pasteable and each expected output complete enough for agents to recognize success.
- [ ] **Step 2: Rewrite Serving Agents as API reference**
Add tables for MCP tool inputs and CLI command inputs. Add workflows:
- Answer an analytics question through MCP
- Answer an analytics question through CLI
- Safely execute SQL with row limits
- [ ] **Step 3: Rewrite Writing Context with schemas and workflows**
Add semantic-source field tables, knowledge-page field tables, and workflows:
- Inspect a source
- Edit and validate a source
- Query through the semantic layer
- Write and search a knowledge page
- [ ] **Step 4: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: MDX compiles without syntax errors.
### Task 4: CLI Reference Normalization
**Files:**
- Modify: `docs-site/content/docs/cli-reference/*.mdx`
- [ ] **Step 1: Normalize every CLI page**
For each CLI reference page, ensure this structure exists:
```md
## Command signature
```bash
ktx <command> [subcommand] [options]
```
## Subcommands
| Subcommand | Description |
|---|---|
## Options
| Flag | Type | Required | Description | Default |
|---|---|---|---|---|
## Examples
```bash
ktx <real-command> --real-flag realistic-value
```
## Output
```text
complete expected output shape
```
## Common errors
| Error | Cause | Recovery |
|---|---|---|
```
Only add sections that are relevant to the command; do not invent output for commands whose output is intentionally interactive.
- [ ] **Step 2: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: MDX compiles without syntax errors.
### Task 5: Integration and Concept Page Polish
**Files:**
- Modify: `docs-site/content/docs/integrations/agent-clients.mdx`
- Modify: `docs-site/content/docs/integrations/primary-sources.mdx`
- Modify: `docs-site/content/docs/integrations/context-sources.mdx`
- Modify: `docs-site/content/docs/concepts/*.mdx`
- Modify: `docs-site/content/docs/benchmarks/link-detection.mdx`
- [ ] **Step 1: Normalize integrations**
Add structured sections for supported values, config snippets, authentication, generated files, and recovery notes. Keep existing examples aligned with current KTX commands.
- [ ] **Step 2: Add agent usage notes**
For concept and benchmark pages, add a compact `## Agent usage notes` section that tells agents when the page is relevant and which concrete page to read next.
- [ ] **Step 3: Build check**
Run: `pnpm --filter ktx-docs build`
Expected: MDX compiles without syntax errors.
### Task 6: Route Verification and Final Checks
**Files:**
- No required source changes unless verification finds a bug.
- [ ] **Step 1: Run production build**
Run: `pnpm --filter ktx-docs build`
Expected: Build succeeds.
- [ ] **Step 2: Run TypeScript check**
Run: `pnpm --filter ktx-docs exec tsc --noEmit`
Expected: TypeScript exits successfully.
- [ ] **Step 3: Start local server**
Run: `pnpm --filter ktx-docs start`
Expected: Server starts on an available port.
- [ ] **Step 4: Verify machine-readable routes**
Run:
```bash
curl -i http://localhost:3000/llms.txt
curl -i http://localhost:3000/llms-full.txt
curl -i http://localhost:3000/docs/getting-started/quickstart.md
curl -i -H "Accept: text/markdown" http://localhost:3000/docs/getting-started/quickstart
curl -i http://localhost:3000/docs/not-a-page.md
```
Expected:
- `/llms.txt`: `200`, `Content-Type: text/plain; charset=utf-8`
- `/llms-full.txt`: `200`, `Content-Type: text/plain; charset=utf-8`
- `/docs/getting-started/quickstart.md`: `200`, `Content-Type: text/markdown; charset=utf-8`
- `/docs/getting-started/quickstart` with `Accept: text/markdown`: `200`, `Content-Type: text/markdown; charset=utf-8`
- `/docs/not-a-page.md`: `404`
- [ ] **Step 5: Inspect final diff**
Run: `git diff --stat && git diff --check`
Expected: Diff contains only docs-site and plan changes, with no whitespace errors.

View file

@ -1,813 +0,0 @@
# Demo Guided Tour Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace the disconnected "Try KTX with packaged demo data" flow with a guided tour that walks users through the same setup wizard steps using pre-filled, read-only selections, then connects their agent to the populated demo project.
**Architecture:** A new `setup-demo-tour.ts` module owns the demo tour flow. It renders read-only cards (database, sources), a simulated context build replay using the existing `renderContextBuildView` + `createRepainter` pipeline from `context-build-view.ts`, then hands off to the real `runKtxSetupAgentsStep`. The entry point in `setup.ts` (`runKtxSetupDemoFromEntryMenu`) is rewired to call this new module instead of `runKtxDemo`.
**Tech Stack:** TypeScript (ESM), Node.js raw stdin for keypress handling, existing `@clack/prompts` visual patterns, vitest for tests.
---
### Task 1: Create `setup-demo-tour.ts` with keypress utility and banner
**Files:**
- Create: `packages/cli/src/setup-demo-tour.ts`
- Test: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for `renderDemoBanner`**
```typescript
// packages/cli/src/setup-demo-tour.test.ts
import { describe, expect, it } from 'vitest';
import { renderDemoBanner } from './setup-demo-tour.js';
describe('renderDemoBanner', () => {
it('includes demo mode explanation', () => {
const output = renderDemoBanner();
expect(output).toContain('Demo mode');
expect(output).toContain('pre-processed');
expect(output).toContain('read-only');
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL - module not found
- [ ] **Step 3: Implement `renderDemoBanner` and `waitForDemoNavigation`**
```typescript
// packages/cli/src/setup-demo-tour.ts
import type { KtxCliIo } from './cli-runtime.js';
import { KtxSetupExitError } from './setup-interrupt.js';
const ESC = String.fromCharCode(0x1b);
function cyan(text: string): string {
return `${ESC}[36m${text}${ESC}[39m`;
}
function dim(text: string): string {
return `${ESC}[2m${text}${ESC}[22m`;
}
export function renderDemoBanner(): string {
const lines = [
'',
`┌ ${cyan('Demo mode')} - data has been pre-processed and KTX context is already built.`,
`│ This walkthrough illustrates the setup steps. Selections are pre-filled and read-only.`,
'',
];
return lines.join('\n');
}
export async function waitForDemoNavigation(
stdin: NodeJS.ReadStream = process.stdin,
): Promise<'forward' | 'back'> {
return new Promise((resolve, reject) => {
const wasRaw = stdin.isRaw;
if (stdin.setRawMode) stdin.setRawMode(true);
stdin.resume();
const onData = (data: Buffer) => {
const key = data.toString();
if (key === '\r' || key === '\n') {
cleanup();
resolve('forward');
} else if (key === '\x1b') {
cleanup();
resolve('back');
} else if (key === '\x03') {
cleanup();
reject(new KtxSetupExitError());
}
};
const cleanup = () => {
stdin.off('data', onData);
if (stdin.setRawMode) stdin.setRawMode(wasRaw ?? false);
};
stdin.on('data', onData);
});
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo tour banner and keypress navigation utility"
```
---
### Task 2: Add `renderDemoCard` function
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for `renderDemoCard`**
Append to the test file:
```typescript
import { renderDemoCardContent } from './setup-demo-tour.js';
describe('renderDemoCardContent', () => {
it('renders a card with title and selections', () => {
const output = renderDemoCardContent('Database connection', ['PostgreSQL (demo warehouse)']);
expect(output).toContain('Database connection');
expect(output).toContain('PostgreSQL (demo warehouse)');
expect(output).toContain('Press Enter to continue');
expect(output).toContain('Escape to go back');
});
it('renders multiple selections', () => {
const output = renderDemoCardContent('Context sources', ['dbt', 'Metabase', 'Notion']);
expect(output).toContain('dbt');
expect(output).toContain('Metabase');
expect(output).toContain('Notion');
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL - `renderDemoCardContent` not exported
- [ ] **Step 3: Implement `renderDemoCardContent` and `renderDemoCard`**
Add to `setup-demo-tour.ts`:
```typescript
export function renderDemoCardContent(title: string, selections: string[]): string {
const lines = [
`┌ ${title}`,
'│',
...selections.map((s) => `│ ${cyan('▸')} ${s}`),
'│',
`│ ${dim('Press Enter to continue, Escape to go back')}`,
'└',
'',
];
return lines.join('\n');
}
export async function renderDemoCard(
title: string,
selections: string[],
io: KtxCliIo,
stdin?: NodeJS.ReadStream,
waitNav?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>,
): Promise<'forward' | 'back'> {
io.stdout.write(renderDemoBanner());
io.stdout.write(renderDemoCardContent(title, selections));
const nav = waitNav ?? waitForDemoNavigation;
return nav(stdin);
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo tour read-only card rendering"
```
---
### Task 3: Add demo context build replay animation
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for demo replay event sequence**
Append to the test file:
```typescript
import { buildDemoReplayTimeline, DEMO_REPLAY_TARGETS } from './setup-demo-tour.js';
describe('buildDemoReplayTimeline', () => {
it('produces events for all four demo targets', () => {
const events = buildDemoReplayTimeline();
const connectionIds = new Set(events.map((e) => e.connectionId));
expect(connectionIds).toEqual(new Set(['demo-warehouse', 'dbt', 'metabase', 'notion']));
});
it('ends with all targets done', () => {
const events = buildDemoReplayTimeline();
const lastByConnection = new Map<string, string>();
for (const e of events) {
lastByConnection.set(e.connectionId, e.status);
}
for (const status of lastByConnection.values()) {
expect(status).toBe('done');
}
});
it('events are sorted by delayMs', () => {
const events = buildDemoReplayTimeline();
for (let i = 1; i < events.length; i++) {
expect(events[i]!.delayMs).toBeGreaterThanOrEqual(events[i - 1]!.delayMs);
}
});
});
describe('DEMO_REPLAY_TARGETS', () => {
it('has one primary source and three context sources', () => {
expect(DEMO_REPLAY_TARGETS.primarySources).toHaveLength(1);
expect(DEMO_REPLAY_TARGETS.contextSources).toHaveLength(3);
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL - exports not found
- [ ] **Step 3: Implement replay timeline and target definitions**
Add to `setup-demo-tour.ts`:
```typescript
import type { KtxPublicIngestPlanTarget } from './public-ingest.js';
import type { ContextBuildTargetState, ContextBuildViewState } from './context-build-view.js';
export interface DemoReplayEvent {
delayMs: number;
connectionId: string;
status: 'running' | 'done';
detailLine: string | null;
summaryText: string | null;
}
function createDemoTarget(connectionId: string, operation: 'scan' | 'source-ingest', driver: string): KtxPublicIngestPlanTarget {
return {
connectionId,
driver,
operation,
debugCommand: `ktx ${operation === 'scan' ? 'scan' : 'ingest'} ${connectionId}`,
steps: operation === 'scan' ? ['scan'] : ['source-ingest'],
};
}
const primaryTarget = createDemoTarget('demo-warehouse', 'scan', 'postgres');
const dbtTarget = createDemoTarget('dbt', 'source-ingest', 'dbt');
const metabaseTarget = createDemoTarget('metabase', 'source-ingest', 'metabase');
const notionTarget = createDemoTarget('notion', 'source-ingest', 'notion');
function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState {
return {
target,
status: 'queued',
detailLine: null,
summaryText: null,
startedAt: null,
elapsedMs: 0,
};
}
export const DEMO_REPLAY_TARGETS = {
primarySources: [primaryTarget],
contextSources: [dbtTarget, metabaseTarget, notionTarget],
};
export function buildDemoReplayTimeline(): DemoReplayEvent[] {
return [
{ delayMs: 0, connectionId: 'demo-warehouse', status: 'running', detailLine: 'scanning...', summaryText: null },
{ delayMs: 600, connectionId: 'demo-warehouse', status: 'running', detailLine: '[50%] scanning...', summaryText: null },
{ delayMs: 1200, connectionId: 'demo-warehouse', status: 'done', detailLine: null, summaryText: 'completed' },
{ delayMs: 1200, connectionId: 'dbt', status: 'running', detailLine: 'ingesting...', summaryText: null },
{ delayMs: 1800, connectionId: 'dbt', status: 'running', detailLine: '[60%] ingesting...', summaryText: null },
{ delayMs: 2200, connectionId: 'dbt', status: 'done', detailLine: null, summaryText: 'completed' },
{ delayMs: 2200, connectionId: 'metabase', status: 'running', detailLine: 'ingesting...', summaryText: null },
{ delayMs: 2800, connectionId: 'metabase', status: 'done', detailLine: null, summaryText: 'completed' },
{ delayMs: 2800, connectionId: 'notion', status: 'running', detailLine: 'ingesting...', summaryText: null },
{ delayMs: 3400, connectionId: 'notion', status: 'done', detailLine: null, summaryText: 'completed' },
];
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Implement `runDemoContextReplay` animation driver**
Add to `setup-demo-tour.ts`:
```typescript
import { renderContextBuildView, createRepainter } from './context-build-view.js';
export async function runDemoContextReplay(
io: KtxCliIo,
stdin?: NodeJS.ReadStream,
): Promise<'forward' | 'back'> {
const repainter = createRepainter(io);
const timeline = buildDemoReplayTimeline();
const state: ContextBuildViewState = {
primarySources: DEMO_REPLAY_TARGETS.primarySources.map((t) => createTargetState(t)),
contextSources: DEMO_REPLAY_TARGETS.contextSources.map((t) => createTargetState(t)),
frame: 0,
startedAt: Date.now(),
totalElapsedMs: 0,
};
const allTargets = [...state.primarySources, ...state.contextSources];
const targetMap = new Map(allTargets.map((t) => [t.target.connectionId, t]));
let eventIndex = 0;
const startTime = Date.now();
const FRAME_MS = 120;
await new Promise<void>((resolve) => {
const interval = setInterval(() => {
const elapsed = Date.now() - startTime;
state.frame += 1;
state.totalElapsedMs = elapsed;
while (eventIndex < timeline.length && timeline[eventIndex]!.delayMs <= elapsed) {
const event = timeline[eventIndex]!;
const target = targetMap.get(event.connectionId);
if (target) {
target.status = event.status;
target.detailLine = event.detailLine;
target.summaryText = event.summaryText;
if (event.status === 'running' && target.startedAt === null) {
target.startedAt = Date.now();
}
if (event.status === 'done') {
target.elapsedMs = target.startedAt ? Date.now() - target.startedAt : 0;
}
}
eventIndex += 1;
}
for (const t of allTargets) {
if (t.status === 'running' && t.startedAt !== null) {
t.elapsedMs = Date.now() - t.startedAt;
}
}
repainter.paint(renderContextBuildView(state, { styled: io.stdout.isTTY ?? false, showHint: false }));
if (eventIndex >= timeline.length && allTargets.every((t) => t.status === 'done')) {
clearInterval(interval);
resolve();
}
}, FRAME_MS);
});
io.stdout.write(renderDemoContextCompletionSummary());
return waitForDemoNavigation(stdin);
}
function renderDemoContextCompletionSummary(): string {
const lines = [
'',
`${cyan('★')} KTX finished ingesting demo data`,
'',
' Placeholder - final counts will come from pre-packaged demo results.',
'',
` ${dim('Press Enter to continue, Escape to go back')}`,
'',
];
return lines.join('\n');
}
```
Note: `renderDemoContextCompletionSummary` is a placeholder that will be updated when
the user provides the real pre-packaged demo data. The summary counts (business areas,
query definitions, knowledge pages) will be populated from those assets.
- [ ] **Step 6: Run tests and type-check**
Run: `pnpm --filter @ktx/cli run type-check && pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 7: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo context build replay animation"
```
---
### Task 4: Add transition message and completion summary
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing tests**
Append to test file:
```typescript
import { renderDemoAgentTransition, renderDemoCompletionSummary } from './setup-demo-tour.js';
describe('renderDemoAgentTransition', () => {
it('includes transition message about connecting agent', () => {
const output = renderDemoAgentTransition();
expect(output).toContain('Demo project is ready');
expect(output).toContain('connect your agent');
});
});
describe('renderDemoCompletionSummary', () => {
it('includes project path and temp warning', () => {
const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', true);
expect(output).toContain('/tmp/ktx-demo-abc123');
expect(output).toContain('temporary');
expect(output).toContain('ktx setup');
});
it('shows manual agent instructions when agent not installed', () => {
const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', false);
expect(output).toContain('ktx setup --agents');
});
it('shows success message when agent installed', () => {
const output = renderDemoCompletionSummary('/tmp/ktx-demo-abc123', true);
expect(output).toContain('agent is connected');
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL - exports not found
- [ ] **Step 3: Implement transition and completion rendering**
Add to `setup-demo-tour.ts`:
```typescript
export function renderDemoAgentTransition(): string {
const lines = [
'',
`┌ Demo project is ready - let's connect your agent`,
'│',
'│ Your KTX context has been built with demo data.',
'│ Select an agent to start using it.',
'└',
'',
];
return lines.join('\n');
}
export function renderDemoCompletionSummary(projectDir: string, agentInstalled: boolean): string {
const lines = [
'',
`${cyan('★')} KTX demo is ready`,
'',
];
if (agentInstalled) {
lines.push(' Your agent is connected to a demo KTX project.');
} else {
lines.push(' Demo project created. Connect an agent to start using it:');
lines.push(` $ ktx setup --agents --project-dir ${projectDir}`);
}
lines.push(
'',
` ${dim('⚠')} This project is in a temporary directory and will be`,
` cleaned up by your system. To set up KTX with your own`,
' data, run: ktx setup',
'',
` Project: ${projectDir}`,
'',
);
return lines.join('\n');
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add demo tour transition and completion summary"
```
---
### Task 5: Implement `runDemoTour` orchestrator
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Modify: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write the failing test for the orchestrator**
Append to test file:
```typescript
import { vi } from 'vitest';
import type { KtxSetupAgentsResult } from './setup-agents.js';
import { runDemoTour } from './setup-demo-tour.js';
describe('runDemoTour', () => {
function createMockIo() {
const chunks: string[] = [];
return {
io: {
stdout: { isTTY: true, columns: 80, write: (chunk: string) => { chunks.push(chunk); } },
stderr: { write: () => {} },
},
chunks,
};
}
it('returns 0 on successful tour with agent installed', async () => {
const { io } = createMockIo();
const mockAgents = vi.fn<() => Promise<KtxSetupAgentsResult>>().mockResolvedValue({
status: 'ready',
projectDir: '/tmp/test',
installs: [{ target: 'claude-code' as const, scope: 'project' as const, mode: 'both' as const }],
});
const navigation = vi.fn<() => Promise<'forward' | 'back'>>().mockResolvedValue('forward');
const result = await runDemoTour(
{ inputMode: 'auto' },
io,
{ agents: mockAgents, waitForNavigation: navigation, skipReplayAnimation: true },
);
expect(result).toBe(0);
expect(mockAgents).toHaveBeenCalled();
});
it('handles back navigation from first step', async () => {
const { io } = createMockIo();
const navigation = vi.fn<() => Promise<'forward' | 'back'>>().mockResolvedValue('back');
const result = await runDemoTour(
{ inputMode: 'auto' },
io,
{ waitForNavigation: navigation, skipReplayAnimation: true },
);
expect(result).toBe(0);
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: FAIL - `runDemoTour` not exported or wrong signature
- [ ] **Step 3: Implement `runDemoTour`**
Add to `setup-demo-tour.ts`:
```typescript
import { defaultDemoProjectDir, ensureSeededDemoProject } from './demo-assets.js';
import type { KtxSetupAgentsResult } from './setup-agents.js';
import { runKtxSetupAgentsStep } from './setup-agents.js';
type DemoStep = 'databases' | 'sources' | 'context' | 'agents';
const DEMO_STEPS: DemoStep[] = ['databases', 'sources', 'context', 'agents'];
export interface DemoTourDeps {
agents?: (args: Parameters<typeof runKtxSetupAgentsStep>[0], io: KtxCliIo) => Promise<KtxSetupAgentsResult>;
waitForNavigation?: (stdin?: NodeJS.ReadStream) => Promise<'forward' | 'back'>;
ensureProject?: typeof ensureSeededDemoProject;
skipReplayAnimation?: boolean;
}
export async function runDemoTour(
args: { inputMode: 'auto' | 'disabled' },
io: KtxCliIo,
deps: DemoTourDeps = {},
): Promise<number> {
const waitNav = deps.waitForNavigation ?? waitForDemoNavigation;
const ensureProject = deps.ensureProject ?? ensureSeededDemoProject;
const projectDir = defaultDemoProjectDir();
await ensureProject({ projectDir });
let stepIndex = 0;
while (stepIndex < DEMO_STEPS.length) {
const step = DEMO_STEPS[stepIndex]!;
let direction: 'forward' | 'back';
if (step === 'databases') {
direction = await renderDemoCard('Database connection', ['PostgreSQL (demo warehouse)'], io, undefined, waitNav);
} else if (step === 'sources') {
direction = await renderDemoCard('Context sources', ['dbt', 'Metabase', 'Notion'], io, undefined, waitNav);
} else if (step === 'context') {
io.stdout.write(renderDemoBanner());
if (deps.skipReplayAnimation) {
direction = await waitNav();
} else {
direction = await runDemoContextReplay(io);
}
} else {
io.stdout.write(renderDemoAgentTransition());
const agentsRunner = deps.agents ?? runKtxSetupAgentsStep;
const agentsResult = await agentsRunner(
{
projectDir,
inputMode: args.inputMode,
yes: false,
agents: true,
scope: 'project',
mode: 'both',
skipAgents: false,
},
io,
);
const agentInstalled = agentsResult.status === 'ready';
if (agentsResult.status === 'back') {
direction = 'back';
} else {
io.stdout.write(renderDemoCompletionSummary(projectDir, agentInstalled));
return 0;
}
}
if (direction === 'back') {
if (stepIndex === 0) return 0;
stepIndex -= 1;
} else {
stepIndex += 1;
}
}
return 0;
}
```
- [ ] **Step 4: Run the test to verify it passes**
Run: `pnpm --filter @ktx/cli run test -- --testPathPattern setup-demo-tour`
Expected: PASS
- [ ] **Step 5: Run type-check**
Run: `pnpm --filter @ktx/cli run type-check`
Expected: PASS - all types align with existing interfaces
- [ ] **Step 6: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "feat(cli): add runDemoTour orchestrator with step navigation"
```
---
### Task 6: Wire up in `setup.ts`
**Files:**
- Modify: `packages/cli/src/setup.ts`
- [ ] **Step 1: Read the current `runKtxSetupDemoFromEntryMenu` function**
Read `packages/cli/src/setup.ts` and locate `runKtxSetupDemoFromEntryMenu` (around lines 218-233).
Current implementation:
```typescript
async function runKtxSetupDemoFromEntryMenu(
args: Extract<KtxSetupArgs, { command: 'run' }>,
io: KtxCliIo,
deps: KtxSetupDeps,
): Promise<number> {
const runner = deps.demo ?? (await import('./demo.js')).runKtxDemo;
return await runner(
{
command: 'seeded',
projectDir: defaultDemoProjectDir(),
outputMode: 'viz',
inputMode: args.inputMode,
},
io,
);
}
```
- [ ] **Step 2: Replace with demo tour call**
Replace the function body to call `runDemoTour`:
```typescript
async function runKtxSetupDemoFromEntryMenu(
args: Extract<KtxSetupArgs, { command: 'run' }>,
io: KtxCliIo,
deps: KtxSetupDeps,
): Promise<number> {
const { runDemoTour } = await import('./setup-demo-tour.js');
return await runDemoTour(
{ inputMode: args.inputMode },
io,
{ agents: deps.agents },
);
}
```
- [ ] **Step 3: Update imports - remove unused `defaultDemoProjectDir` import if no longer needed elsewhere in setup.ts**
Check if `defaultDemoProjectDir` is used elsewhere in `setup.ts`. If it's only used
in `runKtxSetupDemoFromEntryMenu`, remove the import. If used elsewhere, keep it.
Also check if the `KtxDemoArgs` import is still needed. If `runKtxSetupDemoFromEntryMenu`
was the only consumer of `deps.demo` with that type, it may now be unused. Keep the
`demo` slot in `KtxSetupDeps` for backwards compatibility but it will no longer be
called from the entry menu path.
- [ ] **Step 4: Run type-check and tests**
Run: `pnpm --filter @ktx/cli run type-check && pnpm --filter @ktx/cli run test`
Expected: PASS - existing tests continue to work, demo tour is now wired in
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup.ts
git commit -m "feat(cli): wire demo tour into setup entry menu"
```
---
### Task 7: End-to-end verification
**Files:**
- None (verification only)
- [ ] **Step 1: Run full test suite**
Run: `pnpm --filter @ktx/cli run test 2>&1 | tee /tmp/ktx-demo-tour-test.log`
Expected: All tests pass. Check the output for any regressions.
- [ ] **Step 2: Run type-check across workspace**
Run: `pnpm run type-check`
Expected: PASS
- [ ] **Step 3: Run pre-commit checks if available**
Run: `pnpm run check` (if configured)
Expected: PASS
- [ ] **Step 4: Manual smoke test (if TTY available)**
Run: `pnpm --filter @ktx/cli run build && node packages/cli/dist/cli.js setup`
1. Select "Try KTX with packaged demo data"
2. Verify demo banner appears with full explanation text
3. Verify "Database connection" card shows with "PostgreSQL (demo warehouse)"
4. Press Enter → verify "Context sources" card shows with dbt, Metabase, Notion
5. Press Escape → verify you go back to database card
6. Press Enter twice → verify context build replay animation runs
7. Verify completion summary appears after replay
8. Press Enter → verify agents step prompt appears (interactive)
9. Press Escape all the way back → verify you return to entry menu
- [ ] **Step 5: Final commit if any adjustments needed**
```bash
git add -A
git commit -m "fix(cli): demo tour adjustments from smoke test"
```
---
## Open Seams for Demo Data
When the user provides the real pre-packaged demo results, update these locations:
1. **`renderDemoContextCompletionSummary()`** in `setup-demo-tour.ts` - replace placeholder text with actual counts (business areas, query definitions, knowledge pages) from the demo data
2. **`buildDemoReplayTimeline()`** in `setup-demo-tour.ts` - adjust timing and progress details to match the real ingestion profile
3. **`demo-assets.ts`** - update `REQUIRED_SEEDED_ASSET_PATHS` and `demoConfig()` if the demo dataset changes from SQLite/Orbit to Postgres/dbt/Metabase/Notion
4. **Pre-packaged asset files** in `packages/cli/assets/demo/` - replace with the new demo dataset

View file

@ -1,886 +0,0 @@
# Historic SQL Docs Smoke And Config Cleanup Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Finish the historic-SQL redesign follow-through by making setup emit the canonical config shape and replacing stale PGSS baseline/delta/reset example docs with unified artifact and no-WorkUnit idempotency checks.
**Architecture:** This is the acceptance/documentation slice after the adapter cutover. Product code changes are limited to `ktx setup` Historic SQL config serialization; the Postgres example smoke remains a deterministic stage-only path that uses the real local adapter, managed daemon, Docker Postgres, and raw artifact diffing without requiring LLM credentials. Public docs are updated to match the unified Postgres, BigQuery, and Snowflake reader behavior already present in source.
**Tech Stack:** TypeScript, Vitest, Bash, Node.js ESM, `node:test`, pnpm, Docker Compose, KTX local stage-only ingest, managed `ktx-daemon`.
---
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans already based on this spec:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md` - implemented in source: `skill-schemas.ts`, `SemanticLayerSource.usage`, `mergeUsagePreservingExternal()`, `/sql/analyze-batch`, and `SqlAnalysisPort.analyzeBatch()`.
- `docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md` - implemented in source: usage fields in `buildSemanticLayerSourceSearchText()`, SQLite FTS snippets, query-mode `score`, `frequencyTier`, and agent/MCP list propagation.
- `docs/superpowers/plans/2026-05-11-historic-sql-unified-hot-path.md` - implemented in source: unified config/types, bucket helpers, `stage-unified.ts`, aggregate readers, and `chunk-unified.ts`.
- `docs/superpowers/plans/2026-05-11-historic-sql-skills-projection-cutover.md` - implemented in source: replacement skills, evidence tool, projection, post-processor wiring, production adapter cutover, legacy source deletion, and `minExecutions` alias support.
- `docs/superpowers/plans/2026-05-11-historic-sql-cross-dialect-readiness.md` - implemented in source: cross-dialect CLI wiring, generic reader injection, probe result normalization, and PGSS max informational doctor output.
Remaining gap this plan covers:
- `examples/postgres-historic/scripts/smoke.sh`, `examples/postgres-historic/README.md`, `examples/README.md`, and `scripts/examples-docs.test.mjs` still describe the legacy baseline/delta/reset model.
- Public docs still mention `minCalls` and say BigQuery/Snowflake local CLI Historic SQL uses the Postgres path.
- `packages/cli/src/setup-databases.ts` still writes `serviceAccountUserPatterns` for new setup output even though the redesign's canonical runtime config is `filters.serviceAccounts`.
## File Structure
- Modify `packages/cli/src/setup-databases.ts`: write canonical `historicSql.filters.serviceAccounts` blocks from setup flags while keeping existing parser compatibility in `packages/context/src/ingest/adapters/historic-sql/types.ts`.
- Modify `packages/cli/src/setup-databases.test.ts`: assert generated YAML uses `filters` and no longer writes `serviceAccountUserPatterns`.
- Modify `scripts/examples-docs.test.mjs`: lock public example docs and smoke script to the unified artifact contract.
- Modify `examples/postgres-historic/scripts/smoke.sh`: assert `manifest.json`, `tables/*.json`, `patterns-input.json`, per-run `workUnitCount`, and stage-only runtime under 60 seconds after runtime warm-up.
- Modify `examples/postgres-historic/README.md`: replace baseline/delta/reset instructions with unified artifact, no-WorkUnit idempotency, and `minExecutions` language.
- Modify `examples/README.md`: replace the stale one-paragraph summary.
- Modify `docs/content/docs/integrations/primary-sources.mdx`: update Postgres, Snowflake, and BigQuery Historic SQL docs to the unified config and current support status.
- Modify `docs/content/docs/cli-reference/ktx-setup.mdx`: document `--historic-sql-min-executions` as primary and `--historic-sql-min-calls` as the one-release alias.
### Task 1: Emit Canonical Historic SQL Setup Config
**Files:**
- Modify: `packages/cli/src/setup-databases.test.ts`
- Modify: `packages/cli/src/setup-databases.ts`
- [ ] **Step 1: Update failing setup config assertions**
In `packages/cli/src/setup-databases.test.ts`, update the Snowflake expectation in `writes Historic SQL config for supported Snowflake databases after validation succeeds` to:
```typescript
expect(config.connections.snowflake).toMatchObject({
driver: 'snowflake',
authMethod: 'password',
historicSql: {
enabled: true,
dialect: 'snowflake',
windowDays: 30,
filters: {
dropTrivialProbes: true,
serviceAccounts: {
patterns: ['^svc_'],
mode: 'exclude',
},
},
redactionPatterns: ['(?i)secret'],
},
});
expect(config.connections.snowflake.historicSql).not.toHaveProperty('serviceAccountUserPatterns');
```
In the same file, update the Postgres expectation in `writes Postgres Historic SQL config with minExecutions and ignores window/redaction output` to:
```typescript
expect(config.connections.warehouse).toMatchObject({
driver: 'postgres',
url: 'env:DATABASE_URL',
schemas: ['public'],
historicSql: {
enabled: true,
dialect: 'postgres',
minExecutions: 12,
filters: {
dropTrivialProbes: true,
serviceAccounts: {
patterns: ['^svc_'],
mode: 'exclude',
},
},
},
});
expect(config.connections.warehouse.historicSql).not.toHaveProperty('minCalls');
expect(config.connections.warehouse.historicSql).not.toHaveProperty('windowDays');
expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns');
expect(config.connections.warehouse.historicSql).not.toHaveProperty('serviceAccountUserPatterns');
```
Update the existing BigQuery connection expectation in `writes Historic SQL config for supported existing database connections` to:
```typescript
expect(config.connections.analytics).toMatchObject({
historicSql: {
enabled: true,
dialect: 'bigquery',
windowDays: 45,
filters: {
dropTrivialProbes: true,
},
redactionPatterns: [],
},
});
expect(config.connections.analytics.historicSql).not.toHaveProperty('serviceAccountUserPatterns');
```
Update the existing Postgres connection expectation in `enables Historic SQL on an existing Postgres connection` to:
```typescript
expect(config.connections.warehouse).toMatchObject({
historicSql: {
enabled: true,
dialect: 'postgres',
minExecutions: 8,
filters: {
dropTrivialProbes: true,
},
},
});
expect(config.connections.warehouse.historicSql).not.toHaveProperty('serviceAccountUserPatterns');
```
- [ ] **Step 2: Run setup tests to verify they fail**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts --testNamePattern "Historic SQL"
```
Expected: FAIL because `historicSql.serviceAccountUserPatterns` is still written and `historicSql.filters` is missing from generated setup YAML.
- [ ] **Step 3: Write canonical setup config**
In `packages/cli/src/setup-databases.ts`, add this helper near `maybeApplyHistoricSqlConfig()`:
```typescript
function historicSqlFiltersForSetup(patterns: string[] | undefined) {
const serviceAccountPatterns = patterns ?? [];
return {
dropTrivialProbes: true,
...(serviceAccountPatterns.length > 0
? {
serviceAccounts: {
patterns: serviceAccountPatterns,
mode: 'exclude' as const,
},
}
: {}),
};
}
```
Then replace the `common` object inside `maybeApplyHistoricSqlConfig()` with:
```typescript
const common: Record<string, unknown> = {
...existing,
enabled: true,
dialect,
filters: historicSqlFiltersForSetup(input.args.historicSqlServiceAccountPatterns),
};
delete common.serviceAccountUserPatterns;
```
Keep the existing `minExecutions`, `windowDays`, and `redactionPatterns` branches unchanged after this object replacement.
- [ ] **Step 4: Run setup tests to verify they pass**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts --testNamePattern "Historic SQL"
```
Expected: PASS for all Historic SQL setup tests in `src/setup-databases.test.ts`.
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts
git commit -m "fix: write canonical historic sql setup filters"
```
### Task 2: Lock Example Docs To Unified Historic SQL Terms
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Update the failing example docs test**
Replace the `documents the Postgres historic SQL smoke example` test body in `scripts/examples-docs.test.mjs` with:
```javascript
it('documents the Postgres historic SQL smoke example', async () => {
const examples = await readText('examples/README.md');
const readme = await readText('examples/postgres-historic/README.md');
const compose = await readText('examples/postgres-historic/docker-compose.yml');
const initSql = await readText('examples/postgres-historic/init/001-schema.sql');
const workload = await readText('examples/postgres-historic/scripts/generate-workload.sh');
const smoke = await readText('examples/postgres-historic/scripts/smoke.sh');
assert.match(examples, /postgres-historic/);
assert.match(examples, /unified Historic SQL artifacts/);
assert.match(readme, /--enable-historic-sql/);
assert.match(readme, /--historic-sql-min-executions 2/);
assert.match(readme, /ktx dev doctor --project-dir/);
assert.match(readme, /Postgres Historic SQL/);
assert.match(readme, /manifest\.json/);
assert.match(readme, /tables\/\*\.json/);
assert.match(readme, /patterns-input\.json/);
assert.match(readme, /workUnitCount: 0/);
assert.match(compose, /postgres:14/);
assert.match(compose, /shared_preload_libraries=pg_stat_statements/);
assert.match(compose, /pg_stat_statements.track=top/);
assert.match(initSql, /CREATE EXTENSION IF NOT EXISTS pg_stat_statements/);
assert.match(initSql, /GRANT pg_read_all_stats TO ktx_reader/);
assert.match(workload, /JOIN customers/);
assert.match(workload, /app_user/);
assert.match(workload, /etl_user/);
assert.match(smoke, /assert_unified_snapshot/);
assert.match(smoke, /assert_stage_record "\$UNCHANGED_RECORD" unchanged zero/);
assert.match(smoke, /--historic-sql-min-executions 2/);
assert.match(smoke, /KTX_RUNTIME_ROOT/);
assert.match(smoke, /managedDaemon/);
assert.match(smoke, /installPolicy: 'auto'/);
assert.match(smoke, /getKtxCliPackageInfo/);
assert.doesNotMatch(smoke, /python-service/);
assert.doesNotMatch(smoke, /PYTHON_SERVICE/);
assert.doesNotMatch(smoke, /uvicorn app\.main:app/);
assert.doesNotMatch(smoke, /export KTX_SQL_ANALYSIS_URL/);
assert.doesNotMatch(smoke, /baselineFirstRun|degraded|statsResetAt|assert_manifest/);
assert.doesNotMatch(readme, /python-service/);
assert.doesNotMatch(readme, /KTX_SQL_ANALYSIS_URL/);
assert.doesNotMatch(readme, /baselineFirstRun|degraded: true|statsResetAt|fresh PGSS baseline|delta-only/);
assert.doesNotMatch(readme, /--historic-sql-min-calls/);
});
```
- [ ] **Step 2: Run the docs test to verify it fails**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL because the current README and smoke script still mention `--historic-sql-min-calls`, `baselineFirstRun`, `degraded`, and the legacy `assert_manifest` helper.
- [ ] **Step 3: Commit the failing test**
```bash
git add scripts/examples-docs.test.mjs
git commit -m "test: expect unified historic sql example docs"
```
### Task 3: Rewrite The Postgres Historic SQL Smoke
**Files:**
- Modify: `examples/postgres-historic/scripts/smoke.sh`
- [ ] **Step 1: Replace the smoke script with unified artifact assertions**
Replace `examples/postgres-historic/scripts/smoke.sh` with:
```bash
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
KTX_ROOT="$(cd "$EXAMPLE_DIR/../.." && pwd)"
COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml"
PROJECT_PARENT="${KTX_POSTGRES_HISTORIC_PROJECT_PARENT:-$(mktemp -d)}"
PROJECT_DIR="$PROJECT_PARENT/postgres-historic-ktx"
KTX_BIN="$KTX_ROOT/packages/cli/dist/bin.js"
MAX_STAGE_SECONDS="${KTX_POSTGRES_HISTORIC_MAX_STAGE_SECONDS:-60}"
export KTX_RUNTIME_ROOT="$PROJECT_PARENT/managed-runtime"
unset KTX_DAEMON_URL
unset KTX_SQL_ANALYSIS_URL
cleanup() {
if [[ -f "$KTX_BIN" ]]; then
node "$KTX_BIN" runtime stop >/dev/null 2>&1 || true
fi
if [[ "${KTX_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then
docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true
fi
}
trap cleanup EXIT
latest_manifest() {
find "$PROJECT_DIR/raw-sources/warehouse/historic-sql" -name manifest.json | sort | tail -n 1
}
assert_unified_snapshot() {
local manifest_path="$1"
node - "$manifest_path" <<'NODE'
const { dirname, join } = require('node:path');
const { readFileSync, readdirSync } = require('node:fs');
const manifestPath = process.argv[2];
const manifest = JSON.parse(readFileSync(manifestPath, 'utf8'));
function assert(condition, message) {
if (!condition) throw new Error(message);
}
assert(manifest.source === 'historic-sql', `Expected source historic-sql, got ${manifest.source}`);
assert(manifest.dialect === 'postgres', `Expected dialect postgres, got ${manifest.dialect}`);
assert(Number.isInteger(manifest.snapshotRowCount) && manifest.snapshotRowCount > 0, 'Expected snapshotRowCount > 0');
assert(Number.isInteger(manifest.touchedTableCount) && manifest.touchedTableCount > 0, 'Expected touchedTableCount > 0');
assert(Number.isInteger(manifest.parseFailures), 'Expected numeric parseFailures');
assert(Array.isArray(manifest.warnings), 'Expected warnings array');
assert(Array.isArray(manifest.probeWarnings), 'Expected probeWarnings array');
for (const legacyKey of ['degraded', 'baselineFirstRun', 'pgServerVersion', 'statsResetAt', 'templates']) {
assert(!(legacyKey in manifest), `Legacy manifest key is still present: ${legacyKey}`);
}
const root = dirname(manifestPath);
const tableDir = join(root, 'tables');
const tableFiles = readdirSync(tableDir).filter((file) => file.endsWith('.json')).sort();
assert(tableFiles.length === manifest.touchedTableCount, `Expected ${manifest.touchedTableCount} table files, got ${tableFiles.length}`);
const firstTable = JSON.parse(readFileSync(join(tableDir, tableFiles[0]), 'utf8'));
assert(typeof firstTable.table === 'string' && firstTable.table.length > 0, 'Expected staged table name');
assert(firstTable.stats && typeof firstTable.stats.executionsBucket === 'string', 'Expected bucketed table stats');
assert(firstTable.columnsByClause && typeof firstTable.columnsByClause === 'object', 'Expected columnsByClause object');
assert(Array.isArray(firstTable.observedJoins), 'Expected observedJoins array');
assert(Array.isArray(firstTable.topTemplates) && firstTable.topTemplates.length > 0, 'Expected topTemplates');
const patterns = JSON.parse(readFileSync(join(root, 'patterns-input.json'), 'utf8'));
assert(Array.isArray(patterns.templates) && patterns.templates.length > 0, 'Expected patterns-input templates');
assert(
patterns.templates.every((template) => Array.isArray(template.tablesTouched) && template.tablesTouched.length > 0),
'Expected every pattern template to have touched tables',
);
NODE
}
assert_stage_record() {
local record_path="$1"
local label="$2"
local expected_work_units="$3"
node - "$record_path" "$label" "$expected_work_units" "$MAX_STAGE_SECONDS" <<'NODE'
const { readFileSync } = require('node:fs');
const record = JSON.parse(readFileSync(process.argv[2], 'utf8'));
const label = process.argv[3];
const expectedWorkUnits = process.argv[4];
const maxSeconds = Number(process.argv[5]);
function assert(condition, message) {
if (!condition) throw new Error(message);
}
assert(record.status === 'done', `${label}: expected status done, got ${record.status}`);
assert(record.adapter === 'historic-sql', `${label}: expected historic-sql adapter`);
assert(record.connectionId === 'warehouse', `${label}: expected warehouse connection`);
assert(record.rawFileCount >= 3, `${label}: expected manifest, patterns input, and at least one table file`);
assert(Array.isArray(record.errors) && record.errors.length === 0, `${label}: expected no errors`);
if (expectedWorkUnits === 'zero') {
assert(record.workUnitCount === 0, `${label}: expected zero WorkUnits, got ${record.workUnitCount}`);
assert(Array.isArray(record.workUnits) && record.workUnits.length === 0, `${label}: expected empty workUnits`);
} else if (expectedWorkUnits === 'nonzero') {
assert(record.workUnitCount > 0, `${label}: expected nonzero WorkUnits`);
assert(record.workUnits.some((unit) => unit.unitKey === 'historic-sql-patterns'), `${label}: expected patterns WorkUnit`);
assert(record.workUnits.some((unit) => unit.unitKey.startsWith('historic-sql-table-')), `${label}: expected table WorkUnit`);
} else {
throw new Error(`${label}: unknown expected work unit mode ${expectedWorkUnits}`);
}
const elapsedMs = Date.parse(record.completedAt) - Date.parse(record.startedAt);
assert(Number.isFinite(elapsedMs) && elapsedMs >= 0, `${label}: invalid elapsed time`);
assert(elapsedMs <= maxSeconds * 1000, `${label}: stage-only ingest took ${elapsedMs}ms, over ${maxSeconds}s`);
NODE
}
run_historic_stage_only() {
local job_id="$1"
local record_path="$2"
node - "$KTX_ROOT" "$PROJECT_DIR" "$job_id" "$record_path" <<'NODE'
const { writeFile } = await import('node:fs/promises');
const { join } = await import('node:path');
const ktxRoot = process.argv[2];
const projectDir = process.argv[3];
const jobId = process.argv[4];
const recordPath = process.argv[5];
const { loadKtxProject } = await import(join(ktxRoot, 'packages/context/dist/project/index.js'));
const { runLocalStageOnlyIngest } = await import(join(ktxRoot, 'packages/context/dist/ingest/index.js'));
const { createKtxCliLocalIngestAdapters } = await import(join(ktxRoot, 'packages/cli/dist/local-adapters.js'));
const { getKtxCliPackageInfo } = await import(join(ktxRoot, 'packages/cli/dist/index.js'));
const project = await loadKtxProject({ projectDir });
const cliVersion = getKtxCliPackageInfo().version;
const managedRuntimeIo = { stdout: process.stdout, stderr: process.stderr };
const adapters = createKtxCliLocalIngestAdapters(project, {
historicSqlConnectionId: 'warehouse',
managedDaemon: {
cliVersion,
installPolicy: 'auto',
io: managedRuntimeIo,
},
});
const adapter = adapters.find((candidate) => candidate.source === 'historic-sql');
if (!adapter) throw new Error('historic-sql adapter was not registered for local run');
const record = await runLocalStageOnlyIngest({
project,
adapters,
adapter: 'historic-sql',
connectionId: 'warehouse',
trigger: 'manual_resync',
jobId,
});
await writeFile(recordPath, `${JSON.stringify(record, null, 2)}\n`, 'utf8');
console.log(`${record.syncId} workUnits=${record.workUnitCount}`);
NODE
}
cd "$KTX_ROOT"
pnpm --filter @ktx/context run build
pnpm --filter @ktx/cli run build
docker compose -f "$COMPOSE_FILE" up -d --wait
"$EXAMPLE_DIR/scripts/generate-workload.sh" base
export WAREHOUSE_DATABASE_URL="${WAREHOUSE_DATABASE_URL:-postgresql://ktx_reader:ktx_reader@127.0.0.1:55432/analytics}" # pragma: allowlist secret
node "$KTX_BIN" --project-dir "$PROJECT_DIR" setup \
--new \
--skip-agents \
--skip-llm \
--skip-embeddings \
--skip-sources \
--database postgres \
--new-database-connection-id warehouse \
--database-url env:WAREHOUSE_DATABASE_URL \
--database-schema public \
--enable-historic-sql \
--historic-sql-min-executions 2 \
--yes \
--no-input
node "$KTX_BIN" runtime install --yes
node "$KTX_BIN" runtime start
FIRST_RECORD="$PROJECT_PARENT/first-record.json"
run_historic_stage_only "historic-first-$$" "$FIRST_RECORD"
FIRST_MANIFEST="$(latest_manifest)"
assert_unified_snapshot "$FIRST_MANIFEST"
assert_stage_record "$FIRST_RECORD" first nonzero
UNCHANGED_RECORD="$PROJECT_PARENT/unchanged-record.json"
run_historic_stage_only "historic-unchanged-$$" "$UNCHANGED_RECORD"
UNCHANGED_MANIFEST="$(latest_manifest)"
assert_unified_snapshot "$UNCHANGED_MANIFEST"
assert_stage_record "$UNCHANGED_RECORD" unchanged zero
"$EXAMPLE_DIR/scripts/generate-workload.sh" extra
CHANGED_RECORD="$PROJECT_PARENT/changed-record.json"
run_historic_stage_only "historic-changed-$$" "$CHANGED_RECORD"
CHANGED_MANIFEST="$(latest_manifest)"
assert_unified_snapshot "$CHANGED_MANIFEST"
assert_stage_record "$CHANGED_RECORD" changed nonzero
echo "Postgres historic SQL smoke passed"
echo "Project dir: $PROJECT_DIR"
```
- [ ] **Step 2: Run the docs test to verify smoke-script assertions now pass or expose remaining README failures**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL remains because `examples/postgres-historic/README.md`, `examples/README.md`, and public docs have not been rewritten yet. The smoke-specific assertions for `assert_unified_snapshot`, `assert_stage_record`, and `--historic-sql-min-executions 2` should pass.
- [ ] **Step 3: Commit**
```bash
git add examples/postgres-historic/scripts/smoke.sh
git commit -m "test: assert unified postgres historic sql smoke"
```
### Task 4: Update Example And Public Docs
**Files:**
- Modify: `examples/postgres-historic/README.md`
- Modify: `examples/README.md`
- Modify: `docs/content/docs/integrations/primary-sources.mdx`
- Modify: `docs/content/docs/cli-reference/ktx-setup.mdx`
- [ ] **Step 1: Replace the Postgres historic README**
Replace `examples/postgres-historic/README.md` with:
````markdown
# Postgres Historic SQL Example
This example is a manual smoke for the redesigned Postgres historic-SQL ingest
path through `pg_stat_statements`. It starts Postgres 14 with the extension
preloaded, generates query workload under separate users, runs `ktx setup` with
`--enable-historic-sql`, and verifies the unified staged artifacts:
- `manifest.json`
- `tables/*.json`
- `patterns-input.json`
The smoke also runs the same workload twice and verifies the second stage-only
run has `workUnitCount: 0`, which proves unchanged bucketed table and pattern
inputs do not schedule LLM work.
## Prerequisites
- Docker with Compose v2
- Node and pnpm matching the KTX workspace
- `uv` on `PATH` so the KTX-managed Python runtime can install the bundled
runtime wheel
## Run
From the KTX repository root:
```bash
examples/postgres-historic/scripts/smoke.sh
```
The smoke creates a temporary KTX project, isolates the managed Python runtime
under the temporary project parent, starts Postgres on `127.0.0.1:55432`, and
uses this connection URL:
```bash
postgresql://ktx_reader:ktx_reader@127.0.0.1:55432/analytics # pragma: allowlist secret
```
Set `KTX_POSTGRES_HISTORIC_KEEP_DOCKER=1` to leave the container running after
the script exits.
The smoke validates the historic-SQL raw snapshot path without requiring LLM
credentials. It uses KTX's local stage-only ingest API after `ktx setup`, so the
deterministic reader, batch SQL parser, stable artifact writer, and diff-based
WorkUnit planning are checked independently from curation.
## Manual Commands
Start Postgres and generate the base workload:
```bash
docker compose -f examples/postgres-historic/docker-compose.yml up -d --wait
examples/postgres-historic/scripts/generate-workload.sh base
```
Create a project and enable historic SQL:
```bash
export WAREHOUSE_DATABASE_URL=postgresql://ktx_reader:ktx_reader@127.0.0.1:55432/analytics # pragma: allowlist secret
pnpm --filter @ktx/cli run build
node packages/cli/dist/bin.js --project-dir /tmp/ktx-postgres-historic setup \
--new \
--skip-agents \
--skip-llm \
--skip-embeddings \
--skip-sources \
--database postgres \
--new-database-connection-id warehouse \
--database-url env:WAREHOUSE_DATABASE_URL \
--database-schema public \
--enable-historic-sql \
--historic-sql-min-executions 2 \
--yes \
--no-input
```
### Readiness check
```bash
pnpm run ktx -- dev doctor --project-dir /tmp/ktx-postgres-historic --no-input
```
The installed CLI form is:
```bash
ktx dev doctor --project-dir /tmp/ktx-postgres-historic --no-input
```
Expected output includes `PASS Postgres Historic SQL (warehouse)` when
`pg_stat_statements` is installed, `pg_read_all_stats` is granted, and tracking
is enabled. A low `pg_stat_statements.max` value is reported as an informational
note, not a warning.
Run local historic-SQL ingest:
```bash
pnpm run ktx -- dev ingest run --project-dir /tmp/ktx-postgres-historic \
--connection-id warehouse \
--adapter historic-sql \
--plain \
--yes \
--no-input
```
The full `dev ingest run` path also runs curation WorkUnits, so it requires a
configured LLM provider.
Inspect the latest manifest:
```bash
find /tmp/ktx-postgres-historic/raw-sources/warehouse/historic-sql -name manifest.json | sort | tail -n 1
```
The manifest should have `source: "historic-sql"`, `dialect: "postgres"`,
positive `snapshotRowCount`, positive `touchedTableCount`, numeric
`parseFailures`, `warnings`, and `probeWarnings`. The same directory should
contain `patterns-input.json` and one `tables/*.json` file per touched table.
## Troubleshooting
- Missing extension: confirm `shared_preload_libraries=pg_stat_statements` and
`CREATE EXTENSION pg_stat_statements;` both happened in the `analytics`
database.
- Missing grants: confirm `GRANT pg_read_all_stats TO ktx_reader;`.
- Empty snapshot: rerun `scripts/generate-workload.sh base` and keep
`--historic-sql-min-executions 2` for the smoke.
- SQL-analysis failures: run `pnpm run ktx -- runtime doctor` from the KTX
repository root and confirm `uv`, the bundled Python wheel, and the managed
runtime all pass.
````
- [ ] **Step 2: Update the examples index paragraph**
In `examples/README.md`, replace the `postgres-historic` paragraph with:
```markdown
## postgres-historic
`postgres-historic/` is a manual Docker-backed smoke for Postgres
historic-SQL ingest via `pg_stat_statements`. It verifies setup, unified
Historic SQL artifacts, managed daemon batch SQL analysis, and no-WorkUnit
idempotency for unchanged bucketed table and pattern inputs.
```
- [ ] **Step 3: Update the setup CLI reference**
In `docs/content/docs/cli-reference/ktx-setup.mdx`, replace the Historic SQL flag rows with:
```markdown
| `--enable-historic-sql` | Enable Historic SQL when the selected database supports it | `false` |
| `--disable-historic-sql` | Disable Historic SQL for the selected database | `false` |
| `--historic-sql-window-days <number>` | Historic SQL query-history window in days | - |
| `--historic-sql-min-executions <number>` | Minimum executions for a Historic SQL template | - |
| `--historic-sql-min-calls <number>` | Alias for `--historic-sql-min-executions` for one release | - |
| `--historic-sql-service-account-pattern <pattern>` | Historic SQL service-account regex; repeatable | - |
| `--historic-sql-redaction-pattern <pattern>` | Historic SQL SQL-literal redaction regex; repeatable | - |
```
- [ ] **Step 4: Update primary source Historic SQL docs**
In `docs/content/docs/integrations/primary-sources.mdx`, replace the Postgres Historic SQL config block with:
````markdown
```yaml
historicSql:
enabled: true
dialect: postgres
minExecutions: 5
filters:
dropTrivialProbes: true
```
````
Replace the Snowflake Historic SQL feature row with:
```markdown
| Historic SQL | Yes | Via `SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY` when enabled |
```
Replace the Snowflake Historic SQL paragraph and config block with:
````markdown
Snowflake Historic SQL reads aggregated query-history templates from
`SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY` and feeds the same unified staged
artifact shape as Postgres and BigQuery.
```yaml
historicSql:
enabled: true
dialect: snowflake
windowDays: 90
minExecutions: 5
filters:
dropTrivialProbes: true
serviceAccounts:
patterns: ['^svc_']
mode: exclude
redactionPatterns: []
```
````
Replace the BigQuery Historic SQL feature row with:
```markdown
| Historic SQL | Yes | Via region-scoped `INFORMATION_SCHEMA.JOBS_BY_PROJECT` when enabled |
```
Replace the BigQuery Historic SQL paragraph and config block with:
````markdown
BigQuery Historic SQL reads aggregated query-history templates from
region-scoped `INFORMATION_SCHEMA.JOBS_BY_PROJECT` and feeds the same unified
staged artifact shape as Postgres and Snowflake.
```yaml
historicSql:
enabled: true
dialect: bigquery
windowDays: 90
minExecutions: 5
filters:
dropTrivialProbes: true
serviceAccounts:
patterns: ['@bot\\.']
mode: exclude
redactionPatterns: []
```
````
- [ ] **Step 5: Run docs tests to verify they pass**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS. The Postgres historic example test now sees unified artifact language and no legacy baseline/delta/reset wording.
- [ ] **Step 6: Commit**
```bash
git add examples/postgres-historic/README.md examples/README.md docs/content/docs/integrations/primary-sources.mdx docs/content/docs/cli-reference/ktx-setup.mdx
git commit -m "docs: refresh historic sql setup and smoke docs"
```
### Task 5: Final Verification
**Files:**
- Verify: `packages/cli/src/setup-databases.ts`
- Verify: `packages/cli/src/setup-databases.test.ts`
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `examples/postgres-historic/scripts/smoke.sh`
- Verify: `examples/postgres-historic/README.md`
- Verify: `examples/README.md`
- Verify: `docs/content/docs/integrations/primary-sources.mdx`
- Verify: `docs/content/docs/cli-reference/ktx-setup.mdx`
- [ ] **Step 1: Run focused setup tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts --testNamePattern "Historic SQL"
```
Expected: PASS.
- [ ] **Step 2: Run example docs tests**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 3: Run CLI type check**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS.
- [ ] **Step 4: Run grep checks for stale legacy wording**
Run:
```bash
rg -n "baselineFirstRun|fresh PGSS baseline|delta-only|--historic-sql-min-calls 2|local CLI Historic SQL ingest currently uses the Postgres path" examples docs/content scripts packages/cli/src/setup-databases.test.ts
```
Expected: no matches.
Run:
```bash
rg -n "serviceAccountUserPatterns" packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts docs/content examples
```
Expected: no matches. Existing runtime compatibility in `packages/context/src/ingest/adapters/historic-sql/types.ts` must remain untouched, so do not run this grep across `packages/context`.
- [ ] **Step 5: Run the Docker-backed smoke when Docker is available**
Run:
```bash
examples/postgres-historic/scripts/smoke.sh
```
Expected: PASS with `Postgres historic SQL smoke passed`. If Docker is not running or unavailable, record the exact Docker error and still run Steps 1-4.
- [ ] **Step 6: Run pre-commit for touched files**
Run:
```bash
uv run pre-commit run --files \
packages/cli/src/setup-databases.ts \
packages/cli/src/setup-databases.test.ts \
scripts/examples-docs.test.mjs \
examples/postgres-historic/scripts/smoke.sh \
examples/postgres-historic/README.md \
examples/README.md \
docs/content/docs/integrations/primary-sources.mdx \
docs/content/docs/cli-reference/ktx-setup.mdx
```
Expected: PASS when pre-commit is configured. If pre-commit is not configured or this workspace lacks the required hook environment, keep the output and rely on Steps 1-5 plus `git diff --check`.
- [ ] **Step 7: Run whitespace check**
Run:
```bash
git diff --check
```
Expected: no output.
- [ ] **Step 8: Commit verification fixes only if verification changed files**
If any verification step required an edit, commit the exact touched files:
```bash
git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts scripts/examples-docs.test.mjs examples/postgres-historic/scripts/smoke.sh examples/postgres-historic/README.md examples/README.md docs/content/docs/integrations/primary-sources.mdx docs/content/docs/cli-reference/ktx-setup.mdx
git commit -m "test: verify historic sql docs and smoke cleanup"
```
If verification made no edits, do not create an empty commit.
## Self-Review
Spec coverage:
- Spec §8 setup config is covered by Task 1 and Task 4.
- Spec §10.3 docs and setup wizard updates are covered by Tasks 1 and 4.
- Spec §10.4 demo DB acceptance is covered by Task 3 and Task 5.
- The prior implemented plans already cover daemon batch analysis, unified staging, skills/projection, search enrichment, old-code deletion, and cross-dialect local adapter wiring.
Placeholder scan:
- This plan contains concrete file paths, exact replacement snippets, exact commands, and expected outcomes for every step.
Type consistency:
- `filters.dropTrivialProbes`, `filters.serviceAccounts.patterns`, and `filters.serviceAccounts.mode` match `historicSqlUnifiedPullConfigSchema`.
- `workUnitCount`, `rawFileCount`, `startedAt`, and `completedAt` match `LocalIngestRunRecord`.
- `manifest.json`, `tables/*.json`, and `patterns-input.json` match the unified staged artifact names from `stage-unified.ts`.
Plan complete and saved to `docs/superpowers/plans/2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md`. Two execution options:
**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration
**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints
Which approach?

View file

@ -1,452 +0,0 @@
# Historic SQL End-To-End Retrieval Acceptance Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add one focused regression test that proves the redesigned historic-SQL pipeline reaches both agent retrieval surfaces after a real scheduled local ingest run.
**Architecture:** All historic-SQL redesign implementation slices are already present. This plan adds acceptance coverage around the existing production `HistoricSqlSourceAdapter`: a fake aggregate reader and fake batch SQL analysis drive the deterministic hot path, a fake `AgentRunnerService` emits typed table and pattern evidence through `emit_historic_sql_evidence`, and the normal local ingest runner performs projection, squash, wiki indexing, and semantic-layer reindexing.
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, YAML, SQLite FTS5 local search, existing local ingest runner, existing historic-SQL adapter.
---
## Starting Point
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans found that are based on this spec:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-unified-hot-path.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-skills-projection-cutover.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-cross-dialect-readiness.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-projection-archive-hardening.md`
Implemented status verified from this worktree:
- `2026-05-11-historic-sql-foundations.md` is implemented. Evidence: `packages/context/src/ingest/adapters/historic-sql/skill-schemas.ts`, `packages/context/src/sql-analysis/ports.ts` exposes `analyzeBatch()`, `python/ktx-daemon/src/ktx_daemon/app.py` registers `/sql/analyze-batch`, `packages/context/src/sl/types.ts` has `SemanticLayerSource.usage`, and `packages/context/src/ingest/adapters/live-database/manifest.ts` has `mergeUsagePreservingExternal()`.
- `2026-05-11-historic-sql-search-enrichment.md` is implemented. Evidence: `packages/context/src/sl/sl-search.service.ts` indexes `source.usage`, `packages/context/src/sl/sqlite-sl-sources-index.ts` selects FTS snippets, and local/MCP list surfaces expose `frequencyTier` and `snippet`.
- `2026-05-11-historic-sql-unified-hot-path.md` is implemented. Evidence: `stageHistoricSqlAggregatedSnapshot()`, `chunkHistoricSqlUnifiedStagedDir()`, `PostgresPgssReader`, aggregate BigQuery/Snowflake `fetchAggregated()` methods, unified schemas, and package exports exist.
- `2026-05-11-historic-sql-skills-projection-cutover.md` is implemented. Evidence: `HistoricSqlSourceAdapter` uses the unified stager/chunker, `packages/context/skills/historic_sql_table_digest/` and `packages/context/skills/historic_sql_patterns/` exist, `emit_historic_sql_evidence` exists, `HistoricSqlProjectionPostProcessor` is wired in `packages/context/src/ingest/local-bundle-runtime.ts`, and legacy skill names no longer grep in `packages/context` or `packages/cli`.
- `2026-05-11-historic-sql-cross-dialect-readiness.md` is implemented. Evidence: `packages/cli/src/local-adapters.test.ts` covers Postgres, BigQuery, and Snowflake historic-SQL registration, and `packages/cli/src/historic-sql-doctor.test.ts` covers low `pg_stat_statements.max` as informational output.
- `2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md` is implemented. Evidence: `packages/cli/src/setup-databases.test.ts` expects canonical `historicSql.filters.serviceAccounts`, `examples/postgres-historic/scripts/smoke.sh` asserts unified `manifest.json`, `tables/*.json`, `patterns-input.json`, and zero WorkUnits on the unchanged run, and public docs use `minExecutions`.
- `2026-05-11-historic-sql-projection-archive-hardening.md` is implemented. Evidence: `projection.ts` has `isArchivedPatternPage()`, excludes archived pages from active slug matching, and `projection.test.ts` covers reappearing archived patterns, stable archived pages, stale table marking, and legacy query-page deletion.
Remaining acceptance gap this plan covers:
- The current Postgres example smoke is intentionally stage-only, so it verifies raw artifacts and zero unchanged WorkUnits but does not prove table/pattern evidence projection and retrieval.
- `packages/context/src/ingest/local-bundle-ingest.test.ts` verifies the historic-SQL post-processor with a source-dir test adapter, but it does not exercise the production `HistoricSqlSourceAdapter` scheduled-pull path or the `historic_sql_patterns` WorkUnit.
- Existing SL and wiki search tests prove the search layers independently, but no single regression proves spec §7's retrieval chain after historic-SQL ingest writes `_schema` usage and `knowledge/global/historic-sql/*.md`.
## File Structure
Create:
- `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
Owns the end-to-end local regression for the redesigned historic-SQL pipeline. It uses the real adapter and local ingest runner, with fake deterministic reader/analysis/agent components so the test does not need a live database or LLM provider.
## Task 1: Add Real-Adapter Local Ingest Acceptance Coverage
**Files:**
- Create: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
- [ ] **Step 1: Verify the acceptance test does not exist yet**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
```
Expected: FAIL with "No test files found" because no end-to-end historic-SQL retrieval acceptance test exists yet.
- [ ] **Step 2: Write the acceptance test**
Create `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`:
```typescript
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import { AgentRunnerService } from '../../../agent/index.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../../project/index.js';
import { type SqlAnalysisPort } from '../../../sql-analysis/index.js';
import { searchLocalSlSources } from '../../../sl/local-sl.js';
import { searchLocalKnowledgePages } from '../../../wiki/local-knowledge.js';
import { runLocalIngest } from '../../local-ingest.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
import type { AggregatedTemplate, HistoricSqlReader, HistoricSqlUnifiedPullConfig } from './types.js';
class AcceptanceHistoricSqlReader implements HistoricSqlReader {
async probe() {
return { warnings: [], info: [] };
}
async *fetchAggregated(
_client: unknown,
_window: { start: Date; end: Date },
_config: HistoricSqlUnifiedPullConfig,
): AsyncIterable<AggregatedTemplate> {
yield {
templateId: 'pg:orders-lifecycle',
canonicalSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.status = $1 group by o.status, c.segment',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 4,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 18,
p95RuntimeMs: 84,
errorRate: 0,
rowsProduced: 420,
},
topUsers: [{ user: 'analyst@example.test', executions: 42 }],
};
}
}
class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
if (params.telemetryTags?.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' as const };
}
const emitEvidence = params.toolSet.emit_historic_sql_evidence;
if (!emitEvidence?.execute) {
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-orders') {
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
},
{ toolCallId: 'historic-sql-orders-usage' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${String(result)}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-customers') {
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
},
{ toolCallId: 'historic-sql-customers-usage' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${String(result)}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-patterns') {
const result = await emitEvidence.execute(
{
kind: 'pattern',
rawPath: 'patterns-input.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
},
{ toolCallId: 'historic-sql-pattern' },
);
if (!String(result).includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${String(result)}`);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
function acceptanceSqlAnalysis(): SqlAnalysisPort {
return {
analyzeForFingerprint: async () => {
throw new Error('analyzeForFingerprint should not be used by unified historic-SQL ingest');
},
analyzeBatch: vi.fn(async (items) => {
return new Map(
items.map((item) => [
item.id,
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: ['status', 'segment'],
where: ['status'],
join: ['customer_id', 'id'],
groupBy: ['status', 'segment'],
},
},
]),
);
}),
};
}
async function writeHistoricSqlProject(project: KtxLocalProject): Promise<KtxLocalProject> {
await writeFile(
join(project.projectDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' historicSql:',
' enabled: true',
' dialect: postgres',
' minExecutions: 2',
'ingest:',
' adapters:',
' - historic-sql',
' embeddings:',
' backend: deterministic',
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
' author: KTX Test <system@ktx.local>',
'',
].join('\n'),
'utf-8',
);
const loaded = await loadKtxProject({ projectDir: project.projectDir });
await loaded.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
columns: [
{ name: 'id', type: 'string' },
{ name: 'status', type: 'string' },
{ name: 'customer_id', type: 'string' },
],
},
customers: {
table: 'public.customers',
columns: [
{ name: 'id', type: 'string' },
{ name: 'segment', type: 'string' },
],
},
},
}),
'KTX Test',
'system@ktx.local',
'Seed schema shard',
);
return loaded;
}
describe('historic-SQL local ingest retrieval acceptance', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-historic-sql-acceptance-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('projects table and pattern evidence into semantic-layer and wiki retrieval surfaces', async () => {
const initialized = await initKtxProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
const project = await writeHistoricSqlProject(initialized);
const sqlAnalysis = acceptanceSqlAnalysis();
const agentRunner = new HistoricSqlAcceptanceAgentRunner();
const adapter = new HistoricSqlSourceAdapter({
reader: new AcceptanceHistoricSqlReader(),
queryClient: {},
sqlAnalysis,
now: () => new Date('2026-05-11T00:00:00.000Z'),
});
const result = await runLocalIngest({
project,
adapters: [adapter],
adapter: 'historic-sql',
connectionId: 'warehouse',
jobId: 'historic-sql-retrieval-acceptance',
agentRunner,
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(1);
expect(result.result.failedWorkUnits).toEqual([]);
expect(result.result.workUnitCount).toBe(3);
expect(agentRunner.runLoop).toHaveBeenCalledTimes(3);
expect(result.report.body.postProcessor).toMatchObject({
sourceKey: 'historic-sql',
status: 'success',
result: {
tableUsageMerged: 2,
patternPagesWritten: 1,
},
touchedSources: [
{ connectionId: 'warehouse', sourceName: 'customers' },
{ connectionId: 'warehouse', sourceName: 'orders' },
],
});
await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves
.toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.');
await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql/paid-order-lifecycle.md'), 'utf-8'))
.resolves.toContain('Paid Order Lifecycle');
const reloaded = await loadKtxProject({ projectDir: project.projectDir });
await expect(
searchLocalSlSources(reloaded, { connectionId: 'warehouse', query: 'paid order lifecycle', limit: 5 }),
).resolves.toEqual([
expect.objectContaining({
name: 'orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
await expect(
searchLocalKnowledgePages(reloaded, { query: 'paid order lifecycle', userId: 'local', limit: 5 }),
).resolves.toEqual([
expect.objectContaining({
key: 'historic-sql/paid-order-lifecycle',
summary: 'Paid Order Lifecycle',
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
});
});
```
- [ ] **Step 3: Run the focused acceptance test after creating the file**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
```
Expected: PASS. The output reports one passing test and `sqlAnalysis.analyzeBatch` is called exactly once by the test assertion.
- [ ] **Step 4: Commit the acceptance test**
```bash
git add packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
git commit -m "test: cover historic sql retrieval acceptance"
```
## Task 2: Run Adjacent Historic-SQL Regression Checks
**Files:**
- Verify: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts`
- Verify: `packages/context/src/sl/local-sl.test.ts`
- Verify: `packages/context/src/wiki/local-knowledge.test.ts`
- [ ] **Step 1: Run the new acceptance test with the adjacent historic-SQL unit tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts \
src/ingest/adapters/historic-sql/projection.test.ts \
src/ingest/adapters/historic-sql/stage-unified.test.ts \
src/ingest/adapters/historic-sql/chunk-unified.test.ts \
src/sl/local-sl.test.ts \
src/wiki/local-knowledge.test.ts
```
Expected: PASS. These suites cover the new acceptance chain plus the deterministic projection, stager, chunker, SL search, and wiki search layers it depends on.
- [ ] **Step 2: Run pre-commit for the new test file**
Run:
```bash
uv run pre-commit run --files packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
```
Expected: PASS. If `uv` refuses to run because the local binary does not satisfy the repo pin, activate `.venv` and run the closest TypeScript checks instead:
```bash
pnpm --filter @ktx/context run type-check
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
```
- [ ] **Step 3: Confirm no unrelated files are included**
Run:
```bash
git status --short
```
Expected: either an empty status after the Task 1 commit, or only intentionally changed plan/test files if the worker is preserving an uncommitted plan handoff.
## Self-Review
Spec coverage:
- Spec §4 hot path is covered because the test uses `HistoricSqlSourceAdapter.fetch()` with `stageHistoricSqlAggregatedSnapshot()`, a fake `HistoricSqlReader.fetchAggregated()`, and one `SqlAnalysisPort.analyzeBatch()` call.
- Spec §5 cold path is covered because the fake agent emits `table_usage` and `pattern` evidence through `emit_historic_sql_evidence`, and the normal `HistoricSqlProjectionPostProcessor` projects that evidence.
- Spec §6 and §7 retrieval surfaces are covered because the same test verifies `searchLocalSlSources()` returns `frequencyTier` and an FTS snippet and `searchLocalKnowledgePages()` returns `historic-sql/paid-order-lifecycle`.
- Spec §10.4 search retrieval acceptance is covered without requiring a live warehouse or LLM credentials.
Placeholder scan:
- The placeholder scan is clean, and the plan contains concrete file paths, code, commands, and expected outputs.
- The only fallback in the plan is the explicit `uv` version-mismatch path required by repository instructions.
Type consistency:
- `HistoricSqlReader`, `HistoricSqlUnifiedPullConfig`, `SqlAnalysisPort`, `HistoricSqlSourceAdapter`, `runLocalIngest`, `searchLocalSlSources`, and `searchLocalKnowledgePages` match existing exported APIs.
- Evidence payloads match `emit_historic_sql_evidence` input schemas: table evidence omits `connectionId` because the tool injects it; projected persisted evidence includes it.
Plan complete and saved to `docs/superpowers/plans/2026-05-11-historic-sql-end-to-end-retrieval-acceptance.md`. Two execution options:
**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration
**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints
Which approach?

File diff suppressed because it is too large Load diff

View file

@ -1,407 +0,0 @@
# Historic SQL Pattern Shard Smoke Docs Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Align the Postgres historic-SQL smoke and example docs with sharded pattern WorkUnits.
**Architecture:** The runtime already writes the full `patterns-input.json` audit file and bounded `patterns-input/part-0001.json` style shards. This plan updates the example acceptance assets so they verify the sharded contract instead of the pre-sharding root `historic-sql-patterns` WorkUnit.
**Tech Stack:** Bash, Node.js built-in test runner, pnpm workspace scripts, KTX local stage-only ingest.
---
## Spec And Existing Plan Status
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans derived from this spec and implemented in this worktree:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md` - implemented. Evidence: `packages/context/src/ingest/adapters/historic-sql/skill-schemas.ts`, `packages/context/src/sql-analysis/ports.ts`, daemon `/sql/analyze-batch`, `SemanticLayerSource.usage`, and `mergeUsagePreservingExternal()`.
- `docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md` - implemented. Evidence: usage-aware SL search text, SQLite FTS snippets, and local/MCP result fields `frequencyTier` plus `snippet`.
- `docs/superpowers/plans/2026-05-11-historic-sql-unified-hot-path.md` - implemented. Evidence: `stageHistoricSqlAggregatedSnapshot()`, `chunkHistoricSqlUnifiedStagedDir()`, `PostgresPgssReader`, aggregate BigQuery/Snowflake readers, unified schemas, and package exports.
- `docs/superpowers/plans/2026-05-11-historic-sql-skills-projection-cutover.md` - implemented. Evidence: `HistoricSqlSourceAdapter`, `historic_sql_table_digest`, `historic_sql_patterns`, `emit_historic_sql_evidence`, `HistoricSqlProjectionPostProcessor`, and legacy skill removal from runtime code.
- `docs/superpowers/plans/2026-05-11-historic-sql-cross-dialect-readiness.md` - implemented. Evidence: local adapter registration tests for Postgres, BigQuery, and Snowflake plus PG doctor coverage for informational `pg_stat_statements.max`.
- `docs/superpowers/plans/2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md` - implemented at the time it was written, but its smoke assertions predate pattern shard WorkUnits.
- `docs/superpowers/plans/2026-05-11-historic-sql-projection-archive-hardening.md` - implemented. Evidence: `isArchivedPatternPage()`, archive exclusion from slug matching, stale table tests, and legacy query-page cleanup coverage.
- `docs/superpowers/plans/2026-05-11-historic-sql-end-to-end-retrieval-acceptance.md` - implemented. Evidence: `local-ingest-acceptance.test.ts` proves production adapter output reaches SL search and wiki search.
- `docs/superpowers/plans/2026-05-11-historic-sql-redaction-hardening.md` - implemented. Evidence: `redaction.ts`, `redaction.test.ts`, and staged artifact redaction coverage in `stage-unified.test.ts`.
- `docs/superpowers/plans/2026-05-11-historic-sql-pattern-workunit-sharding.md` - implemented. Evidence: `pattern-inputs.ts`, `pattern-inputs.test.ts`, `stage-unified.ts` writes `patterns-input/part-*.json`, `chunk-unified.ts` emits `historic-sql-patterns-part-*`, `historic_sql_patterns` reads shards, and acceptance tests use `rawPath: 'patterns-input/part-0001.json'`.
No existing spec-derived implementation plan is currently unimplemented in this worktree.
Remaining gap this plan fixes:
- `examples/postgres-historic/scripts/smoke.sh` still asserts a WorkUnit with `unitKey === 'historic-sql-patterns'`.
- Current runtime emits pattern WorkUnits with keys like `historic-sql-patterns-part-0001` and raw files like `patterns-input/part-0001.json`.
- The same smoke only validates the audit file `patterns-input.json`; it does not assert that the bounded shard files exist or contain only cross-table candidates.
- `examples/postgres-historic/README.md` and `examples/README.md` describe unchanged "pattern inputs" but do not explain that `patterns-input.json` is now audit-only and `patterns-input/part-*.json` drives pattern WorkUnits.
- `scripts/examples-docs.test.mjs` does not pin the sharded smoke/doc contract, so the stale root WorkUnit assertion can regress silently.
## File Structure
- Modify `scripts/examples-docs.test.mjs`
Pins docs and smoke script to the sharded pattern WorkUnit contract.
- Modify `examples/postgres-historic/scripts/smoke.sh`
Validates `patterns-input/part-*.json` shard files and `historic-sql-patterns-part-*` stage-only WorkUnits.
- Modify `examples/postgres-historic/README.md`
Documents `patterns-input.json` as the full audit artifact and `patterns-input/part-*.json` as bounded pattern WorkUnit input.
- Modify `examples/README.md`
Updates the short example catalog entry with the same audit-vs-shard wording.
### Task 1: Pin Example Tests To Pattern Shards
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Add failing assertions for sharded pattern smoke/docs**
In `scripts/examples-docs.test.mjs`, inside `it('documents the Postgres historic SQL smoke example', ...)`, add these assertions immediately after the existing `assert.match(readme, /patterns-input\.json/);` line:
```javascript
assert.match(readme, /patterns-input\/part-\*\.json/);
assert.match(readme, /full audit input/);
assert.match(readme, /bounded pattern WorkUnit shards/);
```
In the same test, add these assertions immediately after the existing `assert.match(smoke, /assert_stage_record "\$UNCHANGED_RECORD" unchanged zero/);` line:
```javascript
assert.match(smoke, /assertPatternShards/);
assert.match(smoke, /historic-sql-patterns-part-/);
assert.match(smoke, /patterns-input\/part-/);
assert.doesNotMatch(smoke, /unitKey === 'historic-sql-patterns'/);
```
- [ ] **Step 2: Run the example docs test to verify it fails**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL. The test should report missing `patterns-input/part-*.json`, `full audit input`, `bounded pattern WorkUnit shards`, `assertPatternShards`, or it should fail because `smoke.sh` still contains `unitKey === 'historic-sql-patterns'`.
- [ ] **Step 3: Commit the failing test**
Run:
```bash
git add scripts/examples-docs.test.mjs
git commit -m "test: expect historic sql pattern shard smoke docs"
```
### Task 2: Update The Postgres Historic Smoke
**Files:**
- Modify: `examples/postgres-historic/scripts/smoke.sh`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Import `existsSync` in the embedded snapshot assertion**
In `examples/postgres-historic/scripts/smoke.sh`, inside `assert_unified_snapshot()`, replace this line:
```javascript
const { readFileSync, readdirSync } = require('node:fs');
```
with:
```javascript
const { existsSync, readFileSync, readdirSync } = require('node:fs');
```
- [ ] **Step 2: Add shard validation to `assert_unified_snapshot()`**
In `examples/postgres-historic/scripts/smoke.sh`, inside the embedded Node script in `assert_unified_snapshot()`, add this function after the `legacyKeys` loop:
```javascript
function assertPatternShards(root) {
const shardDir = join(root, 'patterns-input');
assert(existsSync(shardDir), 'Expected patterns-input shard directory');
const shardFiles = readdirSync(shardDir)
.filter((file) => /^part-\d{4}\.json$/.test(file))
.sort()
.map((file) => `patterns-input/${file}`);
assert(shardFiles.length > 0, 'Expected at least one pattern shard file');
for (const shardFile of shardFiles) {
const shard = JSON.parse(readFileSync(join(root, shardFile), 'utf8'));
assert(Array.isArray(shard.templates), `${shardFile}: expected templates array`);
assert(shard.templates.length > 0, `${shardFile}: expected at least one template`);
assert(
shard.templates.every((template) => Array.isArray(template.tablesTouched) && template.tablesTouched.length >= 2),
`${shardFile}: expected only cross-table pattern candidates`,
);
}
return shardFiles;
}
```
- [ ] **Step 3: Assert the full audit input and bounded shards**
In the same embedded Node script, replace the current `patterns` block:
```javascript
const patterns = JSON.parse(readFileSync(join(root, 'patterns-input.json'), 'utf8'));
assert(Array.isArray(patterns.templates) && patterns.templates.length > 0, 'Expected patterns-input templates');
assert(
patterns.templates.every((template) => Array.isArray(template.tablesTouched) && template.tablesTouched.length > 0),
'Expected every pattern template to have touched tables',
);
```
with:
```javascript
const patterns = JSON.parse(readFileSync(join(root, 'patterns-input.json'), 'utf8'));
assert(Array.isArray(patterns.templates) && patterns.templates.length > 0, 'Expected patterns-input audit templates');
assert(
patterns.templates.every((template) => Array.isArray(template.tablesTouched) && template.tablesTouched.length > 0),
'Expected every audit pattern template to have touched tables',
);
const shardFiles = assertPatternShards(root);
assert(
shardFiles.length <= patterns.templates.length,
`Expected shard count ${shardFiles.length} to be no greater than audit template count ${patterns.templates.length}`,
);
```
- [ ] **Step 4: Update the stage record WorkUnit assertions**
In `examples/postgres-historic/scripts/smoke.sh`, inside the embedded Node script in `assert_stage_record()`, replace:
```javascript
assert(record.rawFileCount >= 3, `${label}: expected manifest, patterns input, and at least one table file`);
```
with:
```javascript
assert(record.rawFileCount >= 4, `${label}: expected manifest, audit patterns input, pattern shard, and at least one table file`);
```
Then replace this nonzero WorkUnit block:
```javascript
} else if (expectedWorkUnits === 'nonzero') {
assert(record.workUnitCount > 0, `${label}: expected nonzero WorkUnits`);
assert(record.workUnits.some((unit) => unit.unitKey === 'historic-sql-patterns'), `${label}: expected patterns WorkUnit`);
assert(record.workUnits.some((unit) => unit.unitKey.startsWith('historic-sql-table-')), `${label}: expected table WorkUnit`);
} else {
```
with:
```javascript
} else if (expectedWorkUnits === 'nonzero') {
assert(record.workUnitCount > 0, `${label}: expected nonzero WorkUnits`);
const patternUnits = record.workUnits.filter((unit) => /^historic-sql-patterns-part-\d{4}$/.test(unit.unitKey));
assert(patternUnits.length > 0, `${label}: expected sharded patterns WorkUnit`);
for (const unit of patternUnits) {
assert(
unit.rawFiles.some((rawFile) => /^patterns-input\/part-\d{4}\.json$/.test(rawFile)),
`${label}: expected ${unit.unitKey} to read a pattern shard`,
);
assert(
!unit.rawFiles.includes('patterns-input.json'),
`${label}: expected ${unit.unitKey} not to schedule the full audit patterns input`,
);
}
assert(record.workUnits.some((unit) => unit.unitKey.startsWith('historic-sql-table-')), `${label}: expected table WorkUnit`);
} else {
```
- [ ] **Step 5: Run shell syntax and the docs test**
Run:
```bash
bash -n examples/postgres-historic/scripts/smoke.sh
node --test scripts/examples-docs.test.mjs
```
Expected: `bash -n` exits 0. The docs test still fails until the README files are updated in Task 3.
- [ ] **Step 6: Commit the smoke update**
Run:
```bash
git add examples/postgres-historic/scripts/smoke.sh
git commit -m "test: assert historic sql pattern shard smoke"
```
### Task 3: Update Example Documentation
**Files:**
- Modify: `examples/postgres-historic/README.md`
- Modify: `examples/README.md`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Update the artifact list in the Postgres historic README**
In `examples/postgres-historic/README.md`, replace this list:
```markdown
- `manifest.json`
- `tables/*.json`
- `patterns-input.json`
```
with:
```markdown
- `manifest.json`
- `tables/*.json`
- `patterns-input.json` as the full audit input
- `patterns-input/part-*.json` as bounded pattern WorkUnit shards
```
- [ ] **Step 2: Update the idempotency wording**
In `examples/postgres-historic/README.md`, replace this paragraph:
```markdown
The smoke also runs the same workload twice and verifies the second stage-only
run has `workUnitCount: 0`, which proves unchanged bucketed table and pattern
inputs do not schedule LLM work.
```
with:
```markdown
The smoke also runs the same workload twice and verifies the second stage-only
run has `workUnitCount: 0`, which proves unchanged bucketed table inputs and
unchanged bounded pattern shards do not schedule LLM work.
```
- [ ] **Step 3: Update the manifest inspection wording**
In `examples/postgres-historic/README.md`, replace this paragraph:
```markdown
The manifest should have `source: "historic-sql"`, `dialect: "postgres"`,
positive `snapshotRowCount`, positive `touchedTableCount`, numeric
`parseFailures`, `warnings`, and `probeWarnings`. The same directory should
contain `patterns-input.json` and one `tables/*.json` file per touched table.
```
with:
```markdown
The manifest should have `source: "historic-sql"`, `dialect: "postgres"`,
positive `snapshotRowCount`, positive `touchedTableCount`, numeric
`parseFailures`, `warnings`, and `probeWarnings`. The same directory should
contain `patterns-input.json`, at least one `patterns-input/part-*.json` pattern
shard for cross-table candidates, and one `tables/*.json` file per touched
table.
```
- [ ] **Step 4: Update the examples catalog entry**
In `examples/README.md`, replace this paragraph:
```markdown
`postgres-historic/` is a manual Docker-backed smoke for Postgres historic-SQL
ingest via `pg_stat_statements`. It verifies setup, unified Historic SQL artifacts,
managed daemon batch SQL analysis, and no-WorkUnit idempotency for unchanged
bucketed table and pattern inputs.
```
with:
```markdown
`postgres-historic/` is a manual Docker-backed smoke for Postgres historic-SQL
ingest via `pg_stat_statements`. It verifies setup, unified Historic SQL artifacts,
managed daemon batch SQL analysis, bounded pattern WorkUnit shards, and
no-WorkUnit idempotency for unchanged bucketed table inputs and pattern shards.
```
- [ ] **Step 5: Run the example docs test**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 6: Commit the docs update**
Run:
```bash
git add examples/postgres-historic/README.md examples/README.md
git commit -m "docs: explain historic sql pattern shards"
```
### Task 4: Verify The Smoke Contract
**Files:**
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `examples/postgres-historic/scripts/smoke.sh`
- Verify: `examples/postgres-historic/README.md`
- Verify: `examples/README.md`
- [ ] **Step 1: Run focused local checks**
Run:
```bash
bash -n examples/postgres-historic/scripts/smoke.sh
node --test scripts/examples-docs.test.mjs
```
Expected: both commands pass.
- [ ] **Step 2: Run the Docker-backed Postgres historic smoke**
Run:
```bash
examples/postgres-historic/scripts/smoke.sh
```
Expected: PASS with `Postgres historic SQL smoke passed`. The stage-only records should include pattern WorkUnits with keys like `historic-sql-patterns-part-0001`, each reading `patterns-input/part-0001.json`, and the unchanged run should report `workUnitCount: 0`.
- [ ] **Step 3: Run the drift grep**
Run:
```bash
rg -n "unitKey === 'historic-sql-patterns'|expected patterns WorkUnit|patterns-input\\.json\\` and one \\`tables|unchanged bucketed table and pattern inputs" examples scripts
```
Expected: no matches.
- [ ] **Step 4: Commit verification metadata if any test-only wording changed**
Run:
```bash
git status --short
```
Expected: no unstaged files. If a previous step required a wording fix, commit only the touched files:
```bash
git add scripts/examples-docs.test.mjs examples/postgres-historic/scripts/smoke.sh examples/postgres-historic/README.md examples/README.md
git commit -m "test: verify historic sql sharded smoke docs"
```
## Self-Review
**Spec coverage:** This plan follows spec section 5.2's deterministic pattern sharding and preserves section 4.6's full `patterns-input.json` audit artifact. It updates the smoke and docs around the already implemented sharded runtime contract.
**Placeholder scan:** The plan contains exact file paths, exact snippets, commands, expected outcomes, and commit commands.
**Type consistency:** The plan uses the implemented runtime names consistently: `patterns-input.json` for the audit file, `patterns-input/part-*.json` for bounded shards, and `historic-sql-patterns-part-0001` style WorkUnit keys for pattern curation.
Plan complete and saved to `docs/superpowers/plans/2026-05-11-historic-sql-pattern-shard-smoke-docs.md`. Two execution options:
**1. Subagent-Driven (recommended)** - dispatch a fresh subagent per task, review between tasks, fast iteration
**2. Inline Execution** - execute tasks in this session using executing-plans, batch execution with checkpoints

View file

@ -1,943 +0,0 @@
# Historic SQL Pattern WorkUnit Sharding Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Keep historic-SQL pattern WorkUnit inputs under the raw-file and prompt-size limits by writing deterministic bounded pattern shards while preserving `patterns-input.json` as the full audit artifact.
**Architecture:** The stager continues to write full `patterns-input.json` for audit and diff visibility, then writes bounded `patterns-input/part-0001.json` style shards that contain only cross-table pattern candidates. The chunker emits one `historic_sql_patterns` WorkUnit per changed shard and never asks the skill to read the full audit file. Pattern projection is unchanged because emitted evidence already carries a free-form `rawPath`.
**Tech Stack:** TypeScript, Node.js filesystem APIs, Zod, Vitest, pnpm workspace commands.
---
## Spec And Existing Plan Status
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans derived from this spec:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md` - implemented. Current evidence includes `packages/context/src/ingest/adapters/historic-sql/skill-schemas.ts`, `SqlAnalysisPort.analyzeBatch()`, daemon `/sql/analyze-batch`, `SemanticLayerSource.usage`, and `mergeUsagePreservingExternal()`.
- `docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md` - implemented. Current evidence includes usage-aware `buildSemanticLayerSourceSearchText()`, FTS snippets in `sqlite-sl-sources-index.ts`, and list surfaces exposing `frequencyTier` plus `snippet`.
- `docs/superpowers/plans/2026-05-11-historic-sql-unified-hot-path.md` - implemented. Current evidence includes `stageHistoricSqlAggregatedSnapshot()`, `chunkHistoricSqlUnifiedStagedDir()`, `PostgresPgssReader`, aggregate BigQuery/Snowflake readers, unified schemas, and package exports.
- `docs/superpowers/plans/2026-05-11-historic-sql-skills-projection-cutover.md` - implemented. Current evidence includes production adapter cutover, `historic_sql_table_digest`, `historic_sql_patterns`, `emit_historic_sql_evidence`, `HistoricSqlProjectionPostProcessor`, and removal of legacy skill names from runtime code.
- `docs/superpowers/plans/2026-05-11-historic-sql-cross-dialect-readiness.md` - implemented. Current evidence includes local adapter registration tests for Postgres, BigQuery, and Snowflake plus PG doctor coverage for informational `pg_stat_statements.max`.
- `docs/superpowers/plans/2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md` - implemented. Current evidence includes canonical setup config tests, docs using `minExecutions`, and the Postgres historic smoke script asserting unified staged artifacts and unchanged-run idempotency.
- `docs/superpowers/plans/2026-05-11-historic-sql-projection-archive-hardening.md` - implemented. Current evidence includes `isArchivedPatternPage()`, archive exclusion from active slug matching, stale table tests, and legacy query-page cleanup coverage.
- `docs/superpowers/plans/2026-05-11-historic-sql-end-to-end-retrieval-acceptance.md` - implemented. Current evidence includes `local-ingest-acceptance.test.ts` proving production adapter output reaches SL search and wiki search.
- `docs/superpowers/plans/2026-05-11-historic-sql-redaction-hardening.md` - implemented. Current evidence includes `redaction.ts`, `redaction.test.ts`, and `stage-unified.test.ts` proving original SQL is analyzed while staged artifacts contain `[REDACTED]`.
No existing spec-derived plan is currently unimplemented in this worktree. This plan covers the next uncovered implementation gap from spec section 5.2: `historic_sql_patterns` may need "a small handful" of deterministic chunks when `patterns-input.json` exceeds the LLM context budget. Current code always emits one WorkUnit with raw file `patterns-input.json`; `read_raw_file` rejects files larger than 120,000 bytes and WorkUnit prompt construction rejects prompts larger than 240,000 characters.
## File Structure
- Create `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts`
Owns deterministic pattern audit ordering, cross-table candidate filtering, byte-bounded shard creation, shard path constants, and shard path detection.
- Create `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts`
Covers deterministic shard ordering, single-table exclusion from WorkUnit shards, byte limits, and oversize-template manifest warnings.
- Modify `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`
Writes full `patterns-input.json` plus bounded `patterns-input/part-0001.json` shard files, and appends shard warnings to `manifest.json`.
- Modify `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`
Adds a regression for audit file preservation and sharded WorkUnit input creation.
- Modify `packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts`
Emits one patterns WorkUnit per changed shard path, treats root `patterns-input.json` as audit-only, and includes shard paths in the scope descriptor and eviction calculation.
- Modify `packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts`
Updates root-file expectations and adds multi-shard diff behavior.
- Modify `packages/context/skills/historic_sql_patterns/SKILL.md`
Tells the skill to read the exact pattern shard in `rawFiles` and emit evidence with that shard as `rawPath`.
- Modify `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
Updates the fake agent to emit pattern evidence for `historic-sql-patterns-part-0001`.
- Modify `packages/context/src/ingest/ingest-runtime-assets.test.ts`
Keeps packaged skill assertions aligned with sharded pattern file guidance.
## Task 1: Add Pattern Input Sharding Helper
**Files:**
- Create: `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts`
- Create: `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts`
- [ ] **Step 1: Write the failing helper tests**
Create `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts`:
```typescript
import { describe, expect, it } from 'vitest';
import {
HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES,
isHistoricSqlPatternInputShardPath,
serializedStagedPatternsInputByteLength,
splitHistoricSqlPatternInputs,
} from './pattern-inputs.js';
import type { StagedPatternsInput } from './types.js';
type PatternTemplate = StagedPatternsInput['templates'][number];
function template(id: string, tablesTouched: string[], canonicalSql = 'select 1'): PatternTemplate {
return {
id,
canonicalSql,
tablesTouched,
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
};
}
describe('historic-SQL pattern input sharding', () => {
it('keeps the audit input complete while sharding only cross-table pattern candidates', () => {
const largeSql = `select * from public.orders join public.customers on true where marker = '${'x'.repeat(260)}'`;
const input: StagedPatternsInput = {
templates: [
template('single-table-orders', ['public.orders']),
template('orders-customers-2', ['public.orders', 'public.customers'], largeSql),
template('orders-customers-1', ['public.customers', 'public.orders'], largeSql),
template('orders-customers-payments', ['public.orders', 'public.customers', 'public.payments'], largeSql),
],
};
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 760 });
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual([
'orders-customers-1',
'orders-customers-2',
'orders-customers-payments',
'single-table-orders',
]);
expect(result.shards.length).toBeGreaterThan(1);
expect(result.shards.map((shard) => shard.path)).toEqual(['patterns-input/part-0001.json', 'patterns-input/part-0002.json', 'patterns-input/part-0003.json']);
expect(result.shards.flatMap((shard) => shard.input.templates.map((entry) => entry.id))).toEqual([
'orders-customers-payments',
'orders-customers-1',
'orders-customers-2',
]);
expect(result.shards.every((shard) => shard.byteLength <= 760)).toBe(true);
expect(result.shards.flatMap((shard) => shard.input.templates).some((entry) => entry.id === 'single-table-orders')).toBe(false);
expect(result.warnings).toEqual([]);
});
it('omits a single oversized template from shards and reports a manifest warning', () => {
const input: StagedPatternsInput = {
templates: [
template(
'oversized-cross-table',
['public.orders', 'public.customers'],
`select * from public.orders join public.customers on true where payload = '${'x'.repeat(500)}'`,
),
],
};
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 240 });
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual(['oversized-cross-table']);
expect(result.shards).toEqual([]);
expect(result.warnings).toEqual(['patterns_input_template_too_large:oversized-cross-table']);
});
it('recognizes only generated pattern shard paths', () => {
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-0001.json')).toBe(true);
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-0012.json')).toBe(true);
expect(isHistoricSqlPatternInputShardPath('patterns-input.json')).toBe(false);
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-1.json')).toBe(false);
expect(isHistoricSqlPatternInputShardPath('patterns-input/readme.md')).toBe(false);
});
it('uses a production byte budget below read_raw_file maximum size', () => {
expect(HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES).toBeLessThan(120_000);
expect(serializedStagedPatternsInputByteLength({ templates: [] })).toBeGreaterThan(0);
});
});
```
- [ ] **Step 2: Run helper tests to verify they fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/pattern-inputs.test.ts
```
Expected: FAIL because `./pattern-inputs.js` does not exist.
- [ ] **Step 3: Add the sharding helper**
Create `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts`:
```typescript
import { Buffer } from 'node:buffer';
import type { StagedPatternsInput } from './types.js';
export const HISTORIC_SQL_PATTERN_WORKUNIT_DIR = 'patterns-input';
export const HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES = 110_000;
export const HISTORIC_SQL_PATTERN_WORKUNIT_PATH_RE = /^patterns-input\/part-\d{4}\.json$/;
type PatternTemplate = StagedPatternsInput['templates'][number];
export interface HistoricSqlPatternInputShard {
path: string;
input: StagedPatternsInput;
byteLength: number;
}
export interface HistoricSqlPatternInputSplitResult {
auditInput: StagedPatternsInput;
shards: HistoricSqlPatternInputShard[];
warnings: string[];
}
export interface HistoricSqlPatternInputSplitOptions {
maxBytes?: number;
}
export function isHistoricSqlPatternInputShardPath(path: string): boolean {
return HISTORIC_SQL_PATTERN_WORKUNIT_PATH_RE.test(path);
}
export function serializeStagedPatternsInput(input: StagedPatternsInput): string {
return `${JSON.stringify(input, null, 2)}\n`;
}
export function serializedStagedPatternsInputByteLength(input: StagedPatternsInput): number {
return Buffer.byteLength(serializeStagedPatternsInput(input), 'utf-8');
}
function sortedAuditTemplates(templates: readonly PatternTemplate[]): PatternTemplate[] {
return [...templates].sort((left, right) => left.id.localeCompare(right.id));
}
function sortedPatternCandidates(templates: readonly PatternTemplate[]): PatternTemplate[] {
return [...templates]
.filter((template) => template.tablesTouched.length >= 2)
.map((template) => ({ ...template, tablesTouched: [...template.tablesTouched].sort() }))
.sort((left, right) => {
const cardinality = right.tablesTouched.length - left.tablesTouched.length;
if (cardinality !== 0) return cardinality;
const tableSignature = left.tablesTouched.join('\0').localeCompare(right.tablesTouched.join('\0'));
if (tableSignature !== 0) return tableSignature;
return left.id.localeCompare(right.id);
});
}
function shardPath(index: number): string {
return `${HISTORIC_SQL_PATTERN_WORKUNIT_DIR}/part-${String(index).padStart(4, '0')}.json`;
}
export function splitHistoricSqlPatternInputs(
input: StagedPatternsInput,
options: HistoricSqlPatternInputSplitOptions = {},
): HistoricSqlPatternInputSplitResult {
const maxBytes = options.maxBytes ?? HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES;
const auditInput: StagedPatternsInput = { templates: sortedAuditTemplates(input.templates) };
const warnings: string[] = [];
const shards: HistoricSqlPatternInputShard[] = [];
let current: PatternTemplate[] = [];
const flush = () => {
if (current.length === 0) {
return;
}
const shardInput: StagedPatternsInput = { templates: current };
shards.push({
path: shardPath(shards.length + 1),
input: shardInput,
byteLength: serializedStagedPatternsInputByteLength(shardInput),
});
current = [];
};
for (const template of sortedPatternCandidates(input.templates)) {
const singleInput: StagedPatternsInput = { templates: [template] };
if (serializedStagedPatternsInputByteLength(singleInput) > maxBytes) {
warnings.push(`patterns_input_template_too_large:${template.id}`);
continue;
}
const nextInput: StagedPatternsInput = { templates: [...current, template] };
if (current.length > 0 && serializedStagedPatternsInputByteLength(nextInput) > maxBytes) {
flush();
}
current.push(template);
}
flush();
return { auditInput, shards, warnings };
}
```
- [ ] **Step 4: Run helper tests to verify they pass**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/pattern-inputs.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit the helper**
```bash
git add packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts
git commit -m "feat: shard historic sql pattern inputs"
```
## Task 2: Write Pattern Shards During Staging
**Files:**
- Modify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`
- Modify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`
- Test: `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts`
- [ ] **Step 1: Add the failing stager regression**
Append this test inside the existing `describe('stageHistoricSqlAggregatedSnapshot', ...)` block in `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`:
```typescript
it('preserves full patterns audit input and writes bounded cross-table pattern shards', async () => {
const stagedDir = await tempDir();
const largeSql = `select * from public.orders o join public.customers c on c.id = o.customer_id where payload = '${'x'.repeat(8000)}'`;
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'orders-customers-a',
canonicalSql: largeSql,
stats: {
executions: 25,
distinctUsers: 4,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 15,
p95RuntimeMs: 90,
errorRate: 0,
rowsProduced: 250,
},
});
yield aggregate({
templateId: 'orders-customers-b',
canonicalSql: largeSql.replace('payload', 'payload_b'),
stats: {
executions: 22,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 20,
p95RuntimeMs: 95,
errorRate: 0,
rowsProduced: 220,
},
});
yield aggregate({
templateId: 'orders-single-table',
canonicalSql: 'select count(*) from public.orders',
stats: {
executions: 30,
distinctUsers: 2,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 10,
p95RuntimeMs: 20,
errorRate: 0,
rowsProduced: 30,
},
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'orders-customers-a',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: [],
where: ['payload'],
join: ['customer_id', 'id'],
groupBy: [],
},
},
],
[
'orders-customers-b',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: [],
where: ['payload_b'],
join: ['customer_id', 'id'],
groupBy: [],
},
},
],
[
'orders-single-table',
{
tablesTouched: ['public.orders'],
columnsByClause: {
select: [],
where: [],
join: [],
groupBy: [],
},
},
],
])),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: { dialect: 'postgres' },
now: new Date('2026-05-11T12:00:00.000Z'),
});
const audit = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(audit.templates.map((entry: any) => entry.id)).toEqual([
'orders-customers-a',
'orders-customers-b',
'orders-single-table',
]);
const firstShard = await readJson<Record<string, any>>(stagedDir, 'patterns-input/part-0001.json');
expect(firstShard.templates.map((entry: any) => entry.id)).toEqual(['orders-customers-a', 'orders-customers-b']);
expect(firstShard.templates.some((entry: any) => entry.id === 'orders-single-table')).toBe(false);
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
expect(manifest.warnings).toEqual([]);
});
```
- [ ] **Step 2: Run the stager regression to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/stage-unified.test.ts
```
Expected: FAIL because `patterns-input/part-0001.json` is not written.
- [ ] **Step 3: Import the sharding helper in the stager**
In `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`, add this import below the bucket import block:
```typescript
import { splitHistoricSqlPatternInputs } from './pattern-inputs.js';
```
- [ ] **Step 4: Write the audit input and shard files**
In `stageHistoricSqlAggregatedSnapshot()` in `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`, replace this block:
```typescript
await writeJson(input.stagedDir, 'patterns-input.json', toPatternsInput(parsedTemplates));
await writeJson(input.stagedDir, 'manifest.json', {
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: input.connectionId,
dialect: config.dialect,
fetchedAt: now.toISOString(),
windowStart: windowStart.toISOString(),
windowEnd: now.toISOString(),
snapshotRowCount,
touchedTableCount: byTable.size,
parseFailures: warnings.filter((warning) => warning.startsWith('parse_failed:')).length,
warnings,
probeWarnings: probe.warnings,
staleArchiveAfterDays: config.staleArchiveAfterDays,
});
```
with this code:
```typescript
const patternsInput = toPatternsInput(parsedTemplates);
const patternInputSplit = splitHistoricSqlPatternInputs(patternsInput);
const allWarnings = [...warnings, ...patternInputSplit.warnings];
await writeJson(input.stagedDir, 'patterns-input.json', patternInputSplit.auditInput);
for (const shard of patternInputSplit.shards) {
await writeJson(input.stagedDir, shard.path, shard.input);
}
await writeJson(input.stagedDir, 'manifest.json', {
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: input.connectionId,
dialect: config.dialect,
fetchedAt: now.toISOString(),
windowStart: windowStart.toISOString(),
windowEnd: now.toISOString(),
snapshotRowCount,
touchedTableCount: byTable.size,
parseFailures: allWarnings.filter((warning) => warning.startsWith('parse_failed:')).length,
warnings: allWarnings,
probeWarnings: probe.warnings,
staleArchiveAfterDays: config.staleArchiveAfterDays,
});
```
- [ ] **Step 5: Run helper and stager tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/pattern-inputs.test.ts src/ingest/adapters/historic-sql/stage-unified.test.ts
```
Expected: PASS.
- [ ] **Step 6: Commit stager shard writing**
```bash
git add packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts
git commit -m "feat: write historic sql pattern shards"
```
## Task 3: Emit Pattern WorkUnits From Shards
**Files:**
- Modify: `packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts`
- Modify: `packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts`
- Test: `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts`
- [ ] **Step 1: Update chunk tests for sharded pattern WorkUnits**
In `packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts`, replace the `patterns-input.json` write inside `writeUnifiedStagedDir()` with these writes:
```typescript
await writeJson(root, 'patterns-input.json', {
templates: [
{
id: 'orders',
canonicalSql: 'select * from public.orders join public.customers on true',
tablesTouched: ['public.orders', 'public.customers'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
await writeJson(root, 'patterns-input/part-0001.json', {
templates: [
{
id: 'orders',
canonicalSql: 'select * from public.orders join public.customers on true',
tablesTouched: ['public.orders', 'public.customers'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
```
In the first test, replace the patterns WorkUnit expectation with:
```typescript
expect.objectContaining({
unitKey: 'historic-sql-patterns-part-0001',
displayLabel: 'Historic SQL cross-table patterns: part-0001',
rawFiles: ['patterns-input/part-0001.json'],
dependencyPaths: ['manifest.json'],
notes: expect.stringContaining('patterns-input/part-0001.json'),
}),
```
In the diff-set test, replace the second expectation with:
```typescript
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['patterns-input/part-0001.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'tables/public.orders.json'],
}),
).resolves.toMatchObject({
workUnits: [expect.objectContaining({ unitKey: 'historic-sql-patterns-part-0001' })],
});
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['patterns-input.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input/part-0001.json', 'tables/public.orders.json'],
}),
).resolves.toMatchObject({
workUnits: [],
});
```
In the scope test, add these expectations:
```typescript
expect(scope.isPathInScope('patterns-input/part-0001.json')).toBe(true);
expect(scope.isPathInScope('patterns-input/part-1.json')).toBe(false);
```
Append this additional test inside the same `describe` block:
```typescript
it('emits one patterns WorkUnit per changed shard', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
await writeJson(stagedDir, 'patterns-input/part-0002.json', {
templates: [
{
id: 'line-items',
canonicalSql: 'select * from public.orders join public.line_items on true',
tablesTouched: ['public.orders', 'public.line_items'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
const result = await chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: ['patterns-input/part-0002.json'],
modified: ['patterns-input/part-0001.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'tables/public.orders.json'],
});
expect(result.workUnits.map((unit) => unit.unitKey)).toEqual([
'historic-sql-patterns-part-0001',
'historic-sql-patterns-part-0002',
]);
expect(result.workUnits.map((unit) => unit.rawFiles)).toEqual([
['patterns-input/part-0001.json'],
['patterns-input/part-0002.json'],
]);
});
```
- [ ] **Step 2: Run chunk tests to verify they fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/chunk-unified.test.ts
```
Expected: FAIL because `chunkHistoricSqlUnifiedStagedDir()` still emits `historic-sql-patterns` from root `patterns-input.json`.
- [ ] **Step 3: Import shard path helpers in the chunker**
In `packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts`, add this import below the existing type imports:
```typescript
import { isHistoricSqlPatternInputShardPath } from './pattern-inputs.js';
```
- [ ] **Step 4: Emit WorkUnits from shard paths**
In `packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts`, replace the root `patterns-input.json` WorkUnit block:
```typescript
if (files.includes('patterns-input.json') && touchedPath('patterns-input.json', touched)) {
stagedPatternsInputSchema.parse(await readJson(stagedDir, 'patterns-input.json'));
workUnits.push({
unitKey: 'historic-sql-patterns',
displayLabel: 'Historic SQL cross-table patterns',
rawFiles: ['patterns-input.json'],
dependencyPaths: ['manifest.json'],
peerFileIndex: files.filter((file) => file !== 'patterns-input.json' && file !== 'manifest.json').sort(),
notes:
'Use historic_sql_patterns. Read patterns-input.json and emit pattern objects with emit_historic_sql_evidence. Do not call wiki_write or sl_write_source.',
});
}
```
with this code:
```typescript
for (const path of files.filter(isHistoricSqlPatternInputShardPath)) {
if (!touchedPath(path, touched)) {
continue;
}
stagedPatternsInputSchema.parse(await readJson(stagedDir, path));
const shardLabel = path.replace(/^patterns-input\//, '').replace(/\.json$/, '');
workUnits.push({
unitKey: `historic-sql-patterns-${safeUnitKey(shardLabel)}`,
displayLabel: `Historic SQL cross-table patterns: ${shardLabel}`,
rawFiles: [path],
dependencyPaths: ['manifest.json'],
peerFileIndex: files.filter((file) => file !== path && file !== 'manifest.json').sort(),
notes:
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence using rawPath "${path}". Do not call wiki_write or sl_write_source.`,
});
}
```
- [ ] **Step 5: Update eviction and scope matching**
In `packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts`, replace the deleted-path filter:
```typescript
const deleted = diffSet?.deleted.filter((path) => path === 'patterns-input.json' || /^tables\/.+\.json$/.test(path)).sort();
```
with:
```typescript
const deleted = diffSet?.deleted
.filter((path) => isHistoricSqlPatternInputShardPath(path) || /^tables\/.+\.json$/.test(path))
.sort();
```
In `describeHistoricSqlUnifiedScope()`, replace the scope predicate:
```typescript
isPathInScope: (rawPath) =>
rawPath === 'manifest.json' || rawPath === 'patterns-input.json' || /^tables\/.+\.json$/.test(rawPath),
```
with:
```typescript
isPathInScope: (rawPath) =>
rawPath === 'manifest.json' ||
rawPath === 'patterns-input.json' ||
isHistoricSqlPatternInputShardPath(rawPath) ||
/^tables\/.+\.json$/.test(rawPath),
```
- [ ] **Step 6: Run helper, stage, and chunk tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/pattern-inputs.test.ts src/ingest/adapters/historic-sql/stage-unified.test.ts src/ingest/adapters/historic-sql/chunk-unified.test.ts
```
Expected: PASS.
- [ ] **Step 7: Commit chunker shard WorkUnits**
```bash
git add packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts
git commit -m "feat: emit historic sql pattern shard work units"
```
## Task 4: Update Skill Guidance And Acceptance Coverage
**Files:**
- Modify: `packages/context/skills/historic_sql_patterns/SKILL.md`
- Modify: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
- Modify: `packages/context/src/ingest/ingest-runtime-assets.test.ts`
- [ ] **Step 1: Update the packaged historic SQL patterns skill**
Replace `packages/context/skills/historic_sql_patterns/SKILL.md` with:
````markdown
---
name: historic_sql_patterns
description: Identify recurring cross-table historic-SQL analytical intents from a bounded pattern shard and emit typed pattern evidence for deterministic wiki projection.
callers: [memory_agent]
---
# Historic SQL Patterns
Use this skill when the WorkUnit raw file is a `patterns-input/part-0001.json` style shard from the `historic-sql` adapter. Older staged bundles may still provide root `patterns-input.json`; when that is the WorkUnit raw file, read it the same way.
## Required Workflow
1. Read the WorkUnit notes first.
2. Find the single pattern input file listed under the WorkUnit `rawFiles` section.
3. Call `read_raw_file` for that exact raw file path.
4. Identify recurring analytical intents that span at least two tables and have repeated usage signal.
5. Emit one `pattern` evidence object per durable cross-table intent by calling `emit_historic_sql_evidence`.
6. Set each evidence object's `rawPath` to the exact raw file path read in step 3.
7. Stop after all pattern evidence has been emitted.
## Evidence Shape
Each call to `emit_historic_sql_evidence` must use this shape:
```json
{
"kind": "pattern",
"rawPath": "patterns-input/part-0001.json",
"pattern": {
"slug": "order-lifecycle-analysis",
"title": "Order Lifecycle Analysis",
"narrative": "Analysts compare order statuses with customer segments to understand lifecycle movement.",
"definitionSql": "select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status",
"tablesInvolved": ["public.orders", "public.customers"],
"slRefs": ["orders", "customers"],
"constituentTemplateIds": ["pg:1", "pg:2"]
}
}
```
The `pattern` object must match `patternOutputSchema`; multiple calls together must form `patternsArraySchema`.
## Pattern Selection Rules
- Prefer patterns that involve two or more tables.
- Prefer templates with `executionsBucket` at least `10-100` and `distinctUsersBucket` above solo usage.
- Merge templates into one pattern only when the business intent is the same.
- Use a stable kebab-case slug based on intent, not a template id.
- Set `definitionSql` to the clearest representative SQL from a constituent template.
- Set `slRefs` to source names when the source name is obvious from table names; omit uncertain refs rather than guessing.
- Treat each pattern shard independently; do not read peer shard files from `peerFileIndex`.
## Boundaries
- Do not call wiki_write.
- Do not call sl_write_source.
- Do not call sl_edit_source.
- Do not call context_candidate_write.
- Do not create single-table pattern pages.
- Do not copy credentials, tokens, user emails, or unredacted literals into evidence.
````
- [ ] **Step 2: Update runtime asset assertions**
In `packages/context/src/ingest/ingest-runtime-assets.test.ts`, replace this assertion:
```typescript
expect(body).toContain('patterns-input.json');
```
with:
```typescript
expect(body).toContain('patterns-input/part-0001.json');
```
- [ ] **Step 3: Update the local ingest acceptance fake agent**
In `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`, replace this block:
```typescript
if (params.telemetryTags.unitKey === 'historic-sql-patterns') {
const result = await emitEvidence.execute(
{
kind: 'pattern',
rawPath: 'patterns-input.json',
pattern: {
```
with:
```typescript
if (params.telemetryTags.unitKey === 'historic-sql-patterns-part-0001') {
const result = await emitEvidence.execute(
{
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
```
The rest of the pattern object stays unchanged.
- [ ] **Step 4: Run skill and acceptance tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-runtime-assets.test.ts src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit skill and acceptance updates**
```bash
git add packages/context/skills/historic_sql_patterns/SKILL.md packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
git commit -m "test: align historic sql pattern skill with shards"
```
## Task 5: Final Verification
**Files:**
- Verify: `packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts`
- Verify: `packages/context/skills/historic_sql_patterns/SKILL.md`
- [ ] **Step 1: Run focused historic SQL tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/historic-sql/pattern-inputs.test.ts \
src/ingest/adapters/historic-sql/stage-unified.test.ts \
src/ingest/adapters/historic-sql/chunk-unified.test.ts \
src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts \
src/ingest/ingest-runtime-assets.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run context package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Verify no legacy historic SQL code path returned**
Run:
```bash
rg -n "stagePgStatStatementsTemplates|expandCategoricalTemplates|classifySlot|pgss-baseline|historic_sql_ingest|historic_sql_curator|PostgresPgssQueryHistoryReader|historic_sql_template" packages/context packages/cli
```
Expected: no matches in runtime or test source. Matches inside `docs/superpowers/plans/` are acceptable when searching docs separately, but this command does not search docs.
- [ ] **Step 4: Run pre-commit on changed files if configured**
Run:
```bash
uv run pre-commit run --files \
packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts \
packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts \
packages/context/src/ingest/adapters/historic-sql/stage-unified.ts \
packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts \
packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts \
packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts \
packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts \
packages/context/src/ingest/ingest-runtime-assets.test.ts \
packages/context/skills/historic_sql_patterns/SKILL.md
```
Expected: PASS. If the repository has no pre-commit config or the local `uv` version cannot satisfy the project pin, record the exact error and rely on the focused tests plus type-check above.
- [ ] **Step 5: Commit verification-only adjustments if any were needed**
If any test or type-check step required small follow-up edits, commit them:
```bash
git add packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts packages/context/src/ingest/adapters/historic-sql/pattern-inputs.test.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts packages/context/src/ingest/adapters/historic-sql/chunk-unified.ts packages/context/src/ingest/adapters/historic-sql/chunk-unified.test.ts packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/historic_sql_patterns/SKILL.md
git commit -m "test: verify historic sql pattern shard work units"
```
If there were no follow-up edits, do not create an empty commit.
## Self-Review
**Spec coverage:** This plan covers spec section 5.2's allowance for multiple deterministic pattern WorkUnits when `patterns-input.json` exceeds a context budget. It preserves section 4.6's full `patterns-input.json` audit artifact, keeps section 4.7's changed-file DiffSet behavior, and does not alter deterministic projection from section 5.3.
**Placeholder scan:** The plan contains concrete files, commands, expected outcomes, code snippets, and commit commands. It has no deferred implementation markers.
**Type consistency:** `StagedPatternsInput`, `splitHistoricSqlPatternInputs()`, `isHistoricSqlPatternInputShardPath()`, `HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES`, and `serializedStagedPatternsInputByteLength()` are introduced in Task 1 and imported with the same names in later tasks. Pattern shard raw paths use `patterns-input/part-0001.json` consistently in the stager, chunker, skill, and acceptance test.
Plan complete and saved to `docs/superpowers/plans/2026-05-11-historic-sql-pattern-workunit-sharding.md`. Two execution options:
**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration
**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints
Which approach?

View file

@ -1,444 +0,0 @@
# Historic SQL Projection Archive Hardening Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Keep historic-SQL archived pattern pages stable across runs and add projection regression coverage for archive, stale-table, and legacy-page behavior from the redesign spec.
**Architecture:** The redesigned historic-SQL pipeline is already cut over. This plan only hardens the deterministic projection step by treating `knowledge/global/historic-sql/_archived/*.md` pages as historical records, not active candidates for slug reuse or stale/archive processing. Tests stay in the existing projection unit suite because the behavior is pure filesystem projection.
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, YAML, local filesystem fixtures.
---
## Starting Point
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans found that are based on this spec:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-unified-hot-path.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-skills-projection-cutover.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-cross-dialect-readiness.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md`
Implemented status verified from this worktree:
- `2026-05-11-historic-sql-foundations.md` is implemented. Evidence: `packages/context/src/ingest/adapters/historic-sql/skill-schemas.ts`, `packages/context/src/sql-analysis/ports.ts` exposes `analyzeBatch()`, `python/ktx-daemon/src/ktx_daemon/app.py` registers `/sql/analyze-batch`, `packages/context/src/sl/types.ts` has `SemanticLayerSource.usage`, and `packages/context/src/ingest/adapters/live-database/manifest.ts` has `mergeUsagePreservingExternal()`.
- `2026-05-11-historic-sql-search-enrichment.md` is implemented. Evidence: `packages/context/src/sl/sl-search.service.ts` indexes `source.usage`, `packages/context/src/sl/sqlite-sl-sources-index.ts` selects FTS snippets, and local/MCP list surfaces expose `frequencyTier` and `snippet`.
- `2026-05-11-historic-sql-unified-hot-path.md` is implemented. Evidence: `stageHistoricSqlAggregatedSnapshot()`, `chunkHistoricSqlUnifiedStagedDir()`, `PostgresPgssReader`, aggregate BigQuery/Snowflake `fetchAggregated()` methods, unified schemas, and exports exist.
- `2026-05-11-historic-sql-skills-projection-cutover.md` is implemented. Evidence: `HistoricSqlSourceAdapter` uses the unified stager/chunker, `packages/context/skills/historic_sql_table_digest/` and `packages/context/skills/historic_sql_patterns/` exist, `emit_historic_sql_evidence` exists, `HistoricSqlProjectionPostProcessor` is wired in `packages/context/src/ingest/local-bundle-runtime.ts`, and legacy skill names no longer grep in `packages/context` or `packages/cli`.
- `2026-05-11-historic-sql-cross-dialect-readiness.md` is implemented. Evidence: `packages/cli/src/local-adapters.test.ts` covers Postgres, BigQuery, and Snowflake historic-SQL registration, and `packages/cli/src/historic-sql-doctor.test.ts` covers low `pg_stat_statements.max` as informational output.
- `2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md` is implemented. Evidence: `packages/cli/src/setup-databases.test.ts` expects canonical `historicSql.filters.serviceAccounts`, `examples/postgres-historic/scripts/smoke.sh` asserts `manifest.json`, `tables/*.json`, `patterns-input.json`, and zero WorkUnits on the unchanged run, and public docs use `minExecutions`.
Remaining issue this plan fixes:
- `packages/context/src/ingest/adapters/historic-sql/projection.ts` recursively loads every markdown page below `knowledge/global/historic-sql`, including pages already under `_archived/`.
- Because archived pages still have `source: historic-sql` and tags `['historic-sql', 'pattern', 'archived']`, they are currently active candidates for slug reuse and stale/archive processing.
- A reappearing pattern can be written back to `_archived/<slug>.md` instead of active `historic-sql/<slug>.md`.
- A later no-pattern run can move an already archived page to `_archived/_archived/<slug>.md`.
- `projection.test.ts` does not cover stale table marking, legacy query-page deletion, or the archived-page stability behavior required by spec §5.3 and §10.2.
## File Structure
- Modify `packages/context/src/ingest/adapters/historic-sql/projection.ts`: add an archived-page predicate and exclude archived pages from active pattern slug matching and stale/archive loops.
- Modify `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`: add failing tests for archived-page stability, active slug restoration after a pattern reappears, stale table marking, and legacy query-page cleanup.
### Task 1: Add Archived Pattern Projection Regression Tests
**Files:**
- Modify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- [ ] **Step 1: Add failing tests for archived page handling**
Append these tests inside the existing `describe('projectHistoricSqlEvidence', ...)` block in `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`:
```typescript
it('writes a reappearing pattern to the active slug instead of reusing an archived page key', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 2,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 30,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
await writeText(
workdir,
'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md',
[
'---',
YAML.stringify({
summary: 'Archived order lifecycle page',
tags: ['historic-sql', 'pattern', 'archived'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders', 'public.customers'],
fingerprints: ['pg:1'],
stale_since: '2026-01-01T00:00:00.000Z',
}).trimEnd(),
'---',
'',
'Archived body',
'',
].join('\n'),
);
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/pattern.json', {
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status with customer segment again.',
definitionSql: 'select * from public.orders join public.customers on customers.id = orders.customer_id',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.patternPagesWritten).toBe(1);
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain(
'Order Lifecycle Analysis',
);
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain(
'Archived body',
);
await expect(
readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/order-lifecycle-analysis.md'), 'utf-8'),
).rejects.toMatchObject({ code: 'ENOENT' });
});
it('leaves already archived pattern pages stable when they are still absent', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 30,
});
await writeText(
workdir,
'knowledge/global/historic-sql/_archived/retired-pattern.md',
[
'---',
YAML.stringify({
summary: 'Retired pattern',
tags: ['historic-sql', 'pattern', 'archived'],
refs: [],
sl_refs: [],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.tickets'],
fingerprints: ['pg:9'],
stale_since: '2026-01-01T00:00:00.000Z',
}).trimEnd(),
'---',
'',
'Archived retired body',
'',
].join('\n'),
);
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.archivedPatternPages).toBe(0);
expect(result.stalePatternPagesMarked).toBe(0);
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/retired-pattern.md'), 'utf-8')).resolves.toContain(
'Archived retired body',
);
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/retired-pattern.md'), 'utf-8')).rejects.toMatchObject({
code: 'ENOENT',
});
});
```
- [ ] **Step 2: Run projection tests to verify the archived-page tests fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/projection.test.ts
```
Expected: FAIL. The first new test should fail because `knowledge/global/historic-sql/order-lifecycle-analysis.md` is not written. The second new test should fail because `result.archivedPatternPages` is `1` or `_archived/_archived/retired-pattern.md` exists.
### Task 2: Exclude Archived Pages From Active Projection Processing
**Files:**
- Modify: `packages/context/src/ingest/adapters/historic-sql/projection.ts`
- Test: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- [ ] **Step 1: Add the archived-page predicate**
In `packages/context/src/ingest/adapters/historic-sql/projection.ts`, add this function after `isLegacyQueryPage()`:
```typescript
function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean {
const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : [];
return page.key.startsWith('_archived/') || tags.includes('archived');
}
```
- [ ] **Step 2: Use only active pattern pages for slug matching and stale/archive processing**
In `projectHistoricSqlEvidence()`, replace:
```typescript
const allPages = await loadPatternPages(wikiRoot);
const patternPages = allPages.filter(isHistoricPatternPage);
```
with:
```typescript
const allPages = await loadPatternPages(wikiRoot);
const activePages = allPages.filter((page) => !isArchivedPatternPage(page));
const patternPages = activePages.filter(isHistoricPatternPage);
```
- [ ] **Step 3: Run projection tests to verify the archived-page fix passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/projection.test.ts
```
Expected: PASS. All projection tests pass, including the two archived-page tests from Task 1.
- [ ] **Step 4: Commit**
```bash
git add packages/context/src/ingest/adapters/historic-sql/projection.ts packages/context/src/ingest/adapters/historic-sql/projection.test.ts
git commit -m "fix: keep historic sql archived patterns stable"
```
### Task 3: Add Stale Table And Legacy Page Cleanup Regression Coverage
**Files:**
- Modify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- [ ] **Step 1: Add projection coverage for table drift and legacy query-page cleanup**
Append this test inside the existing `describe('projectHistoricSqlEvidence', ...)` block in `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`:
```typescript
it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Orders were active before.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'keep analyst annotation',
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeText(
workdir,
'knowledge/global/historic-sql/legacy-template.md',
[
'---',
YAML.stringify({
summary: 'Legacy template page',
tags: ['historic-sql', 'query-pattern'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders'],
fingerprints: ['legacy:1'],
}).trimEnd(),
'---',
'',
'Legacy body',
'',
].join('\n'),
);
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.staleTablesMarked).toBe(1);
expect(result.legacyPagesDeleted).toBe(1);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep analyst annotation',
narrative: 'No recent historic SQL usage was observed in the latest snapshot.',
frequencyTier: 'unused',
commonFilters: [],
commonGroupBys: [],
commonJoins: [],
staleSince: '2026-05-11T00:00:00.000Z',
});
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/legacy-template.md'), 'utf-8')).rejects.toMatchObject({
code: 'ENOENT',
});
});
```
- [ ] **Step 2: Run projection tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/projection.test.ts
```
Expected: PASS. The new regression test should pass with the current implementation after Task 2, proving stale table drift and legacy query-page cleanup stay covered.
- [ ] **Step 3: Commit**
```bash
git add packages/context/src/ingest/adapters/historic-sql/projection.test.ts
git commit -m "test: cover historic sql projection cleanup"
```
### Task 4: Final Verification
**Files:**
- Verify: `packages/context/src/ingest/adapters/historic-sql/projection.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- [ ] **Step 1: Run the focused projection test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/projection.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run the focused historic-SQL adapter test group**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/historic-sql/evidence.test.ts \
src/ingest/adapters/historic-sql/evidence-tool.test.ts \
src/ingest/adapters/historic-sql/projection.test.ts \
src/ingest/adapters/historic-sql/post-processor.test.ts \
src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts
```
Expected: PASS.
- [ ] **Step 3: Run context type check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 4: Confirm old historic-SQL code paths remain absent**
Run:
```bash
rg -n "stagePgStatStatementsTemplates|expandCategoricalTemplates|classifySlot|historic_sql_ingest|historic_sql_curator|PostgresPgssQueryHistoryReader|historic_sql_template" packages/context packages/cli
```
Expected: no output and exit code 1.
- [ ] **Step 5: Run whitespace check**
Run:
```bash
git diff --check
```
Expected: no output.
- [ ] **Step 6: Commit verification fixes only if verification changed files**
If verification required an edit, commit the exact touched files:
```bash
git add packages/context/src/ingest/adapters/historic-sql/projection.ts packages/context/src/ingest/adapters/historic-sql/projection.test.ts
git commit -m "test: verify historic sql projection archive hardening"
```
If verification made no edits, do not create an empty commit.
## Self-Review
Spec coverage:
- Spec §5.3 stale pattern handling is covered by Task 1 and Task 2: archived pages are historical records and are not repeatedly archived or reused as active slug targets.
- Spec §10.2 legacy wiki page cleanup is covered by Task 3.
- Spec §10.4 drift behavior is covered by Task 3: a table absent from the latest snapshot receives `usage.staleSince` while external usage keys remain intact.
- Spec §10.6 slug churn and user-edited usage risks are covered by Task 1 and Task 3.
Placeholder scan:
- The plan contains no unresolved marker text from the forbidden-pattern list.
- Every code-changing step names exact files, exact inserted or replacement code, exact commands, and expected outcomes.
Type consistency:
- `staleSince`, `frequencyTier`, `commonFilters`, `commonGroupBys`, and `commonJoins` match `tableUsageOutputSchema`.
- `stale_since`, `tags`, `tables`, and `fingerprints` match the existing wiki frontmatter shape used in `projection.ts`.
- `archivedPatternPages`, `stalePatternPagesMarked`, `staleTablesMarked`, and `legacyPagesDeleted` match `HistoricSqlProjectionResult`.
Plan complete and saved to `docs/superpowers/plans/2026-05-11-historic-sql-projection-archive-hardening.md`. Two execution options:
**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration
**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints
Which approach?

View file

@ -1,441 +0,0 @@
# Historic SQL Redaction Hardening Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make `historicSql.redactionPatterns` actually redact sensitive SQL substrings from historic-SQL staged artifacts and WorkUnit inputs.
**Architecture:** Keep the unified hot path parseable by sending original SQL to the local deterministic SQL-analysis daemon, then redact only the SQL text that is written to `tables/*.json` and `patterns-input.json`. Add a focused redaction helper so regex compatibility and error messages are tested independently from staging, then add a stager regression proving raw sensitive values do not reach files consumed by LLM skills.
**Tech Stack:** TypeScript ESM/NodeNext, zod 4, Vitest, existing historic-SQL unified stager.
---
## Starting Point
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans found that are based on this spec:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-unified-hot-path.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-skills-projection-cutover.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-cross-dialect-readiness.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-projection-archive-hardening.md`
- `docs/superpowers/plans/2026-05-11-historic-sql-end-to-end-retrieval-acceptance.md`
Implemented status verified from this worktree:
- `2026-05-11-historic-sql-foundations.md` is implemented. Evidence: `packages/context/src/ingest/adapters/historic-sql/skill-schemas.ts`, `packages/context/src/sql-analysis/ports.ts` exposes `analyzeBatch()`, `python/ktx-daemon/src/ktx_daemon/app.py` registers `/sql/analyze-batch`, `packages/context/src/sl/types.ts` has `SemanticLayerSource.usage`, and `packages/context/src/ingest/adapters/live-database/manifest.ts` has `mergeUsagePreservingExternal()`.
- `2026-05-11-historic-sql-search-enrichment.md` is implemented. Evidence: `packages/context/src/sl/sl-search.service.ts` indexes `source.usage`, `packages/context/src/sl/sqlite-sl-sources-index.ts` selects FTS snippets, and local/MCP list surfaces expose `frequencyTier` and `snippet`.
- `2026-05-11-historic-sql-unified-hot-path.md` is implemented. Evidence: `stageHistoricSqlAggregatedSnapshot()`, `chunkHistoricSqlUnifiedStagedDir()`, `PostgresPgssReader`, aggregate BigQuery/Snowflake `fetchAggregated()` methods, unified schemas, and package exports exist.
- `2026-05-11-historic-sql-skills-projection-cutover.md` is implemented. Evidence: `HistoricSqlSourceAdapter` uses the unified stager/chunker, `packages/context/skills/historic_sql_table_digest/` and `packages/context/skills/historic_sql_patterns/` exist, `emit_historic_sql_evidence` exists, `HistoricSqlProjectionPostProcessor` is wired in `packages/context/src/ingest/local-bundle-runtime.ts`, and legacy skill names no longer grep in `packages/context` or `packages/cli`.
- `2026-05-11-historic-sql-cross-dialect-readiness.md` is implemented. Evidence: `packages/cli/src/local-adapters.test.ts` covers Postgres, BigQuery, and Snowflake historic-SQL registration, and `packages/cli/src/historic-sql-doctor.test.ts` covers low `pg_stat_statements.max` as informational output.
- `2026-05-11-historic-sql-docs-smoke-and-config-cleanup.md` is implemented. Evidence: `packages/cli/src/setup-databases.test.ts` expects canonical `historicSql.filters.serviceAccounts`, `examples/postgres-historic/scripts/smoke.sh` asserts unified `manifest.json`, `tables/*.json`, `patterns-input.json`, and zero WorkUnits on the unchanged run, and public docs use `minExecutions`.
- `2026-05-11-historic-sql-projection-archive-hardening.md` is implemented. Evidence: `packages/context/src/ingest/adapters/historic-sql/projection.ts` has `isArchivedPatternPage()`, excludes archived pages from active slug matching, and `projection.test.ts` covers reappearing archived patterns, stable archived pages, stale table marking, and legacy query-page deletion.
- `2026-05-11-historic-sql-end-to-end-retrieval-acceptance.md` is implemented. Evidence: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts` exercises the production `HistoricSqlSourceAdapter`, fake `emit_historic_sql_evidence` calls, projection, semantic-layer search, and wiki search.
Focused verification before writing this plan:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts src/ingest/adapters/historic-sql/projection.test.ts src/ingest/adapters/historic-sql/stage-unified.test.ts src/ingest/adapters/historic-sql/types.test.ts
```
Observed: 4 files passed, 10 tests passed.
Remaining spec gap this plan covers:
- Spec §8 exposes `historicSql.redactionPatterns`, and setup/docs already write that field.
- `packages/context/src/ingest/adapters/historic-sql/types.ts` parses `redactionPatterns`, but `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts` never applies them.
- Staged `tables/{schema}.{table}.json` and `patterns-input.json` currently copy `AggregatedTemplate.canonicalSql` unchanged into `topTemplates[].canonicalSql` and `templates[].canonicalSql`.
- Those staged files are WorkUnit inputs for `historic_sql_table_digest` and `historic_sql_patterns`, so sensitive substrings can reach LLM prompts even when the user configured redaction.
## File Structure
Create:
- `packages/context/src/ingest/adapters/historic-sql/redaction.ts`
Owns compilation and application of historic-SQL SQL-text redaction patterns. Supports JavaScript regex strings and the documented `(?i)` case-insensitive prefix used by setup tests/docs.
- `packages/context/src/ingest/adapters/historic-sql/redaction.test.ts`
Tests raw regex replacement, `(?i)` compatibility, empty config behavior, and invalid-pattern diagnostics.
Modify:
- `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`
Compiles `config.redactionPatterns` once per fetch. Keeps original SQL for filtering and `SqlAnalysisPort.analyzeBatch()`, then stores redacted SQL in `ParsedTemplate.template.canonicalSql` before `toStagedTable()` and `toPatternsInput()` serialize files.
- `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`
Adds a regression proving raw secrets are absent from staged artifacts while `analyzeBatch()` still receives the original SQL.
## Task 1: Add Historic SQL Redaction Helper
**Files:**
- Create: `packages/context/src/ingest/adapters/historic-sql/redaction.test.ts`
- Create: `packages/context/src/ingest/adapters/historic-sql/redaction.ts`
- [ ] **Step 1: Write the failing redaction helper test**
Create `packages/context/src/ingest/adapters/historic-sql/redaction.test.ts`:
```typescript
import { describe, expect, it } from 'vitest';
import { compileHistoricSqlRedactionPatterns, redactHistoricSqlText } from './redaction.js';
describe('historic-SQL redaction', () => {
it('redacts regex matches and supports the (?i) case-insensitive prefix', () => {
const redactors = compileHistoricSqlRedactionPatterns([
'sk_live_[A-Za-z0-9]+',
'(?i)secret_token_[a-z0-9]+',
]);
const sql =
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
expect(redactHistoricSqlText(sql, redactors)).toBe(
"select * from public.api_events where api_key = '[REDACTED]' and note = '[REDACTED]'",
);
});
it('returns the original SQL text when no redaction patterns are configured', () => {
const sql = "select * from public.orders where status = 'paid'";
expect(redactHistoricSqlText(sql, compileHistoricSqlRedactionPatterns([]))).toBe(sql);
});
it('throws a config-focused error for invalid redaction regex patterns', () => {
expect(() => compileHistoricSqlRedactionPatterns(['[broken'])).toThrow(
'Invalid historicSql.redactionPatterns entry "[broken"',
);
});
it('throws a config-focused error for empty redaction regex patterns', () => {
expect(() => compileHistoricSqlRedactionPatterns([' '])).toThrow(
'Invalid historicSql.redactionPatterns entry " "',
);
});
});
```
- [ ] **Step 2: Run the redaction helper test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/redaction.test.ts
```
Expected: FAIL because `./redaction.js` does not exist.
- [ ] **Step 3: Add the redaction helper implementation**
Create `packages/context/src/ingest/adapters/historic-sql/redaction.ts`:
```typescript
export interface HistoricSqlRedactionPattern {
pattern: string;
expression: RegExp;
}
const CASE_INSENSITIVE_PREFIX = '(?i)';
const REDACTION_TOKEN = '[REDACTED]';
export function compileHistoricSqlRedactionPatterns(patterns: readonly string[]): HistoricSqlRedactionPattern[] {
return patterns.map((pattern) => {
const trimmed = pattern.trim();
const caseInsensitive = trimmed.startsWith(CASE_INSENSITIVE_PREFIX);
const source = caseInsensitive ? trimmed.slice(CASE_INSENSITIVE_PREFIX.length) : trimmed;
if (source.length === 0) {
throw new Error(`Invalid historicSql.redactionPatterns entry "${pattern}": pattern must not be empty`);
}
try {
return {
pattern,
expression: new RegExp(source, caseInsensitive ? 'gi' : 'g'),
};
} catch (error) {
const reason = error instanceof Error ? error.message : String(error);
throw new Error(`Invalid historicSql.redactionPatterns entry "${pattern}": ${reason}`);
}
});
}
export function redactHistoricSqlText(text: string, redactors: readonly HistoricSqlRedactionPattern[]): string {
let next = text;
for (const redactor of redactors) {
redactor.expression.lastIndex = 0;
next = next.replace(redactor.expression, REDACTION_TOKEN);
}
return next;
}
```
- [ ] **Step 4: Run the redaction helper test to verify it passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/redaction.test.ts
```
Expected: PASS. The output reports 1 test file passed and 4 tests passed.
- [ ] **Step 5: Commit the redaction helper**
```bash
git add packages/context/src/ingest/adapters/historic-sql/redaction.ts packages/context/src/ingest/adapters/historic-sql/redaction.test.ts
git commit -m "feat: add historic sql redaction helper"
```
## Task 2: Apply Redaction To Unified Staged Artifacts
**Files:**
- Modify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`
- Modify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/redaction.ts`
- [ ] **Step 1: Add the failing staged-artifact redaction test**
Append this test inside the existing `describe('stageHistoricSqlAggregatedSnapshot', ...)` block in `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`:
```typescript
it('redacts configured SQL substrings in staged artifacts while analyzing original SQL', async () => {
const stagedDir = await tempDir();
const originalSql =
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'api-events-with-secret',
canonicalSql: originalSql,
stats: {
executions: 15,
distinctUsers: 2,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 12,
p95RuntimeMs: 25,
errorRate: 0,
rowsProduced: 15,
},
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'api-events-with-secret',
{
tablesTouched: ['public.api_events'],
columnsByClause: {
select: [],
where: ['api_key', 'note'],
join: [],
groupBy: [],
},
},
],
])),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
[{ id: 'api-events-with-secret', sql: originalSql }],
'postgres',
);
const tableJson = await readFile(join(stagedDir, 'tables/public.api_events.json'), 'utf-8');
const patternsJson = await readFile(join(stagedDir, 'patterns-input.json'), 'utf-8');
expect(tableJson).not.toContain('sk_live_abc123');
expect(tableJson).not.toContain('Secret_Token_9f');
expect(patternsJson).not.toContain('sk_live_abc123');
expect(patternsJson).not.toContain('Secret_Token_9f');
expect(tableJson).toContain('[REDACTED]');
expect(patternsJson).toContain('[REDACTED]');
});
```
- [ ] **Step 2: Run the staged-artifact test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/stage-unified.test.ts
```
Expected: FAIL because `tables/public.api_events.json` and `patterns-input.json` still contain `sk_live_abc123` and `Secret_Token_9f`.
- [ ] **Step 3: Import the redaction helper in the stager**
In `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`, add this import below the existing `./buckets.js` import block:
```typescript
import {
compileHistoricSqlRedactionPatterns,
redactHistoricSqlText,
type HistoricSqlRedactionPattern,
} from './redaction.js';
```
- [ ] **Step 4: Add a small template redaction helper**
In `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`, add this helper after `shouldDropTemplate()`:
```typescript
function redactTemplateSql(
template: AggregatedTemplate,
redactors: readonly HistoricSqlRedactionPattern[],
): AggregatedTemplate {
if (redactors.length === 0) {
return template;
}
return {
...template,
canonicalSql: redactHistoricSqlText(template.canonicalSql, redactors),
};
}
```
- [ ] **Step 5: Compile redaction patterns once per staged snapshot**
In `stageHistoricSqlAggregatedSnapshot()` in `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`, replace this opening block:
```typescript
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
const now = input.now ?? new Date();
const windowStart = new Date(now.getTime() - config.windowDays * 24 * 60 * 60 * 1000);
```
with:
```typescript
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
const now = input.now ?? new Date();
const windowStart = new Date(now.getTime() - config.windowDays * 24 * 60 * 60 * 1000);
```
- [ ] **Step 6: Store redacted SQL only after batch analysis has used original SQL**
In `stageHistoricSqlAggregatedSnapshot()` in `packages/context/src/ingest/adapters/historic-sql/stage-unified.ts`, replace this `parsedTemplates.push()` block:
```typescript
parsedTemplates.push({
template,
tablesTouched,
columnsByClause: Object.fromEntries(
Object.entries(parsed.columnsByClause).map(([clause, columns]) => [clause, [...new Set(columns)].sort()]),
),
});
```
with:
```typescript
parsedTemplates.push({
template: redactTemplateSql(template, redactors),
tablesTouched,
columnsByClause: Object.fromEntries(
Object.entries(parsed.columnsByClause).map(([clause, columns]) => [clause, [...new Set(columns)].sort()]),
),
});
```
- [ ] **Step 7: Run staged-artifact and redaction tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/redaction.test.ts src/ingest/adapters/historic-sql/stage-unified.test.ts
```
Expected: PASS. The output reports 2 test files passed and the staged-artifact test confirms both raw sensitive substrings are absent.
- [ ] **Step 8: Commit the stager redaction**
```bash
git add packages/context/src/ingest/adapters/historic-sql/stage-unified.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts
git commit -m "feat: redact historic sql staged artifacts"
```
## Task 3: Run Focused Historic-SQL Regression Checks
**Files:**
- Verify: `packages/context/src/ingest/adapters/historic-sql/redaction.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/types.test.ts`
- [ ] **Step 1: Run focused historic-SQL tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/redaction.test.ts src/ingest/adapters/historic-sql/stage-unified.test.ts src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts src/ingest/adapters/historic-sql/projection.test.ts src/ingest/adapters/historic-sql/types.test.ts
```
Expected: PASS. The output reports 5 test files passed.
- [ ] **Step 2: Run the context package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS with TypeScript completing without diagnostics.
- [ ] **Step 3: Confirm the implementation did not reintroduce legacy historic-SQL codepaths**
Run:
```bash
rg -n "stagePgStatStatementsTemplates|expandCategoricalTemplates|classifySlot|pgss-baseline|historic_sql_ingest|historic_sql_curator" packages/context/src packages/context/skills packages/cli/src
```
Expected: no matches.
- [ ] **Step 4: Commit verification-only adjustments if any were required**
If Task 3 required a source or test correction, commit the verified files:
```bash
git add packages/context/src/ingest/adapters/historic-sql/redaction.ts packages/context/src/ingest/adapters/historic-sql/redaction.test.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.ts packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts
git commit -m "test: verify historic sql redaction hardening"
```
If Task 3 did not require changes, leave the existing commits from Task 1 and Task 2 unchanged.
## Self-Review
**Spec coverage:** This plan covers the remaining practical gap in spec §8's `redactionPatterns` config by applying it before SQL text reaches staged artifacts and LLM WorkUnit inputs. It does not alter reader SQL, projection, search enrichment, or setup output because those slices are already implemented.
**Placeholder scan:** The plan contains no `TBD`, no `TODO`, and no missing code bodies. Every code-writing step includes the exact test or implementation block to add.
**Type consistency:** `HistoricSqlRedactionPattern`, `compileHistoricSqlRedactionPatterns()`, and `redactHistoricSqlText()` are defined in Task 1 and imported with the same names in Task 2. `redactTemplateSql()` returns `AggregatedTemplate`, preserving the existing `ParsedTemplate.template` type.
Plan complete and saved to `docs/superpowers/plans/2026-05-11-historic-sql-redaction-hardening.md`. Two execution options:
**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration
**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints
Which approach?

View file

@ -1,459 +0,0 @@
# External Hosted Postgres Discovery Manual Test Plan
This plan tests KTX from the point of view of a new external user who discovers
the public CLI and connects the hosted Kaelio demo Postgres database as the
source. It starts with the credential-free seeded demo, then creates a real KTX
project that reads from `start.kaelio.com`.
The plan avoids writing the database password into this repository. Keep the
password in a local environment variable and configure KTX with
`env:KTX_DEMO_DATABASE_URL`.
## Scope
Use this plan when the goal is to test KTX as an external user with the hosted
demo database. The commands use the published package shape through
`npx @kaelio/ktx`. If you are testing from this repository, you can replace
`npx @kaelio/ktx` with the local `ktx` alias.
The required checks cover:
- Running the packaged seeded demo without credentials.
- Creating a new project that points to the hosted Postgres demo source.
- Verifying the connection through the public CLI.
- Running public ingest against the hosted database.
- Searching semantic-layer sources through `agent sl list --query`.
- Running the Postgres historic-SQL readiness doctor.
- Running the historic-SQL adapter when the demo database exposes query
history and local LLM configuration is available.
- Searching generated historic-SQL usage and pattern pages when historic-SQL
ingest runs.
## Prerequisites
Prepare a clean terminal before starting. The required path needs Node and
network access to `start.kaelio.com:5432`. The optional historic-SQL ingest path
also needs `uv` and an LLM provider configured for KTX.
1. Confirm Node 22 or newer is available:
```bash
node --version
```
Expected: the version is `v22` or newer.
2. Confirm the hosted Postgres endpoint is reachable from your network:
```bash
nc -vz start.kaelio.com 5432
```
Expected: the command reports that the TCP connection succeeds. If `nc` is
unavailable, continue and let `ktx connection test` perform the real check.
3. Create an isolated test parent:
```bash
export KTX_EXTERNAL_PARENT="$(mktemp -d)"
export KTX_SEEDED_PROJECT="$KTX_EXTERNAL_PARENT/seeded-demo"
export KTX_HOSTED_PROJECT="$KTX_EXTERNAL_PARENT/hosted-postgres"
export KTX_RUNTIME_ROOT="$KTX_EXTERNAL_PARENT/managed-runtime"
```
Expected: every file created by this test stays under
`$KTX_EXTERNAL_PARENT`.
4. Set the hosted database URL without committing the password:
```bash
read -rsp "Demo database password: " KTX_DEMO_DB_PASSWORD
printf '\n'
export KTX_DEMO_DATABASE_URL="postgresql://kaelio_demo:${KTX_DEMO_DB_PASSWORD}"
export KTX_DEMO_DATABASE_URL="${KTX_DEMO_DATABASE_URL}@start.kaelio.com:5432/demo?sslmode=prefer"
unset KTX_DEMO_DB_PASSWORD
```
Expected: `KTX_DEMO_DATABASE_URL` is set only in your shell. The project
config will store `env:KTX_DEMO_DATABASE_URL`, not the literal URL.
The hosted demo endpoint uses libpq-style `sslmode=prefer`, which means
"try SSL, then fall back to non-SSL." KTX handles this mode explicitly for
the Node Postgres connector so the setup check can connect to the hosted
demo database.
5. Verify the required shell variables before running any `ktx` commands:
```bash
: "${KTX_EXTERNAL_PARENT:?Run prerequisite step 3 in this shell first}"
: "${KTX_SEEDED_PROJECT:?Run prerequisite step 3 in this shell first}"
: "${KTX_HOSTED_PROJECT:?Run prerequisite step 3 in this shell first}"
: "${KTX_RUNTIME_ROOT:?Run prerequisite step 3 in this shell first}"
: "${KTX_DEMO_DATABASE_URL:?Run prerequisite step 4 in this shell first}"
```
Expected: the command prints nothing and exits zero. If it prints a shell
error, rerun the referenced prerequisite in the same terminal before
continuing.
## Step 1: Run the packaged seeded demo
Start with the shortest public path. The seeded demo uses packaged data and
prebuilt context, so it must not ask for an LLM key.
1. Run the seeded demo:
```bash
npx @kaelio/ktx setup demo \
--project-dir "$KTX_SEEDED_PROJECT" \
--plain \
--no-input
```
Expected: output includes `Mode: seeded`, `Source: packaged demo project`,
and `LLM calls: none`.
2. Inspect the seeded demo:
```bash
npx @kaelio/ktx setup demo inspect \
--project-dir "$KTX_SEEDED_PROJECT" \
--json > "$KTX_EXTERNAL_PARENT/seeded-inspect.json"
```
Expected: the JSON reports seeded mode, semantic-layer sources, knowledge
pages, and `reports/seeded-demo-report.json`.
3. Search seeded semantic-layer sources:
```bash
npx @kaelio/ktx agent sl list \
--project-dir "$KTX_SEEDED_PROJECT" \
--json \
--query "revenue" \
> "$KTX_EXTERNAL_PARENT/seeded-sl-search.json"
```
Expected: the command exits zero and returns at least one source with a
numeric `score`.
## Step 2: Create a hosted Postgres project
Create a new KTX project that uses the hosted demo database as the warehouse
source. This step enables historic SQL in the config, but it does not require
LLM credentials yet.
If an earlier setup attempt failed after creating `$KTX_HOSTED_PROJECT/ktx.yaml`,
start a fresh test project before rerunning the `--new` command:
```bash
export KTX_HOSTED_PROJECT="$KTX_EXTERNAL_PARENT/hosted-postgres-retry"
```
1. Create the project and connection:
```bash
npx @kaelio/ktx setup \
--project-dir "${KTX_HOSTED_PROJECT:?Run prerequisite step 3 first}" \
--new \
--skip-llm \
--skip-embeddings \
--skip-sources \
--skip-agents \
--database postgres \
--new-database-connection-id warehouse \
--database-url env:KTX_DEMO_DATABASE_URL \
--database-schema public \
--enable-historic-sql \
--historic-sql-min-executions 2 \
--yes \
--no-input
```
Expected: `$KTX_HOSTED_PROJECT/ktx.yaml` exists and contains a `warehouse`
Postgres connection whose URL is `env:KTX_DEMO_DATABASE_URL`.
2. Confirm the password was not written to disk:
```bash
grep -R "start.kaelio.com:5432/demo" "$KTX_HOSTED_PROJECT" || true
```
Expected: no matches are printed.
3. Inspect the generated connection config:
```bash
sed -n '1,120p' "$KTX_HOSTED_PROJECT/ktx.yaml"
```
Expected: the `warehouse` connection has `driver: postgres`,
`url: env:KTX_DEMO_DATABASE_URL` or an equivalent URL reference, and
`historicSql.enabled: true`.
## Step 3: Test the hosted connection
Run the public connection check before ingest. This verifies that the external
user can reach and introspect the hosted source.
1. Test the connection:
```bash
npx @kaelio/ktx connection test warehouse \
--project-dir "$KTX_HOSTED_PROJECT"
```
Expected: output includes `Driver: postgres` and a positive table count.
2. List configured connections:
```bash
npx @kaelio/ktx connection list \
--project-dir "$KTX_HOSTED_PROJECT"
```
Expected: output includes the `warehouse` connection.
## Step 4: Run public ingest
Run the public ingest command. For warehouse connections, this performs the
database scan path and writes local context files that agent search can use.
1. Run ingest:
```bash
npx @kaelio/ktx ingest warehouse \
--project-dir "$KTX_HOSTED_PROJECT" \
--no-input
```
Expected: output reports that ingest finished and that the `scan` step is
`done`.
2. Inspect the latest public ingest status:
```bash
npx @kaelio/ktx ingest status \
--project-dir "$KTX_HOSTED_PROJECT" \
--no-input
```
Expected: the status references the hosted `warehouse` source and a
completed scan.
3. Confirm semantic-layer files exist:
```bash
find "$KTX_HOSTED_PROJECT/semantic-layer/warehouse" \
-name '*.yaml' -print | head
```
Expected: at least one semantic-layer YAML file is printed.
## Step 5: Search the hosted database context
Use the agent-facing semantic-layer search command after ingest. This validates
the discovery path that agents use for database analysis.
1. Run semantic-layer search:
```bash
npx @kaelio/ktx agent sl list \
--project-dir "$KTX_HOSTED_PROJECT" \
--connection-id warehouse \
--json \
--query "orders revenue customers" \
> "$KTX_EXTERNAL_PARENT/hosted-sl-search.json"
```
Expected: the command exits zero.
2. Validate search metadata:
```bash
node - "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" <<'NODE'
const { readFileSync } = require('node:fs');
const result = JSON.parse(readFileSync(process.argv[2], 'utf8'));
const assert = (ok, message) => {
if (!ok) throw new Error(message);
};
assert(Array.isArray(result.sources), 'sources missing');
assert(result.sources.length > 0, 'no semantic-layer hits');
assert(Number.isFinite(result.sources[0].score), 'score missing');
console.log('hosted semantic-layer search ok');
NODE
```
Expected: the script prints `hosted semantic-layer search ok`.
3. Read the top source:
```bash
node - "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" \
> "$KTX_EXTERNAL_PARENT/hosted-top-source-name.txt" <<'NODE'
const { readFileSync } = require('node:fs');
const result = JSON.parse(readFileSync(process.argv[2], 'utf8'));
process.stdout.write(result.sources[0].name);
NODE
npx @kaelio/ktx agent sl read \
"$(cat "$KTX_EXTERNAL_PARENT/hosted-top-source-name.txt")" \
--project-dir "$KTX_HOSTED_PROJECT" \
--connection-id warehouse \
--json \
> "$KTX_EXTERNAL_PARENT/hosted-sl-read.json"
```
Expected: the JSON includes the full semantic-layer source.
## Step 6: Check historic-SQL readiness
Run the Postgres historic-SQL doctor. This determines whether the hosted demo
database exposes the query-history prerequisites needed for the redesign's
historic-SQL adapter.
1. Run doctor:
```bash
npx @kaelio/ktx dev doctor \
--project-dir "$KTX_HOSTED_PROJECT" \
--no-input
```
Expected: output includes a `Postgres Historic SQL (warehouse)` check.
2. Interpret the result:
- `PASS` means the hosted source is ready for the optional historic-SQL
ingest path.
- `WARN` or `FAIL` means the external discovery test still covers scan and
semantic-layer search, but historic-SQL query-history ingestion is blocked
by database permissions or configuration.
## Step 7: Optional historic-SQL ingest
Run this section only when the doctor passes and the KTX project has an LLM
provider configured. Historic-SQL table and pattern curation uses LLM-backed
skills, so this path is not credential-free.
1. Configure LLM and embeddings if you skipped them during setup:
```bash
npx @kaelio/ktx setup \
--project-dir "$KTX_HOSTED_PROJECT"
```
Expected: `npx @kaelio/ktx setup status --project-dir "$KTX_HOSTED_PROJECT"`
reports that LLM and embedding setup are ready.
2. Run historic-SQL ingest:
```bash
npx @kaelio/ktx dev ingest run \
--project-dir "$KTX_HOSTED_PROJECT" \
--connection-id warehouse \
--adapter historic-sql \
--plain \
--yes \
--no-input
```
Expected: the command exits zero and schedules `historic-sql-table-` and
`historic-sql-patterns-` WorkUnits when the database has qualifying query
history.
3. Locate the latest historic-SQL manifest:
```bash
find "$KTX_HOSTED_PROJECT/raw-sources/warehouse/historic-sql" \
-name manifest.json -print | sort | tail -n 1
```
Expected: a manifest path is printed.
4. Search for generated usage:
```bash
npx @kaelio/ktx agent sl list \
--project-dir "$KTX_HOSTED_PROJECT" \
--connection-id warehouse \
--json \
--query "common filters joins usage" \
> "$KTX_EXTERNAL_PARENT/historic-sl-search.json"
```
Expected: hits produced from historic-SQL usage include `score`, and hits
with projected usage include `frequencyTier` and `snippet`.
5. Search for generated pattern pages:
```bash
npx @kaelio/ktx agent wiki search "historic sql pattern" \
--project-dir "$KTX_HOSTED_PROJECT" \
--json \
--limit 10 \
> "$KTX_EXTERNAL_PARENT/historic-wiki-search.json"
```
Expected: results include pages whose keys start with `historic-sql/` when
the run produced cross-table patterns.
## Step 8: Record results
Capture the result in a way that separates the external discovery path from the
optional historic-SQL path.
1. Save useful outputs:
```bash
mkdir -p "$KTX_EXTERNAL_PARENT/results"
cp "$KTX_EXTERNAL_PARENT/seeded-inspect.json" \
"$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true
cp "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" \
"$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true
cp "$KTX_EXTERNAL_PARENT/hosted-sl-read.json" \
"$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true
cp "$KTX_EXTERNAL_PARENT/historic-sl-search.json" \
"$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true
cp "$KTX_EXTERNAL_PARENT/historic-wiki-search.json" \
"$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true
```
Expected: the results directory contains the JSON outputs created during the
run.
2. Mark these areas as pass, fail, or blocked:
- Public package discovery through `npx @kaelio/ktx`.
- Seeded demo without credentials.
- Hosted Postgres project setup.
- Hosted Postgres connection test.
- Public ingest scan.
- Semantic-layer search and read.
- Historic-SQL doctor.
- Historic-SQL ingest, if doctor and LLM setup allow it.
- Historic-SQL usage search, if ingest ran.
- Historic-SQL wiki pattern search, if ingest ran.
Expected: every required external discovery area passes. Historic-SQL ingest
is pass, fail, or blocked based on the doctor result and local LLM
configuration.
## Cleanup
Remove the disposable project after collecting results. Keep it only when you
need the files for debugging.
1. Stop the managed runtime:
```bash
npx @kaelio/ktx runtime stop || true
```
2. Remove the test parent:
```bash
rm -rf "$KTX_EXTERNAL_PARENT"
```
Expected: temporary projects and runtime files are removed.

View file

@ -1,778 +0,0 @@
# Historic SQL Search Enrichment Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make historic-SQL table usage searchable through semantic-layer search and return lean query-mode context with `frequencyTier` and an FTS snippet.
**Architecture:** This is the second slice of the historic SQL redesign, covering spec §6.2.3-§6.2.5 and the search-hit tier in §7. It builds on the already implemented foundation slice: `SemanticLayerSource.usage` is the source of truth, the SL search text builder indexes usage narrative and structured usage fields, SQLite FTS returns snippets from indexed search text, and local/MCP list responses hydrate `frequencyTier` from the source while keeping the full `usage` block available through `agent sl read`.
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, better-sqlite3 FTS5, zod-backed TypeScript types.
---
## Starting Point
Spec: `docs/superpowers/specs/2026-05-11-historic-sql-redesign-design.md`
Plans found that are based on this spec:
- `docs/superpowers/plans/2026-05-11-historic-sql-foundations.md`
Implemented status:
- `2026-05-11-historic-sql-foundations.md` is implemented in this worktree. Evidence in code: `packages/context/src/ingest/adapters/historic-sql/skill-schemas.ts`, `SemanticLayerSource.usage` in `packages/context/src/sl/types.ts`, `mergeUsagePreservingExternal()` in `packages/context/src/ingest/adapters/live-database/manifest.ts`, `SqlAnalysisPort.analyzeBatch()` in `packages/context/src/sql-analysis/ports.ts`, and `/sql/analyze-batch` in `python/ktx-daemon/src/ktx_daemon/app.py`.
- Focused TypeScript foundation verification passed: `pnpm --filter @ktx/context exec vitest run src/ingest/adapters/historic-sql/skill-schemas.test.ts src/sl/semantic-layer.service.test.ts src/ingest/adapters/live-database/manifest.test.ts src/scan/local-enrichment-artifacts.test.ts src/sql-analysis/http-sql-analysis-port.test.ts` reported 5 files and 53 tests passed.
- `uv run pytest python/ktx-daemon/tests/test_sql_analysis.py python/ktx-daemon/tests/test_app.py -q` is blocked by the repo's exact uv pin: required `==0.11.11`, local `0.11.13`. Closest available check after activating `.venv` passed: `source .venv/bin/activate && python -m pytest python/ktx-daemon/tests/test_sql_analysis.py python/ktx-daemon/tests/test_app.py -q` reported 20 passed.
Not yet implemented:
- `buildSemanticLayerSourceSearchText()` in `packages/context/src/sl/sl-search.service.ts` does not include `source.usage`.
- `SqliteSlSourcesIndex` does not select `snippet(local_sl_sources_fts, ...)`.
- `LocalSlSourceSearchResult` and `KtxSemanticLayerSourceSummary` do not expose `frequencyTier` or `snippet`.
- `createLocalProjectMcpContextPorts().semanticLayer.listSources()` drops any future snippet/frequency metadata.
This plan does not rewrite the historic-SQL adapter, readers, skills, projection, or cleanup path. The next plan after this one should cover the new adapter hot path from spec §4 and §10.3 step 3.
## File Structure
Modify:
- `packages/context/src/sl/sl-search.service.ts`
Adds usage narrative, frequency, filters, group-bys, joins, and stale marker to the canonical SL search text. Preserves snippets returned by repository search for direct `SlSearchService.search()` callers.
- `packages/context/src/sl/sl-search.service.test.ts`
Tests usage search-text content and direct service snippet pass-through.
- `packages/context/src/sl/ports.ts`
Extends `SlSourcesIndexPort.search()` rows with optional `snippet`.
- `packages/context/src/sl/sqlite-sl-sources-index.ts`
Adds FTS5 `snippet()` selection to lexical candidate search and direct index search.
- `packages/context/src/sl/sqlite-sl-sources-index.test.ts`
Locks snippet behavior for both direct search and lexical lane candidates.
- `packages/context/src/sl/local-sl.ts`
Adds `frequencyTier` and `snippet` to query-mode `LocalSlSourceSearchResult`; collects snippets from the lexical lane and hydrates frequency from `SemanticLayerSource.usage`.
- `packages/context/src/sl/local-sl.test.ts`
Tests that usage-only terms can find a source and that results include `frequencyTier` and FTS snippet.
- `packages/context/src/sl/pglite-sl-search-prototype.ts`
Propagates `frequencyTier` for the prototype backend so the shared result type stays truthful.
- `packages/context/src/mcp/types.ts`
Adds `frequencyTier` and `snippet` to `KtxSemanticLayerSourceSummary`.
- `packages/context/src/mcp/local-project-ports.ts`
Includes `frequencyTier` and `snippet` in `semanticLayer.listSources()` output.
- `packages/context/src/mcp/local-project-ports.test.ts`
Tests the agent/MCP-facing list response.
## Task 1: Index Historic SQL Usage In SL Search Text
**Files:**
- Modify: `packages/context/src/sl/sl-search.service.test.ts`
- Modify: `packages/context/src/sl/sl-search.service.ts`
- [ ] **Step 1: Write the failing usage search-text test**
Add this test at the end of the existing `describe('SlSearchService', ...)` block in `packages/context/src/sl/sl-search.service.test.ts`:
```typescript
it('includes historic SQL usage in semantic-layer search text', () => {
const source: SemanticLayerSource = {
name: 'orders',
descriptions: { user: 'Customer orders' },
table: 'public.orders',
grain: ['order_id'],
columns: [{ name: 'order_id', type: 'string' }],
joins: [],
measures: [],
usage: {
narrative: 'Analysts inspect paid and refunded order lifecycle trends by customer segment.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['customer_segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: '2026-05-01T00:00:00.000Z',
},
};
const text = buildSemanticLayerSourceSearchText(source);
expect(text).toContain('usage: Analysts inspect paid and refunded order lifecycle trends by customer segment.');
expect(text).toContain('frequency: high');
expect(text).toContain('commonly filtered by: status, created_at');
expect(text).toContain('commonly grouped by: customer_segment');
expect(text).toContain('commonly joined to public.customers on customer_id');
expect(text).toContain('stale since 2026-05-01T00:00:00.000Z');
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/sl-search.service.test.ts
```
Expected: FAIL because the search text does not contain `usage: Analysts inspect paid and refunded order lifecycle trends by customer segment.`
- [ ] **Step 3: Add usage fields to the canonical search text**
In `packages/context/src/sl/sl-search.service.ts`, insert this block after the existing `freshness` block and before `return parts.join('. ');`:
```typescript
if (source.usage) {
const usage = source.usage;
parts.push(`usage: ${usage.narrative}`);
parts.push(`frequency: ${usage.frequencyTier}`);
if (usage.commonFilters.length > 0) {
parts.push(`commonly filtered by: ${usage.commonFilters.join(', ')}`);
}
if (usage.commonGroupBys?.length) {
parts.push(`commonly grouped by: ${usage.commonGroupBys.join(', ')}`);
}
for (const join of usage.commonJoins) {
parts.push(`commonly joined to ${join.table} on ${join.on.join(',')}`);
}
if (usage.staleSince) {
parts.push(`stale since ${usage.staleSince}`);
}
}
```
- [ ] **Step 4: Run the search-text test to verify it passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/sl-search.service.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit**
```bash
git add packages/context/src/sl/sl-search.service.ts packages/context/src/sl/sl-search.service.test.ts
git commit -m "feat: index historic sql usage in sl search text"
```
## Task 2: Return SQLite FTS Snippets From SL Search
**Files:**
- Modify: `packages/context/src/sl/ports.ts`
- Modify: `packages/context/src/sl/sqlite-sl-sources-index.ts`
- Modify: `packages/context/src/sl/sqlite-sl-sources-index.test.ts`
- Modify: `packages/context/src/sl/sl-search.service.ts`
- Modify: `packages/context/src/sl/sl-search.service.test.ts`
- [ ] **Step 1: Write failing SQLite snippet assertions**
Replace the existing `creates SQLite tables and searches indexed source text` test in `packages/context/src/sl/sqlite-sl-sources-index.test.ts` with:
```typescript
it('creates SQLite tables and searches indexed source text with FTS snippets', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{
sourceName: 'orders',
searchText: 'orders table: public.orders measure: total_revenue sum(revenue) gross revenue',
embedding: null,
},
{
sourceName: 'tickets',
searchText: 'tickets table: public.tickets measure: ticket_count count(*) support queue',
embedding: null,
},
]);
await expect(access(dbPath)).resolves.toBeUndefined();
const directResults = await index.search('warehouse', null, 'gross revenue', 10);
expect(directResults).toEqual([
expect.objectContaining({
sourceName: 'orders',
rrfScore: expect.any(Number),
snippet: expect.stringContaining('<mark>'),
}),
]);
expect(directResults[0]?.snippet).toContain('revenue');
const lexicalCandidates = await index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 10 });
expect(lexicalCandidates).toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
snippet: expect.stringContaining('<mark>'),
}),
]);
});
```
- [ ] **Step 2: Write the failing direct service snippet test**
Add this test at the end of `packages/context/src/sl/sl-search.service.test.ts`:
```typescript
it('preserves FTS snippets returned by the source index', async () => {
const service = new SlSearchService(
{
maxBatchSize: 16,
computeEmbedding: vi.fn(async () => [1, 0]),
computeEmbeddingsBulk: vi.fn(),
},
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(async () => [
{
sourceName: 'orders',
rrfScore: 0.75,
snippet: 'usage: paid <mark>order</mark> lifecycle',
},
]),
},
);
await expect(service.search('warehouse', 'order lifecycle', 10)).resolves.toEqual([
{
sourceName: 'orders',
score: 0.75,
snippet: 'usage: paid <mark>order</mark> lifecycle',
},
]);
});
```
- [ ] **Step 3: Run the snippet tests to verify they fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/sqlite-sl-sources-index.test.ts src/sl/sl-search.service.test.ts
```
Expected: FAIL because `snippet` is missing from SQLite search rows and `SlSearchService.search()` drops repository snippets.
- [ ] **Step 4: Extend the index port result type**
In `packages/context/src/sl/ports.ts`, replace the `search()` return type in `SlSourcesIndexPort` with:
```typescript
search(
connectionId: string,
queryEmbedding: number[] | null,
queryText: string,
limit: number,
minRrfScore?: number,
): Promise<Array<{ sourceName: string; rrfScore: number; snippet?: string }>>;
```
- [ ] **Step 5: Add snippet fields and SQL selection in the SQLite index**
In `packages/context/src/sl/sqlite-sl-sources-index.ts`, replace the `SearchRow` type with:
```typescript
type SearchRow = {
connection_id?: string;
source_name: string;
rank: number;
snippet?: string | null;
};
```
In the `SlSqliteLaneCandidate` interface, add the optional snippet property:
```typescript
export interface SlSqliteLaneCandidate {
id: string;
connectionId: string;
sourceName: string;
rank: number;
rawScore: number;
snippet?: string;
}
```
In `searchLexicalCandidates()`, replace the SELECT list with:
```sql
SELECT
connection_id,
source_name,
bm25(local_sl_sources_fts) AS rank,
snippet(local_sl_sources_fts, 2, '<mark>', '</mark>', '...', 12) AS snippet
FROM local_sl_sources_fts
```
Then replace the returned row mapping in `searchLexicalCandidates()` with:
```typescript
return rows.map((row, index) => ({
id: candidateId(row.connection_id, row.source_name),
connectionId: row.connection_id,
sourceName: row.source_name,
rank: index + 1,
rawScore: Number(row.rank),
...(typeof row.snippet === 'string' && row.snippet.length > 0 ? { snippet: row.snippet } : {}),
}));
```
In the direct `search()` method, replace the SELECT list with:
```sql
SELECT
source_name,
bm25(local_sl_sources_fts) AS rank,
snippet(local_sl_sources_fts, 2, '<mark>', '</mark>', '...', 12) AS snippet
FROM local_sl_sources_fts
```
Then replace the direct `search()` return mapping with:
```typescript
return rows
.map((row) => ({
sourceName: row.source_name,
rrfScore: scoreFromRank(row.rank),
...(typeof row.snippet === 'string' && row.snippet.length > 0 ? { snippet: row.snippet } : {}),
}))
.filter((row) => row.rrfScore >= minRrfScore);
```
- [ ] **Step 6: Preserve snippets in direct `SlSearchService.search()` results**
In `packages/context/src/sl/sl-search.service.ts`, replace the `search()` method signature and final return with:
```typescript
async search(
connectionId: string,
query: string,
limit = 15,
minRrfScore = 0,
): Promise<Array<{ sourceName: string; score: number; snippet?: string }>> {
let queryEmbedding: number[] | null = null;
try {
queryEmbedding = await this.embeddingService.computeEmbedding(query);
} catch (error) {
this.logger.warn(
`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`,
);
}
const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);
return results.map((result) => ({
sourceName: result.sourceName,
score: result.rrfScore,
...(result.snippet ? { snippet: result.snippet } : {}),
}));
}
```
- [ ] **Step 7: Run the snippet tests to verify they pass**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/sqlite-sl-sources-index.test.ts src/sl/sl-search.service.test.ts
```
Expected: PASS.
- [ ] **Step 8: Commit**
```bash
git add packages/context/src/sl/ports.ts packages/context/src/sl/sqlite-sl-sources-index.ts packages/context/src/sl/sqlite-sl-sources-index.test.ts packages/context/src/sl/sl-search.service.ts packages/context/src/sl/sl-search.service.test.ts
git commit -m "feat: return sl search snippets"
```
## Task 3: Hydrate Query-Mode SL Results With Frequency And Snippet
**Files:**
- Modify: `packages/context/src/sl/local-sl.ts`
- Modify: `packages/context/src/sl/local-sl.test.ts`
- Modify: `packages/context/src/sl/pglite-sl-search-prototype.ts`
- [ ] **Step 1: Write the failing local search hydration test**
Add this test after `searches local semantic-layer source text through SQLite FTS` in `packages/context/src/sl/local-sl.test.ts`:
```typescript
it('searches historic SQL usage and returns frequency tier plus FTS snippet', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
orders:
table: public.orders
usage:
narrative: Analysts inspect paid order lifecycle by customer segment.
frequencyTier: high
commonFilters:
- status
- created_at
commonGroupBys:
- customer_segment
commonJoins:
- table: public.customers
on:
- customer_id
columns:
- name: order_id
type: string
- name: status
type: string
`,
'ktx',
'ktx@example.com',
'Add usage-backed manifest shard',
);
const results = await searchLocalSlSources(project, {
connectionId: 'warehouse',
query: 'paid lifecycle customer segment',
});
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/_schema/public.yaml#orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
expect(results[0]?.snippet).toContain('lifecycle');
});
```
- [ ] **Step 2: Run the local search test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/local-sl.test.ts
```
Expected: FAIL because the query cannot match usage text yet if Task 1 is not present, and because `frequencyTier` and `snippet` are not hydrated into `LocalSlSourceSearchResult`.
- [ ] **Step 3: Extend the local search result type**
In `packages/context/src/sl/local-sl.ts`, replace the `LocalSlSourceSearchResult` interface with:
```typescript
export interface LocalSlSourceSearchResult extends LocalSlSourceSummary {
score: number;
frequencyTier?: NonNullable<SemanticLayerSource['usage']>['frequencyTier'];
snippet?: string;
matchReasons?: SlSearchMatchReason[];
dictionaryMatches?: SlDictionaryMatch[];
lanes?: SlSearchLaneSummary[];
}
```
Then add this helper after `candidateKey()`:
```typescript
function searchResultUsageFields(source: SemanticLayerSource): Pick<LocalSlSourceSearchResult, 'frequencyTier'> {
return source.usage?.frequencyTier ? { frequencyTier: source.usage.frequencyTier } : {};
}
```
- [ ] **Step 4: Include frequency tier in the non-SQLite token fallback**
In `searchLocalSlSources()`, inside the `project.config.storage.search !== 'sqlite-fts5'` branch, replace the final mapped object with:
```typescript
.map((result) => ({
...result.candidate.summary,
score: result.score,
matchReasons: ['token'],
...searchResultUsageFields(result.candidate.source),
}))
```
- [ ] **Step 5: Collect lexical snippets during hybrid search**
In `searchLocalSlSources()`, after `const dictionaryEvidence = new Map<string, SlDictionaryMatch[]>();`, add:
```typescript
const lexicalSnippets = new Map<string, string>();
```
Inside the lexical generator, immediately after `const rows = await index.searchLexicalCandidates({ ... });`, add:
```typescript
for (const row of rows) {
if (row.snippet) {
lexicalSnippets.set(row.id, row.snippet);
}
}
```
- [ ] **Step 6: Hydrate frequency tier and snippet in SQLite hybrid results**
In the final hydration loop in `searchLocalSlSources()`, replace the `hydrated.push({ ... })` block with:
```typescript
const dictionaryMatches = dictionaryEvidence.get(fused.id);
const snippet = lexicalSnippets.get(fused.id);
hydrated.push({
...candidate.summary,
score: fused.score,
...searchResultUsageFields(candidate.source),
...(snippet ? { snippet } : {}),
matchReasons: fused.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: result.lanes,
});
```
- [ ] **Step 7: Propagate frequency tier in the PGlite prototype backend**
In `packages/context/src/sl/pglite-sl-search-prototype.ts`, inside the final hydration loop, replace the `hydrated.push({ ... })` block with:
```typescript
const dictionaryMatches = dictionaryEvidence.get(result.id);
const frequencyTier = candidate.source.usage?.frequencyTier;
hydrated.push({
...candidate.summary,
score: result.score,
...(frequencyTier ? { frequencyTier } : {}),
matchReasons: result.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: fused.lanes,
});
```
- [ ] **Step 8: Run the local search test to verify it passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/local-sl.test.ts
```
Expected: PASS.
- [ ] **Step 9: Commit**
```bash
git add packages/context/src/sl/local-sl.ts packages/context/src/sl/local-sl.test.ts packages/context/src/sl/pglite-sl-search-prototype.ts
git commit -m "feat: hydrate sl search usage metadata"
```
## Task 4: Expose Frequency And Snippet Through Agent/MCP SL List
**Files:**
- Modify: `packages/context/src/mcp/types.ts`
- Modify: `packages/context/src/mcp/local-project-ports.ts`
- Modify: `packages/context/src/mcp/local-project-ports.test.ts`
- [ ] **Step 1: Write the failing agent-facing list test**
Add this test after `returns semantic-layer hybrid search metadata through local project ports` in `packages/context/src/mcp/local-project-ports.test.ts`:
```typescript
it('returns historic SQL usage frequency and snippet through semantic-layer list search', async () => {
const project = await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
orders:
table: public.orders
usage:
narrative: Analysts inspect paid order lifecycle by customer segment.
frequencyTier: high
commonFilters:
- status
commonGroupBys:
- customer_segment
commonJoins:
- table: public.customers
on:
- customer_id
columns:
- name: order_id
type: string
- name: status
type: string
`,
'ktx',
'ktx@example.com',
'Seed usage-backed manifest shard',
);
const ports = createLocalProjectMcpContextPorts(project);
await expect(
ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'paid order lifecycle' }),
).resolves.toEqual({
sources: [
expect.objectContaining({
connectionId: 'warehouse',
connectionName: 'warehouse',
name: 'orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
score: expect.any(Number),
matchReasons: expect.arrayContaining(['lexical']),
}),
],
totalSources: 1,
});
});
```
- [ ] **Step 2: Run the local project ports test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/mcp/local-project-ports.test.ts
```
Expected: FAIL because `frequencyTier` and `snippet` are missing from `semanticLayer.listSources()` responses.
- [ ] **Step 3: Add fields to the MCP summary type**
In `packages/context/src/mcp/types.ts`, replace the ingest import with:
```typescript
import type { IngestReportSnapshot, MemoryFlowReplayInput, TableUsageOutput } from '../ingest/index.js';
```
Then add these optional fields to `KtxSemanticLayerSourceSummary` after `joinCount`:
```typescript
frequencyTier?: TableUsageOutput['frequencyTier'];
snippet?: string;
```
- [ ] **Step 4: Pass fields through local project ports**
In `packages/context/src/mcp/local-project-ports.ts`, inside the object built in `semanticLayer.listSources()`, add these two spread lines after `joinCount: source.joinCount,`:
```typescript
...(hasSlSearchMetadata(source) && source.frequencyTier ? { frequencyTier: source.frequencyTier } : {}),
...(hasSlSearchMetadata(source) && source.snippet ? { snippet: source.snippet } : {}),
```
- [ ] **Step 5: Run the agent-facing list test to verify it passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/mcp/local-project-ports.test.ts
```
Expected: PASS.
- [ ] **Step 6: Commit**
```bash
git add packages/context/src/mcp/types.ts packages/context/src/mcp/local-project-ports.ts packages/context/src/mcp/local-project-ports.test.ts
git commit -m "feat: expose sl search usage snippets"
```
## Task 5: Final Verification
**Files:**
- Verify: `packages/context/src/sl/sl-search.service.ts`
- Verify: `packages/context/src/sl/sqlite-sl-sources-index.ts`
- Verify: `packages/context/src/sl/local-sl.ts`
- Verify: `packages/context/src/mcp/local-project-ports.ts`
- [ ] **Step 1: Run all focused tests from this plan**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/sl/sl-search.service.test.ts src/sl/sqlite-sl-sources-index.test.ts src/sl/local-sl.test.ts src/mcp/local-project-ports.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run the context type check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Confirm the adapter rewrite is still untouched**
Run:
```bash
git diff -- packages/context/src/ingest/adapters/historic-sql/stage.ts packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts
```
Expected: no diff output.
- [ ] **Step 4: Confirm no placeholder text remains in the plan**
Run:
```bash
node - <<'NODE'
import { readFileSync } from 'node:fs';
const path = 'docs/superpowers/plans/2026-05-11-historic-sql-search-enrichment.md';
const text = readFileSync(path, 'utf8');
const redFlags = [
'T' + 'BD',
'TO' + 'DO',
'implement ' + 'later',
'fill in ' + 'details',
'Add appropriate ' + 'error handling',
'add ' + 'validation',
'handle edge ' + 'cases',
'Write tests for ' + 'the above',
'Similar to ' + 'Task',
];
let failed = false;
for (const flag of redFlags) {
if (text.includes(flag)) {
console.error(`${path}: contains red-flag placeholder text: ${flag}`);
failed = true;
}
}
process.exit(failed ? 1 : 0);
NODE
```
Expected: exits 0 with no output.
- [ ] **Step 5: Commit verification notes if a verification-only edit was needed**
If Step 1 or Step 2 required a code correction, commit only those corrected files:
```bash
git status --short
git add packages/context/src/sl/sl-search.service.ts packages/context/src/sl/sl-search.service.test.ts packages/context/src/sl/ports.ts packages/context/src/sl/sqlite-sl-sources-index.ts packages/context/src/sl/sqlite-sl-sources-index.test.ts packages/context/src/sl/local-sl.ts packages/context/src/sl/local-sl.test.ts packages/context/src/sl/pglite-sl-search-prototype.ts packages/context/src/mcp/types.ts packages/context/src/mcp/local-project-ports.ts packages/context/src/mcp/local-project-ports.test.ts
git commit -m "test: verify historic sql search enrichment"
```
If Step 1 and Step 2 pass without changes, skip this commit.
## Self-Review
Spec coverage:
- Spec §6.2.3 is covered by Task 1: usage fields are included in `buildSemanticLayerSourceSearchText()`.
- Spec §6.2.4 is already covered by the foundation behavior in `SlSearchService.indexSources()`, which compares search text before re-embedding; Task 1 makes usage changes part of that search-text drift.
- Spec §6.2.5 is covered by Tasks 2-4: SQLite FTS snippets are selected and exposed through query-mode list results, and `frequencyTier` is hydrated from the source.
- Spec §7 search-hit tier is covered by Tasks 3-4: query-mode results carry name, table summary counts, description, score, frequency tier, and snippet. Full `usage` remains available through source read because the foundation plan added `SemanticLayerSource.usage`.
Placeholder scan:
- This plan contains no deferred implementation markers or unspecified code steps.
Type consistency:
- `frequencyTier` uses `TableUsageOutput['frequencyTier']` at the MCP boundary and `NonNullable<SemanticLayerSource['usage']>['frequencyTier']` in local SL search results.
- `snippet` is consistently optional because lexical FTS may not contribute to every hybrid result.

View file

@ -1,856 +0,0 @@
# Managed Local Embeddings Release Smoke Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add an opt-in release smoke that proves the public `@kaelio/ktx`
package can install `local-embeddings`, start the managed daemon, compute a real
local embedding, and persist the managed embedding marker through setup.
**Architecture:** Keep the default `artifacts:verify` path lightweight. Add a
separate Node smoke script with an explicit opt-in gate, source-level tests, and
a package script that a release job can run only when large Python and model
downloads are acceptable.
**Tech Stack:** Node 22 ESM scripts, `node:test`, pnpm, uv, KTX managed Python
runtime assets, FastAPI embedding endpoint, sentence-transformers.
---
## Existing status
This plan is based on
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The following plans are based on that spec and are already implemented in this
worktree:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
Implementation evidence found before writing this plan includes:
- `scripts/build-python-runtime-wheel.mjs` and matching tests.
- `packages/cli/src/managed-python-runtime.ts`, `runtime.ts`, and
`commands/runtime-commands.ts`.
- `packages/cli/src/managed-python-command.ts` and `ktx sl query` runtime
install policy flags.
- `packages/cli/src/managed-python-daemon.ts` and `ktx runtime start` /
`ktx runtime stop`.
- `packages/cli/src/managed-local-embeddings.ts`,
`packages/context/src/llm/local-config.ts`, and setup embedding wiring.
- `scripts/build-public-npm-package.mjs`, `release-policy.json` listing
`@kaelio/ktx`, and public-package smoke command construction.
- `scripts/package-artifacts.mjs` installed CLI smoke that isolates
`KTX_RUNTIME_ROOT`, lazily installs the core runtime, runs `ktx sl query`,
checks runtime status and doctor output, and starts, reuses, and stops the
core daemon.
The remaining spec gap is the release-check item that permits local embeddings
coverage in a separate job or opt-in check. The default release artifact smoke
must not download `sentence-transformers`, `torch`, or the
`all-MiniLM-L6-v2` model.
## File structure
- Create `scripts/local-embeddings-runtime-smoke.mjs`: an opt-in smoke script
that consumes the built public npm tarball, installs it in a temporary pnpm
project, isolates all runtime and model caches, installs the
`local-embeddings` feature, starts the managed daemon, computes one real
embedding, runs setup with local embeddings, verifies the managed config
marker, and stops the daemon.
- Create `scripts/local-embeddings-runtime-smoke.test.mjs`: fast source-level
tests for opt-in gating, public tarball selection, cache isolation, command
construction, daemon URL parsing, embedding response validation, and package
script registration.
- Modify `package.json`: add `release:local-embeddings-smoke` without adding
it to default `check`, `test`, `artifacts:verify`, or release readiness.
### Task 1: Add failing local embeddings smoke tests
**Files:**
- Create: `scripts/local-embeddings-runtime-smoke.test.mjs`
- Test: `scripts/local-embeddings-runtime-smoke.test.mjs`
- [ ] **Step 1: Write the failing test file**
Create `scripts/local-embeddings-runtime-smoke.test.mjs` with this content:
```javascript
import assert from 'node:assert/strict';
import { readFile } from 'node:fs/promises';
import { describe, it } from 'node:test';
import {
buildLocalEmbeddingsSmokeEnv,
localEmbeddingsSmokeCommands,
localEmbeddingsSmokeOptIn,
parseDaemonBaseUrl,
publicKtxTarballName,
validateEmbeddingResponse,
} from './local-embeddings-runtime-smoke.mjs';
describe('localEmbeddingsSmokeOptIn', () => {
it('skips unless the smoke is explicitly enabled', () => {
assert.deepEqual(localEmbeddingsSmokeOptIn({}, []), {
run: false,
message: 'Set KTX_RUN_LOCAL_EMBEDDINGS_SMOKE=1 or pass --force to run the local embeddings smoke.',
});
});
it('runs when the environment opt-in is set', () => {
assert.deepEqual(localEmbeddingsSmokeOptIn({ KTX_RUN_LOCAL_EMBEDDINGS_SMOKE: '1' }, []), {
run: true,
});
});
it('runs when --force is present', () => {
assert.deepEqual(localEmbeddingsSmokeOptIn({}, ['--force']), {
run: true,
});
});
});
describe('publicKtxTarballName', () => {
it('selects the public @kaelio/ktx tarball name', () => {
assert.equal(
publicKtxTarballName(['kaelio-ktx-0.0.0-private.tgz', 'ignore-me.tgz']),
'kaelio-ktx-0.0.0-private.tgz',
);
});
it('fails when the public package tarball is missing', () => {
assert.throws(
() => publicKtxTarballName(['ktx-cli-0.0.0-private.tgz']),
/Expected exactly one @kaelio\/ktx tarball/,
);
});
it('fails when multiple public package tarballs are present', () => {
assert.throws(
() => publicKtxTarballName(['kaelio-ktx-0.1.0.tgz', 'kaelio-ktx-0.2.0.tgz']),
/Expected exactly one @kaelio\/ktx tarball/,
);
});
});
describe('buildLocalEmbeddingsSmokeEnv', () => {
it('isolates the runtime root and model caches inside the smoke root', () => {
const env = buildLocalEmbeddingsSmokeEnv('/tmp/ktx-local-embedding-smoke', {
PATH: '/usr/bin',
});
assert.equal(env.PATH, '/usr/bin');
assert.equal(env.KTX_RUN_LOCAL_EMBEDDINGS_SMOKE, '1');
assert.equal(env.KTX_RUNTIME_ROOT, '/tmp/ktx-local-embedding-smoke/managed-runtime');
assert.equal(env.HF_HOME, '/tmp/ktx-local-embedding-smoke/hf-home');
assert.equal(env.TRANSFORMERS_CACHE, '/tmp/ktx-local-embedding-smoke/transformers-cache');
assert.equal(env.SENTENCE_TRANSFORMERS_HOME, '/tmp/ktx-local-embedding-smoke/sentence-transformers-home');
assert.equal(env.TORCH_HOME, '/tmp/ktx-local-embedding-smoke/torch-home');
});
});
describe('localEmbeddingsSmokeCommands', () => {
it('describes the installed-package commands needed for the smoke', () => {
const commands = localEmbeddingsSmokeCommands({
projectDir: '/tmp/ktx-local-embedding-smoke/project',
});
assert.deepEqual(commands.map((command) => command.label), [
'ktx public package version',
'ktx runtime status missing',
'ktx runtime install local embeddings',
'ktx runtime status local embeddings ready',
'ktx runtime start local embeddings',
'ktx setup local embeddings',
'ktx runtime stop local embeddings',
]);
assert.deepEqual(commands[2], {
label: 'ktx runtime install local embeddings',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'install', '--feature', 'local-embeddings', '--yes'],
timeoutMs: 1_200_000,
});
assert.deepEqual(commands[4], {
label: 'ktx runtime start local embeddings',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'start', '--feature', 'local-embeddings'],
timeoutMs: 300_000,
});
assert.deepEqual(commands[5].args, [
'exec',
'ktx',
'setup',
'--project-dir',
'/tmp/ktx-local-embedding-smoke/project',
'--new',
'--no-input',
'--yes',
'--skip-llm',
'--embedding-backend',
'sentence-transformers',
'--skip-databases',
'--skip-sources',
'--skip-agents',
]);
});
});
describe('parseDaemonBaseUrl', () => {
it('extracts the daemon URL from runtime start output', () => {
assert.equal(
parseDaemonBaseUrl('Started KTX Python daemon\nurl: http://127.0.0.1:61234\nfeatures: local-embeddings\n'),
'http://127.0.0.1:61234',
);
});
it('rejects output without a daemon URL', () => {
assert.throws(() => parseDaemonBaseUrl('Started KTX Python daemon\n'), /Daemon URL was not printed/);
});
});
describe('validateEmbeddingResponse', () => {
it('accepts a finite embedding vector with the expected dimensions', () => {
validateEmbeddingResponse({ embedding: [0.1, -0.2, 0.3] }, 3);
});
it('rejects a vector with the wrong dimensions', () => {
assert.throws(
() => validateEmbeddingResponse({ embedding: [0.1, 0.2] }, 3),
/Expected embedding dimension 3, got 2/,
);
});
it('rejects non-finite embedding values', () => {
assert.throws(
() => validateEmbeddingResponse({ embedding: [0.1, Number.NaN, 0.3] }, 3),
/Embedding value at index 1 is not a finite number/,
);
});
});
describe('package script', () => {
it('registers the opt-in local embeddings smoke command', async () => {
const packageJson = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8'));
assert.equal(
packageJson.scripts['release:local-embeddings-smoke'],
'node scripts/local-embeddings-runtime-smoke.mjs --require-opt-in',
);
});
});
```
- [ ] **Step 2: Run the failing test**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs
```
Expected: FAIL with an import error for
`./local-embeddings-runtime-smoke.mjs`.
- [ ] **Step 3: Commit the failing tests**
Run:
```bash
git add scripts/local-embeddings-runtime-smoke.test.mjs
git commit -m "test: specify local embeddings release smoke"
```
### Task 2: Implement the opt-in smoke script
**Files:**
- Create: `scripts/local-embeddings-runtime-smoke.mjs`
- Test: `scripts/local-embeddings-runtime-smoke.test.mjs`
- [ ] **Step 1: Create the smoke script**
Create `scripts/local-embeddings-runtime-smoke.mjs` with this content:
```javascript
import { execFile } from 'node:child_process';
import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { promisify } from 'node:util';
const execFileAsync = promisify(execFile);
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));
const DEFAULT_ROOT_DIR = resolve(SCRIPT_DIR, '..');
const PUBLIC_NPM_ARTIFACT_DIR = join('dist', 'artifacts', 'npm');
const OPT_IN_MESSAGE =
'Set KTX_RUN_LOCAL_EMBEDDINGS_SMOKE=1 or pass --force to run the local embeddings smoke.';
export function localEmbeddingsSmokeOptIn(env = process.env, args = process.argv.slice(2)) {
if (env.KTX_RUN_LOCAL_EMBEDDINGS_SMOKE === '1' || args.includes('--force')) {
return { run: true };
}
return { run: false, message: OPT_IN_MESSAGE };
}
export function publicKtxTarballName(files) {
const matches = files.filter((file) => /^kaelio-ktx-.+\.tgz$/.test(file)).sort();
if (matches.length !== 1) {
throw new Error(
`Expected exactly one @kaelio/ktx tarball in ${PUBLIC_NPM_ARTIFACT_DIR}, found ${matches.length}: ${
matches.join(', ') || 'none'
}. Run pnpm run artifacts:build first.`,
);
}
return matches[0];
}
export async function selectPublicKtxTarball(rootDir = DEFAULT_ROOT_DIR) {
const npmArtifactDir = join(rootDir, PUBLIC_NPM_ARTIFACT_DIR);
const files = await readdir(npmArtifactDir);
return join(npmArtifactDir, publicKtxTarballName(files));
}
export function buildLocalEmbeddingsSmokeEnv(root, baseEnv = process.env) {
return {
...baseEnv,
KTX_RUN_LOCAL_EMBEDDINGS_SMOKE: '1',
KTX_RUNTIME_ROOT: join(root, 'managed-runtime'),
HF_HOME: join(root, 'hf-home'),
TRANSFORMERS_CACHE: join(root, 'transformers-cache'),
SENTENCE_TRANSFORMERS_HOME: join(root, 'sentence-transformers-home'),
TORCH_HOME: join(root, 'torch-home'),
};
}
export function localEmbeddingsSmokeCommands(input) {
return [
{
label: 'ktx public package version',
command: 'pnpm',
args: ['exec', 'ktx', '--version'],
timeoutMs: 60_000,
},
{
label: 'ktx runtime status missing',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'status', '--json'],
timeoutMs: 60_000,
},
{
label: 'ktx runtime install local embeddings',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'install', '--feature', 'local-embeddings', '--yes'],
timeoutMs: 1_200_000,
},
{
label: 'ktx runtime status local embeddings ready',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'status', '--json'],
timeoutMs: 60_000,
},
{
label: 'ktx runtime start local embeddings',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'start', '--feature', 'local-embeddings'],
timeoutMs: 300_000,
},
{
label: 'ktx setup local embeddings',
command: 'pnpm',
args: [
'exec',
'ktx',
'setup',
'--project-dir',
input.projectDir,
'--new',
'--no-input',
'--yes',
'--skip-llm',
'--embedding-backend',
'sentence-transformers',
'--skip-databases',
'--skip-sources',
'--skip-agents',
],
timeoutMs: 900_000,
},
{
label: 'ktx runtime stop local embeddings',
command: 'pnpm',
args: ['exec', 'ktx', 'runtime', 'stop'],
timeoutMs: 60_000,
},
];
}
export function parseDaemonBaseUrl(stdout) {
const match = stdout.match(/^url: (http:\/\/127\.0\.0\.1:\d+)$/m);
if (!match) {
throw new Error(`Daemon URL was not printed by runtime start:\n${stdout}`);
}
return match[1];
}
export function validateEmbeddingResponse(raw, expectedDimensions) {
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
throw new Error('Embedding response must be a JSON object');
}
const embedding = raw.embedding;
if (!Array.isArray(embedding)) {
throw new Error('Embedding response must include an embedding array');
}
if (embedding.length !== expectedDimensions) {
throw new Error(`Expected embedding dimension ${expectedDimensions}, got ${embedding.length}`);
}
for (const [index, value] of embedding.entries()) {
if (typeof value !== 'number' || !Number.isFinite(value)) {
throw new Error(`Embedding value at index ${index} is not a finite number`);
}
}
}
async function run(command, args, options = {}) {
process.stdout.write(`$ ${command} ${args.join(' ')}\n`);
try {
const result = await execFileAsync(command, args, {
cwd: options.cwd,
env: { ...process.env, ...options.env },
encoding: 'utf8',
maxBuffer: 1024 * 1024 * 20,
timeout: options.timeoutMs ?? 120_000,
});
if (result.stdout) {
process.stdout.write(result.stdout);
}
if (result.stderr) {
process.stderr.write(result.stderr);
}
return { code: 0, stdout: result.stdout, stderr: result.stderr };
} catch (error) {
const stdout = typeof error.stdout === 'string' ? error.stdout : '';
const stderr = typeof error.stderr === 'string' ? error.stderr : error.message;
if (stdout) {
process.stdout.write(stdout);
}
if (stderr) {
process.stderr.write(stderr);
}
return {
code: typeof error.code === 'number' ? error.code : 1,
stdout,
stderr,
};
}
}
function requireSuccess(label, result, options = {}) {
if (result.code !== 0) {
throw new Error(`${label} failed with code ${result.code}\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`);
}
if (options.stderrPattern && !options.stderrPattern.test(result.stderr)) {
throw new Error(`${label} stderr did not match ${options.stderrPattern}\nstderr:\n${result.stderr}`);
}
}
function parseJsonStdout(label, result) {
requireSuccess(label, result);
try {
return JSON.parse(result.stdout);
} catch (error) {
throw new Error(`${label} did not write JSON stdout: ${error.message}\nstdout:\n${result.stdout}`);
}
}
function requireOutput(label, result, pattern) {
if (!pattern.test(result.stdout)) {
throw new Error(`${label} stdout did not match ${pattern}\nstdout:\n${result.stdout}`);
}
}
async function postJson(baseUrl, path, payload, timeoutMs) {
const response = await fetch(new URL(path, baseUrl), {
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
},
body: JSON.stringify(payload),
signal: AbortSignal.timeout(timeoutMs),
});
const text = await response.text();
if (!response.ok) {
throw new Error(`POST ${path} failed with ${response.status}: ${text}`);
}
try {
return JSON.parse(text);
} catch (error) {
throw new Error(`POST ${path} returned non-JSON response: ${error.message}\n${text}`);
}
}
async function writeSmokePackage(projectDir, tarballPath) {
await mkdir(projectDir, { recursive: true });
await writeFile(
join(projectDir, 'package.json'),
`${JSON.stringify(
{
name: 'ktx-local-embeddings-runtime-smoke',
version: '0.0.0',
private: true,
type: 'module',
dependencies: {
'@kaelio/ktx': `file:${tarballPath}`,
},
},
null,
2,
)}\n`,
);
}
export async function runLocalEmbeddingsRuntimeSmoke(options = {}) {
const rootDir = options.rootDir ?? DEFAULT_ROOT_DIR;
const tarballPath = options.tarballPath ?? (await selectPublicKtxTarball(rootDir));
const root = await mkdtemp(join(tmpdir(), 'ktx-local-embeddings-smoke-'));
const keepTemp = options.keepTemp ?? process.env.KTX_KEEP_LOCAL_EMBEDDINGS_SMOKE === '1';
const installDir = join(root, 'installed-package');
const projectDir = join(root, 'project');
const smokeEnv = buildLocalEmbeddingsSmokeEnv(root);
const commands = localEmbeddingsSmokeCommands({ projectDir });
let daemonStarted = false;
try {
await writeSmokePackage(installDir, tarballPath);
requireSuccess(
'pnpm install public package',
await run('pnpm', ['install', '--ignore-scripts=false'], {
cwd: installDir,
env: smokeEnv,
timeoutMs: 300_000,
}),
);
const version = await run(commands[0].command, commands[0].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[0].timeoutMs,
});
requireSuccess(commands[0].label, version);
requireOutput(commands[0].label, version, /@kaelio\/ktx 0\.0\.0-private/);
const missingStatus = parseJsonStdout(
commands[1].label,
await run(commands[1].command, commands[1].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[1].timeoutMs,
}),
);
if (missingStatus.kind !== 'missing') {
throw new Error(`Expected missing runtime before install, got ${JSON.stringify(missingStatus)}`);
}
const install = await run(commands[2].command, commands[2].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[2].timeoutMs,
});
requireSuccess(commands[2].label, install);
requireOutput(commands[2].label, install, /Installed KTX Python runtime/);
requireOutput(commands[2].label, install, /features: core, local-embeddings/);
const readyStatus = parseJsonStdout(
commands[3].label,
await run(commands[3].command, commands[3].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[3].timeoutMs,
}),
);
if (readyStatus.kind !== 'ready') {
throw new Error(`Expected ready runtime after install, got ${JSON.stringify(readyStatus)}`);
}
if (!readyStatus.manifest?.features?.includes('local-embeddings')) {
throw new Error(`Runtime manifest did not include local-embeddings: ${JSON.stringify(readyStatus.manifest)}`);
}
const start = await run(commands[4].command, commands[4].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[4].timeoutMs,
});
requireSuccess(commands[4].label, start);
daemonStarted = true;
const baseUrl = parseDaemonBaseUrl(start.stdout);
const embeddingResponse = await postJson(
baseUrl,
'/embeddings/compute',
{ text: 'KTX local embeddings release smoke' },
900_000,
);
validateEmbeddingResponse(embeddingResponse, 384);
process.stdout.write('KTX local embeddings daemon computed a 384-dimensional embedding\n');
const setup = await run(commands[5].command, commands[5].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[5].timeoutMs,
});
requireSuccess(commands[5].label, setup);
requireOutput(commands[5].label, setup, /Embeddings ready: yes \(all-MiniLM-L6-v2\)/);
const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf8');
if (!config.includes('base_url: managed:local-embeddings')) {
throw new Error(`ktx.yaml did not contain managed local embeddings marker:\n${config}`);
}
process.stdout.write('KTX setup persisted managed local embeddings marker\n');
const stop = await run(commands[6].command, commands[6].args, {
cwd: installDir,
env: smokeEnv,
timeoutMs: commands[6].timeoutMs,
});
requireSuccess(commands[6].label, stop);
daemonStarted = false;
requireOutput(commands[6].label, stop, /Stopped KTX Python daemon/);
process.stdout.write('KTX local embeddings runtime smoke verified\n');
} finally {
if (daemonStarted) {
await run('pnpm', ['exec', 'ktx', 'runtime', 'stop'], {
cwd: installDir,
env: smokeEnv,
timeoutMs: 60_000,
});
}
if (!keepTemp) {
await rm(root, { recursive: true, force: true });
} else {
process.stdout.write(`Kept local embeddings smoke root: ${root}\n`);
}
}
}
async function main() {
const args = process.argv.slice(2);
const optIn = localEmbeddingsSmokeOptIn(process.env, args);
if (!optIn.run) {
process.stdout.write(`Skipping KTX local embeddings runtime smoke. ${optIn.message}\n`);
if (args.includes('--require-opt-in')) {
process.exitCode = 1;
}
return;
}
await runLocalEmbeddingsRuntimeSmoke();
}
if (process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1])) {
main().catch((error) => {
process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`);
process.exitCode = 1;
});
}
```
- [ ] **Step 2: Run the smoke test**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs
```
Expected: FAIL only in the package script test because
`release:local-embeddings-smoke` is not registered yet.
- [ ] **Step 3: Commit the smoke script**
Run:
```bash
git add scripts/local-embeddings-runtime-smoke.mjs
git commit -m "feat: add local embeddings runtime smoke"
```
### Task 3: Register the opt-in package script
**Files:**
- Modify: `package.json`
- Test: `scripts/local-embeddings-runtime-smoke.test.mjs`
- [ ] **Step 1: Add the package script**
In `package.json`, add this script immediately after
`"release:published-smoke"`:
```json
"release:local-embeddings-smoke": "node scripts/local-embeddings-runtime-smoke.mjs --require-opt-in",
```
The surrounding `scripts` section must contain this sequence after the edit:
```json
"release:published-smoke": "node scripts/published-package-smoke.mjs --require-config",
"release:local-embeddings-smoke": "node scripts/local-embeddings-runtime-smoke.mjs --require-opt-in",
"release:readiness": "node scripts/release-readiness.mjs",
```
- [ ] **Step 2: Run the focused test**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs
```
Expected: PASS.
- [ ] **Step 3: Verify the script stays opt-in**
Run:
```bash
pnpm run release:local-embeddings-smoke
```
Expected: FAIL with:
```text
Skipping KTX local embeddings runtime smoke. Set KTX_RUN_LOCAL_EMBEDDINGS_SMOKE=1 or pass --force to run the local embeddings smoke.
```
The command must exit non-zero because `--require-opt-in` is present. This
protects local and CI runs from downloading large dependencies by accident.
- [ ] **Step 4: Commit the package script**
Run:
```bash
git add package.json
git commit -m "chore: register local embeddings smoke"
```
### Task 4: Verify the opt-in smoke path
**Files:**
- Verify: `scripts/local-embeddings-runtime-smoke.mjs`
- Verify: `scripts/local-embeddings-runtime-smoke.test.mjs`
- Verify: `package.json`
- [ ] **Step 1: Run fast script tests**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs scripts/package-artifacts.test.mjs
```
Expected: PASS. Existing package artifact tests must still prove that the
default npm artifact smoke does not prepare an external Python environment or
run local embeddings downloads.
- [ ] **Step 2: Build release artifacts for the smoke**
Run:
```bash
pnpm run artifacts:build
```
Expected: PASS and `dist/artifacts/npm/` contains exactly one
`kaelio-ktx-*.tgz` tarball.
- [ ] **Step 3: Run the opt-in local embeddings smoke**
Run this only in an environment where downloading `sentence-transformers`,
`torch`, and `all-MiniLM-L6-v2` is acceptable:
```bash
KTX_RUN_LOCAL_EMBEDDINGS_SMOKE=1 pnpm run release:local-embeddings-smoke
```
Expected: PASS with output containing:
```text
KTX local embeddings daemon computed a 384-dimensional embedding
KTX setup persisted managed local embeddings marker
KTX local embeddings runtime smoke verified
```
- [ ] **Step 4: Run release readiness**
Run:
```bash
pnpm run release:readiness
```
Expected: PASS. The readiness report must not require
`release:local-embeddings-smoke`; that smoke remains a separately triggered
release job.
- [ ] **Step 5: Run pre-commit for changed files when configured**
Run:
```bash
uv run pre-commit run --files scripts/local-embeddings-runtime-smoke.mjs scripts/local-embeddings-runtime-smoke.test.mjs package.json
```
Expected: PASS. If pre-commit is unavailable in the environment, record the
tooling failure and keep the previous verification output.
- [ ] **Step 6: Commit verification fixes if needed**
If verification required edits, run:
```bash
git add scripts/local-embeddings-runtime-smoke.mjs scripts/local-embeddings-runtime-smoke.test.mjs package.json
git commit -m "fix: verify local embeddings smoke"
```
Skip this commit when no files changed after the previous commits.
## Acceptance criteria
- `node --test scripts/local-embeddings-runtime-smoke.test.mjs` passes.
- `pnpm run release:local-embeddings-smoke` fails fast without the opt-in
environment variable and prints the exact opt-in guidance.
- `KTX_RUN_LOCAL_EMBEDDINGS_SMOKE=1 pnpm run release:local-embeddings-smoke`
installs the public `@kaelio/ktx` tarball into a clean project, isolates
`KTX_RUNTIME_ROOT` and model caches, installs `local-embeddings`, starts the
managed daemon, computes a 384-dimensional embedding through
`/embeddings/compute`, runs setup with `--embedding-backend
sentence-transformers`, verifies `base_url: managed:local-embeddings` in
`ktx.yaml`, and stops the daemon.
- The default `pnpm run artifacts:verify`, `pnpm run release:readiness`, and
`pnpm run check` paths do not run the local embeddings smoke.
## Self-review
- Spec coverage: this plan covers the remaining release-check item for local
embeddings in a separate job or opt-in check. Earlier implemented plans cover
the bundled wheel, managed runtime installer, `sl query` command integration,
daemon lifecycle, managed local embeddings runtime behavior, public npm
package assembly, and default core runtime release smoke.
- Placeholder scan: no steps contain placeholder implementation language.
- Type consistency: runtime feature names are consistently `core` and
`local-embeddings`; the public npm package name is `@kaelio/ktx`; the opt-in
environment variable is `KTX_RUN_LOCAL_EMBEDDINGS_SMOKE`; the managed local
embedding marker remains `managed:local-embeddings`.

View file

@ -1,239 +0,0 @@
# Managed Local Embeddings Smoke Public Version Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make the opt-in local embeddings release smoke validate the public
`@kaelio/ktx` package version instead of the private workspace version.
**Architecture:** Reuse the public package constants from
`scripts/build-public-npm-package.mjs` inside the local embeddings smoke. Add a
small exported RegExp helper so the unit test can lock the version expectation
without running the expensive model-download smoke.
**Tech Stack:** Node.js ESM scripts, `node:test`, pnpm release scripts.
---
## Current State
The npm-managed Python runtime spec is
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The current branch already contains implementation commits for each existing
plan derived from that spec.
Implemented spec-derived plans:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-agent-mcp-semantic-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-local-ingest-daemon-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-docs-and-postgres-smoke-cleanup.md`
- `docs/superpowers/plans/2026-05-11-published-package-managed-runtime-smoke.md`
- `docs/superpowers/plans/2026-05-11-public-npm-release-handoff.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-prune-smoke-and-docs.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-uv-prerequisite-contract.md`
- `docs/superpowers/plans/2026-05-11-single-public-runtime-artifact-cleanup.md`
The remaining gap is in
`scripts/local-embeddings-runtime-smoke.mjs`. The script selects and installs a
public tarball named `kaelio-ktx-*.tgz` and writes a smoke package dependency on
`@kaelio/ktx`, but line 267 still expects `@kaelio/ktx 0.0.0-private`. The
public package builder defines `PUBLIC_NPM_PACKAGE_VERSION = '0.1.0'`, and the
main packed-package smoke already expects `@kaelio/ktx 0.1.0`.
## File Structure
This change keeps the release version source of truth in one script and reuses
it from the opt-in smoke.
- Modify `scripts/local-embeddings-runtime-smoke.mjs`: import the public package
constants, export `expectedPublicKtxVersionPattern()`, and use that pattern
for the smoke version assertion.
- Modify `scripts/local-embeddings-runtime-smoke.test.mjs`: import
`expectedPublicKtxVersionPattern()` and assert that it accepts
`@kaelio/ktx 0.1.0` and rejects `@kaelio/ktx 0.0.0-private`.
### Task 1: Align the local embeddings smoke version assertion
**Files:**
- Modify: `scripts/local-embeddings-runtime-smoke.mjs:1-267`
- Modify: `scripts/local-embeddings-runtime-smoke.test.mjs:5-118`
- Test: `scripts/local-embeddings-runtime-smoke.test.mjs`
- [ ] **Step 1: Write the failing version-pattern test**
In `scripts/local-embeddings-runtime-smoke.test.mjs`, update the import block
to include `expectedPublicKtxVersionPattern`:
```js
import {
buildLocalEmbeddingsSmokeEnv,
expectedPublicKtxVersionPattern,
localEmbeddingsSmokeCommands,
localEmbeddingsSmokeOptIn,
parseDaemonBaseUrl,
publicKtxTarballName,
validateEmbeddingResponse,
} from './local-embeddings-runtime-smoke.mjs';
```
Then add this test after the `publicKtxTarballName` describe block:
```js
describe('expectedPublicKtxVersionPattern', () => {
it('matches the public package version and rejects the private workspace version', () => {
const pattern = expectedPublicKtxVersionPattern();
assert.match('@kaelio/ktx 0.1.0\n', pattern);
assert.doesNotMatch('@kaelio/ktx 0.0.0-private\n', pattern);
});
});
```
- [ ] **Step 2: Run the test to verify it fails**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs
```
Expected: FAIL with an ESM export error that says
`expectedPublicKtxVersionPattern` is not exported from
`./local-embeddings-runtime-smoke.mjs`.
- [ ] **Step 3: Import the public package constants**
In `scripts/local-embeddings-runtime-smoke.mjs`, add this import after the
existing Node imports:
```js
import {
PUBLIC_NPM_PACKAGE_NAME,
PUBLIC_NPM_PACKAGE_VERSION,
} from './build-public-npm-package.mjs';
```
The top of the file becomes:
```js
import { execFile } from 'node:child_process';
import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { promisify } from 'node:util';
import {
PUBLIC_NPM_PACKAGE_NAME,
PUBLIC_NPM_PACKAGE_VERSION,
} from './build-public-npm-package.mjs';
```
- [ ] **Step 4: Add the version-pattern helper**
In `scripts/local-embeddings-runtime-smoke.mjs`, add these functions after the
`OPT_IN_MESSAGE` constant:
```js
function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
export function expectedPublicKtxVersionPattern() {
return new RegExp(
`${escapeRegExp(PUBLIC_NPM_PACKAGE_NAME)} ${escapeRegExp(PUBLIC_NPM_PACKAGE_VERSION)}`,
);
}
```
- [ ] **Step 5: Use the helper in the smoke**
In `scripts/local-embeddings-runtime-smoke.mjs`, replace this line:
```js
requireOutput(commands[0].label, version, /@kaelio\/ktx 0\.0\.0-private/);
```
with:
```js
requireOutput(commands[0].label, version, expectedPublicKtxVersionPattern());
```
- [ ] **Step 6: Run the focused test**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs
```
Expected: PASS. The new test proves the smoke accepts `@kaelio/ktx 0.1.0` and
rejects `@kaelio/ktx 0.0.0-private`.
- [ ] **Step 7: Run related release-script tests**
Run:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs scripts/build-public-npm-package.test.mjs scripts/package-artifacts.test.mjs
```
Expected: PASS. These tests cover the public package constants, tarball name,
artifact smoke source, and local embeddings smoke helpers.
- [ ] **Step 8: Run a stale-expectation search**
Run:
```bash
rg -n "@kaelio/ktx 0\\.0\\.0-private|0\\\\\\.0\\\\\\.0-private" scripts/local-embeddings-runtime-smoke.mjs
```
Expected: no output. The opt-in local embeddings smoke no longer contains the
private package version expectation. The test file still uses
`@kaelio/ktx 0.0.0-private` as a negative fixture.
- [ ] **Step 9: Commit**
Run:
```bash
git add scripts/local-embeddings-runtime-smoke.mjs scripts/local-embeddings-runtime-smoke.test.mjs
git commit -m "fix: align local embeddings smoke with public version"
```
## Verification
Run these checks before marking the plan complete:
```bash
node --test scripts/local-embeddings-runtime-smoke.test.mjs scripts/build-public-npm-package.test.mjs scripts/package-artifacts.test.mjs
rg -n "@kaelio/ktx 0\\.0\\.0-private|0\\\\\\.0\\\\\\.0-private" scripts/local-embeddings-runtime-smoke.mjs
```
Expected results:
- `node --test ...` exits with code 0.
- `rg ...` prints no matches.
- No Python files changed, so the repository Python pre-commit requirement does
not apply.
## Self-Review
- Spec coverage: this plan fixes the opt-in local embeddings release smoke from
the npm-managed runtime spec so it validates the public npm package produced
by the current release artifact flow.
- Placeholder scan: the plan contains concrete file paths, code blocks,
commands, and expected outcomes.
- Type consistency: the helper name is consistently
`expectedPublicKtxVersionPattern`, and it uses
`PUBLIC_NPM_PACKAGE_NAME` plus `PUBLIC_NPM_PACKAGE_VERSION` from the public
package builder.

View file

@ -1,935 +0,0 @@
# Managed Python Runtime Command Integration Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make `ktx sl query` use the KTX-managed bundled Python runtime
instead of relying on a user-provided `python -m ktx_daemon`.
**Architecture:** Add a small CLI helper that resolves the managed runtime,
installs the `core` feature when policy permits it, and creates the existing
`@ktx/context/daemon` one-shot semantic-layer compute port with the managed
`ktx-daemon` executable. Wire `ktx sl query` to pass an explicit runtime
install policy from `--yes`, `--no-input`, or the default interactive mode.
**Tech Stack:** TypeScript, Commander, Vitest, `@clack/prompts`,
`@ktx/context/daemon`, existing KTX managed runtime installer.
---
## Existing status
This plan is based on
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
Existing plans based on the spec:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md` is
implemented. The worktree contains
`scripts/build-python-runtime-wheel.mjs`,
`scripts/build-python-runtime-wheel.test.mjs`, runtime-wheel packaging in
`scripts/package-artifacts.mjs`, release-policy coverage, and matching
artifact tests. The targeted verification passes:
`node --test scripts/build-python-runtime-wheel.test.mjs scripts/package-artifacts.test.mjs scripts/release-readiness.test.mjs`.
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md` is
implemented. The worktree contains
`packages/cli/src/managed-python-runtime.ts`,
`packages/cli/src/runtime.ts`,
`packages/cli/src/commands/runtime-commands.ts`, CLI registration, and
matching Vitest coverage. The targeted CLI verification passes:
`pnpm --filter @ktx/cli run test -- src/managed-python-runtime.test.ts src/runtime.test.ts src/index.test.ts`.
Spec requirements still outside this plan:
- `ktx runtime start` and `ktx runtime stop`.
- Managed HTTP daemon state, health checks, reuse, and stale daemon repair.
- Lazy `local-embeddings` installation and local embedding daemon reuse.
- Public npm package rename from `@ktx/cli` to `@kaelio/ktx`.
This plan implements the next runnable user path: `ktx sl query` installs or
uses the managed `core` Python runtime according to the command's input policy.
## File structure
- Create `packages/cli/src/managed-python-command.ts`: CLI helper for managed
runtime policy, optional prompt, runtime install, and managed semantic-layer
compute port creation.
- Create `packages/cli/src/managed-python-command.test.ts`: unit tests for
ready runtime reuse, `--no-input` failure, `--yes` installation, and
interactive prompt acceptance.
- Modify `packages/cli/src/sl.ts`: extend `KtxSlArgs` with CLI version and
runtime install policy for `query`, and use the managed helper when no test
compute port is injected.
- Modify `packages/cli/src/sl.test.ts`: update existing `query` arguments and
assert `runKtxSl` delegates default compute creation to the managed helper.
- Modify `packages/cli/src/commands/sl-commands.ts`: add `--yes` and
`--no-input` to `sl query`, derive the runtime install policy, and pass the
CLI package version.
- Modify `packages/cli/src/command-schemas.ts`: validate `cliVersion` and
`runtimeInstallPolicy` on parsed `sl query` arguments.
- Modify `packages/cli/src/index.test.ts`: assert Commander routes the new
`sl query` runtime policy flags.
### Task 1: Add failing managed Python command helper tests
**Files:**
- Create: `packages/cli/src/managed-python-command.test.ts`
- Test: `packages/cli/src/managed-python-command.test.ts`
- [ ] **Step 1: Write the failing test file**
Create `packages/cli/src/managed-python-command.test.ts` with this content:
```typescript
import { describe, expect, it, vi } from 'vitest';
import {
createManagedPythonSemanticLayerComputePort,
managedRuntimeInstallCommand,
} from './managed-python-command.js';
import type {
InstalledKtxRuntimeManifest,
KtxRuntimeFeature,
ManagedPythonRuntimeInstallResult,
ManagedPythonRuntimeLayout,
ManagedPythonRuntimeStatus,
} from './managed-python-runtime.js';
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function layout(): ManagedPythonRuntimeLayout {
return {
cliVersion: '0.2.0',
runtimeRoot: '/runtime',
versionDir: '/runtime/0.2.0',
venvDir: '/runtime/0.2.0/.venv',
manifestPath: '/runtime/0.2.0/manifest.json',
installLogPath: '/runtime/0.2.0/install.log',
assetDir: '/assets/python',
assetManifestPath: '/assets/python/manifest.json',
pythonPath: '/runtime/0.2.0/.venv/bin/python',
daemonPath: '/runtime/0.2.0/.venv/bin/ktx-daemon',
};
}
function manifest(features: KtxRuntimeFeature[] = ['core']): InstalledKtxRuntimeManifest {
return {
schemaVersion: 1,
cliVersion: '0.2.0',
installedAt: '2026-05-11T00:00:00.000Z',
asset: {
schemaVersion: 1,
distributionName: 'kaelio-ktx',
normalizedName: 'kaelio_ktx',
version: '0.2.0',
wheel: {
file: 'kaelio_ktx-0.2.0-py3-none-any.whl',
sha256: 'a'.repeat(64),
bytes: 123,
},
},
features,
python: {
executable: '/runtime/0.2.0/.venv/bin/python',
daemonExecutable: '/runtime/0.2.0/.venv/bin/ktx-daemon',
},
installLog: '/runtime/0.2.0/install.log',
};
}
function readyStatus(features: KtxRuntimeFeature[] = ['core']): ManagedPythonRuntimeStatus {
return {
kind: 'ready',
detail: 'Runtime ready at /runtime/0.2.0',
layout: layout(),
manifest: manifest(features),
};
}
function missingStatus(): ManagedPythonRuntimeStatus {
return {
kind: 'missing',
detail: 'No runtime manifest at /runtime/0.2.0/manifest.json',
layout: layout(),
};
}
function installResult(features: KtxRuntimeFeature[] = ['core']): ManagedPythonRuntimeInstallResult {
const installedManifest = manifest(features);
return {
status: 'installed',
layout: layout(),
asset: {
manifest: installedManifest.asset,
wheelPath: '/assets/python/kaelio_ktx-0.2.0-py3-none-any.whl',
},
manifest: installedManifest,
};
}
describe('managedRuntimeInstallCommand', () => {
it('prints the exact command for each managed runtime feature', () => {
expect(managedRuntimeInstallCommand('core')).toBe('ktx runtime install --yes');
expect(managedRuntimeInstallCommand('local-embeddings')).toBe(
'ktx runtime install --feature local-embeddings --yes',
);
});
});
describe('createManagedPythonSemanticLayerComputePort', () => {
it('uses the managed ktx-daemon executable when the runtime is ready', async () => {
const io = makeIo();
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
const createPythonCompute = vi.fn(() => compute);
await expect(
createManagedPythonSemanticLayerComputePort({
cliVersion: '0.2.0',
installPolicy: 'never',
io: io.io,
readStatus: vi.fn(async () => readyStatus()),
installRuntime: vi.fn(),
createPythonCompute,
}),
).resolves.toBe(compute);
expect(createPythonCompute).toHaveBeenCalledWith({
command: '/runtime/0.2.0/.venv/bin/ktx-daemon',
args: [],
});
expect(io.stderr()).toBe('');
});
it('fails with a preparation command when input is disabled and the runtime is missing', async () => {
const io = makeIo();
const installRuntime = vi.fn();
await expect(
createManagedPythonSemanticLayerComputePort({
cliVersion: '0.2.0',
installPolicy: 'never',
io: io.io,
readStatus: vi.fn(async () => missingStatus()),
installRuntime,
}),
).rejects.toThrow('KTX Python runtime is required for this command. Run: ktx runtime install --yes');
expect(installRuntime).not.toHaveBeenCalled();
});
it('installs the core runtime without prompting when policy is auto', async () => {
const io = makeIo();
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
const createPythonCompute = vi.fn(() => compute);
const installRuntime = vi.fn(async () => installResult());
await expect(
createManagedPythonSemanticLayerComputePort({
cliVersion: '0.2.0',
installPolicy: 'auto',
io: io.io,
readStatus: vi.fn(async () => missingStatus()),
installRuntime,
createPythonCompute,
}),
).resolves.toBe(compute);
expect(installRuntime).toHaveBeenCalledWith({
cliVersion: '0.2.0',
features: ['core'],
force: false,
});
expect(io.stderr()).toContain('Installing KTX Python runtime (core) with uv');
expect(io.stderr()).toContain('KTX Python runtime ready: /runtime/0.2.0');
});
it('prompts before installing when policy is prompt', async () => {
const io = makeIo();
const confirmInstall = vi.fn(async () => true);
const installRuntime = vi.fn(async () => installResult());
await createManagedPythonSemanticLayerComputePort({
cliVersion: '0.2.0',
installPolicy: 'prompt',
io: io.io,
readStatus: vi.fn(async () => missingStatus()),
installRuntime,
createPythonCompute: vi.fn(() => ({ query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() })),
confirmInstall,
});
expect(confirmInstall).toHaveBeenCalledWith(
'KTX needs to install the core Python runtime. This downloads Python dependencies with uv. Continue?',
);
expect(installRuntime).toHaveBeenCalledWith({
cliVersion: '0.2.0',
features: ['core'],
force: false,
});
});
});
```
- [ ] **Step 2: Run the failing test**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/managed-python-command.test.ts
```
Expected: FAIL with an import error for
`./managed-python-command.js`.
### Task 2: Implement the managed Python command helper
**Files:**
- Create: `packages/cli/src/managed-python-command.ts`
- Test: `packages/cli/src/managed-python-command.test.ts`
- [ ] **Step 1: Create the helper**
Create `packages/cli/src/managed-python-command.ts` with this content:
```typescript
import { cancel, confirm, isCancel } from '@clack/prompts';
import { createPythonSemanticLayerComputePort, type KtxSemanticLayerComputePort } from '@ktx/context/daemon';
import type { KtxCliIo } from './cli-runtime.js';
import {
installManagedPythonRuntime,
readManagedPythonRuntimeStatus,
type InstalledKtxRuntimeManifest,
type KtxRuntimeFeature,
type ManagedPythonRuntimeInstallOptions,
type ManagedPythonRuntimeInstallResult,
type ManagedPythonRuntimeLayout,
type ManagedPythonRuntimeLayoutOptions,
type ManagedPythonRuntimeStatus,
} from './managed-python-runtime.js';
export type KtxManagedPythonInstallPolicy = 'prompt' | 'auto' | 'never';
export interface ManagedPythonCommandRuntime {
layout: ManagedPythonRuntimeLayout;
manifest: InstalledKtxRuntimeManifest;
}
export interface ManagedPythonCommandDeps {
readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise<ManagedPythonRuntimeStatus>;
installRuntime?: (options: ManagedPythonRuntimeInstallOptions) => Promise<ManagedPythonRuntimeInstallResult>;
confirmInstall?: (message: string) => Promise<boolean>;
}
export interface ManagedPythonCommandOptions extends ManagedPythonCommandDeps {
cliVersion: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxCliIo;
feature?: KtxRuntimeFeature;
}
export interface ManagedPythonSemanticLayerComputeOptions extends ManagedPythonCommandOptions {
createPythonCompute?: typeof createPythonSemanticLayerComputePort;
}
export function managedRuntimeInstallCommand(feature: KtxRuntimeFeature): string {
return feature === 'local-embeddings'
? 'ktx runtime install --feature local-embeddings --yes'
: 'ktx runtime install --yes';
}
function installPrompt(feature: KtxRuntimeFeature): string {
const label = feature === 'local-embeddings' ? 'local embeddings Python runtime' : 'core Python runtime';
return `KTX needs to install the ${label}. This downloads Python dependencies with uv. Continue?`;
}
function runtimeRequiredMessage(feature: KtxRuntimeFeature): string {
return `KTX Python runtime is required for this command. Run: ${managedRuntimeInstallCommand(feature)}`;
}
function hasFeature(manifest: InstalledKtxRuntimeManifest, feature: KtxRuntimeFeature): boolean {
return manifest.features.includes(feature);
}
async function defaultConfirmInstall(message: string): Promise<boolean> {
if (process.stdin.isTTY !== true || process.stdout.isTTY !== true) {
return false;
}
const response = await confirm({ message, initialValue: true });
if (isCancel(response)) {
cancel('Runtime installation cancelled.');
return false;
}
return response === true;
}
export async function ensureManagedPythonCommandRuntime(
options: ManagedPythonCommandOptions,
): Promise<ManagedPythonCommandRuntime> {
const feature = options.feature ?? 'core';
const readStatus = options.readStatus ?? readManagedPythonRuntimeStatus;
const installRuntime = options.installRuntime ?? installManagedPythonRuntime;
const status = await readStatus({ cliVersion: options.cliVersion });
if (status.kind === 'ready' && status.manifest && hasFeature(status.manifest, feature)) {
return { layout: status.layout, manifest: status.manifest };
}
if (options.installPolicy === 'never') {
throw new Error(runtimeRequiredMessage(feature));
}
if (options.installPolicy === 'prompt') {
const confirmInstall = options.confirmInstall ?? defaultConfirmInstall;
const confirmed = await confirmInstall(installPrompt(feature));
if (!confirmed) {
throw new Error(`KTX Python runtime installation was cancelled. Run: ${managedRuntimeInstallCommand(feature)}`);
}
}
options.io.stderr.write(`Installing KTX Python runtime (${feature}) with uv...\n`);
const installed = await installRuntime({
cliVersion: options.cliVersion,
features: [feature],
force: false,
});
options.io.stderr.write(`KTX Python runtime ready: ${installed.layout.versionDir}\n`);
return { layout: installed.layout, manifest: installed.manifest };
}
export async function createManagedPythonSemanticLayerComputePort(
options: ManagedPythonSemanticLayerComputeOptions,
): Promise<KtxSemanticLayerComputePort> {
const runtime = await ensureManagedPythonCommandRuntime({
cliVersion: options.cliVersion,
installPolicy: options.installPolicy,
io: options.io,
feature: 'core',
...(options.readStatus ? { readStatus: options.readStatus } : {}),
...(options.installRuntime ? { installRuntime: options.installRuntime } : {}),
...(options.confirmInstall ? { confirmInstall: options.confirmInstall } : {}),
});
const createPythonCompute = options.createPythonCompute ?? createPythonSemanticLayerComputePort;
return createPythonCompute({
command: runtime.manifest.python.daemonExecutable,
args: [],
});
}
```
- [ ] **Step 2: Run the helper test**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/managed-python-command.test.ts
```
Expected: PASS.
- [ ] **Step 3: Commit**
Run:
```bash
git add packages/cli/src/managed-python-command.ts packages/cli/src/managed-python-command.test.ts
git commit -m "feat: add managed python command helper"
```
Expected: commit succeeds.
### Task 3: Add failing `runKtxSl` managed runtime tests
**Files:**
- Modify: `packages/cli/src/sl.test.ts`
- Test: `packages/cli/src/sl.test.ts`
- [ ] **Step 1: Add runtime fields to existing `query` test args**
In each existing `runKtxSl` call whose argument object has
`command: 'query'`, add these properties:
```typescript
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
```
For example, the first `query` argument object becomes:
```typescript
{
command: 'query',
projectDir: '/tmp/project',
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'sql',
execute: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
}
```
- [ ] **Step 2: Add the managed helper delegation test**
In `packages/cli/src/sl.test.ts`, add this test inside
`describe('runKtxSl', () => { ... })` after the existing
`runs sl query and prints SQL output` test:
```typescript
it('creates default sl query compute through the managed runtime helper', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
project.config.connections.warehouse = { driver: 'postgres', readonly: true };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
measures:
- name: order_count
expr: count(*)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const stdout = { write: vi.fn() };
const stderr = { write: vi.fn() };
const compute = {
query: vi.fn(async () => ({
sql: 'select count(*) as order_count from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
};
const createManagedSemanticLayerCompute = vi.fn(async () => compute);
await expect(
runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
format: 'sql',
execute: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
{ stdout, stderr },
{ createManagedSemanticLayerCompute },
),
).resolves.toBe(0);
expect(createManagedSemanticLayerCompute).toHaveBeenCalledWith({
cliVersion: '0.2.0',
installPolicy: 'auto',
io: { stdout, stderr },
});
expect(stdout.write).toHaveBeenCalledWith('select count(*) as order_count from public.orders\n');
});
```
- [ ] **Step 3: Run the failing `sl` test**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/sl.test.ts
```
Expected: FAIL with a TypeScript/Vitest error because `runKtxSl` does not
accept `createManagedSemanticLayerCompute` yet.
### Task 4: Wire `runKtxSl` to the managed helper
**Files:**
- Modify: `packages/cli/src/sl.ts`
- Test: `packages/cli/src/sl.test.ts`
- [ ] **Step 1: Add the managed helper imports**
In `packages/cli/src/sl.ts`, add this import after the existing imports:
```typescript
import {
createManagedPythonSemanticLayerComputePort,
type KtxManagedPythonInstallPolicy,
} from './managed-python-command.js';
```
- [ ] **Step 2: Extend the `query` args type**
In the `KtxSlArgs` union, replace the current `query` object type with this
shape:
```typescript
| {
command: 'query';
projectDir: string;
connectionId?: string;
query: SemanticLayerQueryInput;
format: SlQueryFormat;
execute: boolean;
maxRows?: number;
cliVersion: string;
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
};
```
- [ ] **Step 3: Extend `KtxSlDeps`**
In `packages/cli/src/sl.ts`, replace `KtxSlDeps` with this interface:
```typescript
interface KtxSlDeps {
loadProject?: typeof loadKtxProject;
createSemanticLayerCompute?: () => KtxSemanticLayerComputePort;
createManagedSemanticLayerCompute?: (options: {
cliVersion: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxSlIo;
}) => Promise<KtxSemanticLayerComputePort>;
createQueryExecutor?: () => KtxSqlQueryExecutorPort;
}
```
- [ ] **Step 4: Use the managed helper in the `query` branch**
In the `args.command === 'query'` branch, replace:
```typescript
const compute = (deps.createSemanticLayerCompute ?? createPythonSemanticLayerComputePort)();
```
with:
```typescript
const compute = deps.createSemanticLayerCompute
? deps.createSemanticLayerCompute()
: await (deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort)({
cliVersion: args.cliVersion,
installPolicy: args.runtimeInstallPolicy,
io,
});
```
- [ ] **Step 5: Run the `sl` test**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/sl.test.ts
```
Expected: PASS.
- [ ] **Step 6: Commit**
Run:
```bash
git add packages/cli/src/sl.ts packages/cli/src/sl.test.ts
git commit -m "feat: use managed runtime for sl query compute"
```
Expected: commit succeeds.
### Task 5: Add failing Commander routing tests for `sl query`
**Files:**
- Modify: `packages/cli/src/index.test.ts`
- Test: `packages/cli/src/index.test.ts`
- [ ] **Step 1: Add routing tests**
In `packages/cli/src/index.test.ts`, add this test near the other command
routing tests:
```typescript
it('routes sl query managed runtime install policies', async () => {
const sl = vi.fn(async () => 0);
const promptIo = makeIo();
await expect(
runKtxCli(['--project-dir', tempDir, 'sl', 'query', '--measure', 'orders.order_count'], promptIo.io, { sl }),
).resolves.toBe(0);
expect(sl).toHaveBeenLastCalledWith(
expect.objectContaining({
command: 'query',
projectDir: tempDir,
cliVersion: '0.0.0-private',
runtimeInstallPolicy: 'prompt',
query: expect.objectContaining({ measures: ['orders.order_count'], dimensions: [] }),
}),
promptIo.io,
);
const autoIo = makeIo();
await expect(
runKtxCli(['--project-dir', tempDir, 'sl', 'query', '--measure', 'orders.order_count', '--yes'], autoIo.io, {
sl,
}),
).resolves.toBe(0);
expect(sl).toHaveBeenLastCalledWith(
expect.objectContaining({
cliVersion: '0.0.0-private',
runtimeInstallPolicy: 'auto',
}),
autoIo.io,
);
const noInputIo = makeIo();
await expect(
runKtxCli(
['--project-dir', tempDir, 'sl', 'query', '--measure', 'orders.order_count', '--no-input'],
noInputIo.io,
{ sl },
),
).resolves.toBe(0);
expect(sl).toHaveBeenLastCalledWith(
expect.objectContaining({
cliVersion: '0.0.0-private',
runtimeInstallPolicy: 'never',
}),
noInputIo.io,
);
});
it('rejects conflicting sl query runtime install flags', async () => {
const io = makeIo();
const sl = vi.fn(async () => 0);
await expect(
runKtxCli(
['--project-dir', tempDir, 'sl', 'query', '--measure', 'orders.order_count', '--yes', '--no-input'],
io.io,
{ sl },
),
).resolves.toBe(1);
expect(sl).not.toHaveBeenCalled();
expect(io.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input');
});
```
- [ ] **Step 2: Run the failing routing tests**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/index.test.ts
```
Expected: FAIL because `sl query` does not accept `--yes` or `--no-input`
and does not pass runtime policy fields yet.
### Task 6: Wire `sl query` flags and schema validation
**Files:**
- Modify: `packages/cli/src/commands/sl-commands.ts`
- Modify: `packages/cli/src/command-schemas.ts`
- Test: `packages/cli/src/index.test.ts`
- [ ] **Step 1: Add the runtime policy type import**
In `packages/cli/src/commands/sl-commands.ts`, add this import:
```typescript
import type { KtxManagedPythonInstallPolicy } from '../managed-python-command.js';
```
- [ ] **Step 2: Add the runtime policy parser**
In `packages/cli/src/commands/sl-commands.ts`, add this function near the
other option parsers:
```typescript
function runtimeInstallPolicy(options: { yes?: boolean; input?: boolean }): KtxManagedPythonInstallPolicy {
if (options.yes === true && options.input === false) {
throw new Error('Choose only one runtime install mode: --yes or --no-input');
}
if (options.yes === true) {
return 'auto';
}
return options.input === false ? 'never' : 'prompt';
}
```
- [ ] **Step 3: Add the command options**
In the `sl.command('query')` option chain, add these options after
`.option('--execute', 'Execute the compiled query', false)`:
```typescript
.option('--yes', 'Install the managed Python runtime without prompting when required', false)
.option('--no-input', 'Disable interactive managed runtime installation')
```
- [ ] **Step 4: Pass runtime fields into `slQueryCommandSchema.parse`**
In the `sl.command('query')` action, add these properties to the parsed object:
```typescript
cliVersion: context.packageInfo.version,
runtimeInstallPolicy: runtimeInstallPolicy(options),
```
The parsed object must include these fields next to `execute` and `format`:
```typescript
const args = slQueryCommandSchema.parse({
command: 'query',
projectDir: resolveCommandProjectDir(command),
connectionId: options.connectionId,
query: {
measures: options.measure,
dimensions: options.dimension,
...(options.filter.length > 0 ? { filters: options.filter } : {}),
...(options.segment.length > 0 ? { segments: options.segment } : {}),
...(options.orderBy.length > 0 ? { order_by: options.orderBy } : {}),
...(options.limit !== undefined ? { limit: options.limit } : {}),
...(options.includeEmpty === true ? { include_empty: true } : {}),
},
format: options.format,
execute: options.execute === true,
cliVersion: context.packageInfo.version,
runtimeInstallPolicy: runtimeInstallPolicy(options),
...(options.maxRows !== undefined ? { maxRows: options.maxRows } : {}),
});
```
- [ ] **Step 5: Extend the command schema**
In `packages/cli/src/command-schemas.ts`, add these fields to
`slQueryCommandSchema` after `execute: z.boolean()`:
```typescript
cliVersion: z.string().min(1),
runtimeInstallPolicy: z.enum(['prompt', 'auto', 'never']),
```
- [ ] **Step 6: Run the routing tests**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/index.test.ts
```
Expected: PASS.
- [ ] **Step 7: Commit**
Run:
```bash
git add packages/cli/src/commands/sl-commands.ts packages/cli/src/command-schemas.ts packages/cli/src/index.test.ts
git commit -m "feat: route sl query managed runtime policy"
```
Expected: commit succeeds.
### Task 7: Verify the full changed surface
**Files:**
- Verify: `packages/cli/src/managed-python-command.test.ts`
- Verify: `packages/cli/src/sl.test.ts`
- Verify: `packages/cli/src/index.test.ts`
- Verify: `packages/cli/src/managed-python-command.ts`
- Verify: `packages/cli/src/sl.ts`
- Verify: `packages/cli/src/commands/sl-commands.ts`
- Verify: `packages/cli/src/command-schemas.ts`
- [ ] **Step 1: Run focused CLI tests**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/managed-python-command.test.ts src/sl.test.ts src/index.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run CLI type checking**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS.
- [ ] **Step 3: Run pre-commit for changed TypeScript files**
Run:
```bash
uv run pre-commit run --files packages/cli/src/managed-python-command.ts packages/cli/src/managed-python-command.test.ts packages/cli/src/sl.ts packages/cli/src/sl.test.ts packages/cli/src/commands/sl-commands.ts packages/cli/src/command-schemas.ts packages/cli/src/index.test.ts
```
Expected: PASS. If pre-commit is unavailable because the local `uv` version
does not satisfy `pyproject.toml`, record the version mismatch and run the
focused CLI tests plus type checking from Steps 1 and 2.
- [ ] **Step 4: Commit verification fixes when needed**
If Step 1, Step 2, or Step 3 changes files through formatting hooks, run:
```bash
git add packages/cli/src/managed-python-command.ts packages/cli/src/managed-python-command.test.ts packages/cli/src/sl.ts packages/cli/src/sl.test.ts packages/cli/src/commands/sl-commands.ts packages/cli/src/command-schemas.ts packages/cli/src/index.test.ts
git commit -m "test: verify managed runtime sl query integration"
```
Expected: commit succeeds only when verification changed files. If no files
changed, leave the branch with the commits from Tasks 2, 4, and 6.
## Acceptance criteria
When this plan is complete:
- `ktx sl query` uses the managed runtime's installed `ktx-daemon` executable
for semantic-layer compilation when no test compute dependency is injected.
- `ktx sl query --yes` installs the `core` runtime feature without prompting
when the managed runtime is missing.
- `ktx sl query --no-input` fails with
`KTX Python runtime is required for this command. Run: ktx runtime install --yes`
when the managed runtime is missing.
- `ktx sl query` prompts before first managed runtime installation in an
interactive terminal.
- Existing injected-compute tests still bypass runtime installation.

View file

@ -1,585 +0,0 @@
# Managed Python Runtime Release Smoke Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make the public `@kaelio/ktx` artifact smoke prove that the npm
package installs and uses its own managed Python runtime without an externally
prepared Python environment.
**Architecture:** Keep the release smoke black-box: install the packed public
npm tarball into a clean project, isolate `KTX_RUNTIME_ROOT`, and exercise the
installed `ktx` binary. The first `ktx sl query --yes` performs the lazy core
runtime install from bundled package assets, then the smoke verifies
`runtime status`, `runtime doctor`, daemon start/reuse, and daemon stop.
**Tech Stack:** Node 22 ESM scripts, `node:test`, pnpm, uv, KTX CLI managed
Python runtime assets.
---
## Existing status
This plan is based on
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
Existing plans based on the spec:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
All six are implemented in this worktree. Evidence found before writing this
plan includes:
- `scripts/build-python-runtime-wheel.mjs` and
`scripts/build-python-runtime-wheel.test.mjs`.
- `packages/cli/assets/python/kaelio_ktx-0.1.0-py3-none-any.whl` and
`packages/cli/assets/python/manifest.json`.
- `packages/cli/src/managed-python-runtime.ts`,
`packages/cli/src/runtime.ts`, and
`packages/cli/src/commands/runtime-commands.ts`.
- `packages/cli/src/managed-python-command.ts` and `ktx sl query` runtime
install policy flags.
- `packages/cli/src/managed-python-daemon.ts`, daemon state paths, and
`ktx runtime start` / `ktx runtime stop`.
- `packages/cli/src/managed-local-embeddings.ts`,
`packages/context/src/llm/local-config.ts` managed marker constants, and
setup wiring in `packages/cli/src/setup-embeddings.ts`.
- `scripts/build-public-npm-package.mjs`,
`scripts/build-public-npm-package.test.mjs`, `release-policy.json` listing
`@kaelio/ktx`, and published smoke command construction for the required
`@kaelio/ktx` invocation modes.
The remaining release-smoke gap is in `scripts/package-artifacts.mjs`:
- `verifyNpmArtifacts()` creates a smoke `.venv`, installs the built Python
runtime wheel into it, and runs installed CLI smoke scripts with that venv at
the front of `PATH`.
- The installed CLI smoke does run `ktx sl query --yes`, but it does not
isolate `KTX_RUNTIME_ROOT`, does not assert that the first query installed
the managed runtime from bundled npm assets, and does not exercise
`ktx runtime status`, `doctor`, `start`, reuse, and `stop`.
This plan closes that release-flow gap without changing the separate Python
artifact smoke. `verifyPythonArtifacts()` must continue to install the built
Python wheel directly because it verifies the Python artifact itself.
## File structure
- Modify `scripts/package-artifacts.test.mjs`: remove the npm-smoke venv test,
add a source-level guard that npm artifact verification does not prepare an
external Python venv, and assert that the installed CLI smoke exercises the
managed runtime lifecycle.
- Modify `scripts/package-artifacts.mjs`: remove npm-smoke Python venv PATH
setup, isolate `KTX_RUNTIME_ROOT` inside `npmRuntimeSmokeSource()`, assert
first-run lazy install, and add runtime status/doctor/start/reuse/stop smoke
commands.
### Task 1: Add failing release-smoke tests
**Files:**
- Modify: `scripts/package-artifacts.test.mjs`
- Test: `scripts/package-artifacts.test.mjs`
- [ ] **Step 1: Remove the stale npm-smoke venv import**
In `scripts/package-artifacts.test.mjs`, delete `npmSmokePythonEnv` from the
import list. The surrounding import block must contain this sequence after the
edit:
```javascript
npmDemoSmokeSource,
npmRuntimeSmokeSource,
npmSmokePackageJson,
npmVerifySource,
```
- [ ] **Step 2: Replace the npm-smoke venv test with a source guard**
Delete this entire test block:
```javascript
describe('npmSmokePythonEnv', () => {
it('prepends the npm smoke virtualenv bin directory to PATH', () => {
const env = npmSmokePythonEnv('/tmp/ktx-npm-smoke', { PATH: '/usr/bin' });
assert.match(env.PATH, /^\/tmp\/ktx-npm-smoke\/\.venv\/(bin|Scripts)/);
assert.match(env.PATH, /\/usr\/bin$/);
});
});
```
Insert this block in the same location:
```javascript
describe('verifyNpmArtifacts', () => {
it('does not prepare an external Python environment for the npm smoke', async () => {
const source = await readFile(new URL('./package-artifacts.mjs', import.meta.url), 'utf8');
const start = source.indexOf('async function verifyNpmArtifacts');
const end = source.indexOf('async function verifyNpmDemoArtifacts');
assert.ok(start > 0, 'verifyNpmArtifacts function must exist');
assert.ok(end > start, 'verifyNpmDemoArtifacts must follow verifyNpmArtifacts');
const body = source.slice(start, end);
assert.doesNotMatch(body, /uv', \['venv', '\.venv'\]/);
assert.doesNotMatch(body, /pythonArtifactInstallArgs/);
assert.doesNotMatch(body, /npmSmokePythonEnv/);
});
});
```
- [ ] **Step 3: Extend the installed CLI smoke assertions**
In the `it('runs installed CLI commands through the public package runtime',
...)` test, add these assertions after the existing
`assert.match(source, /ktx sl query sqlite execute/);` assertion:
```javascript
assert.match(source, /import Database from 'better-sqlite3'/);
assert.doesNotMatch(source, /run\('python'/);
assert.match(source, /KTX_RUNTIME_ROOT/);
assert.match(source, /managed-runtime/);
assert.match(source, /ktx runtime status missing/);
assert.match(source, /runtimeStatusBefore\.kind, 'missing'/);
assert.match(source, /Installing KTX Python runtime \(core\) with uv/);
assert.match(source, /KTX Python runtime ready:/);
assert.match(source, /ktx runtime status ready/);
assert.match(source, /runtimeStatusAfter\.kind, 'ready'/);
assert.match(source, /runtimeStatusAfter\.manifest\.features/);
assert.match(source, /ktx runtime doctor/);
assert.match(source, /PASS Managed Python runtime/);
assert.match(source, /ktx runtime start/);
assert.match(source, /ktx runtime start reuse/);
assert.match(source, /Using existing KTX Python daemon/);
assert.match(source, /ktx runtime stop/);
```
- [ ] **Step 4: Run the failing package artifact tests**
Run:
```bash
node --test scripts/package-artifacts.test.mjs
```
Expected: FAIL. The guard fails because `verifyNpmArtifacts()` still creates
the npm-smoke `.venv`, and the installed CLI smoke assertions fail because
`npmRuntimeSmokeSource()` does not yet isolate or verify the managed runtime.
### Task 2: Make the npm smoke use only the managed runtime
**Files:**
- Modify: `scripts/package-artifacts.mjs`
- Modify: `scripts/package-artifacts.test.mjs`
- Test: `scripts/package-artifacts.test.mjs`
- [ ] **Step 1: Remove the npm-smoke PATH helper**
In `scripts/package-artifacts.mjs`, change the path import from:
```javascript
import { delimiter, dirname, isAbsolute, join, relative, resolve, sep } from 'node:path';
```
to:
```javascript
import { dirname, isAbsolute, join, relative, resolve, sep } from 'node:path';
```
Then delete this exported helper:
```javascript
export function npmSmokePythonEnv(projectDir, baseEnv = process.env) {
const binDir = process.platform === 'win32' ? join(projectDir, '.venv', 'Scripts') : join(projectDir, '.venv', 'bin');
const existingPath = baseEnv.PATH ?? '';
return Object.assign({}, baseEnv, {
PATH: existingPath ? `${binDir}${delimiter}${existingPath}` : binDir,
});
}
```
- [ ] **Step 2: Add runtime-smoke helpers to `npmRuntimeSmokeSource()`**
Inside the template string returned by `npmRuntimeSmokeSource()`, add this
helper immediately after `requireSuccess()`:
```javascript
function requireSuccessWithStderr(label, result, stderrPattern) {
assert.equal(
result.code,
0,
label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr,
);
assert.match(result.stderr, stderrPattern, label + ' stderr did not match ' + stderrPattern);
}
```
Then replace the smoke root setup:
```javascript
const root = await mkdtemp(join(tmpdir(), 'ktx-installed-cli-smoke-'));
try {
const projectDir = join(root, 'project');
const sourceDir = join(root, 'source');
```
with:
```javascript
const root = await mkdtemp(join(tmpdir(), 'ktx-installed-cli-smoke-'));
const previousRuntimeRoot = process.env.KTX_RUNTIME_ROOT;
process.env.KTX_RUNTIME_ROOT = join(root, 'managed-runtime');
let daemonStarted = false;
try {
const projectDir = join(root, 'project');
const sourceDir = join(root, 'source');
```
Finally replace the existing `finally` block at the end of
`npmRuntimeSmokeSource()`:
```javascript
} finally {
await rm(root, { recursive: true, force: true });
}
```
with:
```javascript
} finally {
if (daemonStarted) {
await run('pnpm', ['exec', 'ktx', 'runtime', 'stop']);
}
if (previousRuntimeRoot === undefined) {
delete process.env.KTX_RUNTIME_ROOT;
} else {
process.env.KTX_RUNTIME_ROOT = previousRuntimeRoot;
}
await rm(root, { recursive: true, force: true });
}
```
- [ ] **Step 3: Create the sqlite smoke warehouse without Python**
Inside the template string returned by `npmRuntimeSmokeSource()`, add this
import after the `assert` import:
```javascript
import Database from 'better-sqlite3';
```
Then replace the current `writeSqliteWarehouse()` function:
```javascript
async function writeSqliteWarehouse(projectDir) {
const createDb = await run('python', [
'-c',
[
'import sqlite3',
'import sys',
'db_path = sys.argv[1]',
'conn = sqlite3.connect(db_path)',
'conn.executescript("""',
'DROP TABLE IF EXISTS orders;',
'CREATE TABLE orders (',
' id INTEGER PRIMARY KEY,',
' status TEXT NOT NULL,',
' amount INTEGER NOT NULL',
');',
"INSERT INTO orders (status, amount) VALUES ('paid', 20), ('paid', 30), ('open', 10);",
'""")',
'conn.close()',
].join('\\n'),
join(projectDir, 'warehouse.db'),
]);
requireSuccess('create sqlite warehouse', createDb);
}
```
with:
```javascript
async function writeSqliteWarehouse(projectDir) {
const database = new Database(join(projectDir, 'warehouse.db'));
try {
database.exec(`
DROP TABLE IF EXISTS orders;
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
status TEXT NOT NULL,
amount INTEGER NOT NULL
);
INSERT INTO orders (status, amount) VALUES ('paid', 20), ('paid', 30), ('open', 10);
`);
} finally {
database.close();
}
}
```
- [ ] **Step 4: Assert the isolated runtime is initially missing**
In `npmRuntimeSmokeSource()`, insert this block immediately after the public
package version assertion:
```javascript
const runtimeStatusBefore = parseJsonResult(
'ktx runtime status missing',
await run('pnpm', ['exec', 'ktx', 'runtime', 'status', '--json']),
);
assert.equal(runtimeStatusBefore.kind, 'missing');
assert.equal(runtimeStatusBefore.layout.runtimeRoot, process.env.KTX_RUNTIME_ROOT);
process.stdout.write('ktx managed runtime starts missing in isolated root\\n');
```
- [ ] **Step 5: Assert first `sl query --yes` performs lazy managed install**
In `npmRuntimeSmokeSource()`, replace the current `slQuery` verification block:
```javascript
const slQuery = await run('pnpm', ['exec', 'ktx', 'sl', 'query',
'--connection-id',
'warehouse',
'--measure',
'orders.order_count',
'--format',
'json',
'--yes',
'--project-dir',
projectDir,
]);
requireSuccess('ktx sl query', slQuery);
requireOutput('ktx sl query', slQuery, /"mode": "compile_only"/);
requireOutput('ktx sl query', slQuery, /orders/);
```
with:
```javascript
const slQuery = await run('pnpm', ['exec', 'ktx', 'sl', 'query',
'--connection-id',
'warehouse',
'--measure',
'orders.order_count',
'--format',
'json',
'--yes',
'--project-dir',
projectDir,
]);
requireSuccessWithStderr(
'ktx sl query first managed runtime install',
slQuery,
/Installing KTX Python runtime \(core\) with uv[\s\S]*KTX Python runtime ready:/,
);
requireOutput('ktx sl query first managed runtime install', slQuery, /"mode": "compile_only"/);
requireOutput('ktx sl query first managed runtime install', slQuery, /orders/);
const runtimeStatusAfter = parseJsonResult(
'ktx runtime status ready',
await run('pnpm', ['exec', 'ktx', 'runtime', 'status', '--json']),
);
assert.equal(runtimeStatusAfter.kind, 'ready');
assert.deepEqual(runtimeStatusAfter.manifest.features, ['core']);
assert.equal(runtimeStatusAfter.layout.runtimeRoot, process.env.KTX_RUNTIME_ROOT);
process.stdout.write('ktx managed runtime lazy install verified\\n');
```
- [ ] **Step 6: Add runtime doctor and daemon lifecycle smoke**
In `npmRuntimeSmokeSource()`, insert this block immediately after the
`sqliteSlQuery` verification block:
```javascript
const runtimeDoctor = await run('pnpm', ['exec', 'ktx', 'runtime', 'doctor']);
requireSuccess('ktx runtime doctor', runtimeDoctor);
requireOutput('ktx runtime doctor', runtimeDoctor, /PASS uv/);
requireOutput('ktx runtime doctor', runtimeDoctor, /PASS Bundled Python wheel/);
requireOutput('ktx runtime doctor', runtimeDoctor, /PASS Managed Python runtime/);
process.stdout.write('ktx runtime doctor verified\\n');
const runtimeStart = await run('pnpm', ['exec', 'ktx', 'runtime', 'start']);
requireSuccess('ktx runtime start', runtimeStart);
daemonStarted = true;
requireOutput('ktx runtime start', runtimeStart, /Started KTX Python daemon/);
requireOutput('ktx runtime start', runtimeStart, /url: http:\/\/127\.0\.0\.1:\d+/);
requireOutput('ktx runtime start', runtimeStart, /features: core/);
const runtimeStartReuse = await run('pnpm', ['exec', 'ktx', 'runtime', 'start']);
requireSuccess('ktx runtime start reuse', runtimeStartReuse);
requireOutput('ktx runtime start reuse', runtimeStartReuse, /Using existing KTX Python daemon/);
requireOutput('ktx runtime start reuse', runtimeStartReuse, /features: core/);
const runtimeStop = await run('pnpm', ['exec', 'ktx', 'runtime', 'stop']);
requireSuccess('ktx runtime stop', runtimeStop);
daemonStarted = false;
requireOutput('ktx runtime stop', runtimeStop, /Stopped KTX Python daemon/);
process.stdout.write('ktx runtime daemon lifecycle verified\\n');
```
- [ ] **Step 7: Remove npm-smoke Python preparation from artifact verification**
In `scripts/package-artifacts.mjs`, replace `verifyNpmArtifacts()` with this
implementation:
```javascript
async function verifyNpmArtifacts(layout, tmpRoot) {
for (const packageInfo of NPM_ARTIFACT_PACKAGES) {
await assertPathExists(layout.npmTarballs[packageInfo.name], `${packageInfo.name} tarball`);
}
const projectDir = join(tmpRoot, 'npm-clean-install');
await mkdir(projectDir, { recursive: true });
await writeFile(
join(projectDir, 'package.json'),
`${JSON.stringify(npmSmokePackageJson(layout), null, 2)}\n`,
);
await writeFile(join(projectDir, 'verify-npm.mjs'), npmVerifySource());
await writeFile(join(projectDir, 'verify-installed-cli.mjs'), npmRuntimeSmokeSource());
await writeFile(join(projectDir, 'verify-installed-demo.mjs'), npmDemoSmokeSource());
await runCommand('pnpm', ['install'], { cwd: projectDir });
await runCommand('pnpm', ['rebuild', 'better-sqlite3'], { cwd: projectDir });
await runCommand('node', ['verify-npm.mjs'], { cwd: projectDir });
await runCommand('pnpm', ['exec', 'ktx', '--version'], { cwd: projectDir });
await runCommand('node', ['verify-installed-cli.mjs'], { cwd: projectDir });
await runCommand('node', ['verify-installed-demo.mjs'], { cwd: projectDir });
}
```
- [ ] **Step 8: Run the focused package artifact tests**
Run:
```bash
node --test scripts/package-artifacts.test.mjs
```
Expected: PASS.
- [ ] **Step 9: Commit the release-smoke implementation**
Run:
```bash
git add scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs
git commit -m "test: verify managed runtime in public package smoke"
```
### Task 3: Verify the release-smoke surface
**Files:**
- Test: `scripts/package-artifacts.test.mjs`
- Test: `scripts/package-artifacts.mjs`
- [ ] **Step 1: Run script unit tests that cover artifact packaging**
Run:
```bash
node --test scripts/build-python-runtime-wheel.test.mjs scripts/build-public-npm-package.test.mjs scripts/package-artifacts.test.mjs scripts/published-package-smoke.test.mjs scripts/release-readiness.test.mjs
```
Expected: PASS.
- [ ] **Step 2: Run the public package artifact smoke**
Run:
```bash
pnpm run artifacts:verify
```
Expected: PASS. The `verify-installed-cli.mjs` output must include:
```text
ktx managed runtime starts missing in isolated root
ktx managed runtime lazy install verified
ktx runtime doctor verified
ktx runtime daemon lifecycle verified
```
- [ ] **Step 3: Run release readiness**
Run:
```bash
pnpm run release:readiness
```
Expected: PASS. The report must still list `@kaelio/ktx` as the only npm
package and must still report registry publishing as disabled by
`release-policy.json`.
- [ ] **Step 4: Run pre-commit for changed files**
Run:
```bash
if [ -d .venv ]; then source .venv/bin/activate; fi
uv run pre-commit run --files scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs
```
Expected: PASS. If pre-commit cannot run because the local environment lacks a
compatible hook version, record the exact failure and keep the passing
`node --test` and artifact smoke results.
- [ ] **Step 5: Commit verification fixes if needed**
If Step 1, Step 2, Step 3, or Step 4 required edits, run:
```bash
git add scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs
git commit -m "test: finalize managed runtime release smoke"
```
If no files changed after Task 2, do not create an empty commit.
## Acceptance criteria
- `verifyNpmArtifacts()` no longer creates a Python `.venv`, no longer calls
`pythonArtifactInstallArgs()`, and no longer runs npm smoke scripts with a
custom Python venv at the front of `PATH`.
- The installed public npm smoke creates its sqlite warehouse with
`better-sqlite3` and does not shell out to `python`.
- The installed public npm smoke sets an isolated `KTX_RUNTIME_ROOT` and
confirms that `ktx runtime status --json` starts as `missing`.
- The first installed `ktx sl query --yes` installs the `core` managed Python
runtime from bundled npm package assets and still returns compile-only SQL.
- A second semantic query executes against sqlite using the installed managed
runtime.
- `ktx runtime doctor` passes after lazy install.
- `ktx runtime start` starts a core daemon, a second `ktx runtime start` reuses
the daemon, and `ktx runtime stop` stops it.
- The separate Python artifact verification still installs and tests the
Python wheel directly.
- Focused script tests, `pnpm run artifacts:verify`, release readiness, and
pre-commit pass or have explicitly recorded environment blockers.
## Self-review
- Spec coverage: the previous six plans cover the bundled wheel, runtime
installer, `sl query` command integration, daemon lifecycle, local embeddings,
and public npm package surface. This plan covers release-flow checks for clean
install of the packed npm package, first-run managed runtime install from the
bundled wheel, one-shot semantic-layer query through the managed runtime,
runtime status and doctor output, and daemon start/reuse/stop.
- Remaining intentional gap: optional `local-embeddings` smoke remains outside
the default release artifact smoke because the spec permits it in a separate
job or opt-in check and the dependency downloads are large.
- Placeholder scan: no steps contain placeholder implementation language.
- Type consistency: runtime feature names remain `core` and
`local-embeddings`; the public npm package name remains `@kaelio/ktx`; the
runtime root environment variable is `KTX_RUNTIME_ROOT`.

View file

@ -1,657 +0,0 @@
# Managed Runtime Docs and Postgres Smoke Cleanup Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove the remaining manual Python service guidance from the Postgres
historic SQL smoke and update public docs so the npm-managed Python runtime is
the documented path.
**Architecture:** Keep the existing managed-runtime code unchanged. Add source
and docs guards first, then make the Postgres historic smoke use the
CLI-managed core daemon through `createKtxCliLocalIngestAdapters()`, and update
the README files that still describe internal package artifacts, manual
`ktx-daemon` startup, or `python-service/`.
**Tech Stack:** Bash, Node 22 ESM, `node:test`, Markdown, pnpm, uv, KTX CLI
managed Python runtime.
---
## Existing status
This plan is based on
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The following plans are based on that spec and are already implemented in this
worktree:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-agent-mcp-semantic-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-local-ingest-daemon-runtime.md`
Implementation evidence found before writing this plan includes:
- `scripts/build-python-runtime-wheel.mjs` and
`packages/cli/assets/python/manifest.json`.
- `packages/cli/src/managed-python-runtime.ts`,
`packages/cli/src/runtime.ts`, and
`packages/cli/src/commands/runtime-commands.ts`.
- `packages/cli/src/managed-python-command.ts` and managed `ktx sl query`
runtime policy flags.
- `packages/cli/src/managed-python-daemon.ts` and `ktx runtime start` /
`ktx runtime stop`.
- `packages/cli/src/managed-local-embeddings.ts` and local embeddings setup
wiring.
- `scripts/build-public-npm-package.mjs`, release policy updates, release
smoke coverage, and opt-in local embeddings smoke coverage.
- `packages/cli/src/agent-runtime.ts` and `packages/cli/src/serve.ts` now
create managed semantic-layer compute when no explicit semantic HTTP URL is
provided.
- `packages/cli/src/managed-python-http.ts`,
`packages/cli/src/local-adapters.ts`, `packages/cli/src/ingest.ts`,
`packages/cli/src/scan.ts`, and `packages/cli/src/serve.ts` wire local ingest
helper paths to the managed core daemon.
The remaining drift is documentation and one example smoke script:
- `examples/postgres-historic/scripts/smoke.sh` still checks for
`python-service/.venv`, starts `uvicorn app.main:app`, and exports
`KTX_SQL_ANALYSIS_URL`.
- `examples/postgres-historic/README.md` still documents
`python-service/.venv` or `KTX_SQL_ANALYSIS_URL` as a prerequisite.
- `examples/package-artifacts/README.md` still says the npm smoke installs
generated `@ktx/context` and `@ktx/cli` tarballs.
- `README.md` still presents source-tree `pnpm run ktx -- ...` commands as the
quick start and tells users to start `ktx-daemon` manually for MCP.
This plan closes that drift. It does not rename internal workspace packages and
does not remove explicit daemon URL override behavior from production code.
## File structure
- Modify `scripts/examples-docs.test.mjs`: add regression coverage for managed
runtime docs, public npm package docs, and the Postgres smoke script.
- Modify `examples/postgres-historic/scripts/smoke.sh`: remove
`python-service/` startup and pass managed daemon options into stage-only
historic SQL ingest.
- Modify `examples/postgres-historic/README.md`: document the managed runtime
and remove old SQL-analysis service instructions.
- Modify `examples/package-artifacts/README.md`: describe the single public
`@kaelio/ktx` npm artifact and managed runtime smoke.
- Modify `README.md`: make public `@kaelio/ktx` invocation modes and managed
runtime commands visible while keeping source-tree development commands in
the development section.
### Task 1: Add failing docs and smoke guards
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Add public runtime README assertions**
In `scripts/examples-docs.test.mjs`, insert this test after the existing
`walks through ktx connection list and ktx connection test in the README
quickstart` test:
```javascript
it('documents public npm and managed runtime usage in the README', async () => {
const rootReadme = await readText('README.md');
assert.match(rootReadme, /npx @kaelio\/ktx setup demo --no-input/);
assert.match(rootReadme, /npx @kaelio\/ktx sl query/);
assert.match(rootReadme, /npm install @kaelio\/ktx/);
assert.match(rootReadme, /npm install -g @kaelio\/ktx/);
assert.match(rootReadme, /ktx runtime install/);
assert.match(rootReadme, /ktx runtime status/);
assert.match(rootReadme, /ktx runtime doctor/);
assert.match(rootReadme, /ktx runtime start/);
assert.match(rootReadme, /ktx runtime stop/);
assert.match(rootReadme, /ktx serve --mcp stdio/);
assert.doesNotMatch(rootReadme, /uv run ktx-daemon serve-http/);
assert.doesNotMatch(rootReadme, /--semantic-compute-url http:\/\/127\.0\.0\.1:8765/);
});
```
- [ ] **Step 2: Add package artifact README assertions**
In `scripts/examples-docs.test.mjs`, insert this test after the new public
runtime README test:
```javascript
it('documents the public package artifact smoke shape', async () => {
const readme = await readText('examples/package-artifacts/README.md');
assert.match(readme, /@kaelio\/ktx/);
assert.match(readme, /managed Python runtime/);
assert.match(readme, /ktx runtime status/);
assert.match(readme, /ktx runtime doctor/);
assert.doesNotMatch(readme, /@ktx\/context/);
assert.doesNotMatch(readme, /@ktx\/cli/);
assert.doesNotMatch(readme, /python -m ktx_daemon semantic-validate/);
});
```
- [ ] **Step 3: Extend Postgres smoke assertions**
In the existing `documents the Postgres historic SQL smoke example` test in
`scripts/examples-docs.test.mjs`, add these assertions after
`assert.match(smoke, /pg_stat_statements_reset/);`:
```javascript
assert.match(smoke, /KTX_RUNTIME_ROOT/);
assert.match(smoke, /managedDaemon/);
assert.match(smoke, /installPolicy: 'auto'/);
assert.match(smoke, /getKtxCliPackageInfo/);
assert.doesNotMatch(smoke, /python-service/);
assert.doesNotMatch(smoke, /PYTHON_SERVICE/);
assert.doesNotMatch(smoke, /uvicorn app\.main:app/);
assert.doesNotMatch(smoke, /export KTX_SQL_ANALYSIS_URL/);
assert.doesNotMatch(readme, /python-service/);
assert.doesNotMatch(readme, /KTX_SQL_ANALYSIS_URL/);
```
- [ ] **Step 4: Run the docs test to verify it fails**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL. The failure includes missing `@kaelio/ktx` README matches and
the existing `python-service` / `KTX_SQL_ANALYSIS_URL` references in the
Postgres smoke files.
### Task 2: Move the Postgres historic smoke to the managed runtime
**Files:**
- Modify: `examples/postgres-historic/scripts/smoke.sh`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Remove Python service process state**
In `examples/postgres-historic/scripts/smoke.sh`, replace the variable block:
```bash
KTX_BIN="$KTX_ROOT/packages/cli/dist/bin.js"
PYTHON_SERVICE_LOG="$PROJECT_PARENT/python-service.log"
PYTHON_SERVICE_PID=""
```
with:
```bash
KTX_BIN="$KTX_ROOT/packages/cli/dist/bin.js"
export KTX_RUNTIME_ROOT="$PROJECT_PARENT/managed-runtime"
unset KTX_DAEMON_URL
unset KTX_SQL_ANALYSIS_URL
```
- [ ] **Step 2: Replace cleanup**
In `examples/postgres-historic/scripts/smoke.sh`, replace the `cleanup()`
function with:
```bash
cleanup() {
if [[ -f "$KTX_BIN" ]]; then
node "$KTX_BIN" runtime stop >/dev/null 2>&1 || true
fi
if [[ "${KTX_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then
docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true
fi
}
trap cleanup EXIT
```
- [ ] **Step 3: Delete the old SQL analysis service starter**
Delete the entire `start_sql_analysis_if_needed()` function from
`examples/postgres-historic/scripts/smoke.sh`. The deleted function begins with
this line:
```bash
start_sql_analysis_if_needed() {
```
and ends with this line:
```bash
}
```
immediately before the `latest_manifest()` function.
- [ ] **Step 4: Pass managed daemon options to stage-only ingest**
In the Node heredoc inside `run_historic_stage_only()`, replace this block:
```javascript
const { createKtxCliLocalIngestAdapters } = await import(join(ktxRoot, 'packages/cli/dist/local-adapters.js'));
const project = await loadKtxProject({ projectDir });
const adapters = createKtxCliLocalIngestAdapters(project, { historicSqlConnectionId: 'warehouse' });
```
with:
```javascript
const { createKtxCliLocalIngestAdapters } = await import(join(ktxRoot, 'packages/cli/dist/local-adapters.js'));
const { getKtxCliPackageInfo } = await import(join(ktxRoot, 'packages/cli/dist/index.js'));
const project = await loadKtxProject({ projectDir });
const cliVersion = getKtxCliPackageInfo().version;
const managedRuntimeIo = { stdout: process.stdout, stderr: process.stderr };
const adapters = createKtxCliLocalIngestAdapters(project, {
historicSqlConnectionId: 'warehouse',
managedDaemon: {
cliVersion,
installPolicy: 'auto',
io: managedRuntimeIo,
},
});
```
- [ ] **Step 5: Remove the old starter call**
Delete this line from the bottom half of
`examples/postgres-historic/scripts/smoke.sh`:
```bash
start_sql_analysis_if_needed
```
- [ ] **Step 6: Run the docs test to verify the script guards pass**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL remains because README files have not been updated yet. The
Postgres smoke script assertions now pass.
### Task 3: Update Postgres historic and artifact docs
**Files:**
- Modify: `examples/postgres-historic/README.md`
- Modify: `examples/package-artifacts/README.md`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Replace Postgres prerequisites**
In `examples/postgres-historic/README.md`, replace the `## Prerequisites`
section with:
```markdown
## Prerequisites
- Docker with Compose v2
- Node and pnpm matching the KTX workspace
- `uv` on `PATH` so the KTX-managed Python runtime can install the bundled
runtime wheel
```
- [ ] **Step 2: Replace the smoke run description**
In `examples/postgres-historic/README.md`, replace the paragraph after the
`examples/postgres-historic/scripts/smoke.sh` command with:
```markdown
The smoke creates a temporary KTX project, isolates the managed Python runtime
under the temporary project parent, starts Postgres on `127.0.0.1:55432`, and
uses this connection URL:
```
- [ ] **Step 3: Update the full ingest command**
In `examples/postgres-historic/README.md`, replace the manual ingest command:
```bash
node packages/cli/dist/bin.js --project-dir /tmp/ktx-postgres-historic dev ingest run \
--connection-id warehouse \
--adapter historic-sql \
--plain \
--no-input
```
with:
```bash
pnpm run ktx -- dev ingest run --project-dir /tmp/ktx-postgres-historic \
--connection-id warehouse \
--adapter historic-sql \
--plain \
--yes \
--no-input
```
- [ ] **Step 4: Replace SQL-analysis troubleshooting**
In `examples/postgres-historic/README.md`, replace the final troubleshooting
bullet:
```markdown
- SQL-analysis failures: set `KTX_SQL_ANALYSIS_URL` to the running service URL
or create `python-service/.venv` before running `scripts/smoke.sh`.
```
with:
```markdown
- SQL-analysis failures: run `pnpm run ktx -- runtime doctor` from the KTX
repository root and confirm `uv`, the bundled Python wheel, and the managed
runtime all pass.
```
- [ ] **Step 5: Replace package artifact README body**
Replace the full contents of `examples/package-artifacts/README.md` with:
````markdown
# Package artifact smoke checks
The package artifact smoke checks create temporary projects instead of storing
sample projects in this directory. Run the checks from `ktx/`:
```bash
pnpm run artifacts:check
```
The npm smoke project installs the generated public `@kaelio/ktx` tarball,
imports the package entry point, and runs installed `ktx` commands against a
generated local project.
The managed runtime smoke isolates `KTX_RUNTIME_ROOT`, verifies
`ktx runtime status`, runs `ktx sl query --yes` to install the core runtime from
the bundled wheel, checks `ktx runtime doctor`, starts and reuses the managed
daemon, and stops it.
The Python smoke project still installs the Python artifacts directly because
it verifies the standalone Python distributions that feed the bundled runtime
wheel.
````
- [ ] **Step 6: Run the docs test to verify these docs pass**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL remains because `README.md` still lacks the public npm managed
runtime documentation. The Postgres and package artifact assertions now pass.
### Task 4: Update the root README public runtime path
**Files:**
- Modify: `README.md`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Replace quick start**
In `README.md`, replace the `## Quick start` section through the end of the
full-demo paragraph with:
````markdown
## Quick start
Run the pre-seeded demo through the public npm package:
```bash
npx @kaelio/ktx setup demo --no-input
npx @kaelio/ktx setup demo inspect
```
The default demo uses packaged sample data and prebuilt context. It does not
require API keys, network access, or an LLM provider.
To replay the packaged ingest run, use:
```bash
npx @kaelio/ktx setup demo --mode replay --no-input
```
To run the full agentic demo with an LLM provider, set a provider key for the
current process:
```bash
ANTHROPIC_API_KEY=$YOUR_ANTHROPIC_API_KEY \
npx @kaelio/ktx setup demo --mode full --no-input
```
Interactive full-demo setup can prompt for a provider key without writing the
key to `ktx.yaml`.
You can also install the CLI in a project or globally:
```bash
npm install @kaelio/ktx
npx ktx --help
npm install -g @kaelio/ktx
ktx --help
```
````
- [ ] **Step 2: Replace local project setup command**
In the `## Build a local project` section of `README.md`, replace:
```bash
uv sync --all-packages
source .venv/bin/activate
PROJECT_DIR="$(mktemp -d)/ktx-demo"
pnpm run ktx -- init "$PROJECT_DIR" --name ktx-demo
```
with:
```bash
npm install @kaelio/ktx
PROJECT_DIR="$(mktemp -d)/ktx-demo"
npx ktx init "$PROJECT_DIR" --name ktx-demo
```
- [ ] **Step 3: Replace README command prefixes**
In `README.md`, replace the source-tree command prefix `pnpm run ktx --` with
`npx ktx` in all user workflow commands under `## Build a local project`,
`### Scan the demo warehouse`, and `## Serve MCP`. Keep `pnpm run ktx --` in
the `## Development` section.
For example, this command:
```bash
pnpm run ktx -- sl query --project-dir "$PROJECT_DIR" \
```
becomes:
```bash
npx ktx sl query --project-dir "$PROJECT_DIR" \
```
- [ ] **Step 4: Add managed runtime section**
Insert this section after the scan walkthrough in `README.md`:
````markdown
## Managed Python runtime
KTX installs its Python runtime only when a Python-backed command needs it.
The runtime lives outside the npm cache, is versioned by the installed CLI
version, and is managed by `ktx runtime` commands:
```bash
npx ktx runtime install --yes
npx ktx runtime status
npx ktx runtime doctor
npx ktx runtime start
npx ktx runtime stop
```
Commands such as `npx @kaelio/ktx sl query ... --yes` can install the core
runtime lazily from the bundled wheel. Local embeddings remain lazy; prepare
them only when you select local `sentence-transformers` embeddings:
```bash
npx ktx runtime install --feature local-embeddings --yes
npx ktx runtime start --feature local-embeddings
```
````
- [ ] **Step 5: Replace Serve MCP section**
In `README.md`, replace the full `## Serve MCP` section with:
````markdown
## Serve MCP
Start the stdio MCP server from the project directory:
```bash
npx ktx serve --mcp stdio --project-dir "$PROJECT_DIR" \
--user-id local \
--semantic-compute \
--execute-queries \
--yes
```
The `--semantic-compute` flag uses the managed Python runtime when no explicit
semantic compute URL is provided. KTX starts or reuses the managed runtime as
needed.
The MCP server exposes `connection_list`, `knowledge_search`,
`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`,
`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`,
`ingest_status`, `ingest_report`, and `ingest_replay`.
````
- [ ] **Step 6: Update release status wording**
In `README.md`, replace this sentence in `## Release status`:
```markdown
This repository is prepared for source publication. Package publishing is still
disabled by `release-policy.json`; registry names, public versions, package
visibility, and provenance policy must be chosen before publishing artifacts to
npm or Python package indexes.
```
with:
```markdown
This repository builds a single public npm artifact named `@kaelio/ktx`.
Package publishing is still disabled by `release-policy.json`; registry
credentials, public versions, release tags, and provenance policy must be
chosen before publishing artifacts to npm or Python package indexes.
```
- [ ] **Step 7: Run the docs test to verify the README passes**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
### Task 5: Final verification and commit
**Files:**
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `examples/postgres-historic/scripts/smoke.sh`
- Verify: `examples/postgres-historic/README.md`
- Verify: `examples/package-artifacts/README.md`
- Verify: `README.md`
- [ ] **Step 1: Run the script test suite affected by docs**
Run:
```bash
node --test scripts/examples-docs.test.mjs scripts/check-boundaries.test.mjs
```
Expected: PASS.
- [ ] **Step 2: Run the boundary check**
Run:
```bash
node scripts/check-boundaries.mjs
```
Expected:
```text
ktx boundary check passed
```
- [ ] **Step 3: Search for removed external runtime references**
Run:
```bash
rg -n "python-service|uvicorn app\\.main:app|export KTX_SQL_ANALYSIS_URL|uv run ktx-daemon serve-http|@ktx/context.*@ktx/cli" README.md examples/postgres-historic/README.md examples/postgres-historic/scripts/smoke.sh examples/package-artifacts/README.md
```
Expected: no matches.
- [ ] **Step 4: Commit**
```bash
git add scripts/examples-docs.test.mjs \
examples/postgres-historic/scripts/smoke.sh \
examples/postgres-historic/README.md \
examples/package-artifacts/README.md \
README.md
git commit -m "docs: align managed runtime examples"
```
## Acceptance criteria
- The Postgres historic SQL smoke no longer references `python-service/`,
`uvicorn app.main:app`, or `export KTX_SQL_ANALYSIS_URL`.
- The stage-only Postgres historic smoke uses `createKtxCliLocalIngestAdapters`
with managed daemon options and `installPolicy: 'auto'`.
- The root README documents `npx @kaelio/ktx`, local `npx ktx`, global `ktx`,
`ktx runtime ...`, and MCP `--semantic-compute --yes` managed-runtime usage.
- Package artifact docs describe the single public `@kaelio/ktx` tarball and
the managed runtime smoke.
- `node --test scripts/examples-docs.test.mjs scripts/check-boundaries.test.mjs`
passes.
- `node scripts/check-boundaries.mjs` passes.
## Self-review
- Spec coverage: This plan covers the remaining user-facing drift from the
npm-managed runtime spec by removing manual Python service guidance,
documenting public `@kaelio/ktx` invocation modes, and making the Postgres
example smoke use the managed core daemon.
- Placeholder scan: The plan contains exact files, edits, commands, expected
outcomes, and commit instructions.
- Type consistency: The plan uses the existing `managedDaemon` option shape
from `packages/cli/src/local-adapters.ts` and the existing
`installPolicy: 'auto'` value from `packages/cli/src/managed-python-command.ts`.

View file

@ -1,377 +0,0 @@
# Managed Runtime Prune Smoke and Docs Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Prove and document `ktx runtime prune` as part of the npm-managed
Python runtime release contract.
**Architecture:** The prune command already exists in the CLI runtime layer, so
this plan adds black-box package smoke coverage and public documentation only.
The smoke creates an isolated stale versioned runtime directory, previews it,
verifies confirmation is required, and removes it through the installed
`@kaelio/ktx` package.
**Tech Stack:** Node 22 ESM scripts, `node:test`, pnpm, Markdown, KTX CLI
managed Python runtime.
---
## Current state
This plan follows
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The following plan files are based on that spec and are implemented in the
current tree:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-agent-mcp-semantic-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-local-ingest-daemon-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-docs-and-postgres-smoke-cleanup.md`
- `docs/superpowers/plans/2026-05-11-published-package-managed-runtime-smoke.md`
- `docs/superpowers/plans/2026-05-11-public-npm-release-handoff.md`
Implementation evidence found before writing this plan includes:
- `packages/cli/assets/python/manifest.json` and
`packages/cli/assets/python/kaelio_ktx-0.1.0-py3-none-any.whl`.
- `packages/cli/src/managed-python-runtime.ts`, including
`installManagedPythonRuntime()`, `doctorManagedPythonRuntime()`, and
`pruneManagedPythonRuntimes()`.
- `packages/cli/src/runtime.ts`, including the `install`, `status`,
`doctor`, `start`, `stop`, and `prune` runtime command runner branches.
- `packages/cli/src/commands/runtime-commands.ts`, including the
`runtime prune --dry-run` and `runtime prune --yes` Commander wiring.
- `scripts/build-public-npm-package.mjs`, `scripts/package-artifacts.mjs`,
`scripts/published-package-smoke.mjs`, `scripts/local-embeddings-runtime-smoke.mjs`,
`scripts/publish-public-npm-package.mjs`, `release-policy.json`, and
`.github/workflows/release.yml`.
- `README.md` and `examples/package-artifacts/README.md` document the managed
runtime but do not mention `ktx runtime prune`.
The remaining gap is narrow: the spec lists `ktx runtime prune` as part of the
runtime management command family, but public docs and installed package smoke
coverage only prove `install`, `status`, `doctor`, `start`, and `stop`.
## File structure
- Modify `scripts/package-artifacts.test.mjs`: assert that the generated
installed npm smoke covers `ktx runtime prune --dry-run`, confirmation
failure, and confirmed deletion.
- Modify `scripts/package-artifacts.mjs`: extend `npmRuntimeSmokeSource()` to
create a stale runtime directory and exercise `ktx runtime prune`.
- Modify `scripts/examples-docs.test.mjs`: require public docs to mention
`ktx runtime prune --dry-run` and `ktx runtime prune --yes`.
- Modify `README.md`: add prune commands and one sentence describing preview
and confirmed deletion.
- Modify `examples/package-artifacts/README.md`: describe prune coverage in the
package artifact smoke.
### Task 1: Add installed package prune smoke coverage
**Files:**
- Modify: `scripts/package-artifacts.test.mjs`
- Modify: `scripts/package-artifacts.mjs`
- [ ] **Step 1: Add failing smoke-source assertions**
In `scripts/package-artifacts.test.mjs`, inside
`it('runs installed CLI commands through the public package runtime', () => {`
and immediately after the existing assertions for `ktx runtime stop`, add:
```javascript
assert.match(source, /ktx runtime prune dry run/);
assert.match(source, /0\.0\.0/);
assert.match(source, /ktx runtime prune needs confirmation/);
assert.match(source, /Refusing to prune without --yes/);
assert.match(source, /ktx runtime prune confirmed/);
assert.match(source, /Removed stale KTX Python runtimes/);
assert.match(source, /assert\.rejects\(\(\) => access\(staleRuntimeDir\)\)/);
```
- [ ] **Step 2: Run the package artifact test and verify failure**
Run:
```bash
node --test scripts/package-artifacts.test.mjs
```
Expected: FAIL in the installed CLI smoke source test because
`npmRuntimeSmokeSource()` does not yet contain the prune labels, confirmation
guard, or stale runtime removal assertion.
- [ ] **Step 3: Extend the generated installed CLI smoke**
In `scripts/package-artifacts.mjs`, inside `npmRuntimeSmokeSource()`, add this
block immediately after:
```javascript
process.stdout.write('ktx runtime daemon lifecycle verified\n');
```
Add:
```javascript
const staleRuntimeDir = join(process.env.KTX_RUNTIME_ROOT, '0.0.0');
await mkdir(staleRuntimeDir, { recursive: true });
const runtimePruneDryRun = await run('pnpm', ['exec', 'ktx', 'runtime', 'prune', '--dry-run']);
requireSuccess('ktx runtime prune dry run', runtimePruneDryRun);
requireOutput('ktx runtime prune dry run', runtimePruneDryRun, /Stale KTX Python runtimes/);
requireOutput('ktx runtime prune dry run', runtimePruneDryRun, /0\.0\.0/);
await access(staleRuntimeDir);
const runtimePruneNeedsConfirmation = await run('pnpm', ['exec', 'ktx', 'runtime', 'prune']);
assert.equal(runtimePruneNeedsConfirmation.code, 1, 'ktx runtime prune without --yes must fail');
assert.equal(runtimePruneNeedsConfirmation.stdout, '', 'ktx runtime prune confirmation failure wrote stdout');
assert.match(runtimePruneNeedsConfirmation.stderr, /Refusing to prune without --yes/);
const runtimePruneConfirmed = await run('pnpm', ['exec', 'ktx', 'runtime', 'prune', '--yes']);
requireSuccess('ktx runtime prune confirmed', runtimePruneConfirmed);
requireOutput('ktx runtime prune confirmed', runtimePruneConfirmed, /Removed stale KTX Python runtimes/);
requireOutput('ktx runtime prune confirmed', runtimePruneConfirmed, /0\.0\.0/);
await assert.rejects(() => access(staleRuntimeDir));
process.stdout.write('ktx runtime prune verified\n');
```
No import changes are needed because the generated smoke already imports
`assert`, `access`, `mkdir`, and `join`.
- [ ] **Step 4: Run the package artifact test and verify pass**
Run:
```bash
node --test scripts/package-artifacts.test.mjs
```
Expected: PASS. The source assertions now find prune dry-run coverage,
confirmation failure coverage, confirmed prune coverage, and stale directory
deletion verification.
- [ ] **Step 5: Commit the smoke coverage**
Run:
```bash
git add scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs
git commit -m "test: cover managed runtime prune in package smoke"
```
### Task 2: Document runtime prune in public docs
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- Modify: `README.md`
- Modify: `examples/package-artifacts/README.md`
- [ ] **Step 1: Add failing docs assertions**
In `scripts/examples-docs.test.mjs`, inside
`it('documents public npm and managed runtime usage in the README', async () => {`
and immediately after:
```javascript
assert.match(rootReadme, /ktx runtime stop/);
```
Add:
```javascript
assert.match(rootReadme, /ktx runtime prune --dry-run/);
assert.match(rootReadme, /ktx runtime prune --yes/);
```
In the same file, inside
`it('documents the public package artifact smoke shape', async () => {` and
immediately after:
```javascript
assert.match(readme, /ktx runtime doctor/);
```
Add:
```javascript
assert.match(readme, /ktx runtime prune --dry-run/);
assert.match(readme, /ktx runtime prune --yes/);
```
- [ ] **Step 2: Run the docs test and verify failure**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL because `README.md` and
`examples/package-artifacts/README.md` do not yet mention `ktx runtime prune`.
- [ ] **Step 3: Update the root README runtime section**
In `README.md`, in the `## Managed Python runtime` command block, replace:
```bash
npx ktx runtime install --yes
npx ktx runtime status
npx ktx runtime doctor
npx ktx runtime start
npx ktx runtime stop
```
with:
```bash
npx ktx runtime install --yes
npx ktx runtime status
npx ktx runtime doctor
npx ktx runtime start
npx ktx runtime stop
npx ktx runtime prune --dry-run
npx ktx runtime prune --yes
```
Immediately after that command block, add:
```markdown
Use `runtime prune --dry-run` to preview stale runtime directories from older
CLI versions. Add `--yes` to remove those stale directories after daemon
processes are stopped.
```
- [ ] **Step 4: Update package artifact smoke docs**
In `examples/package-artifacts/README.md`, replace:
```markdown
The managed Python runtime smoke isolates `KTX_RUNTIME_ROOT`, verifies
`ktx runtime status`, runs `ktx sl query --yes` to install the core runtime from
the bundled wheel, checks `ktx runtime doctor`, starts and reuses the managed
daemon, and stops it.
```
with:
```markdown
The managed Python runtime smoke isolates `KTX_RUNTIME_ROOT`, verifies
`ktx runtime status`, runs `ktx sl query --yes` to install the core runtime from
the bundled wheel, checks `ktx runtime doctor`, starts and reuses the managed
daemon, stops it, previews a stale runtime with `ktx runtime prune --dry-run`,
verifies confirmation is required, and removes the stale runtime with
`ktx runtime prune --yes`.
```
- [ ] **Step 5: Run the docs test and verify pass**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS. The public README and package artifact README now document
runtime prune alongside the other managed runtime commands.
- [ ] **Step 6: Commit the docs coverage**
Run:
```bash
git add scripts/examples-docs.test.mjs README.md examples/package-artifacts/README.md
git commit -m "docs: document managed runtime prune"
```
### Task 3: Verify the completed prune release surface
**Files:**
- Verify: `scripts/package-artifacts.mjs`
- Verify: `scripts/package-artifacts.test.mjs`
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `README.md`
- Verify: `examples/package-artifacts/README.md`
- [ ] **Step 1: Run focused tests**
Run:
```bash
node --test scripts/package-artifacts.test.mjs scripts/examples-docs.test.mjs
```
Expected: PASS. The source-level tests cover generated package smoke behavior
and docs assertions.
- [ ] **Step 2: Run the installed package artifact smoke**
Run:
```bash
pnpm run artifacts:check
```
Expected: PASS. The generated installed CLI smoke prints:
```text
ktx runtime prune verified
```
and removes the temporary `0.0.0` directory from the isolated
`KTX_RUNTIME_ROOT`.
- [ ] **Step 3: Inspect git status**
Run:
```bash
git status --short
```
Expected: only the five planned files are modified before the final commit, or
no modified files remain after the task commits.
- [ ] **Step 4: Commit verification fixes if needed**
If verification required small corrections, commit only those intended files:
```bash
git add scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs scripts/examples-docs.test.mjs README.md examples/package-artifacts/README.md
git commit -m "test: verify managed runtime prune release surface"
```
## Acceptance criteria
- The generated installed npm package smoke creates a stale versioned runtime
directory under the isolated `KTX_RUNTIME_ROOT`.
- `ktx runtime prune --dry-run` lists the stale runtime and leaves it on disk.
- `ktx runtime prune` without `--yes` exits nonzero and prints the existing
confirmation guidance.
- `ktx runtime prune --yes` removes the stale runtime directory.
- `README.md` lists `ktx runtime prune --dry-run` and
`ktx runtime prune --yes` with the other managed runtime commands.
- `examples/package-artifacts/README.md` describes prune coverage in the
package artifact smoke.
## Self-review
- Spec coverage: this plan covers the remaining visible gap for the runtime
management command family in the npm-managed Python runtime spec. The prune
implementation already exists, and this plan adds release smoke and public
docs coverage.
- Placeholder scan: no placeholder steps, deferred implementation notes, or
unspecified behavior gaps remain.
- Type consistency: the plan uses existing labels and functions:
`npmRuntimeSmokeSource()`, `requireSuccess()`, `requireOutput()`,
`KTX_RUNTIME_ROOT`, `ktx runtime prune --dry-run`, and
`ktx runtime prune --yes`.

View file

@ -1,647 +0,0 @@
# Managed Runtime uv Prerequisite Contract Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining npm-managed Python runtime open decision by
making `uv` a documented, release-policy-checked prerequisite.
**Architecture:** Keep the runtime installer behavior simple: the CLI locates
`uv` on `PATH` and prints a focused error when it is missing. Encode that
decision in `release-policy.json`, validate it during release readiness, use one
shared runtime error message, and document the prerequisite in public docs.
**Tech Stack:** Node 22 ESM scripts, `node:test`, TypeScript, Vitest, JSON
release policy, Markdown.
---
## Existing status
This plan is based on
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The following plan files are based on that spec and are already implemented in
this worktree:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-agent-mcp-semantic-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-local-ingest-daemon-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-docs-and-postgres-smoke-cleanup.md`
- `docs/superpowers/plans/2026-05-11-published-package-managed-runtime-smoke.md`
- `docs/superpowers/plans/2026-05-11-public-npm-release-handoff.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-prune-smoke-and-docs.md`
Implementation evidence found before writing this plan includes:
- `packages/cli/assets/python/manifest.json` and the bundled
`kaelio_ktx-0.1.0-py3-none-any.whl`.
- `packages/cli/src/managed-python-runtime.ts`, including runtime roots,
bundled wheel verification, install, status, doctor, and prune behavior.
- `packages/cli/src/managed-python-command.ts`,
`packages/cli/src/managed-python-daemon.ts`,
`packages/cli/src/managed-local-embeddings.ts`, and
`packages/cli/src/managed-python-http.ts`.
- `scripts/build-public-npm-package.mjs`, `scripts/package-artifacts.mjs`,
`scripts/published-package-smoke.mjs`,
`scripts/local-embeddings-runtime-smoke.mjs`, and
`scripts/publish-public-npm-package.mjs`.
- `release-policy.json` is already in `npm-public-release-ready` mode for
`@kaelio/ktx` `0.1.0` and keeps Python package publishing disabled.
- `README.md` and `examples/package-artifacts/README.md` document the managed
runtime command family, including `runtime prune`.
The remaining spec gap is the open decision in
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`:
```text
KTX still needs a final decision on whether uv is a hard prerequisite or a
bootstrap dependency that KTX downloads automatically.
```
This plan chooses the hard-prerequisite path for the first public release. KTX
will not download `uv` automatically in this release.
## File structure
- Modify `release-policy.json`: add a `runtimeInstaller` policy section that
records the hard `uv` prerequisite decision.
- Modify `scripts/release-readiness.mjs`: validate the runtime installer
policy, include it in readiness reports, and print it in text output.
- Modify `scripts/release-readiness.test.mjs`: cover the accepted policy and
rejection paths for missing or bootstrap-style `uv` policies.
- Modify `packages/cli/src/managed-python-runtime.ts`: export one shared
missing-`uv` message and use it for install and doctor output.
- Modify `packages/cli/src/managed-python-runtime.test.ts`: cover install and
doctor behavior when `uv` is missing.
- Modify `scripts/examples-docs.test.mjs`: require public docs to state the
hard `uv` prerequisite.
- Modify `README.md`: document that `uv` must be on `PATH` and KTX does not
download it automatically.
- Modify `examples/package-artifacts/README.md`: document the artifact smoke
`uv` prerequisite.
### Task 1: Encode the runtime installer policy
**Files:**
- Modify: `release-policy.json`
- Modify: `scripts/release-readiness.test.mjs`
- Modify: `scripts/release-readiness.mjs`
- Test: `scripts/release-readiness.test.mjs`
- [ ] **Step 1: Add failing release policy tests**
In `scripts/release-readiness.test.mjs`, inside the `releasePolicy()` helper
return value, add the `runtimeInstaller` object immediately after
`publishedPackageSmoke`:
```javascript
runtimeInstaller: {
uvStrategy: 'path-prerequisite',
bootstrapUv: false,
missingUvBehavior: 'focused-error',
},
```
In the three `assert.deepEqual(report, { ... })` expectations, add this field
immediately after `publishedPackageSmokeGate`:
```javascript
runtimeInstaller: {
uvStrategy: 'path-prerequisite',
bootstrapUv: false,
missingUvBehavior: 'focused-error',
},
```
Add these tests immediately after the
`it('accepts the npm public release ready policy', async () => { ... })` block:
```javascript
it('rejects npm public release ready mode without a runtime installer policy', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-runtime-policy-missing-test-'));
try {
await writeReadyFixture(root, {
policy: releasePolicy({
releaseMode: 'npm-public-release-ready',
npm: {
publish: true,
registry: null,
access: 'public',
tag: 'latest',
},
publishedPackageSmoke: {
packageName: '@kaelio/ktx',
version: PUBLIC_NPM_PACKAGE_VERSION,
registry: null,
},
runtimeInstaller: undefined,
requiredBeforePublishing: [],
}),
});
await assert.rejects(
() => releaseReadinessReport(root),
/Release policy runtimeInstaller must be a JSON object/,
);
} finally {
await rm(root, { recursive: true, force: true });
}
});
it('rejects uv bootstrap download policy for the first public npm release', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-runtime-policy-bootstrap-test-'));
try {
await writeReadyFixture(root, {
policy: releasePolicy({
releaseMode: 'npm-public-release-ready',
npm: {
publish: true,
registry: null,
access: 'public',
tag: 'latest',
},
publishedPackageSmoke: {
packageName: '@kaelio/ktx',
version: PUBLIC_NPM_PACKAGE_VERSION,
registry: null,
},
runtimeInstaller: {
uvStrategy: 'bootstrap-download',
bootstrapUv: true,
missingUvBehavior: 'download',
},
requiredBeforePublishing: [],
}),
});
await assert.rejects(
() => releaseReadinessReport(root),
/Release policy runtimeInstaller\.uvStrategy must be path-prerequisite/,
);
} finally {
await rm(root, { recursive: true, force: true });
}
});
```
- [ ] **Step 2: Run the release readiness tests and verify failure**
Run:
```bash
node --test scripts/release-readiness.test.mjs
```
Expected: FAIL because `releaseReadinessReport()` does not include
`runtimeInstaller`, and `validateReleasePolicy()` does not validate the new
policy section.
- [ ] **Step 3: Validate the runtime installer policy**
In `scripts/release-readiness.mjs`, add this function immediately after the
`assertRequiredBeforePublishing(policy)` function definition:
```javascript
function assertRuntimeInstallerPolicy(policy) {
assertPlainObject(policy.runtimeInstaller, 'Release policy runtimeInstaller');
assertString(policy.runtimeInstaller.uvStrategy, 'Release policy runtimeInstaller.uvStrategy');
assertBoolean(policy.runtimeInstaller.bootstrapUv, 'Release policy runtimeInstaller.bootstrapUv');
assertString(
policy.runtimeInstaller.missingUvBehavior,
'Release policy runtimeInstaller.missingUvBehavior',
);
if (policy.runtimeInstaller.uvStrategy !== 'path-prerequisite') {
throw new Error('Release policy runtimeInstaller.uvStrategy must be path-prerequisite');
}
if (policy.runtimeInstaller.bootstrapUv !== false) {
throw new Error('Release policy runtimeInstaller.bootstrapUv must be false');
}
if (policy.runtimeInstaller.missingUvBehavior !== 'focused-error') {
throw new Error('Release policy runtimeInstaller.missingUvBehavior must be focused-error');
}
}
```
In `validateReleasePolicy(policy)`, add this call immediately after
`assertRequiredBeforePublishing(policy);`:
```javascript
assertRuntimeInstallerPolicy(policy);
```
In `releaseReadinessReport(rootDir = scriptRootDir())`, add
`runtimeInstaller` to the returned object immediately after
`publishedPackageSmokeGate`:
```javascript
runtimeInstaller: policy.runtimeInstaller,
```
In `main()`, add these lines immediately after the published package smoke
registry line:
```javascript
process.stdout.write(`Runtime uv strategy: ${report.runtimeInstaller.uvStrategy}\n`);
process.stdout.write(
`Runtime uv bootstrap: ${report.runtimeInstaller.bootstrapUv ? 'enabled' : 'disabled'}\n`,
);
```
- [ ] **Step 4: Encode the policy in `release-policy.json`**
Replace `release-policy.json` with this exact content:
```json
{
"schemaVersion": 1,
"releaseMode": "npm-public-release-ready",
"npm": {
"publish": true,
"registry": null,
"access": "public",
"tag": "latest",
"packages": ["@kaelio/ktx"]
},
"python": {
"publish": false,
"repository": null,
"packages": ["ktx-sl", "ktx-daemon", "kaelio-ktx"]
},
"publishedPackageSmoke": {
"packageName": "@kaelio/ktx",
"version": "0.1.0",
"registry": null
},
"runtimeInstaller": {
"uvStrategy": "path-prerequisite",
"bootstrapUv": false,
"missingUvBehavior": "focused-error"
},
"requiredBeforePublishing": []
}
```
- [ ] **Step 5: Run the release readiness tests and verify success**
Run:
```bash
node --test scripts/release-readiness.test.mjs
```
Expected: PASS.
- [ ] **Step 6: Commit the release policy contract**
```bash
git add release-policy.json scripts/release-readiness.mjs scripts/release-readiness.test.mjs
git commit -m "chore: encode uv runtime prerequisite policy"
```
### Task 2: Centralize missing-uv runtime output
**Files:**
- Modify: `packages/cli/src/managed-python-runtime.test.ts`
- Modify: `packages/cli/src/managed-python-runtime.ts`
- Test: `packages/cli/src/managed-python-runtime.test.ts`
- [ ] **Step 1: Add failing missing-uv runtime tests**
In `packages/cli/src/managed-python-runtime.test.ts`, add
`MISSING_UV_RUNTIME_INSTALL_MESSAGE` to the import from
`./managed-python-runtime.js`:
```typescript
import {
MISSING_UV_RUNTIME_INSTALL_MESSAGE,
doctorManagedPythonRuntime,
installManagedPythonRuntime,
managedPythonRuntimeLayout,
pruneManagedPythonRuntimes,
readManagedPythonRuntimeStatus,
verifyRuntimeAsset,
type ManagedPythonRuntimeExec,
} from './managed-python-runtime.js';
```
Inside `describe('installManagedPythonRuntime', () => { ... })`, add this test
after the local embeddings test:
```typescript
it('fails with the hard-prerequisite message when uv is missing', async () => {
const { assetDir } = await writeAsset(tempDir, 'core-wheel');
const commands: Array<{ command: string; args: string[] }> = [];
const exec: ManagedPythonRuntimeExec = vi.fn(async (command, args) => {
commands.push({ command, args });
throw new Error('spawn uv ENOENT');
});
await expect(
installManagedPythonRuntime({
cliVersion: '0.2.0',
runtimeRoot: join(tempDir, 'runtime'),
assetDir,
features: ['core'],
exec,
}),
).rejects.toThrow(MISSING_UV_RUNTIME_INSTALL_MESSAGE);
expect(commands).toEqual([{ command: 'uv', args: ['--version'] }]);
});
```
Inside `describe('doctorManagedPythonRuntime', () => { ... })`, add this test
after the existing doctor test:
```typescript
it('reports uv as a hard prerequisite when uv is missing', async () => {
const { assetDir } = await writeAsset(tempDir, 'core-wheel');
const exec: ManagedPythonRuntimeExec = vi.fn(async () => {
throw new Error('spawn uv ENOENT');
});
const checks = await doctorManagedPythonRuntime({
cliVersion: '0.2.0',
runtimeRoot: join(tempDir, 'runtime'),
assetDir,
exec,
});
expect(checks[0]).toEqual({
id: 'uv',
label: 'uv',
status: 'fail',
detail: MISSING_UV_RUNTIME_INSTALL_MESSAGE,
fix: 'Install uv, make sure it is on PATH, and run: ktx runtime install --yes',
});
});
```
- [ ] **Step 2: Run the runtime tests and verify failure**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/managed-python-runtime.test.ts
```
Expected: FAIL because the shared message constant does not exist and the
doctor fix text still uses the older message.
- [ ] **Step 3: Add the shared missing-uv message**
In `packages/cli/src/managed-python-runtime.ts`, add this export immediately
after the `ManagedPythonRuntimePruneResult` interface:
```typescript
export const MISSING_UV_RUNTIME_INSTALL_MESSAGE =
'uv is required to install the KTX Python runtime. KTX does not download uv automatically. Install uv, make sure it is on PATH, and retry: ktx runtime install --yes';
```
Replace the body of the `catch` block in `ensureUv()` with:
```typescript
throw new Error(MISSING_UV_RUNTIME_INSTALL_MESSAGE);
```
In `doctorManagedPythonRuntime()`, replace the `fix` value for the `uv` check
with:
```typescript
fix: 'Install uv, make sure it is on PATH, and run: ktx runtime install --yes',
```
- [ ] **Step 4: Run the runtime tests and verify success**
Run:
```bash
pnpm --filter @ktx/cli run test -- src/managed-python-runtime.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit the runtime output contract**
```bash
git add packages/cli/src/managed-python-runtime.ts packages/cli/src/managed-python-runtime.test.ts
git commit -m "fix: clarify missing uv runtime error"
```
### Task 3: Document the hard uv prerequisite
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- Modify: `README.md`
- Modify: `examples/package-artifacts/README.md`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Add failing docs assertions**
In `scripts/examples-docs.test.mjs`, inside
`it('documents public npm and managed runtime usage in the README', ... )`, add
these assertions immediately after the existing `ktx runtime prune --yes`
assertion:
```javascript
assert.match(rootReadme, /KTX requires `uv` on `PATH`/);
assert.match(rootReadme, /KTX doesn't download `uv` automatically/);
```
Inside `it('documents the public package artifact smoke shape', ... )`, add
this assertion immediately after the `managed Python runtime` assertion:
```javascript
assert.match(readme, /requires `uv` on `PATH`/);
```
- [ ] **Step 2: Run the docs test and verify failure**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL because the README files do not state the hard `uv`
prerequisite.
- [ ] **Step 3: Update the root README runtime section**
In `README.md`, in the `## Managed Python runtime` section, replace this
paragraph:
```markdown
KTX installs its Python runtime only when a Python-backed command needs it.
The runtime lives outside the npm cache, is versioned by the installed CLI
version, and is managed by `ktx runtime` commands:
```
With:
```markdown
KTX installs its Python runtime only when a Python-backed command needs it.
The runtime lives outside the npm cache, is versioned by the installed CLI
version, and is managed by `ktx runtime` commands.
KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with
your system package manager or the official installer before running Python-
backed KTX commands. KTX doesn't download `uv` automatically; run
`ktx runtime doctor` if runtime installation fails:
```
- [ ] **Step 4: Update the package artifact smoke README**
In `examples/package-artifacts/README.md`, replace this paragraph:
```markdown
The managed Python runtime smoke isolates `KTX_RUNTIME_ROOT`, verifies
`ktx runtime status`, runs `ktx sl query --yes` to install the core runtime from
the bundled wheel, checks `ktx runtime doctor`, starts and reuses the managed
daemon, stops it, previews a stale runtime with `ktx runtime prune --dry-run`,
verifies confirmation is required, and removes the stale runtime with
`ktx runtime prune --yes`.
```
With:
```markdown
The managed Python runtime smoke requires `uv` on `PATH`, isolates
`KTX_RUNTIME_ROOT`, verifies `ktx runtime status`, runs `ktx sl query --yes` to
install the core runtime from the bundled wheel, checks `ktx runtime doctor`,
starts and reuses the managed daemon, stops it, previews a stale runtime with
`ktx runtime prune --dry-run`, verifies confirmation is required, and removes
the stale runtime with `ktx runtime prune --yes`.
```
- [ ] **Step 5: Run the docs test and verify success**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 6: Commit the public docs update**
```bash
git add README.md examples/package-artifacts/README.md scripts/examples-docs.test.mjs
git commit -m "docs: document uv runtime prerequisite"
```
### Task 4: Verify the completed contract
**Files:**
- Verify: `release-policy.json`
- Verify: `scripts/release-readiness.mjs`
- Verify: `scripts/release-readiness.test.mjs`
- Verify: `packages/cli/src/managed-python-runtime.ts`
- Verify: `packages/cli/src/managed-python-runtime.test.ts`
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `README.md`
- Verify: `examples/package-artifacts/README.md`
- [ ] **Step 1: Run focused verification**
Run:
```bash
node --test scripts/release-readiness.test.mjs scripts/examples-docs.test.mjs
pnpm --filter @ktx/cli run test -- src/managed-python-runtime.test.ts
```
Expected: PASS.
- [ ] **Step 2: Verify release readiness text output**
Run:
```bash
pnpm run release:readiness
```
Expected output includes:
```text
KTX release mode: npm-public-release-ready
Runtime uv strategy: path-prerequisite
Runtime uv bootstrap: disabled
NPM publish target: @kaelio/ktx@0.1.0 (latest)
```
- [ ] **Step 3: Verify no pre-commit config is required**
Run:
```bash
rg --files -g '.pre-commit-config.yaml' -g 'pre-commit-config.yaml'
```
Expected: no output and exit code 1. No Python files changed, so the repository
Python pre-commit requirement does not apply.
- [ ] **Step 4: Review the final diff**
Run:
```bash
git diff --stat
git diff -- release-policy.json scripts/release-readiness.mjs scripts/release-readiness.test.mjs packages/cli/src/managed-python-runtime.ts packages/cli/src/managed-python-runtime.test.ts scripts/examples-docs.test.mjs README.md examples/package-artifacts/README.md
```
Expected: only the runtime installer policy, missing-`uv` message/tests, and
public docs changed.
- [ ] **Step 5: Commit final verification notes if needed**
If Task 4 produces only verification output and no file changes, skip this
step. If a correction was made during verification, commit it:
```bash
git add release-policy.json scripts/release-readiness.mjs scripts/release-readiness.test.mjs packages/cli/src/managed-python-runtime.ts packages/cli/src/managed-python-runtime.test.ts scripts/examples-docs.test.mjs README.md examples/package-artifacts/README.md
git commit -m "chore: finish uv prerequisite release contract"
```
## Self-review
Spec coverage:
- The earlier implemented plans cover the single public npm package, bundled
Python wheel, managed runtime installer, runtime commands, daemon lifecycle,
local embeddings, Python-backed command integration, release smoke, published
smoke, docs cleanup, release handoff, and prune coverage.
- This plan closes the spec's remaining `uv` open decision by choosing
`path-prerequisite`, recording that decision in release policy, validating it
in release readiness, using one CLI error message, and documenting it.
- The plan keeps Python package publication disabled and keeps KTX-owned Python
code bundled in the npm package.
Placeholder scan:
- No task contains deferred implementation markers.
- Each code-changing step names exact files and includes the concrete code to
add or replace.
Type consistency:
- The release policy field is consistently named `runtimeInstaller`.
- The chosen strategy is consistently `path-prerequisite`.
- The shared CLI message constant is consistently
`MISSING_UV_RUNTIME_INSTALL_MESSAGE`.

File diff suppressed because it is too large Load diff

View file

@ -1,602 +0,0 @@
# Published Package Managed Runtime Smoke Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make the post-publication smoke prove that the published
`@kaelio/ktx` package uses the same isolated managed Python runtime across
`npx @kaelio/ktx`, local `npx ktx`, and global `ktx` invocation modes.
**Architecture:** Keep the smoke black-box and network-gated. Strengthen the
command builder so every Python-backed published-package command receives the
same temporary `KTX_RUNTIME_ROOT`, then run a real semantic-layer query through
the direct `npx`, local install, and global install paths instead of checking
only `--version` for local and global binaries.
**Tech Stack:** Node 22 ESM scripts, `node:test`, pnpm, npx, KTX managed Python
runtime, published `@kaelio/ktx` package smoke.
---
## Existing status
This plan is based on
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The following plans are based on that spec and are implemented in this
worktree:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-agent-mcp-semantic-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-local-ingest-daemon-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-docs-and-postgres-smoke-cleanup.md`
Implementation evidence found before writing this plan includes:
- `scripts/build-python-runtime-wheel.mjs` and
`packages/cli/assets/python/manifest.json`.
- `packages/cli/src/managed-python-runtime.ts`,
`packages/cli/src/runtime.ts`,
`packages/cli/src/commands/runtime-commands.ts`,
`packages/cli/src/managed-python-command.ts`,
`packages/cli/src/managed-python-daemon.ts`,
`packages/cli/src/managed-local-embeddings.ts`, and
`packages/cli/src/managed-python-http.ts`.
- `scripts/build-public-npm-package.mjs`, `scripts/package-artifacts.mjs`,
`scripts/local-embeddings-runtime-smoke.mjs`, and
`scripts/published-package-smoke.mjs`.
- `packages/cli/src/agent-runtime.ts`, `packages/cli/src/serve.ts`,
`packages/cli/src/ingest.ts`, and `packages/cli/src/scan.ts` thread managed
runtime policy through the Python-backed CLI paths.
- `examples/postgres-historic/scripts/smoke.sh`,
`examples/postgres-historic/README.md`,
`examples/package-artifacts/README.md`, and `README.md` now document the
managed runtime instead of a manual `python-service/` process.
The remaining release-confidence gap is in the post-publication smoke:
- `scripts/published-package-smoke-config.mjs` runs `npx @kaelio/ktx setup
demo` and `npx @kaelio/ktx sl query ... --yes`, but it does not isolate
`KTX_RUNTIME_ROOT` for those commands.
- The same smoke installs `@kaelio/ktx` locally and globally, but local and
global verification only run `--version`.
- The design spec requires the direct `npx @kaelio/ktx`, local `npx ktx`, and
global `ktx` modes to work for real KTX commands. A semantic-layer query is
the smallest Python-backed command that proves the bundled managed runtime is
usable in each mode.
## File structure
- Modify `scripts/published-package-smoke.test.mjs`: expect a shared
`KTX_RUNTIME_ROOT` in the published smoke commands, expect local and global
semantic query commands, and cover label classification used by the runner.
- Modify `scripts/published-package-smoke-config.mjs`: derive a temporary
runtime root from the smoke project directory, merge it with registry
environment settings, and add local and global `sl query` commands.
- Modify `scripts/published-package-smoke.mjs`: validate the renamed version
labels and semantic query labels when the smoke runs.
### Task 1: Isolate runtime roots and add real local/global command coverage
**Files:**
- Modify: `scripts/published-package-smoke.test.mjs`
- Modify: `scripts/published-package-smoke-config.mjs`
- Test: `scripts/published-package-smoke.test.mjs`
- [ ] **Step 1: Write the failing command-list test**
In `scripts/published-package-smoke.test.mjs`, replace the existing
`it('builds the full public package smoke command list', ...)` block with this
test:
```javascript
it('builds the full public package smoke command list', () => {
assert.deepEqual(
buildPublishedPackageSmokeCommands(
config,
'/tmp/ktx-smoke/demo',
'/tmp/ktx-smoke/managed-runtime',
),
[
{
label: 'published package npx version',
command: 'npx',
args: ['--yes', '@kaelio/ktx@latest', '--version'],
env: { npm_config_registry: 'https://registry.npmjs.org/' },
},
{
label: 'published package setup demo',
command: 'npx',
args: [
'--yes',
'@kaelio/ktx@latest',
'setup',
'demo',
'--project-dir',
'/tmp/ktx-smoke/demo',
'--no-input',
'--plain',
],
env: {
npm_config_registry: 'https://registry.npmjs.org/',
KTX_RUNTIME_ROOT: '/tmp/ktx-smoke/managed-runtime',
},
},
{
label: 'published package npx sl query',
command: 'npx',
args: [
'--yes',
'@kaelio/ktx@latest',
'sl',
'query',
'--project-dir',
'/tmp/ktx-smoke/demo',
'--connection-id',
'orbit_demo',
'--measure',
'contracts.contract_count',
'--format',
'sql',
'--yes',
],
env: {
npm_config_registry: 'https://registry.npmjs.org/',
KTX_RUNTIME_ROOT: '/tmp/ktx-smoke/managed-runtime',
},
},
{
label: 'published package local install',
command: 'pnpm',
args: ['add', '@kaelio/ktx@latest'],
env: { npm_config_registry: 'https://registry.npmjs.org/' },
},
{
label: 'published package local version',
command: 'npx',
args: ['ktx', '--version'],
env: { npm_config_registry: 'https://registry.npmjs.org/' },
},
{
label: 'published package local sl query',
command: 'npx',
args: [
'ktx',
'sl',
'query',
'--project-dir',
'/tmp/ktx-smoke/demo',
'--connection-id',
'orbit_demo',
'--measure',
'contracts.contract_count',
'--format',
'sql',
'--yes',
],
env: {
npm_config_registry: 'https://registry.npmjs.org/',
KTX_RUNTIME_ROOT: '/tmp/ktx-smoke/managed-runtime',
},
},
{
label: 'published package global install',
command: 'pnpm',
args: ['add', '--global', '@kaelio/ktx@latest'],
env: { npm_config_registry: 'https://registry.npmjs.org/' },
},
{
label: 'published package global version',
command: 'ktx',
args: ['--version'],
env: { npm_config_registry: 'https://registry.npmjs.org/' },
},
{
label: 'published package global sl query',
command: 'ktx',
args: [
'sl',
'query',
'--project-dir',
'/tmp/ktx-smoke/demo',
'--connection-id',
'orbit_demo',
'--measure',
'contracts.contract_count',
'--format',
'sql',
'--yes',
],
env: {
npm_config_registry: 'https://registry.npmjs.org/',
KTX_RUNTIME_ROOT: '/tmp/ktx-smoke/managed-runtime',
},
},
],
);
});
```
- [ ] **Step 2: Run the test and verify it fails**
Run:
```bash
node --test scripts/published-package-smoke.test.mjs
```
Expected: FAIL with an `AssertionError` showing that the actual command list
still uses `published package version`, lacks `KTX_RUNTIME_ROOT`, and lacks the
local/global `sl query` commands.
- [ ] **Step 3: Implement the command builder changes**
In `scripts/published-package-smoke-config.mjs`, add this import before the
existing `node:assert/strict` import:
```javascript
import { dirname, join } from 'node:path';
```
In the same file, add these helper functions after
`assertHttpRegistry(registry, label)`:
```javascript
function registryEnv(config) {
return config.registry ? { npm_config_registry: config.registry } : {};
}
function runtimeCommandEnv(config, runtimeRoot) {
return { ...registryEnv(config), KTX_RUNTIME_ROOT: runtimeRoot };
}
function semanticQueryArgs(projectDir) {
return [
'sl',
'query',
'--project-dir',
projectDir,
'--connection-id',
'orbit_demo',
'--measure',
'contracts.contract_count',
'--format',
'sql',
'--yes',
];
}
```
Replace `buildPublishedPackageNpxCommand()` and
`buildPublishedPackageSmokeCommands()` with this implementation:
```javascript
export function buildPublishedPackageNpxCommand(config, args, label = 'published package command', extraEnv = {}) {
return {
label,
command: 'npx',
args: ['--yes', publishedPackageSpec(config), ...args],
env: { ...registryEnv(config), ...extraEnv },
};
}
export function buildPublishedPackageSmokeCommands(
config,
projectDir,
runtimeRoot = join(dirname(projectDir), 'managed-runtime'),
) {
const runtimeEnv = runtimeCommandEnv(config, runtimeRoot);
const packageEnv = registryEnv(config);
const queryArgs = semanticQueryArgs(projectDir);
return [
buildPublishedPackageNpxCommand(config, ['--version'], 'published package npx version'),
buildPublishedPackageNpxCommand(
config,
['setup', 'demo', '--project-dir', projectDir, '--no-input', '--plain'],
'published package setup demo',
{ KTX_RUNTIME_ROOT: runtimeRoot },
),
buildPublishedPackageNpxCommand(config, queryArgs, 'published package npx sl query', {
KTX_RUNTIME_ROOT: runtimeRoot,
}),
{
label: 'published package local install',
command: 'pnpm',
args: ['add', publishedPackageSpec(config)],
env: packageEnv,
},
{
label: 'published package local version',
command: 'npx',
args: ['ktx', '--version'],
env: packageEnv,
},
{
label: 'published package local sl query',
command: 'npx',
args: ['ktx', ...queryArgs],
env: runtimeEnv,
},
{
label: 'published package global install',
command: 'pnpm',
args: ['add', '--global', publishedPackageSpec(config)],
env: packageEnv,
},
{
label: 'published package global version',
command: 'ktx',
args: ['--version'],
env: packageEnv,
},
{
label: 'published package global sl query',
command: 'ktx',
args: queryArgs,
env: runtimeEnv,
},
];
}
```
- [ ] **Step 4: Run the command-list test and verify it passes**
Run:
```bash
node --test scripts/published-package-smoke.test.mjs
```
Expected: PASS for the command construction tests, with remaining failures only
if the runner label validation test from Task 2 has already been added.
- [ ] **Step 5: Commit the command-builder change**
Run:
```bash
git add scripts/published-package-smoke-config.mjs scripts/published-package-smoke.test.mjs
git commit -m "test: cover published package runtime smoke commands"
```
### Task 2: Validate smoke runner labels for the new command list
**Files:**
- Modify: `scripts/published-package-smoke.test.mjs`
- Modify: `scripts/published-package-smoke.mjs`
- Test: `scripts/published-package-smoke.test.mjs`
- [ ] **Step 1: Write the failing label classification test**
In `scripts/published-package-smoke.test.mjs`, replace the import from
`./published-package-smoke.mjs` with this import:
```javascript
import {
buildPublishedPackageNpxCommand,
buildPublishedPackageSmokeCommands,
isPublishedPackageSemanticQueryLabel,
isPublishedPackageVersionLabel,
publishedPackageSpec,
readPublishedPackageSmokeConfig,
} from './published-package-smoke.mjs';
```
Add this test after the `describe('published package smoke command
construction', ...)` block:
```javascript
describe('published package smoke output validation labels', () => {
it('classifies version and semantic query commands', () => {
assert.equal(isPublishedPackageVersionLabel('published package npx version'), true);
assert.equal(isPublishedPackageVersionLabel('published package local version'), true);
assert.equal(isPublishedPackageVersionLabel('published package global version'), true);
assert.equal(isPublishedPackageVersionLabel('published package setup demo'), false);
assert.equal(isPublishedPackageSemanticQueryLabel('published package npx sl query'), true);
assert.equal(isPublishedPackageSemanticQueryLabel('published package local sl query'), true);
assert.equal(isPublishedPackageSemanticQueryLabel('published package global sl query'), true);
assert.equal(isPublishedPackageSemanticQueryLabel('published package local install'), false);
});
});
```
- [ ] **Step 2: Run the test and verify it fails**
Run:
```bash
node --test scripts/published-package-smoke.test.mjs
```
Expected: FAIL with an import error because
`isPublishedPackageSemanticQueryLabel` and `isPublishedPackageVersionLabel` are
not exported yet.
- [ ] **Step 3: Implement label classification and runner validation**
In `scripts/published-package-smoke.mjs`, add these constants and exports after
`const SMOKE_TIMEOUT_MS = 180_000;`:
```javascript
const VERSION_LABELS = new Set([
'published package npx version',
'published package local version',
'published package global version',
]);
const SEMANTIC_QUERY_LABELS = new Set([
'published package npx sl query',
'published package local sl query',
'published package global sl query',
]);
export function isPublishedPackageVersionLabel(label) {
return VERSION_LABELS.has(label);
}
export function isPublishedPackageSemanticQueryLabel(label) {
return SEMANTIC_QUERY_LABELS.has(label);
}
```
In `runPublishedPackageSmoke(config)`, replace this block:
```javascript
if (
command.label === 'published package version' ||
command.label === 'published package local binary' ||
command.label === 'published package global binary'
) {
assert.match(result.stdout, /@kaelio\/ktx /);
}
if (command.label === 'published package sl query') {
assert.match(result.stdout, /SELECT/i);
assert.match(result.stdout, /contracts/i);
}
```
with this block:
```javascript
if (isPublishedPackageVersionLabel(command.label)) {
assert.match(result.stdout, /@kaelio\/ktx /);
}
if (isPublishedPackageSemanticQueryLabel(command.label)) {
assert.match(result.stdout, /SELECT/i);
assert.match(result.stdout, /contracts/i);
}
```
- [ ] **Step 4: Run the label tests and verify they pass**
Run:
```bash
node --test scripts/published-package-smoke.test.mjs
```
Expected: PASS.
- [ ] **Step 5: Commit the runner-label change**
Run:
```bash
git add scripts/published-package-smoke.mjs scripts/published-package-smoke.test.mjs
git commit -m "test: validate published package smoke outputs"
```
### Task 3: Verify release-script compatibility
**Files:**
- Verify: `scripts/published-package-smoke-config.mjs`
- Verify: `scripts/published-package-smoke.mjs`
- Verify: `scripts/published-package-smoke.test.mjs`
- Verify: `scripts/release-readiness.test.mjs`
- Verify: `package.json`
- [ ] **Step 1: Run the focused Node tests**
Run:
```bash
node --test scripts/published-package-smoke.test.mjs scripts/release-readiness.test.mjs
```
Expected: PASS. The release-readiness tests must continue to report the
published package smoke gate without executing the network smoke.
- [ ] **Step 2: Run release readiness**
Run:
```bash
pnpm run release:readiness
```
Expected: PASS and output containing these lines:
```text
Release mode: ci-artifact-only
NPM publish enabled: false
Published package smoke: pending
Published package smoke script: pnpm run release:published-smoke
```
- [ ] **Step 3: Confirm the network smoke stays explicit**
Run:
```bash
rg -n '"release:published-smoke": "node scripts/published-package-smoke.mjs --require-config"' package.json
```
Expected: PASS with one match in `package.json`. Do not run
`pnpm run release:published-smoke` in normal CI before the package is published
to the configured registry.
- [ ] **Step 4: Check pre-commit availability**
Run:
```bash
test ! -f .pre-commit-config.yaml
```
Expected: PASS in the current worktree. If a pre-commit config exists when this
plan is executed, run this instead after activating `.venv`:
```bash
source .venv/bin/activate
uv run pre-commit run --files scripts/published-package-smoke-config.mjs scripts/published-package-smoke.mjs scripts/published-package-smoke.test.mjs
```
- [ ] **Step 5: Commit verification-only fixes if needed**
If Step 1 or Step 2 required additional source changes, commit them with:
```bash
git add scripts/published-package-smoke-config.mjs scripts/published-package-smoke.mjs scripts/published-package-smoke.test.mjs scripts/release-readiness.test.mjs package.json
git commit -m "chore: verify published package runtime smoke"
```
If no files changed after Task 2, do not create an empty commit.
## Acceptance criteria
- `buildPublishedPackageSmokeCommands()` derives
`<smoke root>/managed-runtime` from the demo project directory by default.
- Direct `npx @kaelio/ktx`, local `npx ktx`, and global `ktx` semantic query
commands all receive the same `KTX_RUNTIME_ROOT`.
- Local and global post-publication smoke coverage runs `sl query ... --yes`,
not only `--version`.
- `runPublishedPackageSmoke()` validates version output for all version labels
and validates generated SQL output for all semantic query labels.
- `node --test scripts/published-package-smoke.test.mjs scripts/release-readiness.test.mjs`
passes.
- `pnpm run release:readiness` still reports the published-package smoke as a
pending explicit release gate while registry publishing is disabled.
## Self-review notes
- Spec coverage: this plan covers the remaining invocation-mode confidence gap
from the spec by proving the published package uses an isolated managed
runtime across direct `npx`, local binary, and global binary paths.
- Placeholder scan: the plan contains concrete file paths, exact code blocks,
exact commands, and exact expected outcomes.
- Type consistency: the command label strings are consistent across tests,
command construction, and smoke-runner output validation.

View file

@ -1,978 +0,0 @@
# Single Public Runtime Artifact Cleanup Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make release artifacts match the npm-managed Python runtime design:
one public `@kaelio/ktx` npm tarball plus one bundled `kaelio-ktx` runtime
wheel, with no standalone `ktx-sl` or `ktx-daemon` release artifacts.
**Architecture:** Keep `python/ktx-sl` and `python/ktx-daemon` as source
packages used to assemble the bundled runtime wheel. Remove direct standalone
Python wheel and source-distribution builds from the release artifact path,
manifest, readiness policy, and artifact smoke docs. The packed npm package
remains the only user-visible package; Python-backed verification continues
through the managed runtime installed from the bundled wheel.
**Tech Stack:** Node 22 ESM scripts, `node:test`, pnpm, uv-built bundled
runtime wheel, JSON release policy, Markdown.
---
## Current state
This plan follows
`docs/superpowers/specs/2026-05-11-npm-managed-python-runtime-design.md`.
The following plan files are based on that spec and are implemented in the
current tree:
- `docs/superpowers/plans/2026-05-11-bundled-python-runtime-wheel.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-installer.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-command-integration.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-daemon-lifecycle.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-runtime.md`
- `docs/superpowers/plans/2026-05-11-public-kaelio-ktx-npm-package.md`
- `docs/superpowers/plans/2026-05-11-managed-python-runtime-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-local-embeddings-release-smoke.md`
- `docs/superpowers/plans/2026-05-11-managed-agent-mcp-semantic-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-local-ingest-daemon-runtime.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-docs-and-postgres-smoke-cleanup.md`
- `docs/superpowers/plans/2026-05-11-published-package-managed-runtime-smoke.md`
- `docs/superpowers/plans/2026-05-11-public-npm-release-handoff.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-prune-smoke-and-docs.md`
- `docs/superpowers/plans/2026-05-11-managed-runtime-uv-prerequisite-contract.md`
Implementation evidence found before writing this plan includes:
- `packages/cli/assets/python/manifest.json` and
`packages/cli/assets/python/kaelio_ktx-0.1.0-py3-none-any.whl`.
- `packages/cli/src/managed-python-runtime.ts`,
`packages/cli/src/managed-python-command.ts`,
`packages/cli/src/managed-python-daemon.ts`,
`packages/cli/src/managed-local-embeddings.ts`,
`packages/cli/src/managed-python-http.ts`, and `packages/cli/src/runtime.ts`.
- `scripts/build-public-npm-package.mjs`, `scripts/package-artifacts.mjs`,
`scripts/published-package-smoke.mjs`,
`scripts/local-embeddings-runtime-smoke.mjs`,
`scripts/publish-public-npm-package.mjs`, and
`.github/workflows/release.yml`.
- `release-policy.json` is in `npm-public-release-ready` mode, publishes
`@kaelio/ktx`, disables Python package publishing, and encodes the hard
`uv` prerequisite.
- `README.md` and `examples/package-artifacts/README.md` document public npm
usage, managed runtime commands, `runtime prune`, and the `uv` prerequisite.
The remaining mismatch is in the artifact release surface:
- `scripts/package-artifacts.mjs` still runs `uv build --package ktx-sl` and
`uv build --package ktx-daemon`.
- `scripts/package-artifacts.mjs` still adds `ktx-sl` and `ktx-daemon` wheel
and source-distribution files to the artifact manifest.
- `scripts/package-artifacts.mjs` still runs a direct Python clean-install
smoke, even though the npm artifact smoke already proves Python-backed
commands through the managed runtime.
- `release-policy.json` still lists `ktx-sl` and `ktx-daemon` under
`python.packages`.
- `examples/package-artifacts/README.md` says the Python smoke installs
standalone Python artifacts directly.
This plan removes those release artifacts. It does not delete the Python source
packages because the bundled runtime wheel builder still copies from
`python/ktx-sl/semantic_layer` and `python/ktx-daemon/src/ktx_daemon`.
## File structure
- Modify `scripts/package-artifacts.test.mjs`: make artifact tests expect only
`@kaelio/ktx` plus the `kaelio-ktx` bundled runtime wheel, and add a guard
that direct standalone Python artifact smoke code is gone.
- Modify `scripts/package-artifacts.mjs`: stop building standalone Python
artifacts, stop looking for their wheel and source-distribution files, remove
their release metadata, and remove the direct Python artifact verification
path.
- Modify `scripts/release-readiness.test.mjs`: update release policy fixtures
and readiness reports so the only Python release metadata is `kaelio-ktx`.
- Modify `release-policy.json`: set `python.packages` to `["kaelio-ktx"]`.
- Modify `scripts/examples-docs.test.mjs`: require docs to describe the single
npm tarball plus runtime wheel artifact shape and reject the old direct
Python-artifact smoke wording.
- Modify `README.md`: clarify that `python/ktx-sl` and `python/ktx-daemon` are
source packages, not release artifacts for the first npm release.
- Modify `examples/package-artifacts/README.md`: replace the stale standalone
Python smoke paragraph with the managed-runtime artifact contract.
### Task 1: Make package artifact tests expect one runtime wheel
**Files:**
- Modify: `scripts/package-artifacts.test.mjs`
- Test: `scripts/package-artifacts.test.mjs`
- [ ] **Step 1: Update package artifact imports**
In `scripts/package-artifacts.test.mjs`, replace the import from
`./package-artifacts.mjs` with this import:
```javascript
import {
CLI_PYTHON_ASSET_MANIFEST,
INTERNAL_NPM_WORKSPACE_PACKAGES,
RUNTIME_WHEEL_DISTRIBUTION_NAME,
RUNTIME_WHEEL_NORMALIZED_NAME,
RUNTIME_WHEEL_PACKAGE_VERSION,
artifactManifestPath,
buildArtifactCommands,
copyRuntimeWheelAssets,
findPythonArtifacts,
NPM_ARTIFACT_PACKAGES,
npmDemoSmokeSource,
npmRuntimeSmokeSource,
npmSmokePackageJson,
npmVerifySource,
packageArtifactLayout,
packageReleaseMetadata,
verifyArtifactManifest,
writeArtifactManifest,
} from './package-artifacts.mjs';
```
- [ ] **Step 2: Remove standalone Python fixture setup**
In `scripts/package-artifacts.test.mjs`, replace `writeReleaseMetadataInputs`
with this function:
```javascript
async function writeReleaseMetadataInputs(root) {
for (const packageInfo of INTERNAL_NPM_WORKSPACE_PACKAGES) {
await mkdir(join(root, packageInfo.packageRoot), { recursive: true });
await writeJson(join(root, packageInfo.packageRoot, 'package.json'), {
name: packageInfo.name,
version: '0.0.0-private',
private: true,
});
}
}
```
Replace `writeUploadableArtifactFixtures` with this function:
```javascript
async function writeUploadableArtifactFixtures(layout) {
await mkdir(layout.npmDir, { recursive: true });
await mkdir(layout.pythonDir, { recursive: true });
const fileContents = new Map([
...NPM_ARTIFACT_PACKAGES.map((packageInfo) => [
layout.npmTarballs[packageInfo.name],
`${packageInfo.name}-tarball`,
]),
[
join(layout.pythonDir, 'kaelio_ktx-0.1.0-py3-none-any.whl'),
'kaelio-ktx-runtime-wheel',
],
]);
for (const [path, contents] of fileContents) {
await writeFile(path, contents);
}
}
```
- [ ] **Step 3: Change build command expectations**
In the `buildArtifactCommands` test, replace the body with this code:
```javascript
it('builds TypeScript packages and the runtime wheel before packing npm artifacts', () => {
const layout = packageArtifactLayout('/repo/ktx');
const commands = buildArtifactCommands(layout);
assert.deepEqual(
commands.slice(0, NPM_BUILD_PACKAGE_ORDER.length).map((command) => [command.command, command.args]),
NPM_BUILD_PACKAGE_ORDER.map((packageName) => ['pnpm', ['--filter', packageName, 'run', 'build']]),
);
assert.deepEqual(
commands.slice(NPM_BUILD_PACKAGE_ORDER.length, NPM_BUILD_PACKAGE_ORDER.length + 1).map((command) => [
command.command,
command.args,
]),
[[process.execPath, ['scripts/build-python-runtime-wheel.mjs']]],
);
assert.deepEqual(
commands.slice(NPM_BUILD_PACKAGE_ORDER.length + 1).map((command) => [command.command, command.args]),
[[process.execPath, ['scripts/build-public-npm-package.mjs']]],
);
});
```
- [ ] **Step 4: Change release metadata expectations**
In the `packageReleaseMetadata` test, replace the expected array with this
array:
```javascript
assert.deepEqual(await packageReleaseMetadata(root), [
{
ecosystem: 'npm',
packageName: '@kaelio/ktx',
packageRoot: 'packages/cli',
packageVersion: '0.1.0',
private: false,
releaseMode: 'ci-artifact-only',
},
{
ecosystem: 'python',
packageName: 'kaelio-ktx',
packageRoot: 'python/runtime-wheel',
packageVersion: '0.1.0',
private: false,
releaseMode: 'ci-artifact-only',
},
]);
```
- [ ] **Step 5: Change Python artifact discovery expectations**
Replace the `findPythonArtifacts` success test with this test:
```javascript
it('finds the bundled runtime wheel only', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-test-'));
try {
await writeFile(join(root, 'kaelio_ktx-0.1.0-py3-none-any.whl'), '');
assert.deepEqual(await findPythonArtifacts(root), {
runtimeWheel: join(root, 'kaelio_ktx-0.1.0-py3-none-any.whl'),
});
} finally {
await rm(root, { recursive: true, force: true });
}
});
```
- [ ] **Step 6: Change artifact manifest expectations**
Inside the artifact manifest test, replace the Python package assertion with:
```javascript
assert.deepEqual(
manifest.packages.filter((entry) => entry.ecosystem === 'python'),
[
{
ecosystem: 'python',
packageName: 'kaelio-ktx',
packageRoot: 'python/runtime-wheel',
packageVersion: '0.1.0',
private: false,
releaseMode: 'ci-artifact-only',
},
],
);
```
Replace the Python file assertion with:
```javascript
assert.deepEqual(
manifest.files
.filter((file) => file.ecosystem === 'python')
.map((file) => ({
artifactKind: file.artifactKind,
ecosystem: file.ecosystem,
packageName: file.packageName,
packageVersion: file.packageVersion,
path: file.path,
})),
[
{
artifactKind: 'wheel',
ecosystem: 'python',
packageName: 'kaelio-ktx',
packageVersion: '0.1.0',
path: 'python/kaelio_ktx-0.1.0-py3-none-any.whl',
},
],
);
```
In the `verifyArtifactManifest` success test, replace the file-count assertion
with:
```javascript
assert.equal(manifest.files.length, NPM_ARTIFACT_PACKAGES.length + 1);
```
- [ ] **Step 7: Replace direct Python smoke tests with a dead-code guard**
Remove the whole `describe('pythonArtifactInstallArgs', ...)` block.
In `describe('verification snippets', ...)`, remove the test named
`asserts the Python modules that clean installs must expose`.
Add this test after the `verifyNpmArtifacts` test:
```javascript
describe('standalone Python artifact cleanup', () => {
it('does not build or verify standalone Python package artifacts', async () => {
const source = await readFile(new URL('./package-artifacts.mjs', import.meta.url), 'utf8');
assert.doesNotMatch(source, /uv', \['build', '--package', 'ktx-sl'/);
assert.doesNotMatch(source, /uv', \['build', '--package', 'ktx-daemon'/);
assert.doesNotMatch(source, /async function verifyPythonArtifacts/);
assert.doesNotMatch(source, /pythonArtifactInstallArgs/);
assert.doesNotMatch(source, /pythonVerifySource/);
assert.doesNotMatch(source, /ktx_sl-0\.1\.0/);
assert.doesNotMatch(source, /ktx_daemon-0\.1\.0/);
});
});
```
- [ ] **Step 8: Run package artifact tests and verify failure**
Run:
```bash
node --test scripts/package-artifacts.test.mjs
```
Expected: FAIL. The failures mention the extra `ktx-sl` and `ktx-daemon`
artifact commands, metadata entries, manifest files, or direct Python smoke
helpers.
### Task 2: Remove standalone Python artifacts from package artifacts
**Files:**
- Modify: `scripts/package-artifacts.mjs`
- Test: `scripts/package-artifacts.test.mjs`
- [ ] **Step 1: Remove dead constants and imports**
In `scripts/package-artifacts.mjs`, replace the `node:path` import with this
import:
```javascript
import { dirname, isAbsolute, join, relative, resolve, sep } from 'node:path';
```
Remove these constants:
```javascript
const PACKAGE_VERSION = '0.0.0-private';
const PYTHON_PACKAGE_VERSION = '0.1.0';
```
Remove the whole `ordersSource` constant block.
- [ ] **Step 2: Make npm artifact names public-package only**
Replace `npmPackageTarballName` with this function:
```javascript
function npmPackageTarballName(packageName) {
if (packageName !== PUBLIC_NPM_PACKAGE_NAME) {
throw new Error(`Unsupported npm artifact package: ${packageName}`);
}
return publicNpmPackageTarballName(PUBLIC_NPM_PACKAGE_VERSION);
}
```
- [ ] **Step 3: Remove standalone Python build commands**
Replace `buildArtifactCommands` with this function:
```javascript
export function buildArtifactCommands(layout) {
const packagesByName = new Map(INTERNAL_NPM_WORKSPACE_PACKAGES.map((packageInfo) => [packageInfo.name, packageInfo]));
const npmBuildCommands = NPM_ARTIFACT_BUILD_ORDER.map((packageName) => {
const packageInfo = packagesByName.get(packageName);
if (!packageInfo) {
throw new Error(`Unknown npm artifact build package: ${packageName}`);
}
return {
command: 'pnpm',
args: ['--filter', packageInfo.name, 'run', 'build'],
cwd: layout.rootDir,
};
});
const publicPackageCommand = {
command: process.execPath,
args: ['scripts/build-public-npm-package.mjs'],
cwd: layout.rootDir,
};
return [
...npmBuildCommands,
{
command: process.execPath,
args: ['scripts/build-python-runtime-wheel.mjs'],
cwd: layout.rootDir,
},
publicPackageCommand,
];
}
```
- [ ] **Step 4: Discover only the bundled runtime wheel**
Replace `findOne` and `findPythonArtifacts` with these functions:
```javascript
function findOne(files, distributionName, suffix, label, pythonDir, version) {
const normalized = normalizePythonDistributionName(distributionName);
const found = files.find((file) => file.startsWith(`${normalized}-${version}`) && file.endsWith(suffix));
if (!found) {
throw new Error(`Missing Python artifact: ${label}`);
}
return join(pythonDir, found);
}
export async function findPythonArtifacts(pythonDir) {
const files = await readdir(pythonDir);
return {
runtimeWheel: findOne(
files,
RUNTIME_WHEEL_DISTRIBUTION_NAME,
'.whl',
'kaelio-ktx runtime wheel',
pythonDir,
RUNTIME_WHEEL_PACKAGE_VERSION,
),
};
}
```
- [ ] **Step 5: Emit release metadata only for npm and runtime wheel**
Replace `packageReleaseMetadata` with this function:
```javascript
export async function packageReleaseMetadata(rootDir = scriptRootDir()) {
const npmPackages = await Promise.all(
NPM_ARTIFACT_PACKAGES.map((packageInfo) => readNpmPackageMetadata(rootDir, packageInfo)),
);
return [
...npmPackages,
releaseMetadataEntry({
ecosystem: 'python',
packageName: RUNTIME_WHEEL_DISTRIBUTION_NAME,
packageRoot: 'python/runtime-wheel',
packageVersion: RUNTIME_WHEEL_PACKAGE_VERSION,
privatePackage: false,
}),
];
}
```
- [ ] **Step 6: Remove dead TOML metadata helpers**
Delete these helper functions from `scripts/package-artifacts.mjs` because
release metadata no longer reads standalone Python `pyproject.toml` files:
```javascript
function readProjectBlock(toml, sourcePath) {
const lines = toml.split(/\r?\n/);
const block = [];
let inProject = false;
for (const line of lines) {
if (/^\[project\]\s*$/.test(line)) {
inProject = true;
continue;
}
if (inProject && /^\[.*\]\s*$/.test(line)) {
break;
}
if (inProject) {
block.push(line);
}
}
if (!inProject) {
throw new Error(`Missing [project] table in ${sourcePath}`);
}
return block.join('\n');
}
```
```javascript
function readTomlStringField(projectBlock, fieldName, sourcePath) {
const match = projectBlock.match(new RegExp(`^${fieldName}\\s*=\\s*"([^"]+)"\\s*$`, 'm'));
if (!match) {
throw new Error(`Missing project.${fieldName} in ${sourcePath}`);
}
return match[1];
}
```
```javascript
async function readPyprojectMetadata(path) {
const toml = await readFile(path, 'utf-8');
const projectBlock = readProjectBlock(toml, path);
return {
name: readTomlStringField(projectBlock, 'name', path),
version: readTomlStringField(projectBlock, 'version', path),
};
}
```
- [ ] **Step 7: Emit manifest records only for npm and runtime wheel**
Replace `artifactPackageRecords` with this function:
```javascript
function artifactPackageRecords(layout, pythonArtifacts, packages) {
const packagesByName = packageMetadataByName(packages);
const npmRecords = NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({
artifactKind: 'tarball',
artifactPath: layout.npmTarballs[packageInfo.name],
metadata: requirePackageMetadata(packagesByName, packageInfo.name),
}));
return [
...npmRecords,
{
artifactKind: 'wheel',
artifactPath: pythonArtifacts.runtimeWheel,
metadata: requirePackageMetadata(packagesByName, RUNTIME_WHEEL_DISTRIBUTION_NAME),
},
];
}
```
- [ ] **Step 8: Remove direct Python artifact verification helpers**
Delete these exports and functions from `scripts/package-artifacts.mjs`:
```javascript
export function pythonArtifactInstallArgs(python, pythonArtifacts) {
return ['pip', 'install', '--python', python, pythonArtifacts.runtimeWheel];
}
```
```javascript
export function pythonVerifySource() {
return `
import importlib.metadata
import semantic_layer
import ktx_daemon
assert importlib.metadata.version("kaelio-ktx") == "0.1.0"
assert semantic_layer is not None
assert ktx_daemon.PACKAGE_NAME == "ktx-daemon"
`;
}
```
```javascript
function pythonExecutable(projectDir) {
if (process.platform === 'win32') {
return join(projectDir, '.venv', 'Scripts', 'python.exe');
}
return join(projectDir, '.venv', 'bin', 'python');
}
```
```javascript
export function npmSmokePythonEnv(projectDir, baseEnv = process.env) {
const binDir = process.platform === 'win32' ? join(projectDir, '.venv', 'Scripts') : join(projectDir, '.venv', 'bin');
const existingPath = baseEnv.PATH ?? '';
return {
...baseEnv,
PATH: existingPath ? `${binDir}${delimiter}${existingPath}` : binDir,
};
}
```
```javascript
async function verifyPythonArtifacts(layout, tmpRoot) {
const pythonArtifacts = await findPythonArtifacts(layout.pythonDir);
const projectDir = join(tmpRoot, 'python-clean-install');
await mkdir(projectDir, { recursive: true });
const python = pythonExecutable(projectDir);
await writeFile(join(projectDir, 'verify_python.py'), pythonVerifySource());
await runCommand('uv', ['venv', '.venv'], { cwd: projectDir });
await runCommand('uv', pythonArtifactInstallArgs(python, pythonArtifacts), {
cwd: projectDir,
});
await runCommand(python, ['verify_python.py'], { cwd: projectDir });
await runCommand(python, ['-m', 'ktx_daemon', 'semantic-validate'], {
cwd: projectDir,
input: `${JSON.stringify({ sources: [ordersSource], dialect: 'postgres' })}\n`,
});
}
```
- [ ] **Step 9: Verify artifacts through npm only**
Replace `verifyArtifacts` with this function:
```javascript
async function verifyArtifacts(layout) {
await verifyArtifactManifest(layout);
const tmpRoot = await mkdtemp(join(tmpdir(), 'ktx-artifacts-'));
try {
await verifyNpmArtifacts(layout, tmpRoot);
} finally {
await rm(tmpRoot, { recursive: true, force: true });
}
}
```
- [ ] **Step 10: Run package artifact tests and verify pass**
Run:
```bash
node --test scripts/package-artifacts.test.mjs
```
Expected: PASS. The output includes `# fail 0`.
- [ ] **Step 11: Commit package artifact cleanup**
Run:
```bash
git add scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs
git commit -m "refactor: limit release artifacts to public package runtime"
```
### Task 3: Align release policy and readiness reports
**Files:**
- Modify: `release-policy.json`
- Modify: `scripts/release-readiness.test.mjs`
- Test: `scripts/release-readiness.test.mjs`
- [ ] **Step 1: Update release readiness fixtures**
In `scripts/release-readiness.test.mjs`, replace
`writeReleaseMetadataInputs` with:
```javascript
async function writeReleaseMetadataInputs(root) {
for (const packageInfo of INTERNAL_NPM_WORKSPACE_PACKAGES) {
await mkdir(join(root, packageInfo.packageRoot), { recursive: true });
await writeJson(join(root, packageInfo.packageRoot, 'package.json'), {
name: packageInfo.name,
version: '0.0.0-private',
private: true,
});
}
}
```
Replace `writeUploadableArtifactFixtures` with:
```javascript
async function writeUploadableArtifactFixtures(layout) {
await mkdir(layout.npmDir, { recursive: true });
await mkdir(layout.pythonDir, { recursive: true });
const fileContents = new Map([
...NPM_ARTIFACT_PACKAGES.map((packageInfo) => [
layout.npmTarballs[packageInfo.name],
`${packageInfo.name}-tarball`,
]),
[join(layout.pythonDir, 'kaelio_ktx-0.1.0-py3-none-any.whl'), 'kaelio-ktx-runtime-wheel'],
]);
for (const [path, contents] of fileContents) {
await writeFile(path, contents);
}
}
```
In `releasePolicy`, replace the `python` object with:
```javascript
python: {
publish: false,
repository: null,
packages: ['kaelio-ktx'],
...pythonOverrides,
},
```
- [ ] **Step 2: Update readiness report expectations**
In `scripts/release-readiness.test.mjs`, replace every expected
`packageNames` array with:
```javascript
packageNames: ['@kaelio/ktx', 'kaelio-ktx'],
```
There are three report assertions to update:
- `accepts the current ci-artifact-only policy, package metadata, and artifact manifest`
- `reports required published package smoke when release mode requires it`
- `accepts the npm public release ready policy`
- [ ] **Step 3: Update checked release policy**
In `release-policy.json`, replace the `python.packages` value with:
```json
"packages": ["kaelio-ktx"]
```
- [ ] **Step 4: Run readiness tests and verify pass**
Run:
```bash
node --test scripts/release-readiness.test.mjs
```
Expected: PASS. The output includes `# fail 0`.
- [ ] **Step 5: Commit release policy cleanup**
Run:
```bash
git add release-policy.json scripts/release-readiness.test.mjs
git commit -m "chore: align release policy with bundled runtime wheel"
```
### Task 4: Document the single release artifact surface
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- Modify: `README.md`
- Modify: `examples/package-artifacts/README.md`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Add failing docs assertions**
In `scripts/examples-docs.test.mjs`, inside
`it('documents the public package artifact smoke shape', ...)`, add these
assertions after the existing `assert.match(readme, /managed Python runtime/);`
line:
```javascript
assert.match(readme, /public `@kaelio\/ktx` npm tarball and the bundled `kaelio-ktx` runtime wheel/);
assert.match(readme, /does not install standalone Python packages directly/);
assert.doesNotMatch(readme, /standalone Python distributions/);
assert.doesNotMatch(readme, /installs the Python artifacts directly/);
```
In `it('documents public npm and managed runtime usage in the README', ...)`,
add these assertions after the existing `uv` assertions:
```javascript
assert.match(rootReadme, /release artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` runtime wheel/);
assert.match(rootReadme, /source packages for development, not public release artifacts/);
```
- [ ] **Step 2: Run docs tests and verify failure**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL. The failure mentions the missing single-artifact wording in
`README.md` or `examples/package-artifacts/README.md`.
- [ ] **Step 3: Update the package artifact example README**
In `examples/package-artifacts/README.md`, replace:
```markdown
The Python smoke project still installs the Python artifacts directly because
it verifies the standalone Python distributions that feed the bundled runtime
wheel.
```
with:
```markdown
The artifact manifest contains the public `@kaelio/ktx` npm tarball and the
bundled `kaelio-ktx` runtime wheel. The smoke does not install standalone
Python packages directly; Python-backed behavior is verified through the
managed runtime installed from the npm package.
```
- [ ] **Step 4: Update the root README release status**
In `README.md`, in the `## Release status` section, replace this paragraph:
```markdown
This repository builds one public npm artifact named `@kaelio/ktx`. The first
public npm handoff is policy-gated through `release-policy.json`, which keeps
Python package publishing disabled because KTX-owned Python code ships inside
the npm package as a bundled wheel.
```
with:
```markdown
This repository builds one public npm artifact named `@kaelio/ktx`. The release
artifact manifest contains the public npm tarball and the bundled `kaelio-ktx`
runtime wheel. The first public npm handoff is policy-gated through
`release-policy.json`, which keeps Python package publishing disabled because
KTX-owned Python code ships inside the npm package as a bundled wheel. The
`python/ktx-sl` and `python/ktx-daemon` directories remain source packages for
development, not public release artifacts.
```
- [ ] **Step 5: Run docs tests and verify pass**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS. The output includes `# fail 0`.
- [ ] **Step 6: Commit docs cleanup**
Run:
```bash
git add README.md examples/package-artifacts/README.md scripts/examples-docs.test.mjs
git commit -m "docs: describe single public runtime artifact surface"
```
### Task 5: Verify the cleaned release artifact contract
**Files:**
- Verify: `scripts/package-artifacts.mjs`
- Verify: `scripts/package-artifacts.test.mjs`
- Verify: `scripts/release-readiness.test.mjs`
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `release-policy.json`
- Verify: `README.md`
- Verify: `examples/package-artifacts/README.md`
- [ ] **Step 1: Run focused tests**
Run:
```bash
node --test scripts/package-artifacts.test.mjs scripts/release-readiness.test.mjs scripts/examples-docs.test.mjs
```
Expected: PASS. The output includes `# fail 0`.
- [ ] **Step 2: Verify stale artifact strings are gone from production/docs files**
Run (scans only production and docs files, not test files - test files keep guard assertions that reference the removed strings):
```bash
rg -n "uv', \\['build', '--package', 'ktx-sl'|uv', \\['build', '--package', 'ktx-daemon'|ktx_sl-0\\.1\\.0|ktx_daemon-0\\.1\\.0|pythonArtifactInstallArgs|pythonVerifySource|verifyPythonArtifacts|standalone Python distributions|installs the Python artifacts directly" scripts/package-artifacts.mjs scripts/release-readiness.mjs README.md examples/package-artifacts/README.md release-policy.json
```
Expected: no matches.
- [ ] **Step 3: Verify release readiness against the current artifact manifest**
Run:
```bash
pnpm run release:readiness -- --json
```
Expected: PASS when `dist/artifacts/manifest.json` has been rebuilt after this
change. The JSON output contains:
```json
{
"releaseMode": "npm-public-release-ready",
"packageNames": ["@kaelio/ktx", "kaelio-ktx"],
"pythonPublishEnabled": false
}
```
If this command fails because the local artifact manifest was generated before
the cleanup, run:
```bash
pnpm run artifacts:check
pnpm run release:readiness -- --json
```
Expected: both commands pass. The rebuilt manifest contains only
`npm/kaelio-ktx-0.1.0.tgz` and
`python/kaelio_ktx-0.1.0-py3-none-any.whl` under `files`.
- [ ] **Step 4: Run pre-commit on changed files when configured**
Run:
```bash
uv run pre-commit run --files scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs scripts/release-readiness.test.mjs scripts/examples-docs.test.mjs release-policy.json README.md examples/package-artifacts/README.md
```
Expected: PASS. If pre-commit is not installed or no pre-commit config exists,
record the exact error and keep the focused Node test output from Step 1.
- [ ] **Step 5: Commit final verification fixes if needed**
If Step 1, Step 2, Step 3, or Step 4 required code or docs fixes, commit them:
```bash
git add scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs scripts/release-readiness.test.mjs scripts/examples-docs.test.mjs release-policy.json README.md examples/package-artifacts/README.md
git commit -m "test: verify single public runtime artifact contract"
```
If no fixes were required after the previous commits, do not create an empty
commit.
## Acceptance criteria
- `scripts/package-artifacts.mjs` builds TypeScript packages, builds the
bundled `kaelio-ktx` runtime wheel, copies it into CLI assets, and packs the
public `@kaelio/ktx` npm tarball.
- `scripts/package-artifacts.mjs` no longer builds `ktx-sl` or `ktx-daemon`
standalone wheel or source-distribution artifacts.
- Artifact manifests contain release metadata for `@kaelio/ktx` and
`kaelio-ktx` only.
- `release-policy.json` lists only `@kaelio/ktx` under `npm.packages` and only
`kaelio-ktx` under `python.packages`.
- The artifact smoke verifies Python-backed behavior through the installed
public npm package and managed runtime, not by installing standalone Python
artifacts directly.
- Public docs state that `python/ktx-sl` and `python/ktx-daemon` remain source
packages for development, not public release artifacts.
## Self-review
Spec coverage:
- The plan preserves the single public npm package requirement.
- The plan preserves the bundled KTX-owned Python wheel requirement.
- The plan keeps Python package publishing disabled.
- The plan removes the only remaining artifact path that treated KTX-owned
Python source packages as standalone release artifacts.
Placeholder scan:
- No steps contain placeholder implementation text.
- Every code-changing step names exact files and provides concrete replacement
snippets.
Type and name consistency:
- Public npm package name remains `@kaelio/ktx`.
- Bundled runtime distribution name remains `kaelio-ktx`.
- Runtime wheel filename remains `kaelio_ktx-0.1.0-py3-none-any.whl`.
- Removed standalone Python artifact names are consistently `ktx-sl` and
`ktx-daemon`.

View file

@ -1,785 +0,0 @@
# Notion Warehouse Verification Gap Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1 gaps that prevent ingest agents, especially
Notion WorkUnits, from reliably verifying warehouse table and column
identifiers before writing wiki or semantic-layer output.
**Architecture:** Keep the existing warehouse verification tool module and
runner wiring. Add Notion target-warehouse scoping through the local adapter
factory, make the active WorkUnit prompt name the shipped tools, enforce
`allowedConnectionNames` in `discover_data`, and teach `entity_details` to
resolve and reject column-level display targets.
**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX local
ingest adapters, KTX file store.
---
## Audit summary
The previous implementation plan landed the main tool module and prompt
protocol, but four v1-blocking gaps remain:
- Notion ingest sessions still allow only the Notion connection unless a
specific adapter supplies target IDs. `NotionSourceAdapter` does not supply
target warehouse IDs, so the original Notion hallucination case cannot use
`entity_details` or raw-schema `discover_data` for the warehouse connection.
- The active WorkUnit framing prompt still tells agents to call
`wiki_sl_search` and `sl_describe_table`, which are not shipped KTX tools.
- `discover_data` accepts an explicit out-of-scope `connectionName` and still
searches raw schema for that connection.
- `entity_details({ targets: [{ display: "schema.table.column" }] })` does not
resolve column display strings and does not fail explicit missing-column
targets.
Non-blocking gaps remain out of scope for this plan:
- Full DDL-style `entity_details` formatting with FK and profile summaries.
- AST-backed SQL read-only validation for data-modifying CTEs.
- Search over `enrichment/descriptions.json` for generated descriptions.
- Lexicographic latest-sync edge cases for non-timestamp sync IDs.
- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`.
## File structure
Modify these files:
- `packages/context/src/ingest/adapters/notion/notion.adapter.ts`: add
configured target warehouse IDs and implement `listTargetConnectionIds()`.
- `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: cover
Notion target connection ID fan-out.
- `packages/context/src/ingest/local-adapters.ts`: pass primary warehouse IDs
into `NotionSourceAdapter`.
- `packages/context/src/ingest/local-adapters.test.ts`: cover local Notion
adapter target IDs.
- `packages/context/src/ingest/adapters/notion/chunk.ts`: update Notion
WorkUnit notes to prefer the warehouse verification tools.
- `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: update
Notion note expectations.
- `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`: replace
stale tool names in the active WorkUnit prompt.
- `packages/context/src/ingest/ingest-prompts.test.ts`: guard the WorkUnit
prompt against stale tool names.
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`:
refuse explicit out-of-scope connection names.
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
cover `discover_data` scoping.
- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`:
add column-aware display-target resolution.
- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`:
cover column display resolution.
- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`:
use column-aware resolution and report missing columns.
- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`:
cover column display and missing-column behavior.
### Task 1: Give Notion ingest access to target warehouses
**Files:**
- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.ts`
- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`
- Modify: `packages/context/src/ingest/local-adapters.ts`
- Modify: `packages/context/src/ingest/local-adapters.test.ts`
- [ ] **Step 1: Write the failing Notion adapter test**
Add this test inside `describe('NotionSourceAdapter', ...)` in
`packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`:
```ts
it('returns configured target warehouse connection ids', async () => {
const adapter = new NotionSourceAdapter({
targetConnectionIds: ['warehouse', 'warehouse', 'analytics'],
});
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual([
'analytics',
'warehouse',
]);
});
```
- [ ] **Step 2: Run the failing Notion adapter test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/notion/notion.adapter.test.ts -t "target warehouse connection ids"
```
Expected: FAIL because `NotionSourceAdapterDeps` has no
`targetConnectionIds` option and `NotionSourceAdapter` does not implement
`listTargetConnectionIds()`.
- [ ] **Step 3: Implement Notion target connection IDs**
Modify `packages/context/src/ingest/adapters/notion/notion.adapter.ts`:
```ts
export interface NotionSourceAdapterDeps {
onPullSucceeded?: (ctx: NotionPullSucceededContext) => Promise<void>;
logger?: NotionFetchLogger;
targetConnectionIds?: string[];
}
function uniqueSorted(values: readonly string[] | undefined): string[] {
return [...new Set(values ?? [])].sort((left, right) =>
left.localeCompare(right),
);
}
```
Add this method to `NotionSourceAdapter`:
```ts
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
return uniqueSorted(this.deps.targetConnectionIds);
}
```
- [ ] **Step 4: Pass primary warehouses into the local Notion adapter**
Modify the Notion adapter construction in
`packages/context/src/ingest/local-adapters.ts`:
```ts
new NotionSourceAdapter({
targetConnectionIds: primaryWarehouseConnectionIds(project),
...(options.logger ? { logger: options.logger } : {}),
}),
```
- [ ] **Step 5: Write the local adapter fan-out test**
Add this test to `packages/context/src/ingest/local-adapters.test.ts`:
```ts
it('passes primary warehouse connection ids to the local Notion adapter', async () => {
const adapters = createDefaultLocalIngestAdapters(
projectWithConnections({
notion: {
driver: 'notion',
auth_token: 'secret',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
},
warehouse: {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
},
docs: {
driver: 'dbt',
source_dir: './dbt',
},
} as never),
);
const notion = adapters.find((adapter) => adapter.source === 'notion');
await expect(notion?.listTargetConnectionIds?.('/tmp/staged-notion')).resolves.toEqual([
'warehouse',
]);
});
```
- [ ] **Step 6: Run the Notion target tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/notion/notion.adapter.test.ts -t "target warehouse connection ids" \
src/ingest/local-adapters.test.ts -t "local Notion adapter"
```
Expected: PASS.
- [ ] **Step 7: Commit**
Run:
```bash
git add \
packages/context/src/ingest/adapters/notion/notion.adapter.ts \
packages/context/src/ingest/adapters/notion/notion.adapter.test.ts \
packages/context/src/ingest/local-adapters.ts \
packages/context/src/ingest/local-adapters.test.ts
git commit -m "fix(context): expose target warehouses to Notion ingest"
```
### Task 2: Remove stale tool names from active ingest prompts
**Files:**
- Modify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`
- Modify: `packages/context/src/ingest/ingest-prompts.test.ts`
- Modify: `packages/context/src/ingest/adapters/notion/chunk.ts`
- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`
- [ ] **Step 1: Add failing prompt guards**
Add this test to `packages/context/src/ingest/ingest-prompts.test.ts`:
```ts
it('uses shipped warehouse verification tools in the WorkUnit prompt', async () => {
const prompt = await readFile(
new URL('../../prompts/memory_agent_bundle_ingest_work_unit.md', import.meta.url),
'utf-8',
);
expect(prompt).toContain('discover_data');
expect(prompt).toContain('entity_details');
expect(prompt).not.toContain('wiki_sl_search');
expect(prompt).not.toContain('sl_describe_table');
});
```
- [ ] **Step 2: Run the failing prompt guard**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-prompts.test.ts -t "warehouse verification tools"
```
Expected: FAIL because the WorkUnit prompt still contains `wiki_sl_search` and
`sl_describe_table`.
- [ ] **Step 3: Update the WorkUnit framing prompt**
In `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`, replace
the first `<role>` paragraph with:
```md
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`.
```
In workflow step 2, replace the final sentence with:
```md
The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping.
```
In workflow step 4, replace the sentence that starts
`For each raw file:` with:
```md
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
```
In the `<do_not>` block, replace the physical-column rule with:
```md
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
```
- [ ] **Step 4: Update Notion WorkUnit notes**
In `packages/context/src/ingest/adapters/notion/chunk.ts`, replace
`NOTION_SL_WRITE_GUIDANCE` with:
```ts
const NOTION_SL_WRITE_GUIDANCE =
'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages, SL sources, and raw warehouse schema for the same tables or sl_refs with discover_data before creating a new page. Only write or edit SL sources after discover_data plus sl_discover/sl_read_source or entity_details confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.';
```
In the `reconcileNotes` array in the same file, replace:
```ts
'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.',
```
with:
```ts
'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.',
```
- [ ] **Step 5: Update Notion note expectations**
In `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`,
update the note expectations in `it('chunks changed Notion pages...')`:
```ts
expect(result.workUnits[0].notes).toContain('discover_data');
expect(result.workUnits[0].notes).toContain('entity_details');
```
Update the exact `reconcileNotes` expectation to:
```ts
expect(result.reconcileNotes).toEqual([
'Notion maxKnowledgeCreatesPerRun=25',
'Notion maxKnowledgeUpdatesPerRun=20',
'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.',
'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.',
]);
```
- [ ] **Step 6: Run prompt and Notion note tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-prompts.test.ts \
src/ingest/adapters/notion/notion.adapter.test.ts
```
Expected: PASS.
- [ ] **Step 7: Commit**
Run:
```bash
git add \
packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \
packages/context/src/ingest/ingest-prompts.test.ts \
packages/context/src/ingest/adapters/notion/chunk.ts \
packages/context/src/ingest/adapters/notion/notion.adapter.test.ts
git commit -m "fix(context): update ingest prompts for warehouse verification tools"
```
### Task 3: Enforce allowed connection scope in discover_data
**Files:**
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`
- [ ] **Step 1: Write the failing scoping test**
Add this test to
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
```ts
it('refuses explicit out-of-scope connection names', async () => {
const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context);
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
expect(result.structured).toEqual({ wiki: null, sl: null, raw: null });
expect(wikiSearchTool.call).not.toHaveBeenCalled();
expect(slDiscoverTool.call).not.toHaveBeenCalled();
expect(catalog.searchByName).not.toHaveBeenCalled();
});
```
- [ ] **Step 2: Run the failing scoping test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts -t "out-of-scope"
```
Expected: FAIL because `discover_data` currently searches raw schema for an
explicit `connectionName` even when it is not in `allowedConnectionNames`.
- [ ] **Step 3: Add the scope guard**
In
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`,
add this helper near `totalSources()`:
```ts
function allowedConnectionNames(context: ToolContext): ReadonlySet<string> | null {
return context.session?.allowedConnectionNames ?? null;
}
```
At the top of `DiscoverDataTool.call()`, before the `sourceName` branch and
before calling any child tool, add:
```ts
const allowed = allowedConnectionNames(context);
if (input.connectionName && allowed && !allowed.has(input.connectionName)) {
return {
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
structured: { wiki: null, sl: null, raw: null },
};
}
```
Then replace the raw connection-list construction with:
```ts
const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort();
```
- [ ] **Step 4: Run discover_data tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit**
Run:
```bash
git add \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
git commit -m "fix(context): scope raw schema discovery to allowed connections"
```
### Task 4: Fix column-level entity_details verification
**Files:**
- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`
- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`
- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`
- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`
- [ ] **Step 1: Write failing catalog column-target tests**
First update `seedLiveDatabaseScan()` in that test file so BigQuery tables have
a project/catalog. Replace the repeated inline table refs with:
```ts
const tableRef = {
catalog: driver === 'bigquery' ? 'analytics' : null,
db: driver === 'sqlite' ? null : 'public',
name: 'orders',
};
```
Use `tableRef.catalog`, `tableRef.db`, and `tableRef.name` for the seeded
table and profile table references.
Then add these tests to
`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`:
```ts
it('resolves postgres column display strings without treating the column as a table', async () => {
await seedLiveDatabaseScan();
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({
resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' },
candidates: [],
dialect: 'postgres',
});
});
it('resolves BigQuery column display strings with four parts', async () => {
await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery');
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({
resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' },
candidates: [],
dialect: 'bigquery',
});
});
```
- [ ] **Step 2: Run the failing catalog tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts -t "column display"
```
Expected: FAIL because `resolveDisplayTarget()` does not exist.
- [ ] **Step 3: Implement column-aware display resolution**
In
`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`,
add this exported interface near `RawSchemaHit`:
```ts
export interface DisplayTargetResolution {
resolved: (KtxTableRef & { column?: string }) | null;
candidates: KtxTableRef[];
dialect: string;
}
```
Add these helpers near `parseDisplay()`:
```ts
function expectedDisplayPartCount(driver: CatalogDriver): number {
if (driver === 'sqlite' || driver === 'sqlite3') {
return 1;
}
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
return 3;
}
return 2;
}
function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null {
const parts = splitDisplay(display);
const tablePartCount = expectedDisplayPartCount(driver);
if (parts.length !== tablePartCount + 1) {
return null;
}
const column = parts.at(-1);
if (!column) {
return null;
}
const table = parseDisplay(driver, parts.slice(0, -1).join('.'));
return table ? { ...table, column } : null;
}
```
Add this method to `WarehouseCatalogService` after `resolveDisplay()`:
```ts
async resolveDisplayTarget(connectionName: string, display: string): Promise<DisplayTargetResolution> {
const catalog = await this.loadCatalog(connectionName);
if (!catalog) {
return { resolved: null, candidates: [], dialect: 'unknown' };
}
const dialect = getDialectForDriver(catalog.driver).type;
const tableResolution = await this.resolveDisplay(connectionName, display);
if (tableResolution.resolved) {
return tableResolution;
}
const parsedColumn = parseColumnDisplay(catalog.driver, display);
if (!parsedColumn) {
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
}
const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn));
if (!table) {
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
}
return {
resolved: {
catalog: table.catalog,
db: table.db,
name: table.name,
column: parsedColumn.column,
},
candidates: [],
dialect,
};
}
```
- [ ] **Step 4: Write failing entity_details column tests**
Add these tests to
`packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`:
```ts
it('resolves display targets that include a column name', async () => {
const result = await tool.call(
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
context,
);
expect(result.markdown).toContain('### public.orders');
expect(result.markdown).toContain('- status (text, nullable=false)');
expect(result.markdown).not.toContain('- id (integer');
expect(result.structured.resolved).toHaveLength(1);
expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']);
});
it('reports missing explicit columns instead of returning an empty column list', async () => {
const result = await tool.call(
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
context,
);
expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier');
expect(result.markdown).toContain('Available columns: id, status');
expect(result.structured.resolved).toHaveLength(0);
expect(result.structured.missing).toHaveLength(1);
});
```
- [ ] **Step 5: Run the failing entity_details tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts -t "column"
```
Expected: FAIL because display column targets are treated as table names and
missing columns are not reported.
- [ ] **Step 6: Use column-aware resolution in entity_details**
In
`packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`,
add this helper near `appendTableMarkdown()`:
```ts
function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null {
const normalized = columnName.toLowerCase();
return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null;
}
```
Replace the display resolution block inside the `for (const target of
input.targets)` loop with:
```ts
const resolution =
'display' in target
? await catalog.resolveDisplayTarget(input.connectionName, target.display)
: {
resolved: { catalog: target.catalog, db: target.db, name: target.name, column: target.column },
candidates: [],
dialect: '',
};
```
After `const detail = await catalog.getTable(...)`, replace the existing
`resolved.push(detail); appendTableMarkdown(...)` lines with:
```ts
const requestedColumn = resolution.resolved.column;
if (requestedColumn) {
const column = findColumn(detail, requestedColumn);
if (!column) {
missing.push({
target,
candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }],
});
parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`);
parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`);
continue;
}
const scopedDetail = { ...detail, columns: [column] };
resolved.push(scopedDetail);
appendTableMarkdown(parts, scopedDetail, column.name);
continue;
}
resolved.push(detail);
appendTableMarkdown(parts, detail);
```
- [ ] **Step 7: Run warehouse verification tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
```
Expected: PASS.
- [ ] **Step 8: Commit**
Run:
```bash
git add \
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
git commit -m "fix(context): verify warehouse column display targets"
```
### Task 5: Verify the v1 gap closure
**Files:**
- Verify all files changed by Tasks 1-4.
- [ ] **Step 1: Run focused tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/notion/notion.adapter.test.ts \
src/ingest/local-adapters.test.ts \
src/ingest/ingest-prompts.test.ts \
src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Run package tests**
Run:
```bash
pnpm --filter @ktx/context run test
```
Expected: PASS.
- [ ] **Step 4: Run pre-commit on changed files when configured**
Run:
```bash
uv run pre-commit run --files \
packages/context/src/ingest/adapters/notion/notion.adapter.ts \
packages/context/src/ingest/adapters/notion/notion.adapter.test.ts \
packages/context/src/ingest/local-adapters.ts \
packages/context/src/ingest/local-adapters.test.ts \
packages/context/src/ingest/adapters/notion/chunk.ts \
packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \
packages/context/src/ingest/ingest-prompts.test.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
```
Expected: PASS. If the repo has no pre-commit config or the local `uv` version
cannot satisfy the project pin, record the exact error and rely on focused
tests plus type-check.
- [ ] **Step 5: Inspect final git status**
Run:
```bash
git status --short
```
Expected: only intentional files are modified. Commit any formatter-driven
changes with:
```bash
git add packages/context
git commit -m "chore(context): verify warehouse verification v1 gaps"
```
## Self-review checklist
- Spec coverage: this plan closes the remaining v1 paths for Notion warehouse
verification, active WorkUnit prompt correctness, raw discovery scoping, and
column-level identifier verification.
- Placeholder scan: no task relies on future-work markers, unnamed edge-case
handling, or cross-task shorthand.
- Type consistency: `discover_data` continues to use `connectionName`,
`sl_discover` still receives `connectionId` internally, and
`resolveDisplayTarget()` returns the same table identity plus optional
`column`.

View file

@ -1,957 +0,0 @@
# Warehouse Verification Final V1 Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1 gaps that still prevent ingest agents from
reliably following warehouse verification results through to `entity_details`
and `sql_execution`.
**Architecture:** Keep the existing warehouse verification module and runner
session scoping. Add connection names to raw discovery hits, expose primary
warehouse targets from the remaining source adapters, and make local ingest
SQL probes use the same scan connector read-only execution path as schema scan.
**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX local
ingest runtime, KTX scan connectors.
---
## Audit summary
The first two implementation plans landed the warehouse verification tools,
prompt protocol, Notion warehouse scoping, and stale prompt-name cleanup. The
focused audit on May 12, 2026, found three remaining v1-blocking gaps:
- `discover_data` searches multiple allowed raw warehouse scans, but raw hits do
not carry or render `connectionName`. The tool tells the agent to call
`entity_details({connectionName, targets: [...]})`, then omits the required
`connectionName` from the follow-up evidence.
- Local LookML and MetricFlow adapters do not expose primary warehouse target
IDs. The runner only adds adapter-provided targets to `allowedConnectionNames`,
so those WorkUnits cannot use raw warehouse verification unless their source
connection is itself the warehouse.
- `sql_execution` calls the local ingest connection catalog, but the catalog
either has no query executor in normal CLI ingest or calls an injected
executor without `projectDir` and connection config. The default local query
executor cannot dispatch without that config.
Non-blocking gaps remain out of scope for this v1 plan:
- Full DDL-style `entity_details` formatting with FK profile summaries.
- AST-backed SQL read-only validation for data-modifying CTE bodies.
- Search over generated `enrichment/descriptions.json`.
- Lexicographic latest-sync edge cases for non-timestamp sync IDs.
- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`.
## File structure
Modify these files:
- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`:
add `connectionName` to raw schema hit records.
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`:
render raw hit connection names and preserve them in structured output.
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
cover multi-connection raw discovery follow-up data.
- `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`:
accept and return configured target warehouse connection IDs.
- `packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`:
cover LookML target warehouse IDs.
- `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`:
accept and return configured target warehouse connection IDs.
- `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`:
cover MetricFlow target warehouse IDs.
- `packages/context/src/ingest/local-adapters.ts`:
pass primary warehouse IDs into LookML and MetricFlow adapters.
- `packages/context/src/ingest/local-adapters.test.ts`:
cover local adapter warehouse target fan-out.
- `packages/context/src/ingest/local-bundle-runtime.ts`:
pass full project connection config to local ingest query executors.
- `packages/context/src/ingest/local-bundle-runtime.test.ts`:
cover the local ingest query executor call shape.
- `packages/context/src/ingest/local-ingest.ts`:
use the shared query executor port type.
- `packages/context/src/mcp/local-project-ports.ts`:
no behavior change expected, but type-checks against the updated local ingest
query executor type.
- `packages/cli/src/ingest.ts`:
provide a read-only scan-connector-backed query executor for normal local
ingest runs.
Create these files:
- `packages/cli/src/ingest-query-executor.ts`: CLI query executor that adapts
scan connectors' `executeReadOnly()` method to `KtxSqlQueryExecutorPort`.
- `packages/cli/src/ingest-query-executor.test.ts`: unit coverage for the CLI
ingest query executor.
### Task 1: Preserve raw discovery connection names
**Files:**
- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`
- [ ] **Step 1: Write the failing multi-connection discovery test**
Add this test to
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
```ts
it('includes connectionName on raw schema hits so entity_details can follow up', async () => {
const multiConnectionContext: ToolContext = {
...context,
session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any,
};
catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [
{
kind: 'table',
connectionName,
ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` },
display: `public.${connectionName}_${query}`,
matchedOn: 'name',
},
]);
const result = await tool.call({ query: 'orders', limit: 10 }, multiConnectionContext);
expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10);
expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10);
expect(result.markdown).toContain('connectionName=analytics');
expect(result.markdown).toContain('connectionName=warehouse');
expect(result.markdown).toContain(
'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})',
);
expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual([
'analytics',
'warehouse',
]);
});
```
- [ ] **Step 2: Run the failing discovery test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts -t "connectionName on raw schema hits"
```
Expected: FAIL because `RawSchemaHit` has no `connectionName` property and the
markdown only renders the display string.
- [ ] **Step 3: Add `connectionName` to raw schema hits**
Modify the raw hit type and hit construction in
`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`:
```ts
export type RawSchemaHit =
| {
kind: 'table';
connectionName: string;
ref: KtxTableRef;
display: string;
matchedOn: 'name' | 'db' | 'comment' | 'description';
}
| {
kind: 'column';
connectionName: string;
ref: KtxTableRef & { column: string };
display: string;
matchedOn: 'name' | 'comment' | 'description';
};
```
In the table hit block, add `connectionName`:
```ts
hits.push({
kind: 'table',
connectionName,
ref: { catalog: table.catalog, db: table.db, name: table.name },
display: formatDisplay(catalog.driver, table),
matchedOn: tableMatch,
});
```
In the column hit block, add `connectionName`:
```ts
hits.push({
kind: 'column',
connectionName,
ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name },
display: `${formatDisplay(catalog.driver, table)}.${column.name}`,
matchedOn: columnMatch,
});
```
- [ ] **Step 4: Render follow-up-ready raw hits**
Modify the raw schema markdown in
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`:
```ts
parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values');
parts.push(
rawHits
.slice(0, limit)
.map(
(hit) =>
`- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) - ` +
`follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``,
)
.join('\n'),
);
```
- [ ] **Step 5: Run the discovery test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
```
Expected: PASS.
- [ ] **Step 6: Commit**
Run:
```bash
git add \
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
git commit -m "fix(context): include raw discovery connection names"
```
### Task 2: Expose LookML and MetricFlow warehouse targets
**Files:**
- Modify: `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`
- Modify: `packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`
- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`
- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`
- Modify: `packages/context/src/ingest/local-adapters.ts`
- Modify: `packages/context/src/ingest/local-adapters.test.ts`
- [ ] **Step 1: Write failing adapter target tests**
Add this test to
`packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`:
```ts
it('returns configured target warehouse connection ids', async () => {
const adapter = new LookmlSourceAdapter({
homeDir: join(tmpRoot, 'home'),
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
});
await expect(adapter.listTargetConnectionIds?.(join(tmpRoot, 'staged'))).resolves.toEqual([
'analytics',
'warehouse',
]);
});
```
Add this test to
`packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`:
```ts
it('returns configured target warehouse connection ids', async () => {
const metricflow = new MetricflowSourceAdapter({
homeDir: join(tmpRoot, 'cache-home'),
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
});
await expect(metricflow.listTargetConnectionIds?.(stagedDir)).resolves.toEqual([
'analytics',
'warehouse',
]);
});
```
- [ ] **Step 2: Run the failing adapter tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/lookml/lookml.adapter.test.ts -t "target warehouse connection ids" \
src/ingest/adapters/metricflow/metricflow.adapter.test.ts -t "target warehouse connection ids"
```
Expected: FAIL because neither adapter accepts `targetConnectionIds` or
implements `listTargetConnectionIds()`.
- [ ] **Step 3: Implement target ID support in LookML**
Modify `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`:
```ts
export interface LookmlSourceAdapterDeps {
homeDir: string;
targetConnectionIds?: string[];
}
function uniqueSorted(values: readonly string[] | undefined): string[] {
return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right));
}
```
Add this method to `LookmlSourceAdapter`:
```ts
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
return uniqueSorted(this.deps.targetConnectionIds);
}
```
- [ ] **Step 4: Implement target ID support in MetricFlow**
Modify `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`:
```ts
export interface MetricflowSourceAdapterDeps {
homeDir: string;
targetConnectionIds?: string[];
}
function uniqueSorted(values: readonly string[] | undefined): string[] {
return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right));
}
```
Add this method to `MetricflowSourceAdapter`:
```ts
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
return uniqueSorted(this.deps.targetConnectionIds);
}
```
- [ ] **Step 5: Pass primary warehouses from the local adapter factory**
Modify the LookML and MetricFlow adapter construction in
`packages/context/src/ingest/local-adapters.ts`:
```ts
new LookmlSourceAdapter({
homeDir: join(project.projectDir, '.ktx/cache'),
targetConnectionIds: primaryWarehouseConnectionIds(project),
}),
```
```ts
new MetricflowSourceAdapter({
homeDir: join(project.projectDir, '.ktx/cache'),
targetConnectionIds: primaryWarehouseConnectionIds(project),
}),
```
- [ ] **Step 6: Write the local adapter fan-out test**
Add this test to `packages/context/src/ingest/local-adapters.test.ts`:
```ts
it('passes primary warehouse connection ids to local LookML and MetricFlow adapters', async () => {
const adapters = createDefaultLocalIngestAdapters(
projectWithConnections({
warehouse: {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
},
lookml_docs: {
driver: 'lookml',
lookml: {
repoUrl: 'https://github.com/acme/lookml.git',
},
},
metrics_repo: {
driver: 'metricflow',
metricflow: {
repoUrl: 'https://github.com/acme/metrics.git',
},
},
} as never),
);
const lookml = adapters.find((adapter) => adapter.source === 'lookml');
const metricflow = adapters.find((adapter) => adapter.source === 'metricflow');
await expect(lookml?.listTargetConnectionIds?.('/tmp/staged-lookml')).resolves.toEqual([
'warehouse',
]);
await expect(metricflow?.listTargetConnectionIds?.('/tmp/staged-metricflow')).resolves.toEqual([
'warehouse',
]);
});
```
- [ ] **Step 7: Run the target fan-out tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/adapters/lookml/lookml.adapter.test.ts \
src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
src/ingest/local-adapters.test.ts
```
Expected: PASS.
- [ ] **Step 8: Commit**
Run:
```bash
git add \
packages/context/src/ingest/adapters/lookml/lookml.adapter.ts \
packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts \
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
packages/context/src/ingest/local-adapters.ts \
packages/context/src/ingest/local-adapters.test.ts
git commit -m "fix(context): expose warehouse targets for LookML and MetricFlow"
```
### Task 3: Pass full connection config to local ingest SQL execution
**Files:**
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
- Modify: `packages/context/src/ingest/local-ingest.ts`
- [ ] **Step 1: Write the failing local connection catalog test**
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, change the
Vitest import to include `vi`:
```ts
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
```
Extend `RuntimeWithConnectionDeps`:
```ts
type RuntimeWithConnectionDeps = {
deps: {
connections: {
listEnabledConnections(ids: string[]): Promise<Array<{ id: string; name: string; connectionType: string }>>;
getConnectionById(connectionId: string): Promise<{ id: string; name: string; connectionType: string } | null>;
executeQuery(connectionId: string, sql: string): Promise<unknown>;
};
};
};
```
Add this test:
```ts
it('passes project connection config to local ingest query executors', async () => {
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['answer'],
rows: [[1]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const runtime = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
agentRunner,
queryExecutor,
});
const connections = (runtime.runner as unknown as RuntimeWithConnectionDeps).deps.connections;
await expect(connections.executeQuery('warehouse', 'select 1')).resolves.toMatchObject({
headers: ['answer'],
});
expect(queryExecutor.execute).toHaveBeenCalledWith({
connectionId: 'warehouse',
projectDir: project.projectDir,
connection: project.config.connections.warehouse,
sql: 'select 1',
});
});
```
- [ ] **Step 2: Run the failing local runtime test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "project connection config"
```
Expected: FAIL because `LocalConnectionCatalog.executeQuery()` only passes
`connectionId` and `sql`.
- [ ] **Step 3: Update local ingest query executor types**
In `packages/context/src/ingest/local-bundle-runtime.ts`, import the shared
query executor type:
```ts
import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js';
```
Change `CreateLocalBundleIngestRuntimeOptions.queryExecutor` to:
```ts
queryExecutor?: KtxSqlQueryExecutorPort;
```
Change `LocalConnectionCatalog` to store that type:
```ts
class LocalConnectionCatalog implements SlConnectionCatalogPort {
constructor(
private readonly project: KtxLocalProject,
private readonly queryExecutor?: KtxSqlQueryExecutorPort,
) {}
```
Change `executeQuery()`:
```ts
async executeQuery(connectionId: string, sql: string): Promise<KtxQueryResult> {
if (!this.queryExecutor) {
throw new Error('Local ingest has no query executor configured');
}
return this.queryExecutor.execute({
connectionId,
projectDir: this.project.projectDir,
connection: this.project.config.connections[connectionId],
sql,
});
}
```
In `packages/context/src/ingest/local-ingest.ts`, replace the local query
executor object type with the shared port:
```ts
import type { KtxSqlQueryExecutorPort } from '../connections/index.js';
```
```ts
queryExecutor?: KtxSqlQueryExecutorPort;
```
- [ ] **Step 4: Run the local runtime test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "project connection config"
```
Expected: PASS.
- [ ] **Step 5: Commit**
Run:
```bash
git add \
packages/context/src/ingest/local-bundle-runtime.ts \
packages/context/src/ingest/local-bundle-runtime.test.ts \
packages/context/src/ingest/local-ingest.ts
git commit -m "fix(context): pass connection config to ingest query executors"
```
### Task 4: Supply a scan-connector query executor to CLI ingest
**Files:**
- Create: `packages/cli/src/ingest-query-executor.ts`
- Create: `packages/cli/src/ingest-query-executor.test.ts`
- Modify: `packages/cli/src/ingest.ts`
- [ ] **Step 1: Write the CLI query executor tests**
Create `packages/cli/src/ingest-query-executor.test.ts`:
```ts
import type { KtxLocalProject } from '@ktx/context/project';
import { createKtxConnectorCapabilities, type KtxScanConnector } from '@ktx/context/scan';
import { describe, expect, it, vi } from 'vitest';
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
function project(): KtxLocalProject {
return {
projectDir: '/tmp/ktx-query-project',
config: {
project: 'warehouse',
connections: {
warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
},
},
} as unknown as KtxLocalProject;
}
function connector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
return {
id: 'warehouse',
driver: 'postgres',
capabilities: createKtxConnectorCapabilities({ readOnlySql: true }),
async introspect() {
throw new Error('introspect is not used by this test');
},
executeReadOnly: vi.fn(async () => ({
headers: ['answer'],
rows: [[1]],
totalRows: 1,
rowCount: 1,
})),
cleanup: vi.fn(async () => {}),
...overrides,
};
}
describe('createKtxCliIngestQueryExecutor', () => {
it('executes read-only SQL through the scan connector and cleans it up', async () => {
const scanConnector = connector();
const createConnector = vi.fn(async () => scanConnector);
const executor = createKtxCliIngestQueryExecutor(project(), { createConnector });
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
projectDir: '/tmp/ktx-query-project',
sql: 'select 1',
maxRows: 5,
}),
).resolves.toMatchObject({
headers: ['answer'],
rows: [[1]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
});
expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse');
expect(scanConnector.executeReadOnly).toHaveBeenCalledWith(
{ connectionId: 'warehouse', sql: 'select 1', maxRows: 5 },
{ runId: 'ingest-sql-execution' },
);
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
});
it('rejects connectors without read-only SQL support', async () => {
const scanConnector = connector({
capabilities: createKtxConnectorCapabilities({ readOnlySql: false }),
executeReadOnly: undefined,
});
const executor = createKtxCliIngestQueryExecutor(project(), {
createConnector: vi.fn(async () => scanConnector),
});
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres' },
projectDir: '/tmp/ktx-query-project',
sql: 'select 1',
}),
).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.');
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
});
});
```
- [ ] **Step 2: Run the failing CLI query executor test**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts
```
Expected: FAIL because `ingest-query-executor.ts` does not exist.
- [ ] **Step 3: Add the scan-connector-backed query executor**
Create `packages/cli/src/ingest-query-executor.ts`:
```ts
import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from '@ktx/context/connections';
import type { KtxLocalProject } from '@ktx/context/project';
import type { KtxScanConnector, KtxScanContext } from '@ktx/context/scan';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
type CreateConnector = typeof createKtxCliScanConnector;
export interface KtxCliIngestQueryExecutorDeps {
createConnector?: CreateConnector;
}
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
await connector?.cleanup?.();
}
export function createKtxCliIngestQueryExecutor(
project: KtxLocalProject,
deps: KtxCliIngestQueryExecutorDeps = {},
): KtxSqlQueryExecutorPort {
const createConnector = deps.createConnector ?? createKtxCliScanConnector;
return {
async execute(input: KtxSqlQueryExecutionInput) {
let connector: KtxScanConnector | null = null;
try {
connector = await createConnector(project, input.connectionId);
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
throw new Error(
`Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`,
);
}
const ctx: KtxScanContext = { runId: 'ingest-sql-execution' };
const result = await connector.executeReadOnly(
{ connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows },
ctx,
);
return {
headers: result.headers,
rows: result.rows,
totalRows: result.totalRows,
command: 'SELECT',
rowCount: result.rowCount,
};
} finally {
await cleanupConnector(connector);
}
},
};
}
```
- [ ] **Step 4: Wire the CLI executor into local ingest runs**
In `packages/cli/src/ingest.ts`, import the executor and type:
```ts
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
import type { KtxLocalProject } from '@ktx/context/project';
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
```
Extend `KtxIngestDeps`:
```ts
createQueryExecutor?: (project: KtxLocalProject) => KtxSqlQueryExecutorPort;
```
Inside the `args.command === 'run'` branch, after `localIngestOptions` is
defined, add:
```ts
const queryExecutor =
localIngestOptions.queryExecutor ??
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project);
```
Pass `queryExecutor` to both local ingest execution paths. In the Metabase
fan-out call:
```ts
...localIngestOptions,
queryExecutor,
trigger: 'manual_resync',
```
In the normal local ingest call:
```ts
...localIngestOptions,
queryExecutor,
pullConfigOptions: adapterOptions,
```
- [ ] **Step 5: Add CLI wiring coverage**
Add this test to `packages/cli/src/ingest.test.ts`:
```ts
it('supplies a scan-connector query executor to local ingest runs', async () => {
const io = makeIo();
const projectDir = join(tempDir, 'query-executor-project');
await writeWarehouseConfig(projectDir);
const queryExecutor = {
execute: vi.fn(async () => ({
headers: [],
rows: [],
totalRows: 0,
command: 'SELECT',
rowCount: 0,
})),
};
const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> =>
completedLocalBundleRun(input, 'query-executor-run'),
);
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
outputMode: 'json',
},
io.io,
{
runLocalIngest,
createAdapters: () => [],
createQueryExecutor: () => queryExecutor,
},
),
).resolves.toBe(0);
expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ queryExecutor }));
});
```
- [ ] **Step 6: Run CLI query executor tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "query executor"
```
Expected: PASS.
- [ ] **Step 7: Commit**
Run:
```bash
git add \
packages/cli/src/ingest-query-executor.ts \
packages/cli/src/ingest-query-executor.test.ts \
packages/cli/src/ingest.ts \
packages/cli/src/ingest.test.ts
git commit -m "fix(cli): enable read-only SQL probes for local ingest"
```
### Task 5: Final verification
**Files:**
- Verify: all files changed by Tasks 1-4.
- [ ] **Step 1: Run focused context tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
src/ingest/tools/warehouse-verification/entity-details.tool.test.ts \
src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts \
src/ingest/local-bundle-runtime.test.ts \
src/ingest/local-adapters.test.ts \
src/ingest/adapters/lookml/lookml.adapter.test.ts \
src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
src/ingest/ingest-bundle.runner.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run focused CLI tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts
```
Expected: PASS.
- [ ] **Step 3: Run type checks**
Run:
```bash
pnpm --filter @ktx/context run type-check
pnpm --filter @ktx/cli run type-check
```
Expected: both commands pass.
- [ ] **Step 4: Run pre-commit on changed files if configured**
Run:
```bash
uv run pre-commit run --files \
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
packages/context/src/ingest/adapters/lookml/lookml.adapter.ts \
packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts \
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
packages/context/src/ingest/local-adapters.ts \
packages/context/src/ingest/local-adapters.test.ts \
packages/context/src/ingest/local-bundle-runtime.ts \
packages/context/src/ingest/local-bundle-runtime.test.ts \
packages/context/src/ingest/local-ingest.ts \
packages/cli/src/ingest-query-executor.ts \
packages/cli/src/ingest-query-executor.test.ts \
packages/cli/src/ingest.ts \
packages/cli/src/ingest.test.ts \
docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md
```
Expected: PASS. If the repository has no pre-commit config or the local `uv`
version cannot satisfy the configured toolchain, record the exact error and use
the focused test and type-check results as the closest verification.
- [ ] **Step 5: Commit final verification fixes if any were needed**
If verification required edits, run:
```bash
git add <changed-files>
git commit -m "test: cover warehouse verification v1 closure"
```
If verification required no edits, do not create an empty commit.
## Self-review
Spec coverage:
- Raw warehouse discovery still covers wiki, semantic-layer, and raw schema
results, and now raw hits include the connection name needed by the required
`entity_details` follow-up.
- Every local synthesis adapter with an external source connection now has a
path to target warehouse IDs: dbt and Notion already had it, Looker resolves
staged mappings, Metabase fan-out runs under target warehouse IDs, and this
plan adds LookML and MetricFlow.
- `sql_execution` remains scoped by `allowedConnectionNames`, retains the
read-only SQL wrapper, and gains a normal local ingest execution backend.
Placeholder scan:
- This plan contains no deferred implementation placeholders.
- Every code-changing step includes the exact test or implementation snippet to
add.
Type consistency:
- `connectionName` is added to `RawSchemaHit` and used by `DiscoverDataTool`.
- `targetConnectionIds` and `listTargetConnectionIds()` match the existing dbt
and Notion adapter pattern.
- Local ingest uses `KtxSqlQueryExecutorPort` consistently from CLI to context.

View file

@ -1,580 +0,0 @@
# CLI Command-Tree Script Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add a build-time script that prints the full `ktx` CLI command tree (name, aliases, description per node) as an indented text tree, for docs and discovery - without adding a runtime `ktx` subcommand.
**Architecture:** Commander.js exposes every registered command as a `Command` instance with `.commands`, `.name()`, `.aliases()`, `.description()` - we walk that tree. The current `runCommanderKtxCli` in `packages/cli/src/cli-program.ts` builds the program inline; we extract that assembly into a pure `buildKtxProgram(...)` helper that any caller can use to materialize the configured root `Command` without parsing argv. A new pure module `command-tree.ts` walks the `Command` into plain data and renders it as indented text. A new TypeScript entrypoint `print-command-tree.ts` compiles alongside `bin.ts` into `dist/print-command-tree.js`, instantiates the program with stub IO/deps, and writes the rendered tree to stdout. A pnpm script under `@ktx/cli` exposes it as `pnpm --filter @ktx/cli run docs:commands`.
**Tech Stack:** TypeScript (NodeNext ESM), Node 22, Commander 14 via `@commander-js/extra-typings`, vitest 4.
---
## File Map
- **Modify:** `packages/cli/src/cli-program.ts` - extract `buildKtxProgram` from `runCommanderKtxCli`.
- **Create:** `packages/cli/src/cli-program.test.ts` - vitest tests for the new helper.
- **Create:** `packages/cli/src/command-tree.ts` - pure `walkCommandTree` + `formatCommandTree`.
- **Create:** `packages/cli/src/command-tree.test.ts` - vitest tests against ad-hoc Command trees.
- **Create:** `packages/cli/src/print-command-tree.ts` - script entrypoint; thin glue.
- **Create:** `packages/cli/src/print-command-tree.test.ts` - vitest test that calls the script's exported `main()` with a fake stdout and asserts the rendered tree includes known top-level commands.
- **Modify:** `packages/cli/package.json` - add `docs:commands` script and include the new entry in tsc build output (no change needed if `tsconfig` already globs `src/**/*.ts`, but verify).
- **Modify:** `packages/cli/README.md` (if it exists; otherwise skip) - document `pnpm run docs:commands`.
Files that change together (cli-program + its test, command-tree + its test, print-command-tree + its test) live next to each other under `packages/cli/src/`, matching the existing convention (e.g. `bin.ts`, `cli-runtime.ts`, `runtime.ts` + `runtime.test.ts`).
---
## Task 1: Extract `buildKtxProgram` from `runCommanderKtxCli`
Refactor only - no behavior change. The current code in `cli-program.ts` interleaves program construction with `parseAsync` dispatch. Splitting them lets the new script reuse construction without invoking the CLI.
**Files:**
- Modify: `packages/cli/src/cli-program.ts:197-275` (function `runCommanderKtxCli`)
- Create: `packages/cli/src/cli-program.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/cli/src/cli-program.test.ts`:
```typescript
import { describe, expect, it } from 'vitest';
import type { Command } from '@commander-js/extra-typings';
import { buildKtxProgram } from './cli-program.js';
import type { KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
function stubIo(): KtxCliIo {
return {
stdout: { isTTY: false, columns: 80, write: () => {} },
stderr: { write: () => {} },
};
}
function stubPackageInfo(): KtxCliPackageInfo {
return { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' };
}
describe('buildKtxProgram', () => {
it('returns a Command named "ktx" with all registered top-level subcommands', () => {
const program: Command = buildKtxProgram({
io: stubIo(),
deps: {},
packageInfo: stubPackageInfo(),
runInit: async () => 0,
});
expect(program.name()).toBe('ktx');
const topLevel = program.commands.map((c) => c.name()).sort();
// Sanity check: at least these registrar surfaces must be present.
for (const expected of ['setup', 'serve', 'sl', 'dev']) {
expect(topLevel).toContain(expected);
}
});
it('does not parse argv or invoke action handlers', async () => {
// Build should be a pure call; no rejections, no side-effects to stdout.
let wrote = '';
const io: KtxCliIo = {
stdout: { isTTY: false, columns: 80, write: (chunk) => { wrote += chunk; } },
stderr: { write: (chunk) => { wrote += chunk; } },
};
buildKtxProgram({ io, deps: {}, packageInfo: stubPackageInfo(), runInit: async () => 0 });
expect(wrote).toBe('');
});
});
```
- [ ] **Step 2: Run test to verify it fails**
Run: `pnpm --filter @ktx/cli exec vitest run src/cli-program.test.ts`
Expected: FAIL - `buildKtxProgram is not exported from './cli-program.js'` (or similar TS/ESM error).
- [ ] **Step 3: Extract `buildKtxProgram` from `runCommanderKtxCli`**
Edit `packages/cli/src/cli-program.ts`. Add a new exported function above `runCommanderKtxCli`:
```typescript
export interface BuildKtxProgramOptions {
io: KtxCliIo;
deps: KtxCliDeps;
packageInfo: KtxCliPackageInfo;
runInit: (args: { projectDir: string; projectName?: string; force: boolean }, io: KtxCliIo) => Promise<number>;
setExitCode?: (code: number) => void;
}
export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
const program = createBaseProgram(options.packageInfo, options.io);
const context: KtxCliCommandContext = {
io: options.io,
deps: options.deps,
packageInfo: options.packageInfo,
setExitCode: options.setExitCode ?? (() => {}),
runInit: options.runInit,
writeDebug: (command, commandContext) => {
writeDebug(options.io, commandContext, command);
},
};
registerSetupCommands(program, context);
registerConnectionCommands(program, context);
registerPublicIngestCommands(program, context);
registerWikiCommands(program, context);
registerSlCommands(program, context);
registerRuntimeCommands(program, context);
registerServeCommands(program, context);
registerStatusCommands(program, context);
registerAgentCommands(program, context);
registerDevCommands(program, context);
return program;
}
```
Then rewrite the body of `runCommanderKtxCli` (lines 197-275) to delegate program assembly. Replace the block from `const program = createBaseProgram(info, io);` (line 206) through `registerDevCommands(program, context);` (line 248) with:
```typescript
profileMark('commander:entry');
let exitCode = 0;
const program = buildKtxProgram({
io,
deps,
packageInfo: info,
runInit: options.runInit,
setExitCode: (code: number) => {
exitCode = code;
},
});
profileMark('commander:program-built');
const context: KtxCliCommandContext = {
io,
deps,
packageInfo: info,
setExitCode: (code: number) => {
exitCode = code;
},
runInit: options.runInit,
writeDebug: (command: string, commandContext: CommandWithGlobalOptions) => {
writeDebug(io, commandContext, command);
},
};
```
Keep the `context` re-declaration only if subsequent code (the `if (argv.length === 0)` branch that calls `runBareInteractiveCommand(program, io, context)`) still needs it. It does - `runBareInteractiveCommand` consumes `context`. Keep `context` exactly as it was after the deletion; do not change `runBareInteractiveCommand`'s signature or behavior. Drop the now-removed individual `register*` calls and their `profileMark` lines from `runCommanderKtxCli`.
- [ ] **Step 4: Run the new test to verify it passes**
Run: `pnpm --filter @ktx/cli exec vitest run src/cli-program.test.ts`
Expected: PASS - both `it` blocks green.
- [ ] **Step 5: Run the full CLI test suite to confirm no regression**
Run: `pnpm --filter @ktx/cli run test 2>&1 | tee /tmp/ktx-cli-test-output.log`
Expected: PASS overall. Inspect the log if any previously-passing test now fails - most likely a missing register call (compare to lines 221-249 of the pre-change file).
- [ ] **Step 6: Type-check**
Run: `pnpm --filter @ktx/cli run type-check`
Expected: no errors.
- [ ] **Step 7: Commit**
```bash
git add packages/cli/src/cli-program.ts packages/cli/src/cli-program.test.ts
git commit -m "refactor(cli): extract buildKtxProgram for reuse outside runCommanderKtxCli"
```
---
## Task 2: Pure tree walker `walkCommandTree`
Take a Commander `Command` and produce plain data: `{ name, description, aliases, children }`. No formatting yet. Pure function - depends only on the public `Command` API.
**Files:**
- Create: `packages/cli/src/command-tree.ts`
- Create: `packages/cli/src/command-tree.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/cli/src/command-tree.test.ts`:
```typescript
import { Command } from '@commander-js/extra-typings';
import { describe, expect, it } from 'vitest';
import { walkCommandTree } from './command-tree.js';
describe('walkCommandTree', () => {
it('captures name, description, aliases, and nested children', () => {
const root = new Command('root').description('the root');
const child = new Command('child').description('a child').alias('c').alias('ch');
const grandchild = new Command('grand').description('a grandchild');
child.addCommand(grandchild);
root.addCommand(child);
const tree = walkCommandTree(root);
expect(tree).toEqual({
name: 'root',
description: 'the root',
aliases: [],
children: [
{
name: 'child',
description: 'a child',
aliases: ['c', 'ch'],
children: [
{ name: 'grand', description: 'a grandchild', aliases: [], children: [] },
],
},
],
});
});
it('returns an empty children array when there are no subcommands', () => {
const leaf = new Command('leaf').description('alone');
expect(walkCommandTree(leaf)).toEqual({
name: 'leaf',
description: 'alone',
aliases: [],
children: [],
});
});
it('uses an empty string when description is unset', () => {
const cmd = new Command('bare');
expect(walkCommandTree(cmd).description).toBe('');
});
});
```
- [ ] **Step 2: Run test to verify it fails**
Run: `pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts`
Expected: FAIL - `walkCommandTree` cannot be resolved.
- [ ] **Step 3: Implement `walkCommandTree`**
Create `packages/cli/src/command-tree.ts`:
```typescript
import type { Command } from '@commander-js/extra-typings';
export interface CommandTreeNode {
name: string;
description: string;
aliases: string[];
children: CommandTreeNode[];
}
export function walkCommandTree(command: Command): CommandTreeNode {
return {
name: command.name(),
description: command.description(),
aliases: command.aliases(),
children: command.commands.map((child) => walkCommandTree(child as Command)),
};
}
```
- [ ] **Step 4: Run test to verify it passes**
Run: `pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts`
Expected: PASS (3 of 3).
- [ ] **Step 5: Type-check**
Run: `pnpm --filter @ktx/cli run type-check`
Expected: no errors.
---
## Task 3: Indented-text renderer `formatCommandTree`
Render a `CommandTreeNode` as plain text. Each node on its own line: `<indent><name>[ (alias1, alias2)][ - description]`. Indent is two spaces per depth level. Children sorted alphabetically by name to keep output stable across changes that reorder registrar calls.
**Files:**
- Modify: `packages/cli/src/command-tree.ts`
- Modify: `packages/cli/src/command-tree.test.ts`
- [ ] **Step 1: Write the failing test**
Append to `packages/cli/src/command-tree.test.ts`:
```typescript
import { formatCommandTree } from './command-tree.js';
describe('formatCommandTree', () => {
it('renders a single node with no children', () => {
const node = { name: 'solo', description: 'just me', aliases: [], children: [] };
expect(formatCommandTree(node)).toBe('solo - just me\n');
});
it('renders aliases in parentheses before the description', () => {
const node = { name: 'cmd', description: 'does things', aliases: ['c', 'co'], children: [] };
expect(formatCommandTree(node)).toBe('cmd (c, co) - does things\n');
});
it('omits the dash when description is empty', () => {
const node = { name: 'bare', description: '', aliases: [], children: [] };
expect(formatCommandTree(node)).toBe('bare\n');
});
it('indents children by two spaces per depth level and sorts siblings alphabetically', () => {
const tree = {
name: 'root',
description: 'top',
aliases: [],
children: [
{ name: 'beta', description: 'b', aliases: [], children: [] },
{ name: 'alpha', description: 'a', aliases: ['al'], children: [
{ name: 'inner', description: 'i', aliases: [], children: [] },
] },
],
};
expect(formatCommandTree(tree)).toBe(
'root - top\n' +
' alpha (al) - a\n' +
' inner - i\n' +
' beta - b\n',
);
});
});
```
- [ ] **Step 2: Run test to verify it fails**
Run: `pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts`
Expected: FAIL - `formatCommandTree` is not exported.
- [ ] **Step 3: Implement `formatCommandTree`**
Append to `packages/cli/src/command-tree.ts`:
```typescript
export function formatCommandTree(node: CommandTreeNode): string {
const lines: string[] = [];
appendNode(node, 0, lines);
return `${lines.join('\n')}\n`;
}
function appendNode(node: CommandTreeNode, depth: number, lines: string[]): void {
const indent = ' '.repeat(depth);
const aliasPart = node.aliases.length > 0 ? ` (${node.aliases.join(', ')})` : '';
const descPart = node.description.length > 0 ? ` - ${node.description}` : '';
lines.push(`${indent}${node.name}${aliasPart}${descPart}`);
const sortedChildren = [...node.children].sort((a, b) => a.name.localeCompare(b.name));
for (const child of sortedChildren) {
appendNode(child, depth + 1, lines);
}
}
```
- [ ] **Step 4: Run test to verify it passes**
Run: `pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts`
Expected: PASS (7 of 7 across walkCommandTree + formatCommandTree).
- [ ] **Step 5: Type-check**
Run: `pnpm --filter @ktx/cli run type-check`
Expected: no errors.
- [ ] **Step 6: Commit**
```bash
git add packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts
git commit -m "feat(cli): add walkCommandTree and formatCommandTree helpers"
```
---
## Task 4: Script entrypoint `print-command-tree.ts`
Thin glue: build the program with stub IO/deps, walk, format, write to a provided stdout. Export a `main(stdout)` function for unit testing; only auto-run when invoked as a script.
**Files:**
- Create: `packages/cli/src/print-command-tree.ts`
- Create: `packages/cli/src/print-command-tree.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/cli/src/print-command-tree.test.ts`:
```typescript
import { describe, expect, it } from 'vitest';
import { renderKtxCommandTree } from './print-command-tree.js';
describe('renderKtxCommandTree', () => {
it('renders an indented tree rooted at "ktx" with known top-level commands', () => {
const output = renderKtxCommandTree();
const lines = output.split('\n');
expect(lines[0]).toMatch(/^ktx( |$|\s-)/);
// Top-level commands are indented exactly two spaces.
const topLevel = lines
.filter((line) => /^ {2}\S/.test(line))
.map((line) => line.trim().split(' ')[0]);
for (const expected of ['setup', 'serve', 'sl', 'dev']) {
expect(topLevel).toContain(expected);
}
});
it('ends with a single trailing newline', () => {
const output = renderKtxCommandTree();
expect(output.endsWith('\n')).toBe(true);
expect(output.endsWith('\n\n')).toBe(false);
});
});
```
- [ ] **Step 2: Run test to verify it fails**
Run: `pnpm --filter @ktx/cli exec vitest run src/print-command-tree.test.ts`
Expected: FAIL - module not found.
- [ ] **Step 3: Implement the script**
Create `packages/cli/src/print-command-tree.ts`:
```typescript
import { fileURLToPath } from 'node:url';
import { buildKtxProgram } from './cli-program.js';
import type { KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
import { formatCommandTree, walkCommandTree } from './command-tree.js';
function silentIo(): KtxCliIo {
return {
stdout: { isTTY: false, columns: 80, write: () => {} },
stderr: { write: () => {} },
};
}
function stubPackageInfo(): KtxCliPackageInfo {
return { name: '@ktx/cli', version: '0.0.0-docs', contextPackageName: '@ktx/context' };
}
export function renderKtxCommandTree(): string {
const program = buildKtxProgram({
io: silentIo(),
deps: {},
packageInfo: stubPackageInfo(),
runInit: async () => 0,
});
return formatCommandTree(walkCommandTree(program));
}
export function main(stdout: { write(chunk: string): void }): void {
stdout.write(renderKtxCommandTree());
}
const invokedAsScript =
typeof process !== 'undefined' &&
Array.isArray(process.argv) &&
process.argv[1] !== undefined &&
fileURLToPath(import.meta.url) === process.argv[1];
if (invokedAsScript) {
main(process.stdout);
}
```
- [ ] **Step 4: Run test to verify it passes**
Run: `pnpm --filter @ktx/cli exec vitest run src/print-command-tree.test.ts`
Expected: PASS - both assertions green.
- [ ] **Step 5: Type-check**
Run: `pnpm --filter @ktx/cli run type-check`
Expected: no errors.
- [ ] **Step 6: Build and run the script end-to-end**
Run:
```bash
pnpm --filter @ktx/cli run build
node packages/cli/dist/print-command-tree.js | head -20
```
Expected: first line begins with `ktx`, followed by indented top-level commands (`setup`, `serve`, `sl`, `dev`, etc.). No errors on stderr.
- [ ] **Step 7: Commit**
```bash
git add packages/cli/src/print-command-tree.ts packages/cli/src/print-command-tree.test.ts
git commit -m "feat(cli): add print-command-tree build-time script"
```
---
## Task 5: Wire pnpm script and document
Expose the script through pnpm so contributors and CI don't need to remember the `node dist/…` path.
**Files:**
- Modify: `packages/cli/package.json` (add `docs:commands` to `scripts`)
- [ ] **Step 1: Inspect existing scripts block**
Run: `node -e "const p=require('./packages/cli/package.json'); console.log(JSON.stringify(p.scripts, null, 2))"`
Note the current keys (`build`, `smoke`, `test`, `test:slow`, `type-check`, `assets:demo`). Add a new entry that depends on `build`.
- [ ] **Step 2: Add the `docs:commands` script**
Edit `packages/cli/package.json`. In the `"scripts"` object, add (after `"build"`):
```json
"docs:commands": "pnpm run build && node dist/print-command-tree.js",
```
Keep alphabetical-ish ordering consistent with the existing block; if other scripts use `&&` chains for build prerequisites, match the style.
- [ ] **Step 3: Verify the script runs**
Run: `pnpm --filter @ktx/cli run docs:commands | head -30`
Expected: builds the CLI, then prints the tree (first line `ktx ...`, two-space-indented children below).
- [ ] **Step 4: Verify nothing else broke**
Run in parallel:
- `pnpm --filter @ktx/cli run type-check`
- `pnpm --filter @ktx/cli run test`
Expected: both PASS.
- [ ] **Step 5: Commit**
```bash
git add packages/cli/package.json
git commit -m "chore(cli): add docs:commands pnpm script"
```
---
## Verification Summary
After all tasks, confirm:
- [ ] `pnpm --filter @ktx/cli run type-check` - clean
- [ ] `pnpm --filter @ktx/cli run test` - green, including new tests in `cli-program.test.ts`, `command-tree.test.ts`, `print-command-tree.test.ts`
- [ ] `pnpm --filter @ktx/cli run docs:commands` - prints `ktx` followed by indented subcommand tree
- [ ] `git status --short` - only the files listed in the File Map are modified or created; no incidental edits
If any check fails, fix in place and re-run before declaring done.

File diff suppressed because it is too large Load diff

View file

@ -1,829 +0,0 @@
# Unified Ingest V1 Docs Site Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove the remaining public documentation surfaces that still present
`ktx scan`, adapter-backed `ktx ingest run`, `ktx ingest watch`,
`live-database`, or `Historic SQL` as normal v1 user workflows.
**Architecture:** Keep the implemented CLI behavior unchanged. Update the
Fumadocs content, example READMEs, and documentation regression tests so public
guidance uses connection-centric `ktx ingest <connectionId>`, `ktx ingest
--all`, `--fast`, `--deep`, `--query-history`, `ktx ingest status`, and
`ktx ingest replay`.
**Tech Stack:** Markdown, MDX frontmatter, Fumadocs page metadata, Node test
runner, pnpm workspace scripts.
---
## Current audit
The four implemented unified-ingest plans cover the CLI and setup v1 surface:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`,
`--query-history`, `--no-query-history`, and
`--query-history-window-days` route through `public-ingest.ts`.
- Database targets run before source targets, public source ingest bypasses
adapter allow-lists, and public database ingest captures internal scan output.
- `ktx scan`, `ktx ingest run`, and `ktx ingest watch` are hidden from normal
help.
- Setup stores `connections.<id>.context.depth`, writes
`connections.<id>.context.queryHistory`, rejects reserved ingest ids, and
uses foreground-only context-build state.
### V1-blocking gaps
- `docs-site/content/docs/cli-reference/ktx-ingest.mdx` still documents
adapter-level `ktx ingest run`, `--adapter`, `ktx ingest watch`, and
`live-database`.
- `docs-site/content/docs/cli-reference/ktx-scan.mdx` still presents
`ktx scan` as a public command, and
`docs-site/content/docs/cli-reference/meta.json` still publishes it in the
CLI reference.
- `docs-site/content/docs/cli-reference/ktx-dev.mdx` still links to root
`ktx scan` as a normal command.
- `docs-site/content/docs/guides/building-context.mdx` still has an adapter
table that lists `historic-sql` and `live-database`, and it still documents
`ktx ingest watch` as the visual progress path.
- `docs-site/content/docs/integrations/context-sources.mdx` still instructs
users to run
`ktx ingest run --connection-id <connectionId> --adapter <adapter>`.
- `docs-site/content/docs/concepts/context-as-code.mdx` still recommends
scheduled
`ktx ingest run --connection-id <id> --adapter <adapter> --no-input`.
- `docs-site/content/docs/getting-started/quickstart.mdx` still says setup
runs structural/enriched scans, exposes Historic SQL flags, and describes
detach/background context-build behavior.
- `docs-site/content/docs/integrations/primary-sources.mdx` still uses the
legacy `historicSql` config shape and Historic SQL wording for supported
query-history drivers.
- `examples/README.md` and `examples/local-warehouse/README.md` still present
`ktx ingest run --adapter fake` as the example command.
### Non-blocking gaps
- Hidden debug commands can continue to call `ktx scan`,
`ktx ingest run`, and `ktx ingest watch`.
- Internal source keys, raw artifact paths, tests, scripts, and developer-only
package taxonomy can continue to use `scan`, `live-database`, and
`historic-sql`.
- Contributor docs can continue to mention scan internals when describing
package ownership or connector implementation details.
- The `examples/local-warehouse/ktx.yaml` fake adapter fixture can remain for
CLI smoke tests if the public example docs stop recommending it as a normal
user workflow.
## File structure
- Modify `scripts/examples-docs.test.mjs`: add regression assertions for
docs-site and example README unified-ingest wording.
- Modify `docs-site/content/docs/cli-reference/ktx-ingest.mdx`: rewrite the
page around the connection-centric public command.
- Delete `docs-site/content/docs/cli-reference/ktx-scan.mdx`: remove the
public scan reference page.
- Modify `docs-site/content/docs/cli-reference/meta.json`: remove
`ktx-scan` from published CLI reference pages.
- Modify `docs-site/content/docs/cli-reference/ktx-dev.mdx`: remove the
root-scan link and clarify that database context is built by `ktx ingest`.
- Modify `docs-site/content/docs/guides/building-context.mdx`: remove
adapter tables and live watch guidance; describe status/replay only.
- Modify `docs-site/content/docs/integrations/context-sources.mdx`: replace
adapter-backed ingest commands with `ktx ingest <connectionId>`.
- Modify `docs-site/content/docs/concepts/context-as-code.mdx`: replace
scheduled adapter-backed ingest guidance with `ktx ingest --all`.
- Modify `docs-site/content/docs/getting-started/quickstart.mdx`: update setup
language for schema context, depth, query history, and foreground-only
progress.
- Modify `docs-site/content/docs/integrations/primary-sources.mdx`: replace
`historicSql` with `context.queryHistory` and query-history wording.
- Modify `examples/README.md`: stop advertising the fake adapter command as a
public example workflow.
- Modify `examples/local-warehouse/README.md`: mark the fake adapter fixture as
contributor-only and point users to public ingest docs.
## Tasks
### Task 1: Add stale public-doc regression tests
**Files:**
- Modify: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Add failing docs-site unified-ingest assertions**
In `scripts/examples-docs.test.mjs`, replace the existing test named
`documents public context build workflows in the docs site` with:
```js
it('documents unified public ingest workflows in the docs site', async () => {
const rootReadme = await readText('README.md');
const cliMeta = await readText('docs-site/content/docs/cli-reference/meta.json');
const ingestReference = await readText('docs-site/content/docs/cli-reference/ktx-ingest.mdx');
const devReference = await readText('docs-site/content/docs/cli-reference/ktx-dev.mdx');
const buildingContext = await readText('docs-site/content/docs/guides/building-context.mdx');
const contextSources = await readText('docs-site/content/docs/integrations/context-sources.mdx');
const contextAsCode = await readText('docs-site/content/docs/concepts/context-as-code.mdx');
const quickstart = await readText('docs-site/content/docs/getting-started/quickstart.mdx');
const primarySources = await readText('docs-site/content/docs/integrations/primary-sources.mdx');
const examplesIndex = await readText('examples/README.md');
const localWarehouseReadme = await readText('examples/local-warehouse/README.md');
assert.match(ingestReference, /ktx ingest <connectionId>/);
assert.match(ingestReference, /ktx ingest --all --deep/);
assert.match(ingestReference, /--query-history-window-days <days>/);
assert.match(buildingContext, /ktx ingest <connection-id>/);
assert.match(buildingContext, /ktx ingest --all/);
assert.match(buildingContext, /ktx ingest replay <run-id>/);
assert.match(contextSources, /ktx ingest <connectionId>/);
assert.match(contextAsCode, /ktx ingest --all --no-input/);
assert.match(quickstart, /schema context/);
assert.match(primarySources, /context:\\n queryHistory:/);
assert.doesNotMatch(cliMeta, /ktx-scan/);
assert.doesNotMatch(ingestReference, /ktx ingest run/);
assert.doesNotMatch(ingestReference, /--adapter/);
assert.doesNotMatch(ingestReference, /ktx ingest watch/);
assert.doesNotMatch(ingestReference, /live-database/);
assert.doesNotMatch(devReference, /ktx scan/);
assert.doesNotMatch(buildingContext, /ktx ingest watch/);
assert.doesNotMatch(buildingContext, /historic-sql/);
assert.doesNotMatch(buildingContext, /live-database/);
assert.doesNotMatch(contextSources, /ktx ingest run --connection-id/);
assert.doesNotMatch(contextSources, /--adapter <adapter>/);
assert.doesNotMatch(contextAsCode, /ktx ingest run --connection-id/);
assert.doesNotMatch(quickstart, /Historic SQL/);
assert.doesNotMatch(quickstart, /--enable-historic-sql/);
assert.doesNotMatch(quickstart, /press <kbd>d<\\/kbd> to detach/);
assert.doesNotMatch(primarySources, /historicSql/);
assert.doesNotMatch(primarySources, /Historic SQL/);
assert.doesNotMatch(examplesIndex, /ktx ingest run --project-dir/);
assert.doesNotMatch(localWarehouseReadme, /ktx ingest run --project-dir/);
assert.match(rootReadme, /raw-sources\//);
assert.doesNotMatch(rootReadme, new RegExp(`${['live', 'database'].join('-')}/`));
assert.doesNotMatch(rootReadme, /ktx scan/);
assert.doesNotMatch(rootReadme, /Run a local ingest smoke test/);
assert.doesNotMatch(rootReadme, /ktx ingest run --project-dir/);
assert.doesNotMatch(rootReadme, /ktx ingest status --project-dir/);
});
```
- [ ] **Step 2: Run the failing docs regression test**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL with assertions matching the stale docs-site and example README
content.
- [ ] **Step 3: Commit the failing test**
```bash
git add scripts/examples-docs.test.mjs
git commit -m "test(docs): cover unified ingest public docs"
```
### Task 2: Rewrite the CLI reference surface
**Files:**
- Modify: `docs-site/content/docs/cli-reference/ktx-ingest.mdx`
- Delete: `docs-site/content/docs/cli-reference/ktx-scan.mdx`
- Modify: `docs-site/content/docs/cli-reference/meta.json`
- Modify: `docs-site/content/docs/cli-reference/ktx-dev.mdx`
- [ ] **Step 1: Rewrite `ktx-ingest.mdx`**
Replace `docs-site/content/docs/cli-reference/ktx-ingest.mdx` with:
````mdx
---
title: "ktx ingest"
description: "Build, inspect, and replay KTX context ingest runs."
---
`ktx ingest` builds or refreshes KTX context from configured connections.
Database connections build schema context. Context-source connections ingest
metadata from tools such as dbt, Looker, Metabase, MetricFlow, LookML, and
Notion.
## Command signature
```bash
ktx ingest [options] [connectionId]
```
Use a connection id to build one configured connection. Use `--all` to build
every configured connection. Database connections run before context-source
connections when you use `--all`.
## Build options
| Flag | Description | Default |
|------|-------------|---------|
| `--all` | Build every configured connection | `false` |
| `--fast` | Use deterministic database schema ingest | Stored connection default, or `fast` |
| `--deep` | Use AI-enriched database ingest | Stored connection default, or `fast` |
| `--query-history` | Include database query-history usage patterns | Stored connection default |
| `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default |
| `--query-history-window-days <days>` | Query-history lookback window for this run | Stored connection default |
| `--plain` | Print plain text output | `true` |
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | `false` |
`--fast` and `--deep` are mutually exclusive. Depth flags apply only to
database connections. Query-history flags apply only to database connections
that support query history.
## Status and replay
| Subcommand | Description |
|------------|-------------|
| `status [runId]` | Print status for the latest or selected stored ingest run or report file |
| `replay <runId>` | Replay a stored ingest run or bundle report through memory-flow output |
Both subcommands accept `--report-file <path>`, `--plain`, `--json`, `--viz`,
and `--no-input`.
## Examples
```bash
ktx ingest warehouse
ktx ingest warehouse --fast
ktx ingest warehouse --deep
ktx ingest warehouse --deep --query-history
ktx ingest warehouse --query-history-window-days 30
ktx ingest notion
ktx ingest --all
ktx ingest --all --deep
ktx ingest status
ktx ingest status run-abc123
ktx ingest status --json
ktx ingest replay run-abc123
ktx ingest replay run-abc123 --viz
ktx ingest replay run-abc123 --report-file /tmp/ingest-report.json
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` |
| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` |
| Query history is unsupported | The selected database driver does not support query history | Run schema ingest without query-history flags |
| Latest run not found | No stored ingest report exists in this project | Run `ktx ingest <connectionId>` first |
| Visual replay fails in a non-interactive shell | Visual report replay needs a terminal | Use `ktx ingest status --json` for agent and CI workflows |
````
- [ ] **Step 2: Remove the public scan page**
Delete `docs-site/content/docs/cli-reference/ktx-scan.mdx`.
- [ ] **Step 3: Remove `ktx-scan` from CLI metadata**
In `docs-site/content/docs/cli-reference/meta.json`, replace the full file
with:
```json
{
"title": "CLI Reference",
"defaultOpen": true,
"pages": [
"ktx-setup",
"ktx-connection",
"ktx-ingest",
"ktx-sl",
"ktx-wiki",
"ktx-status",
"ktx-dev"
]
}
```
- [ ] **Step 4: Update the dev command reference**
In `docs-site/content/docs/cli-reference/ktx-dev.mdx`, replace this paragraph:
```mdx
`ktx dev` contains development-only project initialization and managed runtime commands. Scan and ingest commands live at the root as [`ktx scan`](/docs/cli-reference/ktx-scan) and [`ktx ingest`](/docs/cli-reference/ktx-ingest).
```
with:
```mdx
`ktx dev` contains development-only project initialization and managed runtime commands. Context building lives at the root as [`ktx ingest`](/docs/cli-reference/ktx-ingest).
```
- [ ] **Step 5: Run the docs regression test**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL only on the remaining guide, integration, quickstart, primary
source, and example README stale wording.
- [ ] **Step 6: Commit CLI reference cleanup**
```bash
git add docs-site/content/docs/cli-reference/ktx-ingest.mdx docs-site/content/docs/cli-reference/meta.json docs-site/content/docs/cli-reference/ktx-dev.mdx
git rm docs-site/content/docs/cli-reference/ktx-scan.mdx
git commit -m "docs: align ingest CLI reference with unified UX"
```
### Task 3: Update context-build guides
**Files:**
- Modify: `docs-site/content/docs/guides/building-context.mdx`
- Modify: `docs-site/content/docs/integrations/context-sources.mdx`
- Modify: `docs-site/content/docs/concepts/context-as-code.mdx`
- [ ] **Step 1: Update stored report guidance in `building-context.mdx`**
In `docs-site/content/docs/guides/building-context.mdx`, replace the
`### Watching progress` section through the paragraph after it with:
````mdx
### Inspecting stored reports
```bash
# Check status of the latest ingest
ktx ingest status
# Check a specific run
ktx ingest status <run-id>
# Replay a past ingest run
ktx ingest replay <run-id>
```
`ktx ingest replay` opens the stored memory-flow output for a completed run.
Foreground context builds do not detach into background control sessions; if a
run is interrupted, rerun `ktx ingest <connection-id>` or `ktx ingest --all`.
````
- [ ] **Step 2: Replace the adapter table in `building-context.mdx`**
In the same file, replace the `### Available adapters` heading, table, and
following sentence with:
```mdx
### Supported context sources
| Driver | Source | What gets ingested |
|--------|--------|--------------------|
| `dbt` | dbt project | Model definitions, column descriptions, tests, tags |
| `metricflow` | MetricFlow semantic models | Metrics, dimensions, entities, semantic joins |
| `lookml` | LookML files | Views, explores, dimensions, measures, joins |
| `looker` | Looker API | Explores, looks, dashboard metadata |
| `metabase` | Metabase API | Questions, dashboards, table metadata |
| `notion` | Notion API | Database pages, knowledge articles |
Query history is a database connection facet. Enable it with
`connections.<id>.context.queryHistory` or pass `--query-history` for a current
run. See [Context Sources](/docs/integrations/context-sources) for
driver-specific setup and auth configuration.
```
- [ ] **Step 3: Update context-source workflow commands**
In `docs-site/content/docs/integrations/context-sources.mdx`, replace the
numbered workflow with:
```mdx
Agents must configure and ingest context sources in this order:
1. Add the context source connection in `ktx.yaml` or with `ktx setup`.
2. Store tokens as `env:NAME` or `file:/path/to/secret`.
3. Run `ktx ingest <connectionId>` for one source or `ktx ingest --all` for
every configured source.
4. Check progress with `ktx ingest status --json`.
5. Review generated `semantic-layer/` YAML and `wiki/` Markdown files in git.
6. Validate changed semantic sources with `ktx sl validate`.
```
- [ ] **Step 4: Update scheduled ingest wording**
In `docs-site/content/docs/concepts/context-as-code.mdx`, replace this
paragraph:
```mdx
Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest run --connection-id <id> --adapter <adapter> --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning.
```
with:
```mdx
Teams usually run this on demand while setting up a source, then schedule it
once the source is stable. A cron job or CI schedule can run `ktx ingest --all
--no-input` overnight on an ingest branch so the latest schema context, dbt
manifests, BI metadata, and documentation updates are ready for review each
morning.
```
- [ ] **Step 5: Run the docs regression test**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL only on quickstart, primary source, and example README stale
wording.
- [ ] **Step 6: Commit guide cleanup**
```bash
git add docs-site/content/docs/guides/building-context.mdx docs-site/content/docs/integrations/context-sources.mdx docs-site/content/docs/concepts/context-as-code.mdx
git commit -m "docs: update context build guides for unified ingest"
```
### Task 4: Update setup and primary-source docs
**Files:**
- Modify: `docs-site/content/docs/getting-started/quickstart.mdx`
- Modify: `docs-site/content/docs/integrations/primary-sources.mdx`
- [ ] **Step 1: Update database setup copy in quickstart**
In `docs-site/content/docs/getting-started/quickstart.mdx`, replace the first
paragraph under `## Step 3: Connect a database` with:
```mdx
Select one or more databases for KTX to connect to. The wizard supports
SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake.
```
Replace this sentence:
```mdx
After connecting, KTX automatically runs a connection test and a structural scan:
```
with:
```mdx
After connecting, KTX automatically runs a connection test and builds fast
schema context:
```
Replace the example output block in Step 3 with:
````mdx
```
Testing postgres-warehouse
Connection test passed
Driver: PostgreSQL - Tables: 42
Building schema context for postgres-warehouse
Running fast database ingest
Schema context complete for postgres-warehouse
Changes: 42 new tables
Primary source ready
postgres-warehouse - PostgreSQL - schema context complete
```
````
Replace this paragraph:
```mdx
For Snowflake and BigQuery, the wizard offers **Historic SQL** configuration for query history views. For PostgreSQL, enable Historic SQL with `--enable-historic-sql` when `pg_stat_statements` is configured.
```
with:
```mdx
For PostgreSQL, Snowflake, and BigQuery, the wizard can enable query-history
ingest when the warehouse history feature is available. Query history is stored
under `connections.<id>.context.queryHistory` in `ktx.yaml`.
```
- [ ] **Step 2: Update context-build copy in quickstart**
In the same file, replace the first two paragraphs under
`## Step 5: Build context` with:
```mdx
This is where KTX builds agent-ready context. It uses the database context
depth saved by setup and ingests metadata from any configured context sources.
Fast database context builds deterministic schema grounding. Deep database
context also generates AI descriptions, embeddings, and relationship evidence
when those capabilities are configured.
```
Replace the paragraph and background example that starts with `For a small
database` and ends with the fenced context-build block with:
````mdx
For a small database (under 50 tables), this can take a few minutes. Larger
warehouses can take longer. Context builds run in the foreground; press
<kbd>Ctrl+C</kbd> to stop the current run and rerun `ktx setup` or `ktx ingest`
when you are ready to try again.
````
Replace this output line in the completion example:
```text
postgres-warehouse: enriched scan complete
```
with:
```text
postgres-warehouse: deep context complete
```
Replace the next-steps bullet:
```mdx
- **Build more context** - learn about [scanning](/docs/guides/building-context), relationship detection, and ingestion workflows in the Building Context guide.
```
with:
```mdx
- **Build more context** - learn about [database ingest](/docs/guides/building-context), relationship detection, and source ingestion workflows in the Building Context guide.
```
- [ ] **Step 3: Update primary-source query-history config**
In `docs-site/content/docs/integrations/primary-sources.mdx`, replace the
introductory paragraph and shared conventions with:
```mdx
KTX connects to your data warehouse or database to build schema context,
discover relationships, and execute semantic layer queries. Each connection is
defined in `ktx.yaml` under the `connections` key.
All connectors share these conventions:
- Sensitive values support `env:VAR_NAME` (read from environment) and
`file:/path/to/secret` (read from file) references
- Connections are read-only; KTX never writes to your database
- Database ingest discovers tables, columns, types, and constraints
automatically
```
In the connection field reference table, replace the `historicSql` row with:
```mdx
| `context.queryHistory` | No | PostgreSQL, Snowflake, BigQuery | Enables query-history ingestion when the warehouse supports it |
```
Replace every feature row label `Historic SQL` with `Query history`.
Replace each `### Historic SQL` heading with `### Query history`.
Replace the PostgreSQL query-history config block with:
```yaml
context:
queryHistory:
enabled: true
minExecutions: 5
filters:
dropTrivialProbes: true
```
Replace the Snowflake query-history config block with:
```yaml
context:
queryHistory:
enabled: true
windowDays: 90
minExecutions: 5
filters:
dropTrivialProbes: true
serviceAccounts:
patterns: ['^svc_']
mode: exclude
redactionPatterns: []
```
Replace the BigQuery query-history config block with:
```yaml
context:
queryHistory:
enabled: true
windowDays: 90
minExecutions: 5
filters:
dropTrivialProbes: true
serviceAccounts:
patterns: ['@bot\\.']
mode: exclude
redactionPatterns: []
```
Replace the common-errors row:
```mdx
| Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup |
```
with:
```mdx
| Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest <connectionId> --query-history` or `ktx setup` |
```
Replace the common-errors row:
```mdx
| Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
```
with:
```mdx
| Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
```
Replace the common-errors row:
```mdx
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output |
```
with:
```mdx
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context |
```
- [ ] **Step 4: Run targeted stale-term search**
Run:
```bash
rg -n "Historic SQL|historicSql|--enable-historic-sql|--historic-sql|ktx scan|ktx ingest watch|ktx ingest run --connection-id|--adapter <adapter>|live-database" docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/integrations/primary-sources.mdx docs-site/content/docs/cli-reference docs-site/content/docs/guides/building-context.mdx docs-site/content/docs/integrations/context-sources.mdx docs-site/content/docs/concepts/context-as-code.mdx
```
Expected: no output.
- [ ] **Step 5: Run the docs regression test**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: FAIL only on example README stale adapter-command wording.
- [ ] **Step 6: Commit setup and primary-source docs cleanup**
```bash
git add docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/integrations/primary-sources.mdx
git commit -m "docs: update setup and primary source ingest wording"
```
### Task 5: Remove public fake-adapter example commands
**Files:**
- Modify: `examples/README.md`
- Modify: `examples/local-warehouse/README.md`
- [ ] **Step 1: Rewrite the local-warehouse section in `examples/README.md`**
In `examples/README.md`, replace the `## local-warehouse` section with:
````md
## local-warehouse
`local-warehouse/` is a contributor fixture for local CLI smoke tests. It uses
the internal fake ingest adapter so tests can exercise memory-flow behavior
without a live database or external service.
For normal context building, use the public connection-centric commands:
```bash
ktx ingest <connectionId>
ktx ingest --all
```
The copied project initializes its own Git repository on first use.
````
- [ ] **Step 2: Rewrite `examples/local-warehouse/README.md`**
Replace `examples/local-warehouse/README.md` with:
````md
# local-warehouse fixture
This directory is a contributor fixture for KTX CLI smoke tests. It uses the
internal fake ingest adapter so tests can run without a live database or
external service.
Normal users should build context with connection-centric ingest:
```bash
ktx ingest <connectionId>
ktx ingest --all
```
The public ingest workflow is documented in
`docs-site/content/docs/cli-reference/ktx-ingest.mdx` and
`docs-site/content/docs/guides/building-context.mdx`.
````
- [ ] **Step 3: Run the docs regression test**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 4: Commit example docs cleanup**
```bash
git add examples/README.md examples/local-warehouse/README.md
git commit -m "docs: stop advertising adapter-backed example ingest"
```
### Task 6: Final verification
**Files:**
- Verify: `scripts/examples-docs.test.mjs`
- Verify: `docs-site/content/docs/**/*.mdx`
- Verify: `examples/**/*.md`
- [ ] **Step 1: Run docs regression tests**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 2: Run docs-site build**
Run:
```bash
pnpm --filter ktx-docs run build
```
Expected: PASS. If the build fails because this workspace lacks external build
prerequisites, capture the error and run `pnpm --filter ktx-docs run test` as
the closest available docs-site check.
- [ ] **Step 3: Run final stale public-surface search**
Run:
```bash
rg -n "ktx scan|ktx ingest run --connection-id|--adapter <adapter>|ktx ingest watch|live-database|Historic SQL|historicSql|--enable-historic-sql|--historic-sql" docs-site/content/docs examples/README.md examples/local-warehouse/README.md
```
Expected: no output.
- [ ] **Step 4: Inspect git status**
Run:
```bash
git status --short
```
Expected: only the files intentionally changed by this plan appear.
- [ ] **Step 5: Commit verification updates if needed**
If verification required small documentation or test fixes, commit them:
```bash
git add scripts/examples-docs.test.mjs docs-site/content/docs examples/README.md examples/local-warehouse/README.md
git commit -m "docs: close unified ingest public docs gaps"
```
## Self-review
- Spec coverage: This plan covers the remaining public documentation surfaces
that still contradicted the unified ingest UX spec. It intentionally does not
rename internal scan packages, internal adapter keys, raw artifact paths, or
developer-only test fixtures.
- Placeholder scan: No task contains open-ended placeholders. Each edit names
exact files and exact replacement text or commands.
- Type consistency: This is a documentation-only plan. Command names and config
keys match the implemented CLI and config code: `ktx ingest <connectionId>`,
`ktx ingest --all`, `ktx ingest status`, `ktx ingest replay`, and
`connections.<id>.context.queryHistory`.

View file

@ -1,494 +0,0 @@
# Unified Ingest V1 Final Public Surface Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1-blocking public-surface gaps in unified
`ktx ingest`.
**Architecture:** Keep the current connection-centric ingest planner and hidden
legacy debug commands. Fix the public query-history execution path so it passes
the full canonical `connections.<id>.context.queryHistory` pull config to the
historic-SQL adapter, and filter hidden Commander commands from the
documentation command-tree script so docs/discovery output matches normal CLI
help.
**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages,
pnpm workspace scripts.
---
## Current audit
The implemented unified-ingest plan chain covers most of the original
`docs/superpowers/specs/2026-05-13-unified-ingest-ux-design.md` spec:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`,
`--query-history`, `--no-query-history`, and
`--query-history-window-days` route through `public-ingest.ts`.
- Database targets run before source targets. Public source ingest uses
`allowImplicitAdapter: true`, so `ingest.adapters` is no longer required for
inferred public adapters.
- Public database ingest maps `fast` to structural scan internals and `deep` to
enriched scan internals, honors `scan.relationships.enabled`, and isolates
deep-readiness failures per target under `--all`.
- Normal `ktx --help` hides `scan`; normal `ktx ingest --help` hides `run` and
`watch`; setup help exposes query-history flags instead of Historic SQL flags.
- Setup stores `connections.<id>.context.depth` and
`connections.<id>.context.queryHistory`, migrates legacy `historicSql`, and
uses foreground-only context-build state.
- Public docs-site CLI pages no longer document `ktx scan`,
`ktx ingest run --adapter`, or live `ktx ingest watch` as normal workflows.
### V1-blocking gaps
- Public query-history ingest drops configured pull fields. The lower-level
adapter path maps canonical `context.queryHistory` to the existing
`historicSqlUnifiedPullConfigSchema`, but `executePublicIngestTarget()` always
passes `historicSqlPullConfigOverride` with only `dialect` and sometimes
`windowDays`. Normal `ktx ingest warehouse --query-history` can therefore
ignore configured `minExecutions`, `filters`, `redactionPatterns`,
`concurrency`, and `staleArchiveAfterDays`.
- The documentation command-tree script still prints hidden commands. Running
`pnpm --filter @ktx/cli run docs:commands` currently prints top-level
`scan <connectionId>` and `ktx ingest run` / `ktx ingest watch`, even though
the spec requires `ktx scan` and live `ingest watch` not to be presented as
normal public command surfaces.
### Non-blocking gaps
- Hidden debug commands remain callable: `ktx scan`, `ktx ingest run`, and
`ktx ingest watch`. The spec allows hidden/debug placement for old
implementation surfaces in v1.
- Internal adapter keys, package names, WorkUnit keys, raw artifact paths, and
JSON/debug output can continue to use `scan`, `live-database`, and
`historic-sql`.
- Developer-only scripts and tests can keep scan/live-database terminology when
they exercise internal connector or artifact behavior.
- Public docs still use "scan" as a generic noun in a few conceptual database
sections. They do not document `ktx scan` as the public command, so this is
wording cleanup, not v1-blocking behavior.
## File structure
- Modify `packages/cli/src/public-ingest.ts`: preserve the full canonical
query-history pull config in public ingest plans and pass that config to the
lower-level historic-SQL adapter run.
- Modify `packages/cli/src/public-ingest.test.ts`: add regression coverage for
configured query-history fields and current-run `windowDays` overrides.
- Modify `packages/cli/src/command-tree.ts`: filter Commander commands marked
hidden via Commander private `_hidden`, matching Commander help behavior.
- Modify `packages/cli/src/command-tree.test.ts`: cover hidden top-level and
nested command filtering in the pure walker.
- Modify `packages/cli/src/print-command-tree.test.ts`: lock the rendered KTX
docs command tree against hidden unified-ingest commands.
## Tasks
### Task 1: Preserve canonical query-history pull config in public ingest
**Files:**
- Modify: `packages/cli/src/public-ingest.ts`
- Test: `packages/cli/src/public-ingest.test.ts`
- [ ] **Step 1: Write the failing public-ingest query-history config test**
In `packages/cli/src/public-ingest.test.ts`, add this test inside the
`runKtxPublicIngest` describe block, near the existing query-history execution
tests:
```ts
it('preserves configured query-history pull fields while overriding the current-run window', async () => {
const io = makeIo();
const project = deepReadyProject({
warehouse: {
driver: 'postgres',
context: {
queryHistory: {
enabled: true,
windowDays: 90,
minExecutions: 7,
concurrency: 3,
staleArchiveAfterDays: 120,
filters: {
dropTrivialProbes: true,
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
orchestrators: { mode: 'mark-only' },
dropFailedBelow: { errorRate: 0.5, executions: 3 },
},
redactionPatterns: ['(?i)secret'],
},
},
},
});
const runScan = vi.fn(async () => 0);
const runIngest = vi.fn(async () => 0);
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'disabled',
queryHistory: 'enabled',
queryHistoryWindowDays: 30,
},
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(0);
const ingestArgs = runIngest.mock.calls[0]?.[0];
expect(ingestArgs).toMatchObject({
command: 'run',
connectionId: 'warehouse',
adapter: 'historic-sql',
allowImplicitAdapter: true,
historicSqlPullConfigOverride: {
dialect: 'postgres',
windowDays: 30,
minExecutions: 7,
concurrency: 3,
staleArchiveAfterDays: 120,
filters: {
dropTrivialProbes: true,
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
orchestrators: { mode: 'mark-only' },
dropFailedBelow: { errorRate: 0.5, executions: 3 },
},
redactionPatterns: ['(?i)secret'],
},
});
expect(ingestArgs?.historicSqlPullConfigOverride).not.toHaveProperty('enabled');
});
```
- [ ] **Step 2: Run the failing public-ingest test**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testTimeout 30000
```
Expected: FAIL. The new assertion sees `historicSqlPullConfigOverride` with
`dialect: 'postgres'` and `windowDays: 30`, but without `minExecutions`,
`filters`, `redactionPatterns`, `concurrency`, or
`staleArchiveAfterDays`.
- [ ] **Step 3: Add the full query-history pull config to public plans**
In `packages/cli/src/public-ingest.ts`, update the `queryHistory` field on
`KtxPublicIngestPlanTarget` to include a pull config for enabled query-history
runs:
```ts
queryHistory?: {
enabled: boolean;
dialect?: HistoricSqlDialect;
windowDays?: number;
pullConfig?: Record<string, unknown>;
unsupported?: boolean;
skippedStoredByFast?: boolean;
};
```
Still in `packages/cli/src/public-ingest.ts`, add this helper below
`positiveInteger()`:
```ts
function queryHistoryPullConfig(input: {
stored: Record<string, unknown>;
dialect: HistoricSqlDialect;
windowDays?: number;
}): Record<string, unknown> {
const { enabled: _enabled, dialect: _dialect, ...storedConfig } = input.stored;
return {
...storedConfig,
dialect: input.dialect,
...(input.windowDays !== undefined ? { windowDays: input.windowDays } : {}),
};
}
```
Then replace the enabled-query-history return inside
`resolveDatabaseTargetOptions()` with this version:
```ts
if (requestedQh && dialect) {
if (depth === 'fast') {
input.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`);
}
depth = 'deep';
return {
databaseDepth: depth,
queryHistory: {
...queryHistory,
enabled: true,
dialect,
pullConfig: queryHistoryPullConfig({
stored: storedQh,
dialect,
windowDays: queryHistory.windowDays,
}),
},
steps: ['database-schema', 'query-history'],
};
}
```
- [ ] **Step 4: Pass the preserved pull config into the historic-SQL adapter**
In `packages/cli/src/public-ingest.ts`, replace the
`historicSqlPullConfigOverride` construction in `executePublicIngestTarget()`
with:
```ts
historicSqlPullConfigOverride:
target.queryHistory.pullConfig ?? {
dialect: target.queryHistory.dialect,
...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}),
},
```
The surrounding `ingestArgs` object must still include:
```ts
adapter: 'historic-sql',
outputMode: sourceIngestOutputMode(args, io),
inputMode: args.inputMode,
allowImplicitAdapter: true,
```
- [ ] **Step 5: Run the public-ingest tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testTimeout 30000
```
Expected: PASS. The new regression test proves public ingest preserves stored
query-history fields while `--query-history-window-days 30` overrides only
`windowDays` for the current run.
- [ ] **Step 6: Commit**
Run:
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts
git commit -m "fix(cli): preserve query-history pull config in public ingest"
```
### Task 2: Hide debug commands from the docs command tree
**Files:**
- Modify: `packages/cli/src/command-tree.ts`
- Test: `packages/cli/src/command-tree.test.ts`
- Test: `packages/cli/src/print-command-tree.test.ts`
- [ ] **Step 1: Write the failing hidden-command walker test**
In `packages/cli/src/command-tree.test.ts`, add this test inside the
`walkCommandTree` describe block:
```ts
it('omits Commander hidden commands from the public tree', () => {
const root = new Command('ktx');
root.command('scan', { hidden: true }).description('Run a standalone connection scan');
const ingest = root.command('ingest').description('Build or inspect KTX context');
ingest.command('run', { hidden: true }).description('Run local ingest by adapter');
ingest.command('watch', { hidden: true }).description('Open a stored visual report');
ingest.command('status').description('Print status');
root.command('status').description('Check readiness');
const tree = walkCommandTree(root);
expect(tree.children.map((child) => child.name)).toEqual(['ingest', 'status']);
expect(tree.children[0]).toMatchObject({
name: 'ingest',
children: [{ name: 'status', description: 'Print status', aliases: [], arguments: [], children: [] }],
});
});
```
- [ ] **Step 2: Write the failing rendered KTX tree assertions**
In `packages/cli/src/print-command-tree.test.ts`, add these assertions to the
first `renders an indented tree rooted at "ktx" with known top-level commands`
test after the existing `not.toContain()` assertions:
```ts
expect(output).not.toContain('scan <connectionId>');
expect(output).not.toContain('│ ├── run');
expect(output).not.toContain('│ ├── watch');
expect(output).not.toContain('│ └── watch');
```
- [ ] **Step 3: Run the failing command-tree tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts src/print-command-tree.test.ts
```
Expected: FAIL. The walker includes hidden commands because it currently maps
over `command.commands` without filtering Commander `_hidden` entries.
- [ ] **Step 4: Filter hidden Commander commands in the walker**
In `packages/cli/src/command-tree.ts`, add this helper above
`walkCommandTree()`:
```ts
function isHiddenCommand(command: CommandUnknownOpts): boolean {
return (command as CommandUnknownOpts & { _hidden?: boolean })._hidden === true;
}
```
Then replace the `children` field inside `walkCommandTree()` with:
```ts
children: command.commands.filter((child) => !isHiddenCommand(child)).map((child) => walkCommandTree(child)),
```
The complete function should read:
```ts
export function walkCommandTree(command: CommandUnknownOpts): CommandTreeNode {
return {
name: command.name(),
description: command.description(),
aliases: command.aliases(),
arguments: command.registeredArguments.map(formatArgumentDeclaration),
children: command.commands.filter((child) => !isHiddenCommand(child)).map((child) => walkCommandTree(child)),
};
}
```
- [ ] **Step 5: Run the command-tree tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts src/print-command-tree.test.ts
```
Expected: PASS. The pure walker omits hidden commands and the rendered KTX tree
no longer contains `scan <connectionId>`, `ingest run`, or `ingest watch`.
- [ ] **Step 6: Verify the docs command output directly**
Run:
```bash
pnpm --filter @ktx/cli run docs:commands > /tmp/ktx-command-tree.txt
rg -n "scan <connectionId>|^[[:space:][:graph:]]*run[[:space:]]+Run local ingest|^[[:space:][:graph:]]*watch \\[runId\\]" /tmp/ktx-command-tree.txt
```
Expected: the first command succeeds and writes the command tree. The `rg`
command exits with status `1` and prints no matches.
- [ ] **Step 7: Commit**
Run:
```bash
git add packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts packages/cli/src/print-command-tree.test.ts
git commit -m "fix(cli): omit hidden commands from docs command tree"
```
### Task 3: Final verification
**Files:**
- Verify: `packages/cli/src/public-ingest.ts`
- Verify: `packages/cli/src/command-tree.ts`
- Verify: `packages/cli/src/public-ingest.test.ts`
- Verify: `packages/cli/src/command-tree.test.ts`
- Verify: `packages/cli/src/print-command-tree.test.ts`
- [ ] **Step 1: Run focused CLI regression tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/local-adapters.test.ts src/index.test.ts src/command-tree.test.ts src/print-command-tree.test.ts --testTimeout 30000
```
Expected: PASS. This covers public ingest execution, adapter config mapping,
normal help routing, and docs command-tree rendering.
- [ ] **Step 2: Run CLI type-check**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS with no TypeScript errors.
- [ ] **Step 3: Run docs command-tree output check**
Run:
```bash
pnpm --filter @ktx/cli run docs:commands > /tmp/ktx-command-tree.txt
rg -n "scan <connectionId>|^[[:space:][:graph:]]*run[[:space:]]+Run local ingest|^[[:space:][:graph:]]*watch \\[runId\\]" /tmp/ktx-command-tree.txt
```
Expected: the `docs:commands` command succeeds. The `rg` command exits `1`
with no matches.
- [ ] **Step 4: Run TypeScript dead-code checks**
Run:
```bash
pnpm run dead-code
```
Expected: PASS. If Knip reports unrelated existing findings, inspect them and
record the exact findings in the implementation notes before deciding whether
they are related to this plan.
- [ ] **Step 5: Inspect the final diff**
Run:
```bash
git status --short
git diff -- packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts packages/cli/src/print-command-tree.test.ts
```
Expected: only the intended files are modified. The diff contains no generated
`dist/` output and no unrelated documentation changes.
- [ ] **Step 6: Commit verification-only fixes if needed**
If verification required expectation or type-only fixes, run:
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts packages/cli/src/print-command-tree.test.ts
git commit -m "test(cli): close unified ingest final public surface checks"
```
If no files changed during verification, do not create an empty commit.
## Self-review
- Spec coverage: This plan covers the remaining v1-blocking public query-history
config mapping and public command discovery output. It intentionally leaves
hidden debug command callability and internal scan/live-database/historic-sql
names as non-blocking because the original spec allows internal/debug names
in v1.
- Placeholder scan: No task uses deferred placeholders or unnamed edge-handling
steps. Each code step names the exact file, insertion point, and code shape.
- Type consistency: New `pullConfig` data stays under
`KtxPublicIngestPlanTarget.queryHistory` and flows unchanged into the
existing `KtxIngestArgs.historicSqlPullConfigOverride` field. Command-tree
filtering uses Commander `_hidden`, the same field Commander help uses.

View file

@ -1,802 +0,0 @@
# Unified Ingest V1 Final UX Labels Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1-blocking public UX gaps in unified ingest warning aggregation and setup/status terminology.
**Architecture:** Keep the implemented connection-centric ingest planner, hidden debug commands, and internal scan/live-database/historic-sql boundaries. Add one warning accumulator lane for unsupported database query-history targets, then update normal setup/status/docs copy so public database groups are called `Databases` rather than `Primary sources`.
**Tech Stack:** TypeScript ESM, Commander, Vitest, Node test runner, KTX CLI/context packages.
---
## Current Audit
Implemented unified-ingest plans already cover the original spec's main v1 behavior:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, `--query-history`, `--no-query-history`, and `--query-history-window-days` route through `packages/cli/src/public-ingest.ts`.
- Database targets are ordered before source targets, public source ingest bypasses `ingest.adapters`, and database depth maps to structural/enriched scan internals.
- Deep readiness is evaluated before target work starts, and `--all` isolates per-target failures.
- Setup stores `connections.<id>.context.depth` and `connections.<id>.context.queryHistory`, migrates legacy `historicSql`, and uses foreground-only context-build state.
- Normal help hides `ktx scan`, `ktx ingest run`, and live `ktx ingest watch`; docs no longer present those as normal public workflows.
- Foreground progress uses `Databases` and `Context sources`, and normal progress/failure output sanitizes scan/live-database/historic-sql internals.
### V1-Blocking Gaps
- `ktx ingest --all --query-history` does not aggregate unsupported database query-history warnings. Source depth/query-history warnings are aggregated, but unsupported database drivers currently add one warning per target from `resolveDatabaseTargetOptions()`, contrary to the original spec's `--all` warning aggregation rule for non-applicable query-history flags.
- Normal setup/status surfaces still use the old `Primary sources` public label for databases:
- `packages/cli/src/setup.ts` prints `Primary sources configured`.
- `packages/cli/src/setup-context.ts` prints a `Primary sources:` success group.
- `packages/cli/src/setup-ready-menu.ts` labels the database section `Primary sources`.
- `packages/cli/src/setup-databases.ts` uses `primary source` in normal interactive prompts, skip/failure messages, and success headings.
- `README.md`, `docs-site/content/docs/getting-started/quickstart.mdx`, and `docs-site/content/docs/cli-reference/ktx-setup.mdx` still mirror the old label.
### Non-Blocking Gaps
- Hidden debug commands can remain callable: `ktx scan`, `ktx ingest run`, and `ktx ingest watch`.
- Internal adapter keys, raw artifact paths, WorkUnit keys, package names, tests, and developer-only scripts can continue to use `scan`, `live-database`, and `historic-sql`.
- Public conceptual docs may still use `scan` as a generic noun where they are describing internal database metadata artifacts rather than documenting `ktx scan` as the public command.
- Internal readiness config names such as `scan.enrichment.mode` can remain because they are current `ktx.yaml` field names.
## File Structure
- Modify `packages/cli/src/public-ingest.ts`: aggregate unsupported database query-history warnings for `--all`.
- Modify `packages/cli/src/public-ingest.test.ts`: add regression tests for explicit and stored unsupported query-history aggregation.
- Modify `packages/cli/src/setup-ready-menu.ts`: change the ready-project database menu label to `Databases`.
- Modify `packages/cli/src/setup-ready-menu.test.ts`: update the ready-menu expected label.
- Modify `packages/cli/src/setup.ts`: change setup status output from `Primary sources configured` to `Databases configured`.
- Modify `packages/cli/src/setup.test.ts`: update status and empty-selection expectations.
- Modify `packages/cli/src/setup-context.ts`: change setup context success grouping from `Primary sources` to `Databases`.
- Modify `packages/cli/src/setup-context.test.ts`: assert the success output uses `Databases`.
- Modify `packages/cli/src/setup-databases.ts`: change normal database setup copy from `primary source(s)` / `knowledge sources` to `database(s)` / `context sources`.
- Modify `packages/cli/src/setup-databases.test.ts`: update expected prompt/output strings.
- Modify `README.md`: update the setup status example label.
- Modify `docs-site/content/docs/getting-started/quickstart.mdx`: update setup success/status examples.
- Modify `docs-site/content/docs/cli-reference/ktx-setup.mdx`: update setup status example.
- Modify `scripts/examples-docs.test.mjs`: add docs regression assertions for the old `Primary sources` label.
## Tasks
### Task 1: Aggregate Unsupported Query-History Warnings
**Files:**
- Modify: `packages/cli/src/public-ingest.ts`
- Test: `packages/cli/src/public-ingest.test.ts`
- [ ] **Step 1: Add failing unsupported warning aggregation tests**
In `packages/cli/src/public-ingest.test.ts`, add these tests after the existing test named `warns and skips query history for unsupported database drivers`:
```ts
it('aggregates unsupported query-history warnings for all database targets', () => {
const plan = buildPublicIngestPlan(
deepReadyProject({
local: { driver: 'sqlite' },
mysql_warehouse: { driver: 'mysql' },
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
}),
{
projectDir: '/tmp/project',
all: true,
depth: 'deep',
queryHistory: 'enabled',
},
);
expect(plan.targets).toEqual([
expect.objectContaining({
connectionId: 'local',
queryHistory: { enabled: false, unsupported: true },
steps: ['database-schema'],
}),
expect.objectContaining({
connectionId: 'mysql_warehouse',
queryHistory: { enabled: false, unsupported: true },
steps: ['database-schema'],
}),
expect.objectContaining({
connectionId: 'warehouse',
queryHistory: expect.objectContaining({ enabled: true, dialect: 'postgres' }),
steps: ['database-schema', 'query-history'],
}),
]);
expect(plan.warnings).toEqual([
'--query-history is not supported for 2 database connections (mysql, sqlite); running schema ingest for those connections.',
]);
});
it('aggregates stored unsupported query-history config warnings for all database targets', () => {
const plan = buildPublicIngestPlan(
projectWithConnections({
local: { driver: 'sqlite', context: { queryHistory: { enabled: true } } },
mysql_warehouse: { driver: 'mysql', context: { queryHistory: { enabled: true } } },
}),
{
projectDir: '/tmp/project',
all: true,
queryHistory: 'default',
},
);
expect(plan.targets).toEqual([
expect.objectContaining({
connectionId: 'local',
queryHistory: { enabled: false, unsupported: true },
steps: ['database-schema'],
}),
expect.objectContaining({
connectionId: 'mysql_warehouse',
queryHistory: { enabled: false, unsupported: true },
steps: ['database-schema'],
}),
]);
expect(plan.warnings).toEqual([
'2 database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.',
]);
});
```
- [ ] **Step 2: Run the failing public ingest tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "unsupported query-history"
```
Expected: FAIL because the new `--all` cases currently receive one warning per unsupported database target.
- [ ] **Step 3: Add unsupported query-history warning accumulator state**
In `packages/cli/src/public-ingest.ts`, replace the current warning accumulator interface and factory with:
```ts
interface KtxUnsupportedQueryHistoryWarning {
connectionId: string;
driver: string;
reason: 'explicit' | 'stored';
}
interface KtxPublicIngestWarningAccumulator {
warnings: string[];
ignoredDepthForSources: string[];
ignoredQueryHistoryForSources: string[];
unsupportedQueryHistoryForDatabases: KtxUnsupportedQueryHistoryWarning[];
}
function createWarningAccumulator(): KtxPublicIngestWarningAccumulator {
return {
warnings: [],
ignoredDepthForSources: [],
ignoredQueryHistoryForSources: [],
unsupportedQueryHistoryForDatabases: [],
};
}
```
- [ ] **Step 4: Add unsupported database warning formatting**
In `packages/cli/src/public-ingest.ts`, add these helpers after `sourceIgnoredWarning()`:
```ts
function unsupportedDriverList(entries: KtxUnsupportedQueryHistoryWarning[]): string {
return [...new Set(entries.map((entry) => entry.driver))].sort((left, right) => left.localeCompare(right)).join(', ');
}
function unsupportedQueryHistoryWarnings(
entries: KtxUnsupportedQueryHistoryWarning[],
all: boolean,
): string[] {
if (entries.length === 0) {
return [];
}
const warnings: string[] = [];
const explicitEntries = entries.filter((entry) => entry.reason === 'explicit');
const storedEntries = entries.filter((entry) => entry.reason === 'stored');
if (explicitEntries.length === 1 || (!all && explicitEntries.length > 0)) {
warnings.push(
...explicitEntries.map(
(entry) =>
`--query-history is not supported for ${entry.driver}; running schema ingest for ${entry.connectionId}.`,
),
);
} else if (explicitEntries.length > 1) {
warnings.push(
`--query-history is not supported for ${explicitEntries.length} database connections (${unsupportedDriverList(
explicitEntries,
)}); running schema ingest for those connections.`,
);
}
if (storedEntries.length === 1 || (!all && storedEntries.length > 0)) {
warnings.push(
...storedEntries.map(
(entry) =>
`${entry.connectionId} has query history enabled in ktx.yaml, but ${entry.driver} does not support it; running schema ingest.`,
),
);
} else if (storedEntries.length > 1) {
warnings.push(
`${storedEntries.length} database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.`,
);
}
return warnings;
}
```
- [ ] **Step 5: Use the accumulator in `finalizeWarnings()`**
In `packages/cli/src/public-ingest.ts`, replace the start of `finalizeWarnings()` with:
```ts
const warnings = [
...accumulator.warnings,
...unsupportedQueryHistoryWarnings(accumulator.unsupportedQueryHistoryForDatabases, args.all),
];
```
Keep the existing source depth/query-history aggregation logic below that new `warnings` initialization.
- [ ] **Step 6: Record unsupported database targets instead of pushing immediate warnings**
In `packages/cli/src/public-ingest.ts`, change the `resolveDatabaseTargetOptions()` input type so `warnings` is the full accumulator:
```ts
warnings: KtxPublicIngestWarningAccumulator;
```
Inside the unsupported query-history branch, replace the current `input.warnings.push(...)` block with:
```ts
input.warnings.unsupportedQueryHistoryForDatabases.push({
connectionId: input.connectionId,
driver: input.driver,
reason: explicitQueryHistory === 'enabled' || input.args.queryHistoryWindowDays !== undefined ? 'explicit' : 'stored',
});
```
In the supported query-history branch, replace:
```ts
input.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`);
```
with:
```ts
input.warnings.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`);
```
In the stored query-history skipped-by-fast branch, replace:
```ts
input.warnings.push(
`${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`,
);
```
with:
```ts
input.warnings.warnings.push(
`${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`,
);
```
In `targetForConnection()`, replace the database resolver call with:
```ts
const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings });
```
- [ ] **Step 7: Verify unsupported warning aggregation passes**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "unsupported query-history"
```
Expected: PASS. The single-target warning tests keep the old exact messages, while `--all` unsupported database targets receive one aggregate warning per reason.
- [ ] **Step 8: Commit unsupported warning aggregation**
Run:
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts
git commit -m "fix(cli): aggregate unsupported query-history warnings"
```
### Task 2: Rename Public Setup Database Labels
**Files:**
- Modify: `packages/cli/src/setup-ready-menu.ts`
- Modify: `packages/cli/src/setup.ts`
- Modify: `packages/cli/src/setup-context.ts`
- Modify: `packages/cli/src/setup-databases.ts`
- Test: `packages/cli/src/setup-ready-menu.test.ts`
- Test: `packages/cli/src/setup.test.ts`
- Test: `packages/cli/src/setup-context.test.ts`
- Test: `packages/cli/src/setup-databases.test.ts`
- Modify: `README.md`
- Modify: `docs-site/content/docs/getting-started/quickstart.mdx`
- Modify: `docs-site/content/docs/cli-reference/ktx-setup.mdx`
- Test: `scripts/examples-docs.test.mjs`
- [ ] **Step 1: Write failing CLI copy expectations**
In `packages/cli/src/setup-ready-menu.test.ts`, change the expected database option to:
```ts
{ value: 'databases', label: 'Databases' },
```
In `packages/cli/src/setup-context.test.ts`, add these assertions after each `expect(io.stdout()).toContain('KTX context is ready for agents.');` assertion in the successful build and existing-context tests:
```ts
expect(io.stdout()).toContain('Databases:');
expect(io.stdout()).not.toContain('Primary sources:');
```
In `packages/cli/src/setup.test.ts`, change the empty database selection expectation to:
```ts
expect(testIo.stdout()).toContain(
'KTX cannot work without at least one database. Select a database or press Escape to go back.',
);
expect(testIo.stderr()).not.toContain('No databases selected.');
```
In `packages/cli/src/setup.test.ts`, in the existing-project status test, add:
```ts
expect(rendered).toContain('Databases configured: no');
expect(rendered).not.toContain('Primary sources configured');
```
- [ ] **Step 2: Write failing setup database prompt expectations**
In `packages/cli/src/setup-databases.test.ts`, update the old public copy expectations to the new database labels:
```ts
expect(prompts.multiselect).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which databases should KTX connect to?'),
}),
);
```
For configured database menu expectations, use:
```ts
expect(prompts.select).toHaveBeenCalledWith({
message: 'Databases already configured: warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to context sources' },
{ value: 'add', label: 'Add another database' },
],
});
```
For the `postgres-warehouse` configured menu expectations, use:
```ts
expect(prompts.select).toHaveBeenCalledWith({
message: 'Databases already configured: postgres-warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to context sources' },
{ value: 'add', label: 'Add another database' },
],
});
```
For empty-selection output expectations, use:
```ts
expect(io.stdout()).not.toContain('KTX cannot work without at least one database');
```
For successful initial scan/setup output, use:
```ts
expect(io.stdout()).toContain('◇ Database ready');
expect(io.stdout()).not.toContain('Primary source ready');
```
Rename test descriptions that contain `primary source` or `primary sources` so they use `database` or `databases`. For example:
```ts
it('shows every supported database in the interactive checklist', async () => {
```
```ts
it('shows a configured database menu instead of the type checklist when a database exists', async () => {
```
```ts
it('lets users add another database after completing the first one', async () => {
```
- [ ] **Step 3: Run failing setup label tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts -t "ready menu|readiness checklist|context is ready|database|primary source|configured"
```
Expected: FAIL because production copy still uses `Primary sources` and `primary source`.
- [ ] **Step 4: Update the ready menu and status labels**
In `packages/cli/src/setup-ready-menu.ts`, change:
```ts
{ value: 'databases', label: 'Primary sources' },
```
to:
```ts
{ value: 'databases', label: 'Databases' },
```
In `packages/cli/src/setup.ts`, change:
```ts
`Primary sources configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`,
```
to:
```ts
`Databases configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`,
```
In `packages/cli/src/setup-context.ts`, change:
```ts
io.stdout.write('Primary sources:\n');
```
to:
```ts
io.stdout.write('Databases:\n');
```
- [ ] **Step 5: Update setup database prompt and output copy**
In `packages/cli/src/setup-databases.ts`, change:
```ts
const backDestination = canReturnToDriverSelection ? 'primary source selection' : 'the previous setup step';
```
to:
```ts
const backDestination = canReturnToDriverSelection ? 'database selection' : 'the previous setup step';
```
Replace the entire `configuredPrimarySourcesPrompt()` return value with:
```ts
return {
message: `Databases already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`,
options: [
{ value: 'continue', label: 'Continue to context sources' },
{ value: 'add', label: 'Add another database' },
],
};
```
Change the successful database setup heading from:
```ts
writeSetupSection(input.io, 'Primary source ready', [
```
to:
```ts
writeSetupSection(input.io, 'Database ready', [
```
Change the non-interactive no-database error from:
```ts
'KTX cannot work without a primary source. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n',
```
to:
```ts
'KTX cannot work without a database. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n',
```
Change the driver multiselect message from:
```ts
message: withMultiselectNavigation('Which primary sources should KTX connect to?'),
```
to:
```ts
message: withMultiselectNavigation('Which databases should KTX connect to?'),
```
Change the empty-selection warning from:
```ts
io.stdout.write('│ KTX cannot work without at least one primary source. Select a source or press Escape to go back.\n');
```
to:
```ts
io.stdout.write('│ KTX cannot work without at least one database. Select a database or press Escape to go back.\n');
```
Change the skip output from:
```ts
io.stdout.write('│ Primary source setup skipped. KTX cannot work until you add a primary source.\n');
```
to:
```ts
io.stdout.write('│ Database setup skipped. KTX cannot work until you add a database.\n');
```
Change the no-completed-database output from:
```ts
io.stdout.write('│ KTX cannot work without a primary source.\n');
```
to:
```ts
io.stdout.write('│ KTX cannot work without a database.\n');
```
Change the retry prompt message and skip label from:
```ts
message: `Primary source setup failed for ${connectionChoice.connectionId}`,
```
```ts
{ value: 'skip', label: 'Skip this primary source' },
```
to:
```ts
message: `Database setup failed for ${connectionChoice.connectionId}`,
```
```ts
{ value: 'skip', label: 'Skip this database' },
```
Change the final failure line from:
```ts
io.stderr.write('No primary source connections completed setup.\n');
```
to:
```ts
io.stderr.write('No database connections completed setup.\n');
```
- [ ] **Step 6: Update public docs examples**
In `README.md`, replace:
```text
Primary sources configured: yes (postgres-warehouse)
```
with:
```text
Databases configured: yes (postgres-warehouse)
```
In `docs-site/content/docs/getting-started/quickstart.mdx`, replace the database-ready heading line:
```text
Primary source ready
postgres-warehouse - PostgreSQL - schema context complete
```
with:
```text
Database ready
postgres-warehouse - PostgreSQL - schema context complete
```
In `docs-site/content/docs/getting-started/quickstart.mdx`, replace the setup success group:
```text
Primary sources:
postgres-warehouse: deep context complete
```
with:
```text
Databases:
postgres-warehouse: deep context complete
```
In `docs-site/content/docs/getting-started/quickstart.mdx`, replace:
```text
Primary sources configured: yes (postgres-warehouse)
```
with:
```text
Databases configured: yes (postgres-warehouse)
```
In `docs-site/content/docs/cli-reference/ktx-setup.mdx`, replace:
```text
Primary sources configured: yes (postgres-warehouse)
```
with:
```text
Databases configured: yes (postgres-warehouse)
```
- [ ] **Step 7: Add public docs regression assertions**
In `scripts/examples-docs.test.mjs`, inside the test named `documents unified public ingest workflows in the docs site`, add:
```js
const setupReference = await readText('docs-site/content/docs/cli-reference/ktx-setup.mdx');
```
Then add these assertions near the existing `quickstart` and `rootReadme` assertions:
```js
assert.match(rootReadme, /Databases configured: yes \(postgres-warehouse\)/);
assert.match(quickstart, /Databases:\n postgres-warehouse: deep context complete/);
assert.match(quickstart, /Databases configured: yes \(postgres-warehouse\)/);
assert.match(setupReference, /Databases configured: yes \(postgres-warehouse\)/);
assert.doesNotMatch(rootReadme, /Primary sources configured/);
assert.doesNotMatch(quickstart, /Primary sources/);
assert.doesNotMatch(setupReference, /Primary sources configured/);
```
- [ ] **Step 8: Verify setup label tests pass**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts
```
Expected: PASS.
- [ ] **Step 9: Verify docs examples pass**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 10: Scan for stale public labels**
Run:
```bash
rg -n "Primary sources?:|Primary sources? configured|Primary source ready|knowledge sources" packages/cli/src README.md docs-site/content/docs scripts/examples-docs.test.mjs
```
Expected: no matches in public CLI source, README/docs examples, or the docs regression test.
- [ ] **Step 11: Commit public setup labels**
Run:
```bash
git add packages/cli/src/setup-ready-menu.ts packages/cli/src/setup-ready-menu.test.ts packages/cli/src/setup.ts packages/cli/src/setup.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts README.md docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/cli-reference/ktx-setup.mdx scripts/examples-docs.test.mjs
git commit -m "fix(cli): align setup database labels"
```
### Task 3: Final V1 Verification
**Files:**
- Verify: `packages/cli/src/public-ingest.ts`
- Verify: `packages/cli/src/setup-ready-menu.ts`
- Verify: `packages/cli/src/setup.ts`
- Verify: `packages/cli/src/setup-context.ts`
- Verify: `packages/cli/src/setup-databases.ts`
- Verify: `README.md`
- Verify: `docs-site/content/docs/getting-started/quickstart.mdx`
- Verify: `docs-site/content/docs/cli-reference/ktx-setup.mdx`
- [ ] **Step 1: Run focused CLI tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts src/index.test.ts src/command-tree.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run docs regression tests**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 3: Run public unified-ingest stale-copy scans**
Run:
```bash
rg -n "Primary sources?:|Primary sources? configured|Primary source ready|knowledge sources" packages/cli/src README.md docs-site/content/docs scripts/examples-docs.test.mjs
```
Expected: no matches.
Run:
```bash
rg -n "ktx scan|ktx ingest run --connection-id|--adapter <adapter>|ktx ingest watch|live-database|Historic SQL|historicSql" README.md docs-site/content/docs examples/README.md examples/local-warehouse/README.md
```
Expected: no matches. Matches in developer scripts, internal package names, tests, or artifact paths outside this public-docs command are non-blocking under the original spec.
- [ ] **Step 4: Run package pre-commit on changed files**
Run:
```bash
uv run pre-commit run --files packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/setup-ready-menu.ts packages/cli/src/setup-ready-menu.test.ts packages/cli/src/setup.ts packages/cli/src/setup.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts README.md docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/cli-reference/ktx-setup.mdx scripts/examples-docs.test.mjs
```
Expected: PASS. If pre-commit is unavailable because the local `uv` version or hook environment is missing, record the exact failure and run the focused Vitest and Node tests from Steps 1 and 2.
- [ ] **Step 5: Commit final verification if needed**
If Step 4 made formatting changes, run:
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/setup-ready-menu.ts packages/cli/src/setup-ready-menu.test.ts packages/cli/src/setup.ts packages/cli/src/setup.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts README.md docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/cli-reference/ktx-setup.mdx scripts/examples-docs.test.mjs
git commit -m "test: verify unified ingest final ux labels"
```
If Step 4 made no changes, do not create an empty commit.
## Self-Review
- Spec coverage: This plan covers the remaining v1-blocking public gaps found in the audit: unsupported database query-history warning aggregation for `--all`, and old public `Primary sources` terminology in setup/status/docs where the spec's user-facing grouping is `Databases`. Core routing, depth, query-history execution, setup config, foreground-only state, hidden debug commands, public docs command shape, and output sanitization are already implemented by the prior plan chain.
- Placeholder scan: The plan contains exact files, exact tests, exact code snippets, exact commands, and expected outcomes.
- Type consistency: The new accumulator type is `KtxUnsupportedQueryHistoryWarning`; `resolveDatabaseTargetOptions()` receives `KtxPublicIngestWarningAccumulator`; warning strings used in tests match the implementation snippets exactly.

View file

@ -1,932 +0,0 @@
# Unified Ingest V1 Foreground and Retry Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1-blocking public UX gaps in the unified
`ktx ingest` redesign.
**Architecture:** Keep the implemented connection-centric ingest planner and
shared foreground context-build view. Add a small public messaging layer for
notices, warnings, and retry guidance so TTY, non-TTY, and setup next-step
surfaces all match the original spec without changing internal adapter names.
**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages,
Markdown plan documentation.
---
## Current audit
The implemented unified-ingest plans cover the main v1 behavior:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`,
`--query-history`, `--no-query-history`, and
`--query-history-window-days` route through the public ingest planner.
- Database targets run before source targets. Public source ingest bypasses
`ingest.adapters`. Fast and deep map to structural and enriched database
ingest, and deep readiness failures are isolated per target under `--all`.
- `ktx scan`, `ktx ingest run`, and `ktx ingest watch` are hidden from normal
help. Setup stores `connections.<id>.context.depth` and
`connections.<id>.context.queryHistory`.
- Setup context builds are foreground-only, legacy context-build states are
normalized to stale, and public docs no longer advertise `ktx scan` or
adapter-backed `ktx ingest run` as normal workflows.
### V1-blocking gaps
- Interactive foreground `ktx ingest` and setup context builds compute public
warnings but never render them. A TTY user can pass `--deep` for source
connections, `--query-history` for unsupported targets, or `--fast` with
stored query history and receive no warning in the foreground view.
- Explicit query-history runs do not state that database schema ingest runs
before query-history processing. The spec requires that message when a user
explicitly passes `--query-history`.
- Plain non-TTY failures report generic step failures such as
`warehouse failed at database-schema.` and a debug command, but they do not
include the retry guidance required by the error-handling section.
- Setup next-step output still describes the context-build action as
`Build or resume agent-ready context` through `ktx setup`, and it says the
build covers `primary-source scans and context-source ingests`. The public
model is `setup` configures, `ingest` builds or refreshes context, and status
explains readiness.
- The guided demo foreground replay still shows `scanning tables...` and
`tables scanned`, even though the normal foreground view must use
`reading schema` or `building schema context`.
### Non-blocking gaps
- Hidden debug commands can continue to call `ktx scan`, `ktx ingest run`, and
`ktx ingest watch`.
- Internal adapter keys, raw artifact paths, WorkUnit keys, package names, and
JSON or debug output can continue to use `scan`, `live-database`, and
`historic-sql`.
- Developer docs can continue to mention scan internals when they describe
connector implementation details.
- Existing `autoWatch`, `detached`, and `paused` type remnants in setup code
are not user-facing because setup context state is normalized before display.
## File structure
- Modify `packages/cli/src/public-ingest.ts`: add public plan notices, print
schema-before-query-history notices, and add retry guidance to plain
non-TTY failure details.
- Modify `packages/cli/src/public-ingest.test.ts`: cover explicit
query-history notices and retry guidance in plain output.
- Modify `packages/cli/src/context-build-view.ts`: render foreground notices
and warnings from `buildPublicIngestPlan`.
- Modify `packages/cli/src/context-build-view.test.ts`: cover warning and
notice rendering in the foreground view.
- Modify `packages/cli/src/next-steps.ts`: make the public build command
`ktx ingest --all` and remove resume/scan wording from setup next steps.
- Modify `packages/cli/src/next-steps.test.ts`: update public next-step
expectations.
- Modify `packages/cli/src/setup-demo-tour.ts`: replace demo replay scan copy
with schema-context copy.
- Modify `packages/cli/src/setup-demo-tour.test.ts`: lock the demo replay
wording against `scan` terms.
## Tasks
### Task 1: Render foreground notices and warnings
**Files:**
- Modify: `packages/cli/src/context-build-view.ts`
- Test: `packages/cli/src/context-build-view.test.ts`
- [ ] **Step 1: Write failing foreground-message tests**
In `packages/cli/src/context-build-view.test.ts`, add these tests inside the
`renderContextBuildView` describe block, near the existing rendering tests:
```ts
it('renders public warnings in the foreground view', () => {
const state = initViewState([
{
connectionId: 'docs',
driver: 'notion',
operation: 'source-ingest',
adapter: 'notion',
debugCommand: 'ktx ingest docs --debug',
steps: ['source-ingest', 'memory-update'],
},
]);
const rendered = renderContextBuildView(state, {
styled: false,
warnings: ['--deep affects database ingest only; ignoring it for docs.'],
});
expect(rendered).toContain('Warnings:');
expect(rendered).toContain('--deep affects database ingest only; ignoring it for docs.');
});
it('renders public notices in the foreground view before warnings', () => {
const state = initViewState([
{
connectionId: 'warehouse',
driver: 'postgres',
operation: 'database-ingest',
debugCommand: 'ktx ingest warehouse --debug',
steps: ['database-schema', 'query-history'],
databaseDepth: 'deep',
detectRelationships: true,
queryHistory: { enabled: true, dialect: 'postgres' },
},
]);
const rendered = renderContextBuildView(state, {
styled: false,
notices: ['Schema ingest runs before query history for warehouse.'],
warnings: ['--query-history requires deep ingest; running warehouse with --deep.'],
});
expect(rendered.indexOf('Notices:')).toBeLessThan(rendered.indexOf('Warnings:'));
expect(rendered).toContain('Schema ingest runs before query history for warehouse.');
expect(rendered).toContain('--query-history requires deep ingest; running warehouse with --deep.');
});
```
- [ ] **Step 2: Run the failing foreground-message tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts -t "renders public warnings|renders public notices"
```
Expected: FAIL because `renderContextBuildView` does not accept or render
`warnings` or `notices`.
- [ ] **Step 3: Add render options for foreground messages**
In `packages/cli/src/context-build-view.ts`, add this helper after
`renderTargetGroup`:
```ts
function renderMessageGroup(label: string, messages: string[], styled: boolean): string[] {
if (messages.length === 0) return [];
const renderedMessages = messages.map((message) => ` - ${message}`);
return ['', ` ${label}:`, ...renderedMessages.map((line) => (styled ? dim(line) : line))];
}
```
Then change the `renderContextBuildView` signature from:
```ts
export function renderContextBuildView(
state: ContextBuildViewState,
options: { styled?: boolean; showHint?: boolean; hintText?: string; projectDir?: string } = {},
): string {
```
to:
```ts
export function renderContextBuildView(
state: ContextBuildViewState,
options: {
styled?: boolean;
showHint?: boolean;
hintText?: string;
projectDir?: string;
notices?: string[];
warnings?: string[];
} = {},
): string {
```
In the `lines` array inside `renderContextBuildView`, insert the notice and
warning groups after the `Context sources` group:
```ts
...renderTargetGroup('Databases', state.primarySources, state.frame, styled, width),
...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width),
...renderMessageGroup('Notices', options.notices ?? [], styled),
...renderMessageGroup('Warnings', options.warnings ?? [], styled),
'',
```
- [ ] **Step 4: Pass plan messages into foreground rendering**
In `packages/cli/src/context-build-view.ts`, inside `runContextBuild`, change:
```ts
const viewOpts = { styled: true, projectDir: args.projectDir };
```
to:
```ts
const viewOpts = {
styled: true,
projectDir: args.projectDir,
notices: plan.notices ?? [],
warnings: plan.warnings,
};
```
This makes every call to `paint()` and the final non-TTY foreground fallback
render the same public messages.
- [ ] **Step 5: Run the foreground-message tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts -t "renders public warnings|renders public notices"
```
Expected: PASS.
- [ ] **Step 6: Commit**
```bash
git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts
git commit -m "fix: render unified ingest foreground warnings"
```
### Task 2: State schema-before-query-history for explicit runs
**Files:**
- Modify: `packages/cli/src/public-ingest.ts`
- Modify: `packages/cli/src/context-build-view.ts`
- Test: `packages/cli/src/public-ingest.test.ts`
- Test: `packages/cli/src/context-build-view.test.ts`
- [ ] **Step 1: Write failing explicit query-history notice tests**
In `packages/cli/src/public-ingest.test.ts`, add this test inside
`describe('buildPublicIngestPlan', ...)` after the existing query-history
planning tests:
```ts
it('adds a schema-first notice when query history is explicitly enabled', () => {
const project = deepReadyProject({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
expect(
buildPublicIngestPlan(project, {
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
queryHistory: 'enabled',
}).notices,
).toEqual(['Schema ingest runs before query history for warehouse.']);
});
```
In `packages/cli/src/public-ingest.test.ts`, add this test inside
`describe('runKtxPublicIngest', ...)` after
`runs query history after schema ingest with current-run window override`:
```ts
it('prints the schema-first notice for explicit query-history runs', async () => {
const io = makeIo();
const project = deepReadyProject({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
const runScan = vi.fn(async () => 0);
const runIngest = vi.fn(async () => 0);
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'disabled',
queryHistory: 'enabled',
},
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.');
});
```
In `packages/cli/src/context-build-view.test.ts`, add this test near the
existing `runContextBuild` tests:
```ts
it('passes schema-first notices from the plan into foreground output', async () => {
const io = makeIo();
const project = {
...projectWithConnections({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
}),
config: {
...projectWithConnections({ warehouse: { driver: 'postgres' } }).config,
connections: {
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
},
llm: {
provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret
models: { default: 'gpt-test' },
},
scan: {
...projectWithConnections({ warehouse: { driver: 'postgres' } }).config.scan,
enrichment: {
mode: 'llm',
embeddings: {
backend: 'openai',
model: 'text-embedding-3-small',
dimensions: 1536,
},
},
},
},
};
const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation));
await expect(
runContextBuild(
project,
{
projectDir: '/tmp/project',
inputMode: 'disabled',
targetConnectionId: 'warehouse',
all: false,
queryHistory: 'enabled',
},
io.io,
{ executeTarget, now: () => 1000 },
),
).resolves.toMatchObject({ exitCode: 0 });
expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.');
});
```
- [ ] **Step 2: Run the failing query-history notice tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts -t "schema-first notice|passes schema-first"
```
Expected: FAIL because plans do not include `notices`, and plain output does
not print schema-first text.
- [ ] **Step 3: Add notices to the public ingest plan**
In `packages/cli/src/public-ingest.ts`, update `KtxPublicIngestPlan`:
```ts
export interface KtxPublicIngestPlan {
projectDir: string;
targets: KtxPublicIngestPlanTarget[];
warnings: string[];
notices?: string[];
}
```
Add this helper after `finalizeWarnings`:
```ts
function schemaFirstQueryHistoryNotice(
targets: KtxPublicIngestPlanTarget[],
args: { queryHistory?: KtxPublicIngestQueryHistoryFlag },
): string | null {
if (args.queryHistory !== 'enabled') {
return null;
}
const queryHistoryTargets = targets.filter((target) => target.queryHistory?.enabled === true);
if (queryHistoryTargets.length === 0) {
return null;
}
if (queryHistoryTargets.length === 1) {
return `Schema ingest runs before query history for ${queryHistoryTargets[0].connectionId}.`;
}
return `Schema ingest runs before query history for ${queryHistoryTargets.length} database connections.`;
}
```
In `buildPublicIngestPlan`, replace the direct return with:
```ts
const orderedTargets = [
...targets.filter((t) => t.operation === 'database-ingest'),
...targets.filter((t) => t.operation === 'source-ingest'),
];
const notice = schemaFirstQueryHistoryNotice(orderedTargets, args);
return {
projectDir: args.projectDir,
targets: orderedTargets,
warnings: finalizeWarnings(warnings, args),
...(notice ? { notices: [notice] } : {}),
};
```
- [ ] **Step 4: Print notices in plain public ingest**
In `packages/cli/src/public-ingest.ts`, inside `runKtxPublicIngest`, change:
```ts
if (!args.json && plan.warnings.length > 0) {
for (const warning of plan.warnings) {
io.stderr.write(`Warning: ${warning}\n`);
}
}
```
to:
```ts
if (!args.json) {
for (const notice of plan.notices ?? []) {
io.stdout.write(`${notice}\n`);
}
for (const warning of plan.warnings) {
io.stderr.write(`Warning: ${warning}\n`);
}
}
```
Task 1 already passes `plan.notices` into `runContextBuild`, so explicit
query-history foreground runs render the same notice in the view.
- [ ] **Step 5: Run the query-history notice tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts -t "schema-first notice|passes schema-first"
```
Expected: PASS.
- [ ] **Step 6: Commit**
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts
git commit -m "fix: explain query history schema order"
```
### Task 3: Add retry guidance to plain public failures
**Files:**
- Modify: `packages/cli/src/public-ingest.ts`
- Test: `packages/cli/src/public-ingest.test.ts`
- [ ] **Step 1: Write failing plain retry tests**
In `packages/cli/src/public-ingest.test.ts`, replace these assertions in
`runs all independent targets and reports partial failures`:
```ts
expect(io.stdout()).toContain('warehouse failed at database-schema.');
expect(io.stdout()).toContain('Debug: ktx ingest warehouse --debug');
```
with:
```ts
expect(io.stdout()).toContain('warehouse failed at database-schema.');
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --fast');
expect(io.stdout()).not.toContain('Debug: ktx ingest warehouse --debug');
```
Then add this test after `runs all independent targets and reports partial
failures`:
```ts
it('prints query-history retry guidance for query-history facet failures', async () => {
const io = makeIo();
const project = deepReadyProject({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
const runScan = vi.fn(async () => 0);
const runIngest = vi.fn(async () => 1);
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'disabled',
queryHistory: 'enabled',
},
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(1);
expect(io.stdout()).toContain('warehouse failed at query-history.');
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history');
expect(io.stdout()).not.toContain('historic-sql');
});
```
- [ ] **Step 2: Run the failing retry tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "partial failures|query-history retry"
```
Expected: FAIL because plain failures still print `Debug:` and lack retry
commands.
- [ ] **Step 3: Add retry command formatting to public ingest**
In `packages/cli/src/public-ingest.ts`, add these helpers before
`markTargetResult`:
```ts
function retryCommandForTarget(
target: KtxPublicIngestPlanTarget,
args: Extract<KtxPublicIngestArgs, { command: 'run' }>,
): string {
const projectPart = ` --project-dir ${args.projectDir}`;
const depthPart = target.databaseDepth ? ` --${target.databaseDepth}` : '';
const queryHistoryPart = target.queryHistory?.enabled === true ? ' --query-history' : '';
const windowPart =
target.queryHistory?.enabled === true && target.queryHistory.windowDays !== undefined
? ` --query-history-window-days ${target.queryHistory.windowDays}`
: '';
return `ktx ingest ${target.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`;
}
function trimTrailingPeriod(value: string): string {
return value.endsWith('.') ? value.slice(0, -1) : value;
}
function failureDetailWithRetry(input: {
target: KtxPublicIngestPlanTarget;
args: Extract<KtxPublicIngestArgs, { command: 'run' }>;
failedOperation: KtxPublicIngestStepName;
failureDetail?: string;
}): string {
const detail = input.failureDetail?.trim();
const base =
detail && detail.startsWith(`${input.target.connectionId} `)
? detail
: detail
? `${input.target.connectionId} failed: ${detail}`
: `${input.target.connectionId} failed at ${input.failedOperation}.`;
return `${trimTrailingPeriod(base)}. Retry: ${retryCommandForTarget(input.target, input.args)}`;
}
```
- [ ] **Step 4: Thread run args into failure detail construction**
Change the `markTargetResult` signature in `packages/cli/src/public-ingest.ts`
from:
```ts
function markTargetResult(
target: KtxPublicIngestPlanTarget,
status: 'done' | 'failed',
failedOperation?: KtxPublicIngestStepName,
failureDetail?: string,
): KtxPublicIngestTargetResult {
```
to:
```ts
function markTargetResult(
target: KtxPublicIngestPlanTarget,
args: Extract<KtxPublicIngestArgs, { command: 'run' }>,
status: 'done' | 'failed',
failedOperation?: KtxPublicIngestStepName,
failureDetail?: string,
): KtxPublicIngestTargetResult {
```
Inside the failed-step branch, replace:
```ts
detail: failureDetail ?? `${target.connectionId} failed at ${selectedFailedOperation}.`,
```
with:
```ts
detail: failureDetailWithRetry({
target,
args,
failedOperation: selectedFailedOperation,
failureDetail,
}),
```
Update every `markTargetResult` call in `executePublicIngestTarget`:
```ts
return markTargetResult(
target,
args,
'failed',
'database-schema',
capturedScanIo ? firstCapturedFailureLine(capturedScanIo.capturedOutput()) : undefined,
);
```
```ts
return markTargetResult(target, args, 'failed', 'query-history');
```
```ts
return markTargetResult(target, args, 'done');
```
```ts
return markTargetResult(target, args, exitCode === 0 ? 'done' : 'failed');
```
- [ ] **Step 5: Stop printing debug commands in plain failure summaries**
In `renderPlainResults`, remove this block:
```ts
if (failedStep.debugCommand) {
io.stdout.write(` Debug: ${failedStep.debugCommand}\n`);
}
```
Debug commands remain available through JSON and debug surfaces, but normal
plain output now focuses on the connection and retry action.
- [ ] **Step 6: Run the retry tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "partial failures|query-history retry"
```
Expected: PASS.
- [ ] **Step 7: Commit**
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts
git commit -m "fix: add public ingest retry guidance"
```
### Task 4: Replace setup next-step scan/resume wording
**Files:**
- Modify: `packages/cli/src/next-steps.ts`
- Test: `packages/cli/src/next-steps.test.ts`
- [ ] **Step 1: Write failing next-step copy tests**
In `packages/cli/src/next-steps.test.ts`, replace the expected
`KTX_CONTEXT_BUILD_COMMANDS` value with:
```ts
expect(KTX_CONTEXT_BUILD_COMMANDS).toEqual([
{
command: 'ktx ingest --all',
description: 'Build or refresh agent-ready context from configured connections',
},
{
command: 'ktx status',
description: 'Check setup and context readiness',
},
]);
```
In the test named `keeps setup next steps focused on building context when the
build is not ready`, replace:
```ts
expect(rendered).toContain('primary-source scans and context-source ingests');
expect(rendered).toContain('ktx setup');
```
with:
```ts
expect(rendered).toContain('Run ingest to build database schema context before context-source ingest.');
expect(rendered).toContain('ktx ingest --all');
expect(rendered).not.toContain('resume');
expect(rendered).not.toContain('scan');
```
- [ ] **Step 2: Run the failing next-step copy tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/next-steps.test.ts
```
Expected: FAIL because the current copy still recommends `ktx setup` for the
context-build action and uses resume/scan wording.
- [ ] **Step 3: Update the next-step command constants**
In `packages/cli/src/next-steps.ts`, change `KTX_CONTEXT_BUILD_COMMANDS` to:
```ts
export const KTX_CONTEXT_BUILD_COMMANDS = [
{
command: 'ktx ingest --all',
description: 'Build or refresh agent-ready context from configured connections',
},
{
command: 'ktx status',
description: 'Check setup and context readiness',
},
] as const;
```
In `formatSetupNextStepLines`, replace:
```ts
`${indent}Preferred route: run the CLI build; it covers primary-source scans and context-source ingests.`,
```
with:
```ts
`${indent}Run ingest to build database schema context before context-source ingest.`,
```
- [ ] **Step 4: Run the next-step copy tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/next-steps.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/next-steps.ts packages/cli/src/next-steps.test.ts
git commit -m "fix: align setup next steps with unified ingest"
```
### Task 5: Clean guided demo foreground scan wording
**Files:**
- Modify: `packages/cli/src/setup-demo-tour.ts`
- Test: `packages/cli/src/setup-demo-tour.test.ts`
- [ ] **Step 1: Write failing demo wording tests**
In `packages/cli/src/setup-demo-tour.test.ts`, add this test inside
`describe('buildDemoReplayTimeline', ...)`:
```ts
it('uses schema-context wording for database progress', () => {
const renderedTimeline = timeline
.map((event) => [event.detailLine, event.summaryText].filter(Boolean).join(' '))
.join('\n');
expect(renderedTimeline).toContain('reading schema');
expect(renderedTimeline).toContain('56 tables');
expect(renderedTimeline).not.toContain('scanning');
expect(renderedTimeline).not.toContain('scanned');
});
```
- [ ] **Step 2: Run the failing demo wording test**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-demo-tour.test.ts -t "schema-context wording"
```
Expected: FAIL because the demo timeline still uses `scanning tables...` and
`tables scanned`.
- [ ] **Step 3: Replace demo timeline database copy**
In `packages/cli/src/setup-demo-tour.ts`, inside `buildDemoReplayTimeline`,
replace the first three events:
```ts
// postgres-warehouse: scan
{ delayMs: 0, connectionId: 'postgres-warehouse', status: 'running', detailLine: null, summaryText: null },
{ delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] scanning tables...', summaryText: null },
{ delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables scanned' },
```
with:
```ts
// postgres-warehouse: database schema context
{ delayMs: 0, connectionId: 'postgres-warehouse', status: 'running', detailLine: null, summaryText: null },
{ delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] reading schema...', summaryText: null },
{ delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables' },
```
- [ ] **Step 4: Run the demo wording test**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-demo-tour.test.ts -t "schema-context wording"
```
Expected: PASS.
- [ ] **Step 5: Commit**
```bash
git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "fix: remove scan wording from demo progress"
```
### Task 6: Final verification
**Files:**
- Verify: `packages/cli/src/public-ingest.ts`
- Verify: `packages/cli/src/context-build-view.ts`
- Verify: `packages/cli/src/next-steps.ts`
- Verify: `packages/cli/src/setup-demo-tour.ts`
- Verify: relevant tests
- [ ] **Step 1: Run focused Vitest coverage**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/next-steps.test.ts src/setup-demo-tour.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run CLI type-check**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS.
- [ ] **Step 3: Run CLI tests**
Run:
```bash
pnpm --filter @ktx/cli run test
```
Expected: PASS.
- [ ] **Step 4: Run dead-code check after TypeScript changes**
Run:
```bash
pnpm run dead-code
```
Expected: PASS.
- [ ] **Step 5: Search for stale public wording in touched surfaces**
Run:
```bash
rg -n "Build or resume agent-ready|primary-source scans|scanning tables|tables scanned|Debug: ktx ingest" packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/next-steps.ts packages/cli/src/next-steps.test.ts packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
```
Expected: no matches.
- [ ] **Step 6: Commit verification fixes if any were needed**
If verification required edits, run:
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/next-steps.ts packages/cli/src/next-steps.test.ts packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts
git commit -m "test: verify unified ingest ux closure"
```
If no edits were needed, do not create an empty commit.
## Self-review
- Spec coverage: The plan covers the remaining v1-blocking warning,
schema-first query-history, retry-guidance, setup next-step, and foreground
demo wording gaps. Core command routing, depth policy, query-history config,
setup depth, docs-site command references, foreground-only state, and reserved
ids are already covered by earlier implemented plans.
- Placeholder scan: The plan contains exact file paths, concrete test code,
implementation snippets, commands, and expected results. No red-flag
placeholders are present.
- Type consistency: `notices` is added as an optional
`KtxPublicIngestPlan` property and threaded through `renderContextBuildView`
options. Retry helpers use existing `KtxPublicIngestPlanTarget`,
`KtxPublicIngestArgs`, and `KtxPublicIngestStepName` types.

View file

@ -1,559 +0,0 @@
# Unified Ingest V1 Progress Copy Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove the remaining v1-blocking scan wording from normal public
unified-ingest progress, failure, and setup scope-selection output.
**Architecture:** Keep the implemented connection-centric ingest planner,
hidden legacy commands, and foreground context-build view. Add a small shared
public-copy helper for lower-level database ingest and query-history messages,
then use it from foreground progress and direct public failure summarization.
**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages.
---
## Current audit
The implemented unified-ingest plan chain covers the original spec's main v1
behavior:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`,
`--query-history`, `--no-query-history`, and
`--query-history-window-days` route through `public-ingest.ts`.
- Database targets run before source targets, inferred public adapters bypass
`ingest.adapters`, and `fast` or `deep` maps to structural or enriched
database ingest internals.
- Deep readiness is evaluated before target work starts, and `--all` isolates
per-target deep-readiness failures.
- Setup stores `connections.<id>.context.depth` and
`connections.<id>.context.queryHistory`, migrates legacy `historicSql`, and
uses foreground-only setup context state.
- Normal help hides `ktx scan`, `ktx ingest run`, and `ktx ingest watch`; docs
and command-tree output no longer present those as normal public workflows.
### V1-blocking gaps
- Foreground `ktx ingest` and setup context-build progress still pass database
ingest progress messages through from scan internals. A normal user can see
messages such as `Preparing scan`, even though the spec says the foreground
view must use `reading schema` or `building schema context` and must not show
`scan` in normal mode.
- Direct public database ingest failure summaries sanitize `live-database` and
`historic-sql`, but not scan-specific failure lines such as
`KTX scan enrichment failed after structural scan completed: ...`.
- Interactive database setup still asks for `PostgreSQL schemas to scan`, which
keeps scan wording in normal setup output after the public model changed to
database schema context.
### Non-blocking gaps
- Hidden debug commands can remain callable: `ktx scan`, `ktx ingest run`, and
`ktx ingest watch`.
- Internal adapter keys, raw artifact paths, WorkUnit keys, package names,
tests, and developer-only scripts can continue to use `scan`,
`live-database`, and `historic-sql`.
- README package taxonomy such as `Postgres scan connector` can remain because
it describes internal package ownership, not normal command usage.
- Internal readiness configuration names such as `scan.enrichment.mode` can
remain because they refer to existing `ktx.yaml` configuration fields.
## File structure
- Create `packages/cli/src/public-ingest-copy.ts`: shared copy sanitizer for
database ingest and query-history messages used by public output paths.
- Create `packages/cli/src/public-ingest-copy.test.ts`: unit coverage for the
sanitizer.
- Modify `packages/cli/src/context-build-view.ts`: sanitize foreground
database progress messages and reuse the shared query-history sanitizer.
- Modify `packages/cli/src/context-build-view.test.ts`: cover foreground
progress output with lower-level scan messages.
- Modify `packages/cli/src/public-ingest.ts`: use the shared public output-line
sanitizer for captured failure details.
- Modify `packages/cli/src/public-ingest.test.ts`: cover direct public failure
output for scan-enrichment failures.
- Modify `packages/cli/src/setup-databases.ts`: change the schema scope prompt
from `schemas to scan` to `schemas to include`.
- Modify `packages/cli/src/setup-databases.test.ts`: update the schema prompt
expectation and assert scan wording is absent.
## Tasks
### Task 1: Add shared public ingest copy sanitizers
**Files:**
- Create: `packages/cli/src/public-ingest-copy.ts`
- Create: `packages/cli/src/public-ingest-copy.test.ts`
- [ ] **Step 1: Write the public-copy tests**
Create `packages/cli/src/public-ingest-copy.test.ts`:
```ts
import { describe, expect, it } from 'vitest';
import {
publicDatabaseIngestMessage,
publicIngestOutputLine,
publicQueryHistoryMessage,
} from './public-ingest-copy.js';
describe('public ingest copy sanitizers', () => {
it('maps database scan progress into schema-context wording', () => {
expect(publicDatabaseIngestMessage('Preparing scan')).toBe('Preparing database ingest');
expect(publicDatabaseIngestMessage('Inspecting database schema')).toBe('Reading database schema');
expect(publicDatabaseIngestMessage('Writing schema artifacts')).toBe('Writing schema context');
expect(publicDatabaseIngestMessage('Enriching schema metadata')).toBe('Building enriched schema context');
});
it('maps database scan failure text into public database ingest wording', () => {
expect(
publicDatabaseIngestMessage(
'KTX scan enrichment failed after structural scan completed: embedding service timed out',
),
).toBe('Database enrichment failed after schema context completed: embedding service timed out');
expect(publicDatabaseIngestMessage('structural scan wrote partial artifacts')).toBe(
'schema context wrote partial artifacts',
);
expect(publicDatabaseIngestMessage('scan results may be less complete')).toBe(
'database context may be less complete',
);
});
it('maps query-history adapter progress into public wording', () => {
expect(publicQueryHistoryMessage('Fetching source files for warehouse/historic-sql', 'warehouse')).toBe(
'Fetching query history for warehouse',
);
expect(publicQueryHistoryMessage('Curating warehouse/historic-sql work units', 'warehouse')).toBe(
'Curating warehouse query history work units',
);
expect(publicQueryHistoryMessage('historic SQL local ingest failed', 'warehouse')).toBe(
'query history local ingest failed',
);
});
it('sanitizes captured public output lines across database and query-history internals', () => {
expect(
publicIngestOutputLine(
'KTX scan enrichment failed after structural scan completed in raw-sources/warehouse/live-database/sync-1',
),
).toBe('Database enrichment failed after schema context completed in raw-sources/warehouse/database schema/sync-1');
expect(publicIngestOutputLine('Historic SQL local ingest requires a configured reader')).toBe(
'query history local ingest requires a configured reader',
);
});
});
```
- [ ] **Step 2: Run the failing public-copy tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts
```
Expected: FAIL because `packages/cli/src/public-ingest-copy.ts` does not exist.
- [ ] **Step 3: Implement the shared sanitizers**
Create `packages/cli/src/public-ingest-copy.ts`:
```ts
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
const DATABASE_INGEST_REPLACEMENTS: Array<[RegExp, string]> = [
[/\bPreparing scan\b/gi, 'Preparing database ingest'],
[/\bInspecting database schema\b/gi, 'Reading database schema'],
[/\bWriting schema artifacts\b/gi, 'Writing schema context'],
[/\bEnriching schema metadata\b/gi, 'Building enriched schema context'],
[
/\bKTX scan enrichment failed after structural scan completed\b/gi,
'Database enrichment failed after schema context completed',
],
[/\bstructural scan\b/gi, 'schema context'],
[/\benriched scan\b/gi, 'deep database ingest'],
[/\bscan results\b/gi, 'database context'],
];
export function publicDatabaseIngestMessage(message: string): string {
return DATABASE_INGEST_REPLACEMENTS.reduce(
(current, [pattern, replacement]) => current.replace(pattern, replacement),
message,
);
}
export function publicQueryHistoryMessage(message: string, connectionId?: string): string {
let current = message;
if (connectionId && connectionId.length > 0) {
const escapedConnectionId = escapeRegExp(connectionId);
current = current
.replace(
new RegExp(`Fetching source files for ${escapedConnectionId}/historic-sql`, 'i'),
`Fetching query history for ${connectionId}`,
)
.replace(`${connectionId}/historic-sql`, `${connectionId} query history`);
}
return current.replace(/\bhistoric-sql\b/g, 'query history').replace(/\bhistoric SQL\b/gi, 'query history');
}
export function publicIngestOutputLine(line: string): string {
return publicQueryHistoryMessage(publicDatabaseIngestMessage(line)).replace(/\blive-database\b/g, 'database schema');
}
```
- [ ] **Step 4: Run the public-copy tests again**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit the shared sanitizer**
Run:
```bash
git add packages/cli/src/public-ingest-copy.ts packages/cli/src/public-ingest-copy.test.ts
git commit -m "fix(cli): add public ingest copy sanitizers"
```
### Task 2: Sanitize foreground progress and captured public failures
**Files:**
- Modify: `packages/cli/src/context-build-view.ts`
- Modify: `packages/cli/src/context-build-view.test.ts`
- Modify: `packages/cli/src/public-ingest.ts`
- Modify: `packages/cli/src/public-ingest.test.ts`
- Test: `packages/cli/src/public-ingest-copy.test.ts`
- [ ] **Step 1: Write the failing foreground progress test**
In `packages/cli/src/context-build-view.test.ts`, add this test inside the
`runContextBuild` describe block near the existing query-history progress test:
```ts
it('renders database ingest progress without scan wording', async () => {
const io = makeIo();
const project = projectWithConnections({ warehouse: { driver: 'postgres' } });
const executeTarget = vi.fn(async (target, _args, _targetIo, deps) => {
await deps.scanProgress?.update(0.05, 'Preparing scan');
await deps.scanProgress?.update(0.15, 'Inspecting database schema');
await deps.scanProgress?.update(0.7, 'Writing schema artifacts');
return successResult(target.connectionId, target.driver, target.operation);
});
await expect(
runContextBuild(
project,
{
projectDir: '/tmp/project',
inputMode: 'disabled',
targetConnectionId: 'warehouse',
all: false,
},
io.io,
{ executeTarget, now: () => 1000, sourceProgressThrottleMs: 0 },
),
).resolves.toMatchObject({ exitCode: 0 });
expect(io.stdout()).toContain('Preparing database ingest');
expect(io.stdout()).toContain('Reading database schema');
expect(io.stdout()).toContain('Writing schema context');
expect(io.stdout()).not.toContain('Preparing scan');
expect(io.stdout()).not.toMatch(/\bscan\b/i);
});
```
- [ ] **Step 2: Write the failing direct public failure test**
In `packages/cli/src/public-ingest.test.ts`, add this test inside the
`runKtxPublicIngest` describe block near
`suppresses internal scan output for public database ingest summaries`:
```ts
it('sanitizes captured database scan failure details in direct public output', async () => {
const io = makeIo();
const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } });
const runScan = vi.fn(async (_args, scanIo) => {
scanIo.stdout.write('KTX scan enrichment failed after structural scan completed: embedding service timed out\n');
return 1;
});
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'disabled',
depth: 'deep',
},
io.io,
{ loadProject: vi.fn(async () => project), runScan },
),
).resolves.toBe(1);
expect(io.stdout()).toContain(
'warehouse failed: Database enrichment failed after schema context completed: embedding service timed out.',
);
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep');
expect(io.stdout()).not.toContain('KTX scan enrichment failed');
expect(io.stdout()).not.toContain('structural scan');
});
```
- [ ] **Step 3: Run the failing integration tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/public-ingest.test.ts -t "database ingest progress|captured database scan failure" --testTimeout 30000
```
Expected: FAIL because foreground progress still prints `Preparing scan`, and
captured direct failures still print the lower-level scan failure text.
- [ ] **Step 4: Use the shared sanitizer in foreground progress**
In `packages/cli/src/context-build-view.ts`, add this import:
```ts
import { publicDatabaseIngestMessage, publicQueryHistoryMessage } from './public-ingest-copy.js';
```
Replace the existing `publicProgressMessage()` implementation:
```ts
function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string {
if (!target.steps.includes('query-history')) {
return message;
}
return message
.replace(
new RegExp(`Fetching source files for ${target.connectionId}/historic-sql`, 'i'),
`Fetching query history for ${target.connectionId}`,
)
.replace(`${target.connectionId}/historic-sql`, `${target.connectionId} query history`)
.replace(/\bhistoric-sql\b/g, 'query history')
.replace(/\bhistoric SQL\b/gi, 'query history');
}
```
with:
```ts
function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string {
if (target.operation === 'database-ingest') {
return publicDatabaseIngestMessage(message);
}
if (target.steps.includes('query-history')) {
return publicQueryHistoryMessage(message, target.connectionId);
}
return message;
}
```
- [ ] **Step 5: Use the shared sanitizer in public ingest failure capture**
In `packages/cli/src/public-ingest.ts`, add this import:
```ts
import { publicIngestOutputLine } from './public-ingest-copy.js';
```
Delete the local `publicIngestOutputLine()` function:
```ts
function publicIngestOutputLine(line: string): string {
return line
.replace(/\blive-database\b/g, 'database schema')
.replace(/\bhistoric-sql\b/g, 'query history')
.replace(/\bhistoric SQL\b/gi, 'query history');
}
```
Leave `firstCapturedFailureLine()` calling `publicIngestOutputLine` unchanged;
the imported function now provides the broader public wording.
- [ ] **Step 6: Run the integration tests again**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts src/context-build-view.test.ts src/public-ingest.test.ts --testTimeout 30000
```
Expected: PASS.
- [ ] **Step 7: Commit foreground and failure sanitization**
Run:
```bash
git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/public-ingest-copy.ts packages/cli/src/public-ingest-copy.test.ts
git commit -m "fix(cli): sanitize public ingest progress copy"
```
### Task 3: Rename setup schema scope prompt
**Files:**
- Modify: `packages/cli/src/setup-databases.ts`
- Modify: `packages/cli/src/setup-databases.test.ts`
- [ ] **Step 1: Update the setup prompt expectation**
In `packages/cli/src/setup-databases.test.ts`, in the test named
`prompts for discovered Postgres schemas before the first scan`, replace:
```ts
message: expect.stringContaining('PostgreSQL schemas to scan'),
```
with:
```ts
message: expect.stringContaining('PostgreSQL schemas to include'),
```
Add this assertion after the `toHaveBeenCalledWith` block:
```ts
expect(String(prompts.multiselect.mock.calls[0]?.[0].message)).not.toContain('to scan');
```
- [ ] **Step 2: Run the failing setup prompt test**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "prompts for discovered Postgres schemas before the first scan" --testTimeout 30000
```
Expected: FAIL because the prompt still says `PostgreSQL schemas to scan`.
- [ ] **Step 3: Rename the setup scope prompt**
In `packages/cli/src/setup-databases.ts`, replace:
```ts
`${spec.promptLabel} to scan\n` +
`KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`,
```
with:
```ts
`${spec.promptLabel} to include\n` +
`KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`,
```
- [ ] **Step 4: Run the setup prompt test again**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "prompts for discovered Postgres schemas before the first scan" --testTimeout 30000
```
Expected: PASS.
- [ ] **Step 5: Commit setup prompt wording**
Run:
```bash
git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts
git commit -m "fix(cli): rename setup schema scope prompt"
```
### Task 4: Final verification
**Files:**
- Verify: `packages/cli/src/public-ingest-copy.ts`
- Verify: `packages/cli/src/context-build-view.ts`
- Verify: `packages/cli/src/public-ingest.ts`
- Verify: `packages/cli/src/setup-databases.ts`
- [ ] **Step 1: Run targeted unified-ingest tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts src/context-build-view.test.ts src/public-ingest.test.ts src/setup-databases.test.ts --testTimeout 30000
```
Expected: PASS.
- [ ] **Step 2: Run CLI type-check**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS.
- [ ] **Step 3: Scan normal public files for the closed wording gaps**
Run:
```bash
rg -n "Preparing scan|KTX scan enrichment failed|structural scan completed|schemas to scan" packages/cli/src/context-build-view.ts packages/cli/src/public-ingest.ts packages/cli/src/setup-databases.ts packages/cli/src/*.test.ts
```
Expected: no matches except historical expectations in low-level `scan.test.ts`
or internal scan-specific tests that are not part of the command above.
- [ ] **Step 4: Run workspace dead-code check**
Run:
```bash
pnpm run dead-code
```
Expected: PASS.
- [ ] **Step 5: Commit final verification marker if needed**
If the verification steps required only the commits above, no additional
commit is needed. If a verification fix changed files, run:
```bash
git add packages/cli/src/public-ingest-copy.ts packages/cli/src/public-ingest-copy.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts
git commit -m "test(cli): verify unified ingest public progress copy"
```
## Self-review
Spec coverage: this plan covers the remaining normal public output paths where
scan wording still leaks into unified ingest:
- Foreground progress now maps database scan progress into schema-context copy.
- Captured direct public failure summaries now map scan-enrichment failures into
database ingest copy.
- Interactive setup schema scope selection now says `schemas to include`, not
`schemas to scan`.
The plan intentionally leaves hidden debug commands, internal artifact paths,
developer scripts, low-level scan tests, and configuration field names alone.
Those are non-blocking under the original spec's implementation-detail
allowances.
Placeholder scan: no task uses deferred code markers, unnamed edge handling, or
undefined helper names. Every changed helper, test, and command is named with
the file that owns it.
Type consistency: the new helper exports
`publicDatabaseIngestMessage()`, `publicQueryHistoryMessage()`, and
`publicIngestOutputLine()`. Later tasks import those exact names from
`./public-ingest-copy.js`.

View file

@ -1,598 +0,0 @@
# Unified Ingest V1 Public Plain Output Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove the last v1-blocking adapter-centric and internal source-key leaks from normal public `ktx ingest` plain output.
**Architecture:** Keep the current connection-centric public ingest planner and hidden debug commands. Sanitize low-level ingest report labels in `ingest.ts`, and capture low-level source/query-history output in `public-ingest.ts` so public plain `ktx ingest <connectionId>` renders only the unified result table, warnings, notices, and retry guidance. JSON output and hidden debug commands may continue to expose raw `sourceKey` values for troubleshooting.
**Tech Stack:** TypeScript, Commander, Vitest, pnpm workspace scripts.
---
## Current audit
The unified ingest plan chain has implemented the main v1 behavior:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`,
`--query-history`, `--no-query-history`, and
`--query-history-window-days` route through `public-ingest.ts`.
- Database targets run before source targets, deep readiness is target-local
for `--all`, and inferred public adapters bypass `ingest.adapters`.
- Normal command help hides `ktx scan`, `ktx ingest run`, and
`ktx ingest watch`; docs-site command references no longer publish those
as normal workflows.
- Setup stores `connections.<id>.context.depth` and
`connections.<id>.context.queryHistory`, migrates legacy `historicSql`, and
uses foreground-only context-build state.
### V1-blocking gaps
- Direct public non-TTY or `--no-input` source ingest still delegates to
`runKtxIngest()` with the real CLI IO. The lower-level reporter prints
`Adapter: <sourceKey>` and routine report details before the public result
table. For query history this can print `Adapter: historic-sql`, violating
the spec requirement that normal output use query-history wording and keep
internal adapter names out of routine output.
- `ktx ingest status` and `ktx ingest replay` plain output call the same
lower-level report formatter. Stored database reports can therefore print
`Adapter: live-database`, and stored query-history reports can print
`Adapter: historic-sql`, even though `status` and `replay` are public
report-viewing surfaces.
### Non-blocking gaps
- Hidden debug commands remain callable: `ktx scan`, `ktx ingest run`, and
`ktx ingest watch`.
- JSON output, debug output, tests, internal artifact paths, WorkUnit keys,
adapter package names, and developer scripts can continue to use
`scan`, `live-database`, and `historic-sql`.
- Public docs still use "scan" as a generic implementation noun in a few
contributor or concept pages. They do not present `ktx scan` as the normal
public command, so that is later wording cleanup.
## File structure
- Modify `packages/cli/src/ingest.ts`: replace the plain report `Adapter:`
label with public source labels, while leaving JSON report payloads intact.
- Modify `packages/cli/src/public-ingest.ts`: capture lower-level source and
query-history plain output for direct public ingest, sanitize failure detail
lines, and render only the public summary table.
- Modify `packages/cli/src/ingest.test.ts`: update existing report label
expectations and add regressions for `live-database` and `historic-sql`
stored-report labels.
- Modify `packages/cli/src/public-ingest.test.ts`: add regressions proving
direct public source and query-history runs do not leak lower-level adapter
report output.
## Tasks
### Task 1: Use public source labels in stored report output
**Files:**
- Modify: `packages/cli/src/ingest.ts`
- Modify: `packages/cli/src/ingest.test.ts`
- [ ] **Step 1: Add failing stored-report label tests**
Add these tests inside the existing `describe('runKtxIngest', () => { ... })`
block in `packages/cli/src/ingest.test.ts`, near the existing
`runs local ingest and reads status` test:
```typescript
it('labels internal database reports without adapter names in plain status output', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const report = localFakeBundleReport('scan-job-1', {
id: 'report-scan-1',
runId: 'run-scan-1',
connectionId: 'warehouse',
sourceKey: 'live-database',
});
const io = makeIo();
await expect(
runKtxIngest(
{
command: 'status',
projectDir,
reportFile: '/tmp/scan-report.json',
outputMode: 'plain',
},
io.io,
{
readReportFile: vi.fn(async () => report),
},
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Source: Database schema\n');
expect(io.stdout()).not.toContain('Adapter:');
expect(io.stdout()).not.toContain('live-database');
expect(io.stderr()).toBe('');
});
it('labels internal query-history reports without adapter names in plain status output', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const report = localFakeBundleReport('query-history-job-1', {
id: 'report-query-history-1',
runId: 'run-query-history-1',
connectionId: 'warehouse',
sourceKey: 'historic-sql',
});
const io = makeIo();
await expect(
runKtxIngest(
{
command: 'status',
projectDir,
reportFile: '/tmp/query-history-report.json',
outputMode: 'plain',
},
io.io,
{
readReportFile: vi.fn(async () => report),
},
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Source: Query history\n');
expect(io.stdout()).not.toContain('Adapter:');
expect(io.stdout()).not.toContain('historic-sql');
expect(io.stderr()).toBe('');
});
```
- [ ] **Step 2: Run the failing stored-report tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts --testNamePattern "labels internal"
```
Expected: FAIL. The output still contains `Adapter: live-database` or
`Adapter: historic-sql`, and it does not contain the new public `Source:`
labels.
- [ ] **Step 3: Add public report source labels**
In `packages/cli/src/ingest.ts`, add these helpers above
`function writeReportStatus(...)`:
```typescript
const REPORT_SOURCE_LABELS = new Map<string, string>([
['live-database', 'Database schema'],
['historic-sql', 'Query history'],
['dbt', 'dbt'],
['metricflow', 'MetricFlow'],
['lookml', 'LookML'],
['looker', 'Looker'],
['metabase', 'Metabase'],
['notion', 'Notion'],
]);
function reportSourceLabel(sourceKey: string): string {
const label = REPORT_SOURCE_LABELS.get(sourceKey);
if (label) {
return label;
}
return sourceKey
.split(/[-_]+/)
.filter((part) => part.length > 0)
.map((part) => `${part[0]?.toUpperCase() ?? ''}${part.slice(1)}`)
.join(' ');
}
```
Then replace the `Adapter:` line in `writeReportStatus()`:
```typescript
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
```
The full function should keep the remaining fields unchanged:
```typescript
function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void {
const counts = savedMemoryCountsForReport(report);
io.stdout.write(`Report: ${report.id}\n`);
io.stdout.write(`Run: ${report.runId}\n`);
io.stdout.write(`Job: ${report.jobId}\n`);
io.stdout.write(`Status: ${reportStatus(report)}\n`);
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
io.stdout.write(`Connection: ${report.connectionId}\n`);
io.stdout.write(`Sync: ${report.body.syncId}\n`);
io.stdout.write(
`Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`,
);
io.stdout.write(`Work units: ${report.body.workUnits.length}\n`);
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`);
}
```
- [ ] **Step 4: Update existing report label expectations**
In `packages/cli/src/ingest.test.ts`, update the existing assertions that
still expect the old `Adapter:` label:
```typescript
expect(statusIo.stdout()).toContain('Source: Metabase');
```
```typescript
expect(io.stdout()).toContain('Source: Query history\n');
```
```typescript
expect(io.stdout()).toContain('Source: Looker');
```
```typescript
expect(statusIo.stdout()).toContain('Source: Looker');
```
Remove the corresponding `Adapter: metabase`, `Adapter: historic-sql`, and
`Adapter: looker` expectations.
- [ ] **Step 5: Run the stored-report tests again**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts --testNamePattern "labels internal|runs public Metabase|historic-sql projection|Looker"
```
Expected: PASS. Plain report output uses `Source:` labels and does not print
`Adapter:` for the covered status and run summaries.
- [ ] **Step 6: Commit stored-report label cleanup**
Run:
```bash
git add packages/cli/src/ingest.ts packages/cli/src/ingest.test.ts
git commit -m "fix(cli): use public source labels in ingest reports"
```
### Task 2: Capture low-level output during public source ingest
**Files:**
- Modify: `packages/cli/src/public-ingest.ts`
- Modify: `packages/cli/src/public-ingest.test.ts`
- [ ] **Step 1: Add failing public source-output tests**
Add these tests to `packages/cli/src/public-ingest.test.ts` near the existing
public output tests for captured scan output and query-history retry guidance:
```typescript
it('suppresses lower-level source report output during direct public source ingest', async () => {
const io = makeIo();
const project = projectWithConnections({
docs: { driver: 'notion' },
});
const runIngest = vi.fn(async (_args, ingestIo) => {
ingestIo.stdout.write('Report: report-docs-1\n');
ingestIo.stdout.write('Adapter: notion\n');
ingestIo.stdout.write('Saved memory: 2 wiki, 0 SL\n');
return 0;
});
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'docs',
all: false,
json: false,
inputMode: 'disabled',
},
io.io,
{ loadProject: vi.fn(async () => project), runIngest },
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Ingest finished');
expect(io.stdout()).toContain('docs');
expect(io.stdout()).toContain('source-ingest');
expect(io.stdout()).not.toContain('Report: report-docs-1');
expect(io.stdout()).not.toContain('Adapter:');
expect(io.stdout()).not.toContain('notion\n');
expect(io.stderr()).toBe('');
});
it('suppresses historic-sql report output during direct public query-history ingest', async () => {
const io = makeIo();
const project = deepReadyProject({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
const runScan = vi.fn(async () => 0);
const runIngest = vi.fn(async (_args, ingestIo) => {
ingestIo.stdout.write('Report: report-query-history-1\n');
ingestIo.stdout.write('Adapter: historic-sql\n');
ingestIo.stdout.write('Saved memory: 1 wiki, 1 SL\n');
return 0;
});
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'disabled',
queryHistory: 'enabled',
},
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.');
expect(io.stdout()).toContain('Ingest finished');
expect(io.stdout()).toContain('warehouse');
expect(io.stdout()).toContain('done');
expect(io.stdout()).not.toContain('Report: report-query-history-1');
expect(io.stdout()).not.toContain('Adapter:');
expect(io.stdout()).not.toContain('historic-sql');
expect(io.stderr()).toBe('');
});
```
- [ ] **Step 2: Run the failing public source-output tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testNamePattern "suppresses"
```
Expected: FAIL. The direct public run writes lower-level `Report:` and
`Adapter:` lines into normal public stdout.
- [ ] **Step 3: Add captured ingest output helpers**
In `packages/cli/src/public-ingest.ts`, keep the existing
`createCapturedPublicIngestIo()` helper and replace
`firstCapturedFailureLine()` with these helpers:
```typescript
const INTERNAL_STATUS_LINE_RE =
/^(Report|Run|Job|Status|Adapter|Connection|Sync|Diff|Work units|Saved memory|Provenance rows):\s*/;
function publicIngestOutputLine(line: string): string {
return line
.replace(/\blive-database\b/g, 'database schema')
.replace(/\bhistoric-sql\b/g, 'query history')
.replace(/\bhistoric SQL\b/gi, 'query history');
}
function firstCapturedFailureLine(output: string): string | undefined {
return output
.split(/\r?\n/)
.map((line) => line.trim())
.filter((line) => line.length > 0)
.filter((line) => !line.startsWith('KTX scan completed'))
.filter((line) => !INTERNAL_STATUS_LINE_RE.test(line))
.map(publicIngestOutputLine)
.find((line) => line.length > 0);
}
```
- [ ] **Step 4: Capture query-history ingest output**
In `executePublicIngestTarget()`, replace the query-history branch with this
captured-output flow:
```typescript
if (target.queryHistory?.enabled === true) {
const { runKtxIngest } = await import('./ingest.js');
const runIngest = deps.runIngest ?? runKtxIngest;
const ingestArgs: KtxIngestArgs = {
command: 'run',
projectDir: args.projectDir,
connectionId: target.connectionId,
adapter: 'historic-sql',
outputMode: sourceIngestOutputMode(args, io),
inputMode: args.inputMode,
allowImplicitAdapter: true,
historicSqlPullConfigOverride:
target.queryHistory.pullConfig ?? {
dialect: target.queryHistory.dialect,
...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}),
},
};
const capturedIngestIo = deps.ingestProgress ? null : createCapturedPublicIngestIo();
const ingestIo = capturedIngestIo ?? io;
const qhExitCode = deps.ingestProgress
? await runIngest(ingestArgs, ingestIo, { progress: deps.ingestProgress })
: await runIngest(ingestArgs, ingestIo);
if (qhExitCode !== 0) {
return markTargetResult(
target,
args,
'failed',
'query-history',
capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined,
);
}
}
```
This keeps foreground progress working because `runContextBuild()` supplies
`deps.ingestProgress` and already passes a captured IO object into
`executePublicIngestTarget()`.
- [ ] **Step 5: Capture source ingest output**
In the source-ingest branch of `executePublicIngestTarget()`, replace the
direct `runIngest(..., io, ...)` call with this captured-output flow:
```typescript
const runIngest = deps.runIngest ?? runKtxIngest;
const capturedIngestIo = deps.ingestProgress ? null : createCapturedPublicIngestIo();
const ingestIo = capturedIngestIo ?? io;
const exitCode = deps.ingestProgress
? await runIngest(ingestArgs, ingestIo, { progress: deps.ingestProgress })
: await runIngest(ingestArgs, ingestIo);
return markTargetResult(
target,
args,
exitCode === 0 ? 'done' : 'failed',
'source-ingest',
capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined,
);
```
Keep the existing `ingestArgs` object unchanged:
```typescript
const ingestArgs: KtxIngestArgs = {
command: 'run',
projectDir: args.projectDir,
connectionId: target.connectionId,
adapter: target.adapter ?? target.driver,
...(target.sourceDir ? { sourceDir: target.sourceDir } : {}),
outputMode: sourceIngestOutputMode(args, io),
inputMode: args.inputMode,
allowImplicitAdapter: true,
};
```
- [ ] **Step 6: Run the public source-output tests again**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testNamePattern "suppresses|retry guidance|foreground"
```
Expected: PASS. Direct public source and query-history runs no longer print
low-level `Report:`, `Adapter:`, `live-database`, or `historic-sql` lines in
plain stdout, while existing foreground and retry guidance tests still pass.
- [ ] **Step 7: Commit public source-output capture**
Run:
```bash
git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts
git commit -m "fix(cli): suppress low-level public ingest output"
```
### Task 3: Final verification
**Files:**
- Verify: `packages/cli/src/ingest.ts`
- Verify: `packages/cli/src/public-ingest.ts`
- Verify: `packages/cli/src/ingest.test.ts`
- Verify: `packages/cli/src/public-ingest.test.ts`
- [ ] **Step 1: Run focused CLI tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run \
src/public-ingest.test.ts \
src/context-build-view.test.ts \
src/ingest.test.ts \
src/ingest-viz.test.ts \
src/command-tree.test.ts \
src/print-command-tree.test.ts
```
Expected: PASS. These tests cover direct public ingest, foreground context
builds, stored report rendering, visual report rendering, and hidden command
tree filtering.
- [ ] **Step 2: Run CLI type-check**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS with no TypeScript errors.
- [ ] **Step 3: Verify generated command tree still hides debug commands**
Run:
```bash
pnpm --filter @ktx/cli run docs:commands >/tmp/ktx-command-tree.txt
rg "scan <connectionId>|ingest run|ingest watch" /tmp/ktx-command-tree.txt
```
Expected: the `docs:commands` command succeeds. The `rg` command exits `1`
with no matches.
- [ ] **Step 4: Search public docs and normal CLI surfaces for old public command guidance**
Run:
```bash
rg -n "ktx scan|ktx ingest run|ktx ingest watch|--enable-historic-sql|--historic-sql|historicSql|Historic SQL|live-database" \
README.md docs-site/content examples/README.md examples/local-warehouse/README.md examples/postgres-historic/README.md
```
Expected: no v1-blocking matches. Matches that refer only to internal raw
artifact paths such as `raw-sources/warehouse/historic-sql` are allowed only in
the Postgres query-history smoke README.
- [ ] **Step 5: Run dead-code checks after TypeScript changes**
Run:
```bash
pnpm run dead-code
```
Expected: PASS. If Knip reports unrelated existing findings, inspect them and
record the unrelated findings before finishing.
- [ ] **Step 6: Inspect final diff**
Run:
```bash
git status --short
git diff -- packages/cli/src/ingest.ts packages/cli/src/public-ingest.ts packages/cli/src/ingest.test.ts packages/cli/src/public-ingest.test.ts
```
Expected: only the intended TypeScript source and test files are modified.
The diff contains no generated `dist/` files and no docs changes beyond this
plan.
- [ ] **Step 7: Commit verification-only fixes if needed**
Run only if verification required small expectation or formatting fixes:
```bash
git add packages/cli/src/ingest.ts packages/cli/src/public-ingest.ts packages/cli/src/ingest.test.ts packages/cli/src/public-ingest.test.ts
git commit -m "test(cli): verify unified ingest public plain output"
```
Expected: no commit is needed when all checks pass after Tasks 1 and 2.
## Self-review
- Spec coverage: This plan closes the remaining v1-blocking normal-output
leaks for direct public source ingest, public query-history ingest, and
public stored-report status/replay output. It intentionally leaves hidden
debug commands, JSON payloads, internal artifact paths, and developer tests
untouched.
- Placeholder scan: The plan contains concrete file paths, exact test code,
exact implementation snippets, commands, and expected results.
- Type consistency: The snippets use existing local types and helpers:
`KtxIngestArgs`, `createCapturedPublicIngestIo()`,
`firstCapturedFailureLine()`, `sourceIngestOutputMode()`,
`markTargetResult()`, `localFakeBundleReport()`, and `makeIo()`.

View file

@ -1,326 +0,0 @@
# Unified Ingest V1 Verification Copy Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1-blocking verification and setup-copy gaps in the unified `ktx ingest` UX.
**Architecture:** Keep the implemented connection-centric ingest planner unchanged. Fix the test-only TypeScript error that currently blocks `@ktx/cli` type-check, then replace the remaining normal setup help/output references to old "primary source" terminology with database-oriented copy.
**Tech Stack:** TypeScript ESM, Commander, Vitest, pnpm workspace scripts, uv pre-commit.
---
## Current Audit
Implemented unified-ingest plans already cover the original spec's main v1 behavior:
- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, `--query-history`, `--no-query-history`, and `--query-history-window-days` route through `packages/cli/src/public-ingest.ts`.
- Database targets are ordered before source targets, public source ingest bypasses `ingest.adapters`, and database depth maps to structural/enriched scan internals.
- Deep readiness is evaluated per target before target work starts, and `--all` isolates eligible targets from independent failures.
- Setup stores `connections.<id>.context.depth` and `connections.<id>.context.queryHistory`, migrates legacy `historicSql`, and uses foreground-only context-build state.
- Normal `ktx` and `ktx ingest` help hide `ktx scan`, `ktx ingest run`, and live `ktx ingest watch`.
- Foreground progress and normal public output sanitize scan/live-database/historic-sql internals.
### V1-Blocking Gaps
- `pnpm --filter @ktx/cli run type-check` fails:
```text
src/setup-databases.test.ts(1078,39): error TS2339: Property 'mock' does not exist on type '(options: { message: string; options: KtxSetupPromptOption<string>[]; required?: boolean | undefined; initialValues?: string[] | undefined; }) => Promise<string[]>'.
```
- Normal setup help/output still exposes the old database category as "primary source":
- `packages/cli/src/commands/setup-commands.ts` documents `--skip-databases` as `KTX cannot work until a primary source is added`.
- `packages/cli/src/setup-sources.ts` prints `Connect a primary source before adding context sources.`
- `packages/cli/src/setup-context.ts` prints `No primary or context sources are configured for a KTX context build.`
### Non-Blocking Gaps
- Hidden debug commands remain callable: `ktx scan`, `ktx ingest run`, and `ktx ingest watch`.
- Internal adapter keys, artifact paths, WorkUnit keys, package names, tests, and developer-only scripts can continue to use `scan`, `live-database`, `historic-sql`, and internal `primarySource*` identifiers.
- Public docs still have a `Primary Sources` integration page and a quickstart sentence about BI metadata mapping to primary source connections. That is broader documentation information architecture cleanup, not a v1 blocker for the normal command/help/output behavior in this spec.
## File Structure
- Modify `packages/cli/src/setup-databases.test.ts`: use Vitest's typed mock helper for the existing `prompts.multiselect` assertion.
- Modify `packages/cli/src/setup-sources.ts`: change the normal missing-database message before context source setup.
- Modify `packages/cli/src/setup-sources.test.ts`: update the missing-database regression.
- Modify `packages/cli/src/setup-context.ts`: change the normal no-target context-build error.
- Modify `packages/cli/src/setup-context.test.ts`: update the no-target context-build regression.
- Modify `packages/cli/src/commands/setup-commands.ts`: change the public `--skip-databases` help copy.
- Modify `packages/cli/src/index.test.ts`: assert setup help no longer contains public "primary source" wording.
## Tasks
### Task 1: Repair Setup Database Test Type-Check
**Files:**
- Modify: `packages/cli/src/setup-databases.test.ts`
- [ ] **Step 1: Replace the untyped mock access**
In `packages/cli/src/setup-databases.test.ts`, in the test named `prompts for discovered Postgres schemas before the first scan`, replace:
```ts
expect(String(prompts.multiselect.mock.calls[0]?.[0].message)).not.toContain('to scan');
```
with:
```ts
expect(String(vi.mocked(prompts.multiselect).mock.calls[0]?.[0].message)).not.toContain('to scan');
```
- [ ] **Step 2: Run the setup database type-check regression**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected before the fix: FAIL with `TS2339: Property 'mock' does not exist`.
Expected after the fix: PASS.
- [ ] **Step 3: Commit the type-check repair**
Run:
```bash
git add packages/cli/src/setup-databases.test.ts
git commit -m "test(cli): fix setup database test type-check"
```
### Task 2: Replace Remaining Normal Setup Primary-Source Copy
**Files:**
- Modify: `packages/cli/src/setup-sources.ts`
- Modify: `packages/cli/src/setup-sources.test.ts`
- Modify: `packages/cli/src/setup-context.ts`
- Modify: `packages/cli/src/setup-context.test.ts`
- Modify: `packages/cli/src/commands/setup-commands.ts`
- Modify: `packages/cli/src/index.test.ts`
- [ ] **Step 1: Update setup source missing-database expectations**
In `packages/cli/src/setup-sources.test.ts`, replace the test name and output expectation:
```ts
it('does not offer context sources until a primary source exists', async () => {
```
with:
```ts
it('does not offer context sources until a database exists', async () => {
```
and replace:
```ts
expect(io.stdout()).toContain('Connect a primary source before adding context sources.');
```
with:
```ts
expect(io.stdout()).toContain('Connect a database before adding context sources.');
```
- [ ] **Step 2: Update setup context no-target expectations**
In `packages/cli/src/setup-context.test.ts`, replace:
```ts
expect(io.stderr()).toContain('No primary or context sources are configured for a KTX context build.');
```
with:
```ts
expect(io.stderr()).toContain('No databases or context sources are configured for a KTX context build.');
```
- [ ] **Step 3: Add setup help regression coverage**
In `packages/cli/src/index.test.ts`, in the test named `documents setup as a bare command without subcommands`, add these assertions after the existing query-history flag assertions and before the historic-SQL assertions:
```ts
expect(testIo.stdout()).toContain('KTX cannot work until a database is added');
expect(testIo.stdout()).not.toContain('primary source');
expect(testIo.stdout()).not.toContain('primary sources');
```
- [ ] **Step 4: Run the failing setup-copy tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-sources.test.ts src/setup-context.test.ts src/index.test.ts -t "context sources until a database exists|No databases or context sources|documents setup as a bare command"
```
Expected: FAIL because implementation still prints `primary source` in setup source/context output and setup help.
- [ ] **Step 5: Update setup source output**
In `packages/cli/src/setup-sources.ts`, replace:
```ts
const message = 'Connect a primary source before adding context sources.';
```
with:
```ts
const message = 'Connect a database before adding context sources.';
```
- [ ] **Step 6: Update setup context output**
In `packages/cli/src/setup-context.ts`, replace:
```ts
io.stderr.write('No primary or context sources are configured for a KTX context build.\n');
```
with:
```ts
io.stderr.write('No databases or context sources are configured for a KTX context build.\n');
```
- [ ] **Step 7: Update public setup help output**
In `packages/cli/src/commands/setup-commands.ts`, replace:
```ts
.option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a primary source is added', false)
```
with:
```ts
.option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a database is added', false)
```
- [ ] **Step 8: Run the setup-copy tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-sources.test.ts src/setup-context.test.ts src/index.test.ts -t "context sources until a database exists|No databases or context sources|documents setup as a bare command"
```
Expected: PASS.
- [ ] **Step 9: Commit the setup-copy repair**
Run:
```bash
git add packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts
git commit -m "fix(cli): remove primary-source wording from setup output"
```
### Task 3: Final V1 Verification
**Files:**
- Verify: `packages/cli/src/setup-databases.test.ts`
- Verify: `packages/cli/src/setup-sources.ts`
- Verify: `packages/cli/src/setup-sources.test.ts`
- Verify: `packages/cli/src/setup-context.ts`
- Verify: `packages/cli/src/setup-context.test.ts`
- Verify: `packages/cli/src/commands/setup-commands.ts`
- Verify: `packages/cli/src/index.test.ts`
- [ ] **Step 1: Run focused unified ingest tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts src/setup-sources.test.ts src/index.test.ts src/command-tree.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run docs regression tests**
Run:
```bash
node --test scripts/examples-docs.test.mjs
```
Expected: PASS.
- [ ] **Step 3: Run CLI type-check**
Run:
```bash
pnpm --filter @ktx/cli run type-check
```
Expected: PASS.
- [ ] **Step 4: Check the normal setup public-copy surface**
Run:
```bash
rg -n "primary source|primary sources|Primary Sources|primary-source" \
packages/cli/src/commands/setup-commands.ts \
packages/cli/src/setup-sources.ts \
packages/cli/src/setup-context.ts \
packages/cli/src/index.test.ts \
packages/cli/src/setup-sources.test.ts \
packages/cli/src/setup-context.test.ts
```
Expected: no matches.
- [ ] **Step 5: Check the unified ingest public command surface**
Run:
```bash
node packages/cli/dist/bin.js ingest --help
node packages/cli/dist/bin.js --help
```
Expected: normal help lists `ktx ingest [connectionId]`, `--all`, `--fast`, `--deep`, `--query-history`, `status`, and `replay`; it does not list `ktx scan`, `ktx ingest run`, or `ktx ingest watch`.
- [ ] **Step 6: Run pre-commit on changed files**
Run:
```bash
uv run pre-commit run --files \
packages/cli/src/setup-databases.test.ts \
packages/cli/src/setup-sources.ts \
packages/cli/src/setup-sources.test.ts \
packages/cli/src/setup-context.ts \
packages/cli/src/setup-context.test.ts \
packages/cli/src/commands/setup-commands.ts \
packages/cli/src/index.test.ts
```
Expected: PASS. If pre-commit cannot run because the local hook environment or pinned tool version is unavailable, record the exact failure and keep the focused Vitest, docs, and type-check results from Steps 1-3.
- [ ] **Step 7: Commit verification formatting if needed**
If Step 6 changes files, run:
```bash
git add packages/cli/src/setup-databases.test.ts packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts
git commit -m "test(cli): verify unified ingest setup closure"
```
If Step 6 makes no changes, do not create an empty commit.
## Self-Review
- Spec coverage: This plan covers the remaining v1-blocking issues found in the audit: package type-check is currently red, and normal setup help/output still exposes the old public database category as `primary source` instead of database-oriented copy. Core ingest routing, depth behavior, query-history behavior, foreground-only state, warning aggregation, public command help, and scan/live-database/historic-sql output sanitization are already implemented by prior plans.
- Placeholder scan: The plan contains concrete file paths, exact replacement snippets, exact commands, and expected outcomes.
- Type consistency: The only test typing change uses the existing Vitest pattern already used elsewhere in `packages/cli/src/setup-databases.test.ts`: `vi.mocked(prompts.multiselect).mock.calls`.

View file

@ -1,345 +0,0 @@
# Warehouse Verification Prompt Shape Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make every warehouse-verification prompt use KTX's shipped
`sql_execution` input shape so ingest agents include `connectionName` when they
probe warehouse identifiers.
**Architecture:** Keep the warehouse verification tool code unchanged. Add
prompt-asset tests that reject Kaelio's old session-only SQL examples, then
update the shared identifier protocol and the three remaining per-skill SQL
probe examples that still show the legacy shape.
**Tech Stack:** Markdown skill prompts, TypeScript, Vitest, pnpm workspace
commands.
---
## Audit Summary
The warehouse verification tools, runner wiring, adapter target fan-out, and
focused tests are present. Focused verification passed:
```bash
pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor"
```
Remaining v1-blocking gap:
- `packages/context/skills/lookml_ingest/SKILL.md`,
`packages/context/skills/metricflow_ingest/SKILL.md`, and
`packages/context/skills/sl_capture/SKILL.md` still contain
`sql_execution({ sql ... })` / "session shape" guidance inherited from
Kaelio. KTX's tool contract is
`sql_execution({connectionName, sql, rowLimit?})`, so these examples can make
agents call the shipped tool with invalid input.
Non-blocking gaps remain out of scope for this v1 plan:
- Full DDL-style `entity_details` formatting with FK profile summaries.
- AST-backed SQL validation for data-modifying CTE bodies.
- Search over generated `enrichment/descriptions.json`.
- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache
hits across separate tool calls.
- A deterministic fake-LLM end-to-end Notion hallucination regression. Prompt
guards and tool contract tests cover the v1 contract; a broader behavior
regression can land as follow-up.
## File Structure
Modify these files:
- `packages/context/src/memory/memory-runtime-assets.test.ts`: add a prompt
guard that rejects the legacy session-only `sql_execution` shape.
- `packages/context/src/ingest/ingest-runtime-assets.test.ts`: strengthen the
shared prompt asset assertion for the KTX `connectionName` SQL shape.
- `packages/context/skills/_shared/identifier-verification.md`: make both SQL
probe instructions show the KTX `connectionName` argument.
- `packages/context/skills/notion_synthesize/SKILL.md`: inline the updated
protocol block.
- `packages/context/skills/dbt_ingest/SKILL.md`: inline the updated protocol
block.
- `packages/context/skills/lookml_ingest/SKILL.md`: inline the updated protocol
block and fix the legacy SQL fallback example.
- `packages/context/skills/looker_ingest/SKILL.md`: inline the updated
protocol block.
- `packages/context/skills/metabase_ingest/SKILL.md`: inline the updated
protocol block.
- `packages/context/skills/metricflow_ingest/SKILL.md`: inline the updated
protocol block and fix the legacy SQL fallback example.
- `packages/context/skills/live_database_ingest/SKILL.md`: inline the updated
protocol block.
- `packages/context/skills/historic_sql_table_digest/SKILL.md`: inline the
updated protocol block.
- `packages/context/skills/historic_sql_patterns/SKILL.md`: inline the updated
protocol block.
- `packages/context/skills/knowledge_capture/SKILL.md`: inline the updated
protocol block.
- `packages/context/skills/sl_capture/SKILL.md`: inline the updated protocol
block and fix the join-discovery SQL example.
### Task 1: Add Prompt Guards For The KTX SQL Tool Shape
**Files:**
- Modify: `packages/context/src/memory/memory-runtime-assets.test.ts`
- Modify: `packages/context/src/ingest/ingest-runtime-assets.test.ts`
- [ ] **Step 1: Add the failing memory asset guard**
In `packages/context/src/memory/memory-runtime-assets.test.ts`, add this test
after `does not ship stale warehouse verification tool names or fictional
identifiers`:
```ts
it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => {
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
for (const skillName of verificationWriterSkills) {
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
expect(body).toContain('sql_execution({connectionName');
expect(body).not.toContain('sql_execution({ sql');
expect(body).not.toContain('session shape');
expect(body).not.toContain('connection is already pinned by the ingest session');
}
});
```
- [ ] **Step 2: Strengthen the shared ingest asset guard**
In `packages/context/src/ingest/ingest-runtime-assets.test.ts`, update
`packages identifier verification prompt assets` so the final assertions are:
```ts
expect(shared).toContain('discover_data');
expect(shared).toContain('entity_details');
expect(shared).toContain('sql_execution');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
```
- [ ] **Step 3: Run the failing prompt guards**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
```
Expected: FAIL. The failure must mention at least one current legacy string:
`sql_execution({ sql`, `session shape`, or missing
`sql_execution({connectionName`.
### Task 2: Update The Shared Identifier Verification Protocol
**Files:**
- Modify: `packages/context/skills/_shared/identifier-verification.md`
- Modify: `packages/context/skills/notion_synthesize/SKILL.md`
- Modify: `packages/context/skills/dbt_ingest/SKILL.md`
- Modify: `packages/context/skills/lookml_ingest/SKILL.md`
- Modify: `packages/context/skills/looker_ingest/SKILL.md`
- Modify: `packages/context/skills/metabase_ingest/SKILL.md`
- Modify: `packages/context/skills/metricflow_ingest/SKILL.md`
- Modify: `packages/context/skills/live_database_ingest/SKILL.md`
- Modify: `packages/context/skills/historic_sql_table_digest/SKILL.md`
- Modify: `packages/context/skills/historic_sql_patterns/SKILL.md`
- Modify: `packages/context/skills/knowledge_capture/SKILL.md`
- Modify: `packages/context/skills/sl_capture/SKILL.md`
- [ ] **Step 1: Replace the shared protocol text**
Replace the full `## Identifier Verification Protocol` block in
`packages/context/skills/_shared/identifier-verification.md` with:
```md
## Identifier Verification Protocol
Before writing a wiki page or SL source on any topic:
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
tables already exist. Prefer updating existing pages over creating new ones.
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
confirm the identifier resolves; inspect native types, FK/PK, and
sampleValues.
3. For literal values from the source, such as status codes or plan tiers,
check whether they appear in `entity_details` sampleValues for the relevant
column. If sampleValues is short or the sample may have missed real values,
run a `sql_execution` probe with the same warehouse connection name:
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
4. If the candidate identifier still does not resolve, do one of:
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
If it errors, the identifier is fictional.
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
citing the exact raw path that mentioned it.
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
the failing probe error in `clarification`.
5. Never copy `<schema>.<table>` placeholder strings from these instructions
into output.
```
- [ ] **Step 2: Inline the same protocol in every writer skill**
Replace the existing `## Identifier Verification Protocol` block in each writer
skill with the exact block from Step 1:
```bash
packages/context/skills/notion_synthesize/SKILL.md
packages/context/skills/dbt_ingest/SKILL.md
packages/context/skills/lookml_ingest/SKILL.md
packages/context/skills/looker_ingest/SKILL.md
packages/context/skills/metabase_ingest/SKILL.md
packages/context/skills/metricflow_ingest/SKILL.md
packages/context/skills/live_database_ingest/SKILL.md
packages/context/skills/historic_sql_table_digest/SKILL.md
packages/context/skills/historic_sql_patterns/SKILL.md
packages/context/skills/knowledge_capture/SKILL.md
packages/context/skills/sl_capture/SKILL.md
```
- [ ] **Step 3: Run the shared prompt asset tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
```
Expected: still FAIL because the per-skill legacy SQL examples in LookML,
MetricFlow, and `sl_capture` have not been fixed yet.
### Task 3: Fix Legacy Per-Skill SQL Examples
**Files:**
- Modify: `packages/context/skills/lookml_ingest/SKILL.md`
- Modify: `packages/context/skills/metricflow_ingest/SKILL.md`
- Modify: `packages/context/skills/sl_capture/SKILL.md`
- [ ] **Step 1: Fix the LookML fallback probe example**
In `packages/context/skills/lookml_ingest/SKILL.md`, replace the current
Required flow item 2 with:
```md
2. If the table isn't in the manifest, use the warehouse `connectionName`
returned by `discover_data` or the target connection chosen from
`sl_discover`, then call a dialect-appropriate SQL probe with that
connection name, for example:
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
Replace `warehouse`, `analytics`, and `orders` with the verified connection,
schema or dataset, and table from the WorkUnit evidence.
```
- [ ] **Step 2: Fix the MetricFlow fallback probe example**
In `packages/context/skills/metricflow_ingest/SKILL.md`, replace the paragraph
that begins `If \`sl_discover\` errors` with:
```md
If `sl_discover` errors because no such table exists, use `discover_data` and
`entity_details` to find the warehouse target. If a SQL probe is still needed,
call `sql_execution` with the same warehouse connection name, for example:
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
**Never invent column names** - every column in `columns:`, `grain:`, and
`sql:` must be sourced from raw files, `entity_details`, or a successful SQL
probe.
```
- [ ] **Step 3: Fix the `sl_capture` join probe example**
In `packages/context/skills/sl_capture/SKILL.md`, replace Tool sequence item 6
with:
```md
6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
```
- [ ] **Step 4: Run the prompt asset tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
```
Expected: PASS. The tests must report 2 files passed.
### Task 4: Final Verification
**Files:**
- No new files.
- [ ] **Step 1: Run focused warehouse prompt and tool tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Inspect final diff**
Run:
```bash
git diff -- packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/_shared/identifier-verification.md packages/context/skills/notion_synthesize/SKILL.md packages/context/skills/dbt_ingest/SKILL.md packages/context/skills/lookml_ingest/SKILL.md packages/context/skills/looker_ingest/SKILL.md packages/context/skills/metabase_ingest/SKILL.md packages/context/skills/metricflow_ingest/SKILL.md packages/context/skills/live_database_ingest/SKILL.md packages/context/skills/historic_sql_table_digest/SKILL.md packages/context/skills/historic_sql_patterns/SKILL.md packages/context/skills/knowledge_capture/SKILL.md packages/context/skills/sl_capture/SKILL.md
```
Expected: only prompt wording and prompt-asset guards changed. No tool
implementation files changed.
- [ ] **Step 4: Commit**
Run:
```bash
git add packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/_shared/identifier-verification.md packages/context/skills/notion_synthesize/SKILL.md packages/context/skills/dbt_ingest/SKILL.md packages/context/skills/lookml_ingest/SKILL.md packages/context/skills/looker_ingest/SKILL.md packages/context/skills/metabase_ingest/SKILL.md packages/context/skills/metricflow_ingest/SKILL.md packages/context/skills/live_database_ingest/SKILL.md packages/context/skills/historic_sql_table_digest/SKILL.md packages/context/skills/historic_sql_patterns/SKILL.md packages/context/skills/knowledge_capture/SKILL.md packages/context/skills/sl_capture/SKILL.md
git commit -m "fix(context): align warehouse sql probe prompt shape"
```
Expected: one focused commit.
## Self-Review
Spec coverage:
- The original spec requires `sql_execution` inputs to include
`connectionName`; this plan removes contradictory session-only examples from
all active writer guidance.
- The shared protocol remains in `_shared` and inlined in every synthesis
writer skill named by the original spec.
- The tool implementation remains unchanged because the shipped schema already
enforces the v1 contract.
Placeholder scan:
- The plan has no deferred implementation markers.
- Prompt examples use concrete `warehouse`, `analytics`, and `orders` example
names only to demonstrate JSON shape, and each example tells the worker to
replace them with discovered evidence.
Type consistency:
- Tests assert the exact KTX tool call shape:
`sql_execution({connectionName, sql: ...})`.
- Prompt wording consistently uses `connectionName`, matching
`packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts`.

Some files were not shown because too many files have changed in this diff Show more