mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
Merge remote-tracking branch 'origin/main' into metabase-mapping-ktx-yaml
# Conflicts: # packages/cli/src/ingest.test-utils.ts # packages/cli/src/ingest.test.ts
This commit is contained in:
commit
bdeb935db9
162 changed files with 9255 additions and 2522 deletions
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
|
|
@ -37,6 +37,9 @@ jobs:
|
|||
- name: Install TypeScript dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Run TypeScript dead-code checks
|
||||
run: pnpm run dead-code
|
||||
|
||||
- name: Run TypeScript checks
|
||||
run: pnpm run check
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,19 @@ repos:
|
|||
name: ruff format (python)
|
||||
files: ^python/
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: biome-dead-code
|
||||
name: biome dead-code check
|
||||
entry: pnpm exec biome ci . --formatter-enabled=false --assist-enabled=false
|
||||
language: system
|
||||
pass_filenames: false
|
||||
- id: knip-dead-code
|
||||
name: knip dead-code check
|
||||
entry: pnpm exec knip --reporter compact
|
||||
language: system
|
||||
pass_filenames: false
|
||||
|
||||
- repo: https://github.com/Yelp/detect-secrets
|
||||
rev: v1.5.0
|
||||
hooks:
|
||||
|
|
|
|||
19
AGENTS.md
19
AGENTS.md
|
|
@ -89,6 +89,7 @@ pnpm run build
|
|||
pnpm run type-check
|
||||
pnpm run test
|
||||
pnpm run check
|
||||
pnpm run dead-code
|
||||
pnpm --filter @ktx/cli run smoke
|
||||
pnpm --filter './packages/*' run build
|
||||
pnpm --filter './packages/*' run test
|
||||
|
|
@ -130,6 +131,7 @@ shared contracts or package exports are affected.
|
|||
- Build/export changes: `pnpm run build`
|
||||
- Workspace scripts: `node --test scripts/*.test.mjs` or the specific script
|
||||
test file
|
||||
- TypeScript dead-code tooling/config changes: `pnpm run dead-code`
|
||||
- Python semantic layer: `uv run pytest python/ktx-sl/tests -q`
|
||||
- Python daemon: `uv run pytest python/ktx-daemon/tests -q`
|
||||
- Python files: also run `uv run pre-commit run --files [FILES]` when
|
||||
|
|
@ -159,6 +161,23 @@ pnpm run test 2>&1 | tee /tmp/ktx-test-output.log
|
|||
- Do not manually edit generated or built output under `dist/`; edit source and
|
||||
rebuild.
|
||||
|
||||
### Dead TypeScript Code Checks
|
||||
|
||||
KTX uses Biome for local unused-code linting and Knip for workspace graph
|
||||
analysis. These checks are intentionally part of CI and pre-commit because the
|
||||
normal development workflow is agent-based.
|
||||
|
||||
- Run `pnpm run dead-code` after TypeScript changes.
|
||||
- Treat Knip findings as investigation prompts, not automatic deletion orders.
|
||||
- Remove private dead code when you confirm there are no imports, dynamic
|
||||
references, generated references, or tests that still need it.
|
||||
- Preserve public package exports unless the task explicitly includes API
|
||||
pruning.
|
||||
- Add narrow `knip.json` ignores only for intentional dynamic or public cases.
|
||||
Do not add broad package-level ignores to silence unrelated findings.
|
||||
- Update `knip.json` when adding dynamic entrypoints, generated files, package
|
||||
exports, CLI bins, or framework files that Knip cannot infer.
|
||||
|
||||
### CLI Standards
|
||||
|
||||
- Use Commander for CLI command trees, arguments, options, help text, custom
|
||||
|
|
|
|||
|
|
@ -152,8 +152,6 @@ ktx dev runtime install --yes
|
|||
ktx dev runtime status
|
||||
ktx dev runtime start
|
||||
ktx dev runtime stop
|
||||
ktx dev runtime prune --dry-run
|
||||
ktx dev runtime prune --yes
|
||||
```
|
||||
|
||||
The release artifact manifest contains the public npm tarball and the bundled `kaelio-ktx`
|
||||
|
|
|
|||
36
biome.json
Normal file
36
biome.json
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"$schema": "https://biomejs.dev/schemas/2.4.15/schema.json",
|
||||
"assist": {
|
||||
"enabled": false
|
||||
},
|
||||
"formatter": {
|
||||
"enabled": false
|
||||
},
|
||||
"files": {
|
||||
"includes": [
|
||||
"scripts/**/*.mjs",
|
||||
"packages/**/*.ts",
|
||||
"packages/**/*.tsx",
|
||||
"docs-site/**/*.ts",
|
||||
"docs-site/**/*.tsx",
|
||||
"docs-site/**/*.mjs",
|
||||
"!**/dist/**",
|
||||
"!**/coverage/**",
|
||||
"!**/.next/**",
|
||||
"!**/node_modules/**",
|
||||
"!**/*.gen.ts",
|
||||
"!**/*.generated.ts"
|
||||
]
|
||||
},
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": false,
|
||||
"correctness": {
|
||||
"noUnusedImports": "error",
|
||||
"noUnusedVariables": "error",
|
||||
"noUnusedPrivateClassMembers": "error"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -47,7 +47,7 @@ export function TerminalPreview() {
|
|||
<div className="h-2" />
|
||||
<div>
|
||||
<span className="term-prompt">$</span>{" "}
|
||||
<span className="term-cmd">ktx agent context --json</span>
|
||||
<span className="term-cmd">ktx status --json</span>
|
||||
<span className="term-cursor ml-1" />
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ Agents should start with the smallest source that answers the task:
|
|||
| How to check project readiness | [ktx status](/docs/cli-reference/ktx-status) | [Quickstart](/docs/getting-started/quickstart) |
|
||||
| How context gets built | [Building Context](/docs/guides/building-context) | [ktx ingest](/docs/cli-reference/ktx-ingest) |
|
||||
| How semantic YAML works | [Writing Context](/docs/guides/writing-context) | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| How machine-readable CLI output is shaped | [ktx agent](/docs/cli-reference/ktx-agent) | [Markdown Access](/docs/ai-resources/markdown-access) |
|
||||
| How machine-readable CLI output is shaped | [ktx sl](/docs/cli-reference/ktx-sl) | [ktx wiki](/docs/cli-reference/ktx-wiki) |
|
||||
|
||||
## Operating workflow
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,8 @@ Every docs page has a Markdown route:
|
|||
|
||||
```text
|
||||
https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
|
||||
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-agent.md
|
||||
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-sl.md
|
||||
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-wiki.md
|
||||
https://docs.kaelio.com/ktx/docs/guides/building-context.md
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -1,148 +0,0 @@
|
|||
---
|
||||
title: "ktx agent"
|
||||
description: "Machine-readable commands for coding agents."
|
||||
---
|
||||
|
||||
Hidden commands that provide machine-readable JSON output for coding agents. These are the commands that agent integrations (Claude Code, Cursor, Codex, OpenCode) call under the hood — you typically won't use them directly.
|
||||
|
||||
All `ktx agent` subcommands require `--json` and produce structured JSON output on stdout.
|
||||
|
||||
## Command signature
|
||||
|
||||
```bash
|
||||
ktx agent <subcommand> --json [options]
|
||||
```
|
||||
|
||||
## Subcommands
|
||||
|
||||
| Subcommand | Description |
|
||||
|-----------|-------------|
|
||||
| `tools` | Print available agent-facing KTX tools |
|
||||
| `context` | Print project context for agent planning |
|
||||
| `sl list` | List semantic-layer sources |
|
||||
| `sl read <sourceName>` | Read one semantic-layer source |
|
||||
| `sl query` | Run a semantic-layer query from a JSON file |
|
||||
| `wiki search <query>` | Search KTX wiki pages |
|
||||
| `wiki read <pageId>` | Read one KTX wiki page |
|
||||
| `sql execute` | Execute read-only SQL with a row limit |
|
||||
|
||||
## Options
|
||||
|
||||
### `agent tools`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
|
||||
### `agent context`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
|
||||
### `agent sl list`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
| `--connection-id <id>` | Filter by connection id | — |
|
||||
| `--query <text>` | Search source names and descriptions | — |
|
||||
|
||||
### `agent sl read`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
| `--connection-id <id>` | Connection id containing the source | — |
|
||||
|
||||
### `agent sl query`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
| `--connection-id <id>` | Connection id for execution (required) | — |
|
||||
| `--query-file <path>` | JSON semantic-layer query file (required) | — |
|
||||
| `--execute` | Execute the compiled query against the connection | `false` |
|
||||
| `--max-rows <number>` | Maximum rows to return when executing (1-1000) | — |
|
||||
|
||||
### `agent wiki search`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
| `--limit <number>` | Maximum search results | `10` |
|
||||
|
||||
### `agent wiki read`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
|
||||
### `agent sql execute`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output (required) | — |
|
||||
| `--connection-id <id>` | Connection id for execution (required) | — |
|
||||
| `--sql-file <path>` | SQL file to execute (required) | — |
|
||||
| `--max-rows <number>` | Maximum rows to return, 1-1000 (required) | — |
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
# List available tools
|
||||
ktx agent tools --json
|
||||
|
||||
# Get project context for planning
|
||||
ktx agent context --json
|
||||
|
||||
# List semantic sources
|
||||
ktx agent sl list --json
|
||||
|
||||
# Search semantic sources by name
|
||||
ktx agent sl list --json --query "revenue"
|
||||
|
||||
# Read a semantic source
|
||||
ktx agent sl read orders --json --connection-id my-warehouse
|
||||
|
||||
# Run a semantic-layer query from a file
|
||||
ktx agent sl query --json \
|
||||
--connection-id my-warehouse \
|
||||
--query-file /tmp/query.json \
|
||||
--execute \
|
||||
--max-rows 100
|
||||
|
||||
# Search wiki pages
|
||||
ktx agent wiki search "churn definition" --json
|
||||
|
||||
# Read a specific wiki page
|
||||
ktx agent wiki read page-abc123 --json
|
||||
|
||||
# Execute read-only SQL
|
||||
ktx agent sql execute --json \
|
||||
--connection-id my-warehouse \
|
||||
--sql-file /tmp/query.sql \
|
||||
--max-rows 500
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Every `ktx agent` command writes JSON to stdout and diagnostic text to stderr. Agents should parse stdout as JSON and treat a non-zero exit code as a failed tool call.
|
||||
|
||||
```json
|
||||
{
|
||||
"ok": true,
|
||||
"data": {
|
||||
"type": "agent-response"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Common errors
|
||||
|
||||
| Error | Cause | Recovery |
|
||||
|-------|-------|----------|
|
||||
| Missing JSON output | `--json` was omitted | Re-run the same subcommand with `--json` |
|
||||
| Unknown connection id | The requested connection is not configured in `ktx.yaml` | Call `ktx agent context --json` or `ktx connection list` to discover valid ids |
|
||||
| Query file cannot be read | `--query-file` points to a missing or invalid JSON file | Write the query payload to a real file and pass its absolute path |
|
||||
| SQL execution rejected | SQL is not read-only or `--max-rows` is missing | Use semantic-layer queries first; for direct SQL, pass read-only SQL and an explicit row limit |
|
||||
|
|
@ -16,7 +16,7 @@ ktx dev <subcommand> [options]
|
|||
| Subcommand | Description |
|
||||
|-----------|-------------|
|
||||
| `init [directory]` | Initialize a Git-backed KTX project directory |
|
||||
| `runtime` | Install, start, stop, inspect, and prune the KTX-managed Python runtime |
|
||||
| `runtime` | Install, start, stop, and inspect the KTX-managed Python runtime |
|
||||
|
||||
## `dev init`
|
||||
|
||||
|
|
@ -27,15 +27,14 @@ ktx dev <subcommand> [options]
|
|||
|
||||
## `dev runtime`
|
||||
|
||||
`ktx dev runtime` supports `install`, `start`, `stop`, `status`, and `prune`.
|
||||
`ktx dev runtime` supports `install`, `start`, `stop`, and `status`.
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--feature <feature>` | Runtime feature level for `install`, `start`, and `status` (`core` or `local-embeddings`) | `core` |
|
||||
| `--feature <feature>` | Runtime feature level for `install` and `start` (`core` or `local-embeddings`) | `core` |
|
||||
| `--json` | Print JSON output for `status` | `false` |
|
||||
| `--yes` | Confirm runtime install or prune actions where supported | `false` |
|
||||
| `--yes` | Confirm runtime install actions where supported | `false` |
|
||||
| `--force` | Reinstall or restart where supported | `false` |
|
||||
| `--dry-run` | Preview runtime pruning without removing files | `false` |
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
@ -48,8 +47,6 @@ ktx dev runtime install --yes
|
|||
ktx dev runtime status
|
||||
ktx dev runtime start
|
||||
ktx dev runtime stop
|
||||
ktx dev runtime prune --dry-run
|
||||
ktx dev runtime prune --yes
|
||||
```
|
||||
|
||||
## Common errors
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ ktx sl <subcommand> [options]
|
|||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--connection-id <id>` | Filter by KTX connection id | — |
|
||||
| `--query <text>` | Search source names and descriptions | — |
|
||||
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
|
||||
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
|
||||
|
||||
|
|
@ -36,6 +37,7 @@ ktx sl <subcommand> [options]
|
|||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--connection-id <id>` | KTX connection id (required) | — |
|
||||
| `--json` | Print JSON output | `false` |
|
||||
|
||||
### `sl validate`
|
||||
|
||||
|
|
@ -55,6 +57,7 @@ ktx sl <subcommand> [options]
|
|||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--connection-id <id>` | KTX connection id | — |
|
||||
| `--query-file <path>` | JSON semantic-layer query file | — |
|
||||
| `--measure <measure>` | Measure to query; repeatable (at least one required) | — |
|
||||
| `--dimension <dimension>` | Dimension to include; repeatable | — |
|
||||
| `--filter <filter>` | Filter expression; repeatable | — |
|
||||
|
|
@ -78,9 +81,15 @@ ktx sl list --connection-id my-warehouse
|
|||
# List sources as JSON
|
||||
ktx sl list --json
|
||||
|
||||
# Search sources as JSON
|
||||
ktx sl list --json --query "revenue"
|
||||
|
||||
# Read a source definition
|
||||
ktx sl read orders --connection-id my-warehouse
|
||||
|
||||
# Read a source definition as JSON
|
||||
ktx sl read orders --connection-id my-warehouse --json
|
||||
|
||||
# Validate a source against the live schema
|
||||
ktx sl validate orders --connection-id my-warehouse
|
||||
|
||||
|
|
@ -119,6 +128,13 @@ ktx sl query \
|
|||
--dimension orders.created_date \
|
||||
--execute \
|
||||
--max-rows 1000
|
||||
|
||||
# Execute a query from a JSON file
|
||||
ktx sl query \
|
||||
--connection-id my-warehouse \
|
||||
--query-file query.json \
|
||||
--execute \
|
||||
--max-rows 100
|
||||
```
|
||||
|
||||
## Output
|
||||
|
|
|
|||
|
|
@ -26,19 +26,23 @@ ktx wiki <subcommand> [options]
|
|||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output | `false` |
|
||||
| `--user-id <id>` | Local user id | `local` |
|
||||
|
||||
### `wiki read`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output | `false` |
|
||||
| `--user-id <id>` | Local user id | `local` |
|
||||
|
||||
### `wiki search`
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--json` | Print JSON output | `false` |
|
||||
| `--user-id <id>` | Local user id | `local` |
|
||||
| `--limit <number>` | Maximum search results | — |
|
||||
|
||||
### `wiki write`
|
||||
|
||||
|
|
@ -58,12 +62,21 @@ ktx wiki <subcommand> [options]
|
|||
# List all wiki pages
|
||||
ktx wiki list
|
||||
|
||||
# List all wiki pages as JSON
|
||||
ktx wiki list --json
|
||||
|
||||
# Read a specific wiki page
|
||||
ktx wiki read revenue-definitions
|
||||
|
||||
# Read a specific wiki page as JSON
|
||||
ktx wiki read revenue-definitions --json
|
||||
|
||||
# Search wiki pages
|
||||
ktx wiki search "monthly recurring revenue"
|
||||
|
||||
# Search wiki pages as JSON
|
||||
ktx wiki search "monthly recurring revenue" --json --limit 10
|
||||
|
||||
# Write a global knowledge page
|
||||
ktx wiki write revenue-definitions \
|
||||
--summary "Canonical revenue metric definitions" \
|
||||
|
|
@ -97,13 +110,16 @@ Wiki commands print local knowledge pages and search results. Agents should sear
|
|||
|
||||
```json
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"key": "revenue-definitions",
|
||||
"summary": "Canonical revenue metric definitions",
|
||||
"score": 0.92
|
||||
}
|
||||
]
|
||||
"kind": "list",
|
||||
"data": {
|
||||
"items": [
|
||||
{
|
||||
"key": "revenue-definitions",
|
||||
"summary": "Canonical revenue metric definitions",
|
||||
"score": 0.92
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@
|
|||
"ktx-sl",
|
||||
"ktx-wiki",
|
||||
"ktx-status",
|
||||
"ktx-agent",
|
||||
"ktx-dev"
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ KTX writes project state as plain files so agents can inspect and edit changes i
|
|||
| `semantic-layer/<connection-id>/*.yaml` | context build, ingestion, or `ktx sl write` | Semantic source definitions agents use for SQL generation |
|
||||
| `knowledge/global/*.md` | ingestion or `ktx wiki write --scope global` | Shared business context and metric definitions |
|
||||
| `knowledge/user/<user-id>/*.md` | `ktx wiki write --scope user` | User-scoped notes for one agent/user context |
|
||||
| `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling `ktx agent` commands |
|
||||
| `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling public `ktx` commands |
|
||||
|
||||
## Verify it worked
|
||||
|
||||
|
|
|
|||
|
|
@ -3,37 +3,36 @@ title: Serving Agents
|
|||
description: Expose your context to Claude Code, Cursor, Codex, and other coding agents.
|
||||
---
|
||||
|
||||
Once you've built and refined your context, the final step is exposing it to
|
||||
coding agents. KTX provides machine-readable CLI commands for direct terminal
|
||||
access from Claude Code, Cursor, Codex, OpenCode, and custom agent workflows.
|
||||
Once you've built and refined your context, expose it to coding agents through
|
||||
the public KTX CLI. Claude Code, Cursor, Codex, OpenCode, and custom agent
|
||||
workflows can call the same commands you use at a terminal.
|
||||
|
||||
## CLI Commands
|
||||
|
||||
KTX provides a set of machine-readable commands under `ktx agent`. These return
|
||||
JSON output designed for programmatic consumption.
|
||||
KTX public commands support JSON output for the context reads that agents use
|
||||
most often. Use `--project-dir` when the agent is not already running inside the
|
||||
KTX project directory.
|
||||
|
||||
### Available commands
|
||||
|
||||
```bash
|
||||
# List available tools and their descriptions
|
||||
ktx agent tools --json
|
||||
|
||||
# Get project context for planning
|
||||
ktx agent context --json
|
||||
# Check setup and context readiness
|
||||
ktx status --json
|
||||
```
|
||||
|
||||
**Semantic layer:**
|
||||
|
||||
```bash
|
||||
# List sources
|
||||
ktx agent sl list --json
|
||||
ktx agent sl list --json --connection-id my-postgres
|
||||
ktx sl list --json
|
||||
ktx sl list --json --connection-id my-postgres
|
||||
ktx sl list --json --query "revenue"
|
||||
|
||||
# Read a source
|
||||
ktx agent sl read orders --json --connection-id my-postgres
|
||||
ktx sl read orders --json --connection-id my-postgres
|
||||
|
||||
# Run a query from a JSON file
|
||||
ktx agent sl query --json \
|
||||
ktx sl query --json \
|
||||
--connection-id my-postgres \
|
||||
--query-file query.json \
|
||||
--execute \
|
||||
|
|
@ -44,20 +43,10 @@ ktx agent sl query --json \
|
|||
|
||||
```bash
|
||||
# Search knowledge pages
|
||||
ktx agent wiki search "revenue recognition" --json --limit 10
|
||||
ktx wiki search "revenue recognition" --json --limit 10
|
||||
|
||||
# Read a specific page
|
||||
ktx agent wiki read order-status-definitions --json
|
||||
```
|
||||
|
||||
**SQL execution:**
|
||||
|
||||
```bash
|
||||
# Execute read-only SQL with a row limit
|
||||
ktx agent sql execute --json \
|
||||
--connection-id my-postgres \
|
||||
--sql-file query.sql \
|
||||
--max-rows 500
|
||||
ktx wiki read order-status-definitions --json
|
||||
```
|
||||
|
||||
## Setting Up Your Agent
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ title: Agent Clients
|
|||
description: Set up KTX with Claude Code, Cursor, Codex, and OpenCode.
|
||||
---
|
||||
|
||||
KTX integrates with coding agents through CLI skills and command files. These files teach agents to call `ktx agent ...` commands directly from the terminal for semantic-layer context, wiki knowledge, and safe SQL execution.
|
||||
KTX integrates with coding agents through CLI skills and command files. These
|
||||
files teach agents to call public `ktx` commands directly from the terminal for
|
||||
semantic-layer context and wiki knowledge.
|
||||
|
||||
Run `ktx setup` and select your agent targets, or configure manually using the snippets below.
|
||||
|
||||
|
|
@ -26,17 +28,17 @@ Create `.claude/skills/ktx/SKILL.md`:
|
|||
```markdown title=".claude/skills/ktx/SKILL.md"
|
||||
---
|
||||
name: ktx
|
||||
description: Use local KTX semantic context, wiki knowledge, and safe SQL execution for this project.
|
||||
description: Use local KTX semantic context and wiki knowledge for this project.
|
||||
---
|
||||
|
||||
Available commands:
|
||||
- `ktx agent context --json --project-dir /path/to/project`
|
||||
- `ktx agent sl list --json --project-dir /path/to/project`
|
||||
- `ktx agent sl read '<sourceName>' --json --project-dir /path/to/project`
|
||||
- `ktx agent sl query --json --project-dir /path/to/project --connection-id '<id>' --query-file '<path>' --execute --max-rows 100`
|
||||
- `ktx agent wiki search '<query>' --json --project-dir /path/to/project`
|
||||
- `ktx agent wiki read '<pageId>' --json --project-dir /path/to/project`
|
||||
- `ktx agent sql execute --json --project-dir /path/to/project --connection-id '<id>' --sql-file '<path>' --max-rows 100`
|
||||
- `ktx status --json --project-dir /path/to/project`
|
||||
- `ktx sl list --json --project-dir /path/to/project`
|
||||
- `ktx sl list --json --project-dir /path/to/project --query '<text>'`
|
||||
- `ktx sl read '<sourceName>' --json --project-dir /path/to/project --connection-id '<id>'`
|
||||
- `ktx sl query --json --project-dir /path/to/project --connection-id '<id>' --query-file '<path>' --execute --max-rows 100`
|
||||
- `ktx wiki search '<query>' --json --project-dir /path/to/project --limit 10`
|
||||
- `ktx wiki read '<pageId>' --json --project-dir /path/to/project`
|
||||
```
|
||||
|
||||
### Workflow tips
|
||||
|
|
@ -123,22 +125,19 @@ All supported agent clients call the same KTX CLI commands:
|
|||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `ktx agent context --json` | Return a compact project context summary |
|
||||
| `ktx agent tools --json` | List available agent-facing commands |
|
||||
| `ktx agent wiki search <query> --json` | Search knowledge pages |
|
||||
| `ktx agent wiki read <key> --json` | Read a knowledge page |
|
||||
| `ktx agent wiki write --json` | Write or update a knowledge page |
|
||||
| `ktx agent sl list --json` | List semantic layer sources |
|
||||
| `ktx agent sl read <source> --json` | Read a semantic source definition |
|
||||
| `ktx agent sl write --json` | Write or update a semantic source |
|
||||
| `ktx agent sl validate --json` | Validate semantic source definitions |
|
||||
| `ktx agent sl query --json` | Execute a semantic layer query when semantic compute is configured |
|
||||
| `ktx agent sql execute --json` | Execute read-only SQL with an explicit row limit |
|
||||
| `ktx status --json` | Return project setup and context readiness |
|
||||
| `ktx wiki search <query> --json` | Search knowledge pages |
|
||||
| `ktx wiki read <key> --json` | Read a knowledge page |
|
||||
| `ktx wiki write <key>` | Write or update a knowledge page |
|
||||
| `ktx sl list --json` | List semantic-layer sources |
|
||||
| `ktx sl list --query <text> --json` | Search semantic-layer sources |
|
||||
| `ktx sl read <source> --json --connection-id <id>` | Read a semantic source definition |
|
||||
| `ktx sl write <source> --connection-id <id>` | Write or update a semantic source |
|
||||
| `ktx sl validate <source> --connection-id <id>` | Validate semantic source definitions |
|
||||
| `ktx sl query --json` | Execute a semantic-layer query when semantic compute is configured |
|
||||
|
||||
### Security constraints
|
||||
|
||||
- SQL execution is always read-only.
|
||||
- Agent SQL execution requires an explicit `--max-rows` limit from 1 to 1000.
|
||||
- Secrets and credentials are never exposed in command output.
|
||||
- Commands resolve the project from `--project-dir`, `KTX_PROJECT_DIR`, or the nearest `ktx.yaml`.
|
||||
|
||||
|
|
|
|||
|
|
@ -511,4 +511,4 @@ No authentication required — SQLite is file-based. The file must be readable b
|
|||
| Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
|
||||
| Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup |
|
||||
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output |
|
||||
| SQL execution fails through agents | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the agent command flags |
|
||||
| Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |
|
||||
|
|
|
|||
|
|
@ -67,12 +67,12 @@ ${link("/docs/guides/writing-context", "Writing Context", "Write semantic source
|
|||
- [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response
|
||||
- [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown
|
||||
- [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough
|
||||
- [Agent CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-agent.md")}): Machine-readable agent commands
|
||||
- [Semantic-layer CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-sl.md")}): Semantic-layer commands and JSON output
|
||||
- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Knowledge page commands and JSON output
|
||||
|
||||
## CLI Reference
|
||||
|
||||
${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}
|
||||
${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")}
|
||||
${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}
|
||||
${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")}
|
||||
${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,785 @@
|
|||
# Notion Warehouse Verification Gap Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Close the remaining v1 gaps that prevent ingest agents, especially
|
||||
Notion WorkUnits, from reliably verifying warehouse table and column
|
||||
identifiers before writing wiki or semantic-layer output.
|
||||
|
||||
**Architecture:** Keep the existing warehouse verification tool module and
|
||||
runner wiring. Add Notion target-warehouse scoping through the local adapter
|
||||
factory, make the active WorkUnit prompt name the shipped tools, enforce
|
||||
`allowedConnectionNames` in `discover_data`, and teach `entity_details` to
|
||||
resolve and reject column-level display targets.
|
||||
|
||||
**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX local
|
||||
ingest adapters, KTX file store.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
The previous implementation plan landed the main tool module and prompt
|
||||
protocol, but four v1-blocking gaps remain:
|
||||
|
||||
- Notion ingest sessions still allow only the Notion connection unless a
|
||||
specific adapter supplies target IDs. `NotionSourceAdapter` does not supply
|
||||
target warehouse IDs, so the original Notion hallucination case cannot use
|
||||
`entity_details` or raw-schema `discover_data` for the warehouse connection.
|
||||
- The active WorkUnit framing prompt still tells agents to call
|
||||
`wiki_sl_search` and `sl_describe_table`, which are not shipped KTX tools.
|
||||
- `discover_data` accepts an explicit out-of-scope `connectionName` and still
|
||||
searches raw schema for that connection.
|
||||
- `entity_details({ targets: [{ display: "schema.table.column" }] })` does not
|
||||
resolve column display strings and does not fail explicit missing-column
|
||||
targets.
|
||||
|
||||
Non-blocking gaps remain out of scope for this plan:
|
||||
|
||||
- Full DDL-style `entity_details` formatting with FK and profile summaries.
|
||||
- AST-backed SQL read-only validation for data-modifying CTEs.
|
||||
- Search over `enrichment/descriptions.json` for generated descriptions.
|
||||
- Lexicographic latest-sync edge cases for non-timestamp sync IDs.
|
||||
- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`.
|
||||
|
||||
## File structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/ingest/adapters/notion/notion.adapter.ts`: add
|
||||
configured target warehouse IDs and implement `listTargetConnectionIds()`.
|
||||
- `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: cover
|
||||
Notion target connection ID fan-out.
|
||||
- `packages/context/src/ingest/local-adapters.ts`: pass primary warehouse IDs
|
||||
into `NotionSourceAdapter`.
|
||||
- `packages/context/src/ingest/local-adapters.test.ts`: cover local Notion
|
||||
adapter target IDs.
|
||||
- `packages/context/src/ingest/adapters/notion/chunk.ts`: update Notion
|
||||
WorkUnit notes to prefer the warehouse verification tools.
|
||||
- `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: update
|
||||
Notion note expectations.
|
||||
- `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`: replace
|
||||
stale tool names in the active WorkUnit prompt.
|
||||
- `packages/context/src/ingest/ingest-prompts.test.ts`: guard the WorkUnit
|
||||
prompt against stale tool names.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`:
|
||||
refuse explicit out-of-scope connection names.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
|
||||
cover `discover_data` scoping.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`:
|
||||
add column-aware display-target resolution.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`:
|
||||
cover column display resolution.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`:
|
||||
use column-aware resolution and report missing columns.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`:
|
||||
cover column display and missing-column behavior.
|
||||
|
||||
### Task 1: Give Notion ingest access to target warehouses
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.ts`
|
||||
- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`
|
||||
- Modify: `packages/context/src/ingest/local-adapters.ts`
|
||||
- Modify: `packages/context/src/ingest/local-adapters.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing Notion adapter test**
|
||||
|
||||
Add this test inside `describe('NotionSourceAdapter', ...)` in
|
||||
`packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`:
|
||||
|
||||
```ts
|
||||
it('returns configured target warehouse connection ids', async () => {
|
||||
const adapter = new NotionSourceAdapter({
|
||||
targetConnectionIds: ['warehouse', 'warehouse', 'analytics'],
|
||||
});
|
||||
|
||||
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual([
|
||||
'analytics',
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing Notion adapter test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/adapters/notion/notion.adapter.test.ts -t "target warehouse connection ids"
|
||||
```
|
||||
|
||||
Expected: FAIL because `NotionSourceAdapterDeps` has no
|
||||
`targetConnectionIds` option and `NotionSourceAdapter` does not implement
|
||||
`listTargetConnectionIds()`.
|
||||
|
||||
- [ ] **Step 3: Implement Notion target connection IDs**
|
||||
|
||||
Modify `packages/context/src/ingest/adapters/notion/notion.adapter.ts`:
|
||||
|
||||
```ts
|
||||
export interface NotionSourceAdapterDeps {
|
||||
onPullSucceeded?: (ctx: NotionPullSucceededContext) => Promise<void>;
|
||||
logger?: NotionFetchLogger;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
function uniqueSorted(values: readonly string[] | undefined): string[] {
|
||||
return [...new Set(values ?? [])].sort((left, right) =>
|
||||
left.localeCompare(right),
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
Add this method to `NotionSourceAdapter`:
|
||||
|
||||
```ts
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return uniqueSorted(this.deps.targetConnectionIds);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Pass primary warehouses into the local Notion adapter**
|
||||
|
||||
Modify the Notion adapter construction in
|
||||
`packages/context/src/ingest/local-adapters.ts`:
|
||||
|
||||
```ts
|
||||
new NotionSourceAdapter({
|
||||
targetConnectionIds: primaryWarehouseConnectionIds(project),
|
||||
...(options.logger ? { logger: options.logger } : {}),
|
||||
}),
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Write the local adapter fan-out test**
|
||||
|
||||
Add this test to `packages/context/src/ingest/local-adapters.test.ts`:
|
||||
|
||||
```ts
|
||||
it('passes primary warehouse connection ids to the local Notion adapter', async () => {
|
||||
const adapters = createDefaultLocalIngestAdapters(
|
||||
projectWithConnections({
|
||||
notion: {
|
||||
driver: 'notion',
|
||||
auth_token: 'secret',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
},
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://readonly@db.example.test/analytics',
|
||||
},
|
||||
docs: {
|
||||
driver: 'dbt',
|
||||
source_dir: './dbt',
|
||||
},
|
||||
} as never),
|
||||
);
|
||||
|
||||
const notion = adapters.find((adapter) => adapter.source === 'notion');
|
||||
|
||||
await expect(notion?.listTargetConnectionIds?.('/tmp/staged-notion')).resolves.toEqual([
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Run the Notion target tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/adapters/notion/notion.adapter.test.ts -t "target warehouse connection ids" \
|
||||
src/ingest/local-adapters.test.ts -t "local Notion adapter"
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 7: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/adapters/notion/notion.adapter.ts \
|
||||
packages/context/src/ingest/adapters/notion/notion.adapter.test.ts \
|
||||
packages/context/src/ingest/local-adapters.ts \
|
||||
packages/context/src/ingest/local-adapters.test.ts
|
||||
git commit -m "fix(context): expose target warehouses to Notion ingest"
|
||||
```
|
||||
|
||||
### Task 2: Remove stale tool names from active ingest prompts
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`
|
||||
- Modify: `packages/context/src/ingest/ingest-prompts.test.ts`
|
||||
- Modify: `packages/context/src/ingest/adapters/notion/chunk.ts`
|
||||
- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add failing prompt guards**
|
||||
|
||||
Add this test to `packages/context/src/ingest/ingest-prompts.test.ts`:
|
||||
|
||||
```ts
|
||||
it('uses shipped warehouse verification tools in the WorkUnit prompt', async () => {
|
||||
const prompt = await readFile(
|
||||
new URL('../../prompts/memory_agent_bundle_ingest_work_unit.md', import.meta.url),
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
expect(prompt).toContain('discover_data');
|
||||
expect(prompt).toContain('entity_details');
|
||||
expect(prompt).not.toContain('wiki_sl_search');
|
||||
expect(prompt).not.toContain('sl_describe_table');
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing prompt guard**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-prompts.test.ts -t "warehouse verification tools"
|
||||
```
|
||||
|
||||
Expected: FAIL because the WorkUnit prompt still contains `wiki_sl_search` and
|
||||
`sl_describe_table`.
|
||||
|
||||
- [ ] **Step 3: Update the WorkUnit framing prompt**
|
||||
|
||||
In `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`, replace
|
||||
the first `<role>` paragraph with:
|
||||
|
||||
```md
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`.
|
||||
```
|
||||
|
||||
In workflow step 2, replace the final sentence with:
|
||||
|
||||
```md
|
||||
The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping.
|
||||
```
|
||||
|
||||
In workflow step 4, replace the sentence that starts
|
||||
`For each raw file:` with:
|
||||
|
||||
```md
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
|
||||
```
|
||||
|
||||
In the `<do_not>` block, replace the physical-column rule with:
|
||||
|
||||
```md
|
||||
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Update Notion WorkUnit notes**
|
||||
|
||||
In `packages/context/src/ingest/adapters/notion/chunk.ts`, replace
|
||||
`NOTION_SL_WRITE_GUIDANCE` with:
|
||||
|
||||
```ts
|
||||
const NOTION_SL_WRITE_GUIDANCE =
|
||||
'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages, SL sources, and raw warehouse schema for the same tables or sl_refs with discover_data before creating a new page. Only write or edit SL sources after discover_data plus sl_discover/sl_read_source or entity_details confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.';
|
||||
```
|
||||
|
||||
In the `reconcileNotes` array in the same file, replace:
|
||||
|
||||
```ts
|
||||
'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.',
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```ts
|
||||
'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.',
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Update Notion note expectations**
|
||||
|
||||
In `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`,
|
||||
update the note expectations in `it('chunks changed Notion pages...')`:
|
||||
|
||||
```ts
|
||||
expect(result.workUnits[0].notes).toContain('discover_data');
|
||||
expect(result.workUnits[0].notes).toContain('entity_details');
|
||||
```
|
||||
|
||||
Update the exact `reconcileNotes` expectation to:
|
||||
|
||||
```ts
|
||||
expect(result.reconcileNotes).toEqual([
|
||||
'Notion maxKnowledgeCreatesPerRun=25',
|
||||
'Notion maxKnowledgeUpdatesPerRun=20',
|
||||
'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.',
|
||||
'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.',
|
||||
]);
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Run prompt and Notion note tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-prompts.test.ts \
|
||||
src/ingest/adapters/notion/notion.adapter.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 7: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \
|
||||
packages/context/src/ingest/ingest-prompts.test.ts \
|
||||
packages/context/src/ingest/adapters/notion/chunk.ts \
|
||||
packages/context/src/ingest/adapters/notion/notion.adapter.test.ts
|
||||
git commit -m "fix(context): update ingest prompts for warehouse verification tools"
|
||||
```
|
||||
|
||||
### Task 3: Enforce allowed connection scope in discover_data
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing scoping test**
|
||||
|
||||
Add this test to
|
||||
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
|
||||
|
||||
```ts
|
||||
it('refuses explicit out-of-scope connection names', async () => {
|
||||
const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context);
|
||||
|
||||
expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.');
|
||||
expect(result.structured).toEqual({ wiki: null, sl: null, raw: null });
|
||||
expect(wikiSearchTool.call).not.toHaveBeenCalled();
|
||||
expect(slDiscoverTool.call).not.toHaveBeenCalled();
|
||||
expect(catalog.searchByName).not.toHaveBeenCalled();
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing scoping test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts -t "out-of-scope"
|
||||
```
|
||||
|
||||
Expected: FAIL because `discover_data` currently searches raw schema for an
|
||||
explicit `connectionName` even when it is not in `allowedConnectionNames`.
|
||||
|
||||
- [ ] **Step 3: Add the scope guard**
|
||||
|
||||
In
|
||||
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`,
|
||||
add this helper near `totalSources()`:
|
||||
|
||||
```ts
|
||||
function allowedConnectionNames(context: ToolContext): ReadonlySet<string> | null {
|
||||
return context.session?.allowedConnectionNames ?? null;
|
||||
}
|
||||
```
|
||||
|
||||
At the top of `DiscoverDataTool.call()`, before the `sourceName` branch and
|
||||
before calling any child tool, add:
|
||||
|
||||
```ts
|
||||
const allowed = allowedConnectionNames(context);
|
||||
if (input.connectionName && allowed && !allowed.has(input.connectionName)) {
|
||||
return {
|
||||
markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`,
|
||||
structured: { wiki: null, sl: null, raw: null },
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
Then replace the raw connection-list construction with:
|
||||
|
||||
```ts
|
||||
const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort();
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run discover_data tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
|
||||
git commit -m "fix(context): scope raw schema discovery to allowed connections"
|
||||
```
|
||||
|
||||
### Task 4: Fix column-level entity_details verification
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write failing catalog column-target tests**
|
||||
|
||||
First update `seedLiveDatabaseScan()` in that test file so BigQuery tables have
|
||||
a project/catalog. Replace the repeated inline table refs with:
|
||||
|
||||
```ts
|
||||
const tableRef = {
|
||||
catalog: driver === 'bigquery' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
name: 'orders',
|
||||
};
|
||||
```
|
||||
|
||||
Use `tableRef.catalog`, `tableRef.db`, and `tableRef.name` for the seeded
|
||||
table and profile table references.
|
||||
|
||||
Then add these tests to
|
||||
`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`:
|
||||
|
||||
```ts
|
||||
it('resolves postgres column display strings without treating the column as a table', async () => {
|
||||
await seedLiveDatabaseScan();
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({
|
||||
resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' },
|
||||
candidates: [],
|
||||
dialect: 'postgres',
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves BigQuery column display strings with four parts', async () => {
|
||||
await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery');
|
||||
const catalog = new WarehouseCatalogService({ fileStore: project.fileStore });
|
||||
|
||||
await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({
|
||||
resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' },
|
||||
candidates: [],
|
||||
dialect: 'bigquery',
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing catalog tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts -t "column display"
|
||||
```
|
||||
|
||||
Expected: FAIL because `resolveDisplayTarget()` does not exist.
|
||||
|
||||
- [ ] **Step 3: Implement column-aware display resolution**
|
||||
|
||||
In
|
||||
`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`,
|
||||
add this exported interface near `RawSchemaHit`:
|
||||
|
||||
```ts
|
||||
export interface DisplayTargetResolution {
|
||||
resolved: (KtxTableRef & { column?: string }) | null;
|
||||
candidates: KtxTableRef[];
|
||||
dialect: string;
|
||||
}
|
||||
```
|
||||
|
||||
Add these helpers near `parseDisplay()`:
|
||||
|
||||
```ts
|
||||
function expectedDisplayPartCount(driver: CatalogDriver): number {
|
||||
if (driver === 'sqlite' || driver === 'sqlite3') {
|
||||
return 1;
|
||||
}
|
||||
if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') {
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null {
|
||||
const parts = splitDisplay(display);
|
||||
const tablePartCount = expectedDisplayPartCount(driver);
|
||||
if (parts.length !== tablePartCount + 1) {
|
||||
return null;
|
||||
}
|
||||
const column = parts.at(-1);
|
||||
if (!column) {
|
||||
return null;
|
||||
}
|
||||
const table = parseDisplay(driver, parts.slice(0, -1).join('.'));
|
||||
return table ? { ...table, column } : null;
|
||||
}
|
||||
```
|
||||
|
||||
Add this method to `WarehouseCatalogService` after `resolveDisplay()`:
|
||||
|
||||
```ts
|
||||
async resolveDisplayTarget(connectionName: string, display: string): Promise<DisplayTargetResolution> {
|
||||
const catalog = await this.loadCatalog(connectionName);
|
||||
if (!catalog) {
|
||||
return { resolved: null, candidates: [], dialect: 'unknown' };
|
||||
}
|
||||
|
||||
const dialect = getDialectForDriver(catalog.driver).type;
|
||||
const tableResolution = await this.resolveDisplay(connectionName, display);
|
||||
if (tableResolution.resolved) {
|
||||
return tableResolution;
|
||||
}
|
||||
|
||||
const parsedColumn = parseColumnDisplay(catalog.driver, display);
|
||||
if (!parsedColumn) {
|
||||
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
|
||||
}
|
||||
|
||||
const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn));
|
||||
if (!table) {
|
||||
return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect };
|
||||
}
|
||||
|
||||
return {
|
||||
resolved: {
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: table.name,
|
||||
column: parsedColumn.column,
|
||||
},
|
||||
candidates: [],
|
||||
dialect,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Write failing entity_details column tests**
|
||||
|
||||
Add these tests to
|
||||
`packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`:
|
||||
|
||||
```ts
|
||||
it('resolves display targets that include a column name', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('### public.orders');
|
||||
expect(result.markdown).toContain('- status (text, nullable=false)');
|
||||
expect(result.markdown).not.toContain('- id (integer');
|
||||
expect(result.structured.resolved).toHaveLength(1);
|
||||
expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']);
|
||||
});
|
||||
|
||||
it('reports missing explicit columns instead of returning an empty column list', async () => {
|
||||
const result = await tool.call(
|
||||
{ connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier');
|
||||
expect(result.markdown).toContain('Available columns: id, status');
|
||||
expect(result.structured.resolved).toHaveLength(0);
|
||||
expect(result.structured.missing).toHaveLength(1);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the failing entity_details tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts -t "column"
|
||||
```
|
||||
|
||||
Expected: FAIL because display column targets are treated as table names and
|
||||
missing columns are not reported.
|
||||
|
||||
- [ ] **Step 6: Use column-aware resolution in entity_details**
|
||||
|
||||
In
|
||||
`packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`,
|
||||
add this helper near `appendTableMarkdown()`:
|
||||
|
||||
```ts
|
||||
function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null {
|
||||
const normalized = columnName.toLowerCase();
|
||||
return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null;
|
||||
}
|
||||
```
|
||||
|
||||
Replace the display resolution block inside the `for (const target of
|
||||
input.targets)` loop with:
|
||||
|
||||
```ts
|
||||
const resolution =
|
||||
'display' in target
|
||||
? await catalog.resolveDisplayTarget(input.connectionName, target.display)
|
||||
: {
|
||||
resolved: { catalog: target.catalog, db: target.db, name: target.name, column: target.column },
|
||||
candidates: [],
|
||||
dialect: '',
|
||||
};
|
||||
```
|
||||
|
||||
After `const detail = await catalog.getTable(...)`, replace the existing
|
||||
`resolved.push(detail); appendTableMarkdown(...)` lines with:
|
||||
|
||||
```ts
|
||||
const requestedColumn = resolution.resolved.column;
|
||||
if (requestedColumn) {
|
||||
const column = findColumn(detail, requestedColumn);
|
||||
if (!column) {
|
||||
missing.push({
|
||||
target,
|
||||
candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }],
|
||||
});
|
||||
parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`);
|
||||
parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`);
|
||||
continue;
|
||||
}
|
||||
const scopedDetail = { ...detail, columns: [column] };
|
||||
resolved.push(scopedDetail);
|
||||
appendTableMarkdown(parts, scopedDetail, column.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
resolved.push(detail);
|
||||
appendTableMarkdown(parts, detail);
|
||||
```
|
||||
|
||||
- [ ] **Step 7: Run warehouse verification tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
|
||||
src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 8: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
|
||||
git commit -m "fix(context): verify warehouse column display targets"
|
||||
```
|
||||
|
||||
### Task 5: Verify the v1 gap closure
|
||||
|
||||
**Files:**
|
||||
- Verify all files changed by Tasks 1-4.
|
||||
|
||||
- [ ] **Step 1: Run focused tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/adapters/notion/notion.adapter.test.ts \
|
||||
src/ingest/local-adapters.test.ts \
|
||||
src/ingest/ingest-prompts.test.ts \
|
||||
src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
|
||||
src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
|
||||
src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run package type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run package tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run test
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 4: Run pre-commit on changed files when configured**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
uv run pre-commit run --files \
|
||||
packages/context/src/ingest/adapters/notion/notion.adapter.ts \
|
||||
packages/context/src/ingest/adapters/notion/notion.adapter.test.ts \
|
||||
packages/context/src/ingest/local-adapters.ts \
|
||||
packages/context/src/ingest/local-adapters.test.ts \
|
||||
packages/context/src/ingest/adapters/notion/chunk.ts \
|
||||
packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \
|
||||
packages/context/src/ingest/ingest-prompts.test.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS. If the repo has no pre-commit config or the local `uv` version
|
||||
cannot satisfy the project pin, record the exact error and rely on focused
|
||||
tests plus type-check.
|
||||
|
||||
- [ ] **Step 5: Inspect final git status**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git status --short
|
||||
```
|
||||
|
||||
Expected: only intentional files are modified. Commit any formatter-driven
|
||||
changes with:
|
||||
|
||||
```bash
|
||||
git add packages/context
|
||||
git commit -m "chore(context): verify warehouse verification v1 gaps"
|
||||
```
|
||||
|
||||
## Self-review checklist
|
||||
|
||||
- Spec coverage: this plan closes the remaining v1 paths for Notion warehouse
|
||||
verification, active WorkUnit prompt correctness, raw discovery scoping, and
|
||||
column-level identifier verification.
|
||||
- Placeholder scan: no task relies on future-work markers, unnamed edge-case
|
||||
handling, or cross-task shorthand.
|
||||
- Type consistency: `discover_data` continues to use `connectionName`,
|
||||
`sl_discover` still receives `connectionId` internally, and
|
||||
`resolveDisplayTarget()` returns the same table identity plus optional
|
||||
`column`.
|
||||
|
|
@ -0,0 +1,957 @@
|
|||
# Warehouse Verification Final V1 Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Close the remaining v1 gaps that still prevent ingest agents from
|
||||
reliably following warehouse verification results through to `entity_details`
|
||||
and `sql_execution`.
|
||||
|
||||
**Architecture:** Keep the existing warehouse verification module and runner
|
||||
session scoping. Add connection names to raw discovery hits, expose primary
|
||||
warehouse targets from the remaining source adapters, and make local ingest
|
||||
SQL probes use the same scan connector read-only execution path as schema scan.
|
||||
|
||||
**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX local
|
||||
ingest runtime, KTX scan connectors.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
The first two implementation plans landed the warehouse verification tools,
|
||||
prompt protocol, Notion warehouse scoping, and stale prompt-name cleanup. The
|
||||
focused audit on May 12, 2026, found three remaining v1-blocking gaps:
|
||||
|
||||
- `discover_data` searches multiple allowed raw warehouse scans, but raw hits do
|
||||
not carry or render `connectionName`. The tool tells the agent to call
|
||||
`entity_details({connectionName, targets: [...]})`, then omits the required
|
||||
`connectionName` from the follow-up evidence.
|
||||
- Local LookML and MetricFlow adapters do not expose primary warehouse target
|
||||
IDs. The runner only adds adapter-provided targets to `allowedConnectionNames`,
|
||||
so those WorkUnits cannot use raw warehouse verification unless their source
|
||||
connection is itself the warehouse.
|
||||
- `sql_execution` calls the local ingest connection catalog, but the catalog
|
||||
either has no query executor in normal CLI ingest or calls an injected
|
||||
executor without `projectDir` and connection config. The default local query
|
||||
executor cannot dispatch without that config.
|
||||
|
||||
Non-blocking gaps remain out of scope for this v1 plan:
|
||||
|
||||
- Full DDL-style `entity_details` formatting with FK profile summaries.
|
||||
- AST-backed SQL read-only validation for data-modifying CTE bodies.
|
||||
- Search over generated `enrichment/descriptions.json`.
|
||||
- Lexicographic latest-sync edge cases for non-timestamp sync IDs.
|
||||
- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`.
|
||||
|
||||
## File structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`:
|
||||
add `connectionName` to raw schema hit records.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`:
|
||||
render raw hit connection names and preserve them in structured output.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
|
||||
cover multi-connection raw discovery follow-up data.
|
||||
- `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`:
|
||||
accept and return configured target warehouse connection IDs.
|
||||
- `packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`:
|
||||
cover LookML target warehouse IDs.
|
||||
- `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`:
|
||||
accept and return configured target warehouse connection IDs.
|
||||
- `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`:
|
||||
cover MetricFlow target warehouse IDs.
|
||||
- `packages/context/src/ingest/local-adapters.ts`:
|
||||
pass primary warehouse IDs into LookML and MetricFlow adapters.
|
||||
- `packages/context/src/ingest/local-adapters.test.ts`:
|
||||
cover local adapter warehouse target fan-out.
|
||||
- `packages/context/src/ingest/local-bundle-runtime.ts`:
|
||||
pass full project connection config to local ingest query executors.
|
||||
- `packages/context/src/ingest/local-bundle-runtime.test.ts`:
|
||||
cover the local ingest query executor call shape.
|
||||
- `packages/context/src/ingest/local-ingest.ts`:
|
||||
use the shared query executor port type.
|
||||
- `packages/context/src/mcp/local-project-ports.ts`:
|
||||
no behavior change expected, but type-checks against the updated local ingest
|
||||
query executor type.
|
||||
- `packages/cli/src/ingest.ts`:
|
||||
provide a read-only scan-connector-backed query executor for normal local
|
||||
ingest runs.
|
||||
|
||||
Create these files:
|
||||
|
||||
- `packages/cli/src/ingest-query-executor.ts`: CLI query executor that adapts
|
||||
scan connectors' `executeReadOnly()` method to `KtxSqlQueryExecutorPort`.
|
||||
- `packages/cli/src/ingest-query-executor.test.ts`: unit coverage for the CLI
|
||||
ingest query executor.
|
||||
|
||||
### Task 1: Preserve raw discovery connection names
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing multi-connection discovery test**
|
||||
|
||||
Add this test to
|
||||
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`:
|
||||
|
||||
```ts
|
||||
it('includes connectionName on raw schema hits so entity_details can follow up', async () => {
|
||||
const multiConnectionContext: ToolContext = {
|
||||
...context,
|
||||
session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any,
|
||||
};
|
||||
catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [
|
||||
{
|
||||
kind: 'table',
|
||||
connectionName,
|
||||
ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` },
|
||||
display: `public.${connectionName}_${query}`,
|
||||
matchedOn: 'name',
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await tool.call({ query: 'orders', limit: 10 }, multiConnectionContext);
|
||||
|
||||
expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10);
|
||||
expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10);
|
||||
expect(result.markdown).toContain('connectionName=analytics');
|
||||
expect(result.markdown).toContain('connectionName=warehouse');
|
||||
expect(result.markdown).toContain(
|
||||
'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})',
|
||||
);
|
||||
expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual([
|
||||
'analytics',
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing discovery test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts -t "connectionName on raw schema hits"
|
||||
```
|
||||
|
||||
Expected: FAIL because `RawSchemaHit` has no `connectionName` property and the
|
||||
markdown only renders the display string.
|
||||
|
||||
- [ ] **Step 3: Add `connectionName` to raw schema hits**
|
||||
|
||||
Modify the raw hit type and hit construction in
|
||||
`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`:
|
||||
|
||||
```ts
|
||||
export type RawSchemaHit =
|
||||
| {
|
||||
kind: 'table';
|
||||
connectionName: string;
|
||||
ref: KtxTableRef;
|
||||
display: string;
|
||||
matchedOn: 'name' | 'db' | 'comment' | 'description';
|
||||
}
|
||||
| {
|
||||
kind: 'column';
|
||||
connectionName: string;
|
||||
ref: KtxTableRef & { column: string };
|
||||
display: string;
|
||||
matchedOn: 'name' | 'comment' | 'description';
|
||||
};
|
||||
```
|
||||
|
||||
In the table hit block, add `connectionName`:
|
||||
|
||||
```ts
|
||||
hits.push({
|
||||
kind: 'table',
|
||||
connectionName,
|
||||
ref: { catalog: table.catalog, db: table.db, name: table.name },
|
||||
display: formatDisplay(catalog.driver, table),
|
||||
matchedOn: tableMatch,
|
||||
});
|
||||
```
|
||||
|
||||
In the column hit block, add `connectionName`:
|
||||
|
||||
```ts
|
||||
hits.push({
|
||||
kind: 'column',
|
||||
connectionName,
|
||||
ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name },
|
||||
display: `${formatDisplay(catalog.driver, table)}.${column.name}`,
|
||||
matchedOn: columnMatch,
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Render follow-up-ready raw hits**
|
||||
|
||||
Modify the raw schema markdown in
|
||||
`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`:
|
||||
|
||||
```ts
|
||||
parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values');
|
||||
parts.push(
|
||||
rawHits
|
||||
.slice(0, limit)
|
||||
.map(
|
||||
(hit) =>
|
||||
`- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) — ` +
|
||||
`follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``,
|
||||
)
|
||||
.join('\n'),
|
||||
);
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the discovery test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 6: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
|
||||
git commit -m "fix(context): include raw discovery connection names"
|
||||
```
|
||||
|
||||
### Task 2: Expose LookML and MetricFlow warehouse targets
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`
|
||||
- Modify: `packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`
|
||||
- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`
|
||||
- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`
|
||||
- Modify: `packages/context/src/ingest/local-adapters.ts`
|
||||
- Modify: `packages/context/src/ingest/local-adapters.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write failing adapter target tests**
|
||||
|
||||
Add this test to
|
||||
`packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`:
|
||||
|
||||
```ts
|
||||
it('returns configured target warehouse connection ids', async () => {
|
||||
const adapter = new LookmlSourceAdapter({
|
||||
homeDir: join(tmpRoot, 'home'),
|
||||
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
|
||||
});
|
||||
|
||||
await expect(adapter.listTargetConnectionIds?.(join(tmpRoot, 'staged'))).resolves.toEqual([
|
||||
'analytics',
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
Add this test to
|
||||
`packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`:
|
||||
|
||||
```ts
|
||||
it('returns configured target warehouse connection ids', async () => {
|
||||
const metricflow = new MetricflowSourceAdapter({
|
||||
homeDir: join(tmpRoot, 'cache-home'),
|
||||
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
|
||||
});
|
||||
|
||||
await expect(metricflow.listTargetConnectionIds?.(stagedDir)).resolves.toEqual([
|
||||
'analytics',
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing adapter tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/adapters/lookml/lookml.adapter.test.ts -t "target warehouse connection ids" \
|
||||
src/ingest/adapters/metricflow/metricflow.adapter.test.ts -t "target warehouse connection ids"
|
||||
```
|
||||
|
||||
Expected: FAIL because neither adapter accepts `targetConnectionIds` or
|
||||
implements `listTargetConnectionIds()`.
|
||||
|
||||
- [ ] **Step 3: Implement target ID support in LookML**
|
||||
|
||||
Modify `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`:
|
||||
|
||||
```ts
|
||||
export interface LookmlSourceAdapterDeps {
|
||||
homeDir: string;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
function uniqueSorted(values: readonly string[] | undefined): string[] {
|
||||
return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
```
|
||||
|
||||
Add this method to `LookmlSourceAdapter`:
|
||||
|
||||
```ts
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return uniqueSorted(this.deps.targetConnectionIds);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Implement target ID support in MetricFlow**
|
||||
|
||||
Modify `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`:
|
||||
|
||||
```ts
|
||||
export interface MetricflowSourceAdapterDeps {
|
||||
homeDir: string;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
function uniqueSorted(values: readonly string[] | undefined): string[] {
|
||||
return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
```
|
||||
|
||||
Add this method to `MetricflowSourceAdapter`:
|
||||
|
||||
```ts
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return uniqueSorted(this.deps.targetConnectionIds);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Pass primary warehouses from the local adapter factory**
|
||||
|
||||
Modify the LookML and MetricFlow adapter construction in
|
||||
`packages/context/src/ingest/local-adapters.ts`:
|
||||
|
||||
```ts
|
||||
new LookmlSourceAdapter({
|
||||
homeDir: join(project.projectDir, '.ktx/cache'),
|
||||
targetConnectionIds: primaryWarehouseConnectionIds(project),
|
||||
}),
|
||||
```
|
||||
|
||||
```ts
|
||||
new MetricflowSourceAdapter({
|
||||
homeDir: join(project.projectDir, '.ktx/cache'),
|
||||
targetConnectionIds: primaryWarehouseConnectionIds(project),
|
||||
}),
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Write the local adapter fan-out test**
|
||||
|
||||
Add this test to `packages/context/src/ingest/local-adapters.test.ts`:
|
||||
|
||||
```ts
|
||||
it('passes primary warehouse connection ids to local LookML and MetricFlow adapters', async () => {
|
||||
const adapters = createDefaultLocalIngestAdapters(
|
||||
projectWithConnections({
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://readonly@db.example.test/analytics',
|
||||
},
|
||||
lookml_docs: {
|
||||
driver: 'lookml',
|
||||
lookml: {
|
||||
repoUrl: 'https://github.com/acme/lookml.git',
|
||||
},
|
||||
},
|
||||
metrics_repo: {
|
||||
driver: 'metricflow',
|
||||
metricflow: {
|
||||
repoUrl: 'https://github.com/acme/metrics.git',
|
||||
},
|
||||
},
|
||||
} as never),
|
||||
);
|
||||
|
||||
const lookml = adapters.find((adapter) => adapter.source === 'lookml');
|
||||
const metricflow = adapters.find((adapter) => adapter.source === 'metricflow');
|
||||
|
||||
await expect(lookml?.listTargetConnectionIds?.('/tmp/staged-lookml')).resolves.toEqual([
|
||||
'warehouse',
|
||||
]);
|
||||
await expect(metricflow?.listTargetConnectionIds?.('/tmp/staged-metricflow')).resolves.toEqual([
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 7: Run the target fan-out tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/adapters/lookml/lookml.adapter.test.ts \
|
||||
src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
|
||||
src/ingest/local-adapters.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 8: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/adapters/lookml/lookml.adapter.ts \
|
||||
packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts \
|
||||
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \
|
||||
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
|
||||
packages/context/src/ingest/local-adapters.ts \
|
||||
packages/context/src/ingest/local-adapters.test.ts
|
||||
git commit -m "fix(context): expose warehouse targets for LookML and MetricFlow"
|
||||
```
|
||||
|
||||
### Task 3: Pass full connection config to local ingest SQL execution
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
|
||||
- Modify: `packages/context/src/ingest/local-ingest.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing local connection catalog test**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, change the
|
||||
Vitest import to include `vi`:
|
||||
|
||||
```ts
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
```
|
||||
|
||||
Extend `RuntimeWithConnectionDeps`:
|
||||
|
||||
```ts
|
||||
type RuntimeWithConnectionDeps = {
|
||||
deps: {
|
||||
connections: {
|
||||
listEnabledConnections(ids: string[]): Promise<Array<{ id: string; name: string; connectionType: string }>>;
|
||||
getConnectionById(connectionId: string): Promise<{ id: string; name: string; connectionType: string } | null>;
|
||||
executeQuery(connectionId: string, sql: string): Promise<unknown>;
|
||||
};
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
Add this test:
|
||||
|
||||
```ts
|
||||
it('passes project connection config to local ingest query executors', async () => {
|
||||
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
|
||||
const queryExecutor = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
})),
|
||||
};
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner,
|
||||
queryExecutor,
|
||||
});
|
||||
const connections = (runtime.runner as unknown as RuntimeWithConnectionDeps).deps.connections;
|
||||
|
||||
await expect(connections.executeQuery('warehouse', 'select 1')).resolves.toMatchObject({
|
||||
headers: ['answer'],
|
||||
});
|
||||
expect(queryExecutor.execute).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: project.projectDir,
|
||||
connection: project.config.connections.warehouse,
|
||||
sql: 'select 1',
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing local runtime test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "project connection config"
|
||||
```
|
||||
|
||||
Expected: FAIL because `LocalConnectionCatalog.executeQuery()` only passes
|
||||
`connectionId` and `sql`.
|
||||
|
||||
- [ ] **Step 3: Update local ingest query executor types**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.ts`, import the shared
|
||||
query executor type:
|
||||
|
||||
```ts
|
||||
import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js';
|
||||
```
|
||||
|
||||
Change `CreateLocalBundleIngestRuntimeOptions.queryExecutor` to:
|
||||
|
||||
```ts
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
```
|
||||
|
||||
Change `LocalConnectionCatalog` to store that type:
|
||||
|
||||
```ts
|
||||
class LocalConnectionCatalog implements SlConnectionCatalogPort {
|
||||
constructor(
|
||||
private readonly project: KtxLocalProject,
|
||||
private readonly queryExecutor?: KtxSqlQueryExecutorPort,
|
||||
) {}
|
||||
```
|
||||
|
||||
Change `executeQuery()`:
|
||||
|
||||
```ts
|
||||
async executeQuery(connectionId: string, sql: string): Promise<KtxQueryResult> {
|
||||
if (!this.queryExecutor) {
|
||||
throw new Error('Local ingest has no query executor configured');
|
||||
}
|
||||
return this.queryExecutor.execute({
|
||||
connectionId,
|
||||
projectDir: this.project.projectDir,
|
||||
connection: this.project.config.connections[connectionId],
|
||||
sql,
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
In `packages/context/src/ingest/local-ingest.ts`, replace the local query
|
||||
executor object type with the shared port:
|
||||
|
||||
```ts
|
||||
import type { KtxSqlQueryExecutorPort } from '../connections/index.js';
|
||||
```
|
||||
|
||||
```ts
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run the local runtime test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "project connection config"
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/local-bundle-runtime.ts \
|
||||
packages/context/src/ingest/local-bundle-runtime.test.ts \
|
||||
packages/context/src/ingest/local-ingest.ts
|
||||
git commit -m "fix(context): pass connection config to ingest query executors"
|
||||
```
|
||||
|
||||
### Task 4: Supply a scan-connector query executor to CLI ingest
|
||||
|
||||
**Files:**
|
||||
- Create: `packages/cli/src/ingest-query-executor.ts`
|
||||
- Create: `packages/cli/src/ingest-query-executor.test.ts`
|
||||
- Modify: `packages/cli/src/ingest.ts`
|
||||
|
||||
- [ ] **Step 1: Write the CLI query executor tests**
|
||||
|
||||
Create `packages/cli/src/ingest-query-executor.test.ts`:
|
||||
|
||||
```ts
|
||||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import { createKtxConnectorCapabilities, type KtxScanConnector } from '@ktx/context/scan';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
|
||||
function project(): KtxLocalProject {
|
||||
return {
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
config: {
|
||||
project: 'warehouse',
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
|
||||
},
|
||||
},
|
||||
} as unknown as KtxLocalProject;
|
||||
}
|
||||
|
||||
function connector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
|
||||
return {
|
||||
id: 'warehouse',
|
||||
driver: 'postgres',
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: true }),
|
||||
async introspect() {
|
||||
throw new Error('introspect is not used by this test');
|
||||
},
|
||||
executeReadOnly: vi.fn(async () => ({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
})),
|
||||
cleanup: vi.fn(async () => {}),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('createKtxCliIngestQueryExecutor', () => {
|
||||
it('executes read-only SQL through the scan connector and cleans it up', async () => {
|
||||
const scanConnector = connector();
|
||||
const createConnector = vi.fn(async () => scanConnector);
|
||||
const executor = createKtxCliIngestQueryExecutor(project(), { createConnector });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
sql: 'select 1',
|
||||
maxRows: 5,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
});
|
||||
|
||||
expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse');
|
||||
expect(scanConnector.executeReadOnly).toHaveBeenCalledWith(
|
||||
{ connectionId: 'warehouse', sql: 'select 1', maxRows: 5 },
|
||||
{ runId: 'ingest-sql-execution' },
|
||||
);
|
||||
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('rejects connectors without read-only SQL support', async () => {
|
||||
const scanConnector = connector({
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: false }),
|
||||
executeReadOnly: undefined,
|
||||
});
|
||||
const executor = createKtxCliIngestQueryExecutor(project(), {
|
||||
createConnector: vi.fn(async () => scanConnector),
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres' },
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.');
|
||||
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing CLI query executor test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because `ingest-query-executor.ts` does not exist.
|
||||
|
||||
- [ ] **Step 3: Add the scan-connector-backed query executor**
|
||||
|
||||
Create `packages/cli/src/ingest-query-executor.ts`:
|
||||
|
||||
```ts
|
||||
import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import type { KtxScanConnector, KtxScanContext } from '@ktx/context/scan';
|
||||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
|
||||
type CreateConnector = typeof createKtxCliScanConnector;
|
||||
|
||||
export interface KtxCliIngestQueryExecutorDeps {
|
||||
createConnector?: CreateConnector;
|
||||
}
|
||||
|
||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||
await connector?.cleanup?.();
|
||||
}
|
||||
|
||||
export function createKtxCliIngestQueryExecutor(
|
||||
project: KtxLocalProject,
|
||||
deps: KtxCliIngestQueryExecutorDeps = {},
|
||||
): KtxSqlQueryExecutorPort {
|
||||
const createConnector = deps.createConnector ?? createKtxCliScanConnector;
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput) {
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await createConnector(project, input.connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(
|
||||
`Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`,
|
||||
);
|
||||
}
|
||||
|
||||
const ctx: KtxScanContext = { runId: 'ingest-sql-execution' };
|
||||
const result = await connector.executeReadOnly(
|
||||
{ connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows },
|
||||
ctx,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
command: 'SELECT',
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} finally {
|
||||
await cleanupConnector(connector);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Wire the CLI executor into local ingest runs**
|
||||
|
||||
In `packages/cli/src/ingest.ts`, import the executor and type:
|
||||
|
||||
```ts
|
||||
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
```
|
||||
|
||||
Extend `KtxIngestDeps`:
|
||||
|
||||
```ts
|
||||
createQueryExecutor?: (project: KtxLocalProject) => KtxSqlQueryExecutorPort;
|
||||
```
|
||||
|
||||
Inside the `args.command === 'run'` branch, after `localIngestOptions` is
|
||||
defined, add:
|
||||
|
||||
```ts
|
||||
const queryExecutor =
|
||||
localIngestOptions.queryExecutor ??
|
||||
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project);
|
||||
```
|
||||
|
||||
Pass `queryExecutor` to both local ingest execution paths. In the Metabase
|
||||
fan-out call:
|
||||
|
||||
```ts
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
```
|
||||
|
||||
In the normal local ingest call:
|
||||
|
||||
```ts
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
pullConfigOptions: adapterOptions,
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Add CLI wiring coverage**
|
||||
|
||||
Add this test to `packages/cli/src/ingest.test.ts`:
|
||||
|
||||
```ts
|
||||
it('supplies a scan-connector query executor to local ingest runs', async () => {
|
||||
const io = makeIo();
|
||||
const projectDir = join(tempDir, 'query-executor-project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const queryExecutor = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: [],
|
||||
rows: [],
|
||||
totalRows: 0,
|
||||
command: 'SELECT',
|
||||
rowCount: 0,
|
||||
})),
|
||||
};
|
||||
const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> =>
|
||||
completedLocalBundleRun(input, 'query-executor-run'),
|
||||
);
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
adapter: 'fake',
|
||||
outputMode: 'json',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
runLocalIngest,
|
||||
createAdapters: () => [],
|
||||
createQueryExecutor: () => queryExecutor,
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ queryExecutor }));
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Run CLI query executor tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "query executor"
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 7: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/cli/src/ingest-query-executor.ts \
|
||||
packages/cli/src/ingest-query-executor.test.ts \
|
||||
packages/cli/src/ingest.ts \
|
||||
packages/cli/src/ingest.test.ts
|
||||
git commit -m "fix(cli): enable read-only SQL probes for local ingest"
|
||||
```
|
||||
|
||||
### Task 5: Final verification
|
||||
|
||||
**Files:**
|
||||
- Verify: all files changed by Tasks 1-4.
|
||||
|
||||
- [ ] **Step 1: Run focused context tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \
|
||||
src/ingest/tools/warehouse-verification/entity-details.tool.test.ts \
|
||||
src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
|
||||
src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts \
|
||||
src/ingest/local-bundle-runtime.test.ts \
|
||||
src/ingest/local-adapters.test.ts \
|
||||
src/ingest/adapters/lookml/lookml.adapter.test.ts \
|
||||
src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
|
||||
src/ingest/ingest-bundle.runner.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run focused CLI tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run type checks**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
pnpm --filter @ktx/cli run type-check
|
||||
```
|
||||
|
||||
Expected: both commands pass.
|
||||
|
||||
- [ ] **Step 4: Run pre-commit on changed files if configured**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
uv run pre-commit run --files \
|
||||
packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \
|
||||
packages/context/src/ingest/adapters/lookml/lookml.adapter.ts \
|
||||
packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts \
|
||||
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \
|
||||
packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \
|
||||
packages/context/src/ingest/local-adapters.ts \
|
||||
packages/context/src/ingest/local-adapters.test.ts \
|
||||
packages/context/src/ingest/local-bundle-runtime.ts \
|
||||
packages/context/src/ingest/local-bundle-runtime.test.ts \
|
||||
packages/context/src/ingest/local-ingest.ts \
|
||||
packages/cli/src/ingest-query-executor.ts \
|
||||
packages/cli/src/ingest-query-executor.test.ts \
|
||||
packages/cli/src/ingest.ts \
|
||||
packages/cli/src/ingest.test.ts \
|
||||
docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md
|
||||
```
|
||||
|
||||
Expected: PASS. If the repository has no pre-commit config or the local `uv`
|
||||
version cannot satisfy the configured toolchain, record the exact error and use
|
||||
the focused test and type-check results as the closest verification.
|
||||
|
||||
- [ ] **Step 5: Commit final verification fixes if any were needed**
|
||||
|
||||
If verification required edits, run:
|
||||
|
||||
```bash
|
||||
git add <changed-files>
|
||||
git commit -m "test: cover warehouse verification v1 closure"
|
||||
```
|
||||
|
||||
If verification required no edits, do not create an empty commit.
|
||||
|
||||
## Self-review
|
||||
|
||||
Spec coverage:
|
||||
|
||||
- Raw warehouse discovery still covers wiki, semantic-layer, and raw schema
|
||||
results, and now raw hits include the connection name needed by the required
|
||||
`entity_details` follow-up.
|
||||
- Every local synthesis adapter with an external source connection now has a
|
||||
path to target warehouse IDs: dbt and Notion already had it, Looker resolves
|
||||
staged mappings, Metabase fan-out runs under target warehouse IDs, and this
|
||||
plan adds LookML and MetricFlow.
|
||||
- `sql_execution` remains scoped by `allowedConnectionNames`, retains the
|
||||
read-only SQL wrapper, and gains a normal local ingest execution backend.
|
||||
|
||||
Placeholder scan:
|
||||
|
||||
- This plan contains no deferred implementation placeholders.
|
||||
- Every code-changing step includes the exact test or implementation snippet to
|
||||
add.
|
||||
|
||||
Type consistency:
|
||||
|
||||
- `connectionName` is added to `RawSchemaHit` and used by `DiscoverDataTool`.
|
||||
- `targetConnectionIds` and `listTargetConnectionIds()` match the existing dbt
|
||||
and Notion adapter pattern.
|
||||
- Local ingest uses `KtxSqlQueryExecutorPort` consistently from CLI to context.
|
||||
1617
docs/superpowers/plans/2026-05-12-warehouse-verification-tools.md
Normal file
1617
docs/superpowers/plans/2026-05-12-warehouse-verification-tools.md
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,345 @@
|
|||
# Warehouse Verification Prompt Shape Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Make every warehouse-verification prompt use KTX's shipped
|
||||
`sql_execution` input shape so ingest agents include `connectionName` when they
|
||||
probe warehouse identifiers.
|
||||
|
||||
**Architecture:** Keep the warehouse verification tool code unchanged. Add
|
||||
prompt-asset tests that reject Kaelio's old session-only SQL examples, then
|
||||
update the shared identifier protocol and the three remaining per-skill SQL
|
||||
probe examples that still show the legacy shape.
|
||||
|
||||
**Tech Stack:** Markdown skill prompts, TypeScript, Vitest, pnpm workspace
|
||||
commands.
|
||||
|
||||
---
|
||||
|
||||
## Audit Summary
|
||||
|
||||
The warehouse verification tools, runner wiring, adapter target fan-out, and
|
||||
focused tests are present. Focused verification passed:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor"
|
||||
```
|
||||
|
||||
Remaining v1-blocking gap:
|
||||
|
||||
- `packages/context/skills/lookml_ingest/SKILL.md`,
|
||||
`packages/context/skills/metricflow_ingest/SKILL.md`, and
|
||||
`packages/context/skills/sl_capture/SKILL.md` still contain
|
||||
`sql_execution({ sql ... })` / "session shape" guidance inherited from
|
||||
Kaelio. KTX's tool contract is
|
||||
`sql_execution({connectionName, sql, rowLimit?})`, so these examples can make
|
||||
agents call the shipped tool with invalid input.
|
||||
|
||||
Non-blocking gaps remain out of scope for this v1 plan:
|
||||
|
||||
- Full DDL-style `entity_details` formatting with FK profile summaries.
|
||||
- AST-backed SQL validation for data-modifying CTE bodies.
|
||||
- Search over generated `enrichment/descriptions.json`.
|
||||
- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache
|
||||
hits across separate tool calls.
|
||||
- A deterministic fake-LLM end-to-end Notion hallucination regression. Prompt
|
||||
guards and tool contract tests cover the v1 contract; a broader behavior
|
||||
regression can land as follow-up.
|
||||
|
||||
## File Structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/memory/memory-runtime-assets.test.ts`: add a prompt
|
||||
guard that rejects the legacy session-only `sql_execution` shape.
|
||||
- `packages/context/src/ingest/ingest-runtime-assets.test.ts`: strengthen the
|
||||
shared prompt asset assertion for the KTX `connectionName` SQL shape.
|
||||
- `packages/context/skills/_shared/identifier-verification.md`: make both SQL
|
||||
probe instructions show the KTX `connectionName` argument.
|
||||
- `packages/context/skills/notion_synthesize/SKILL.md`: inline the updated
|
||||
protocol block.
|
||||
- `packages/context/skills/dbt_ingest/SKILL.md`: inline the updated protocol
|
||||
block.
|
||||
- `packages/context/skills/lookml_ingest/SKILL.md`: inline the updated protocol
|
||||
block and fix the legacy SQL fallback example.
|
||||
- `packages/context/skills/looker_ingest/SKILL.md`: inline the updated
|
||||
protocol block.
|
||||
- `packages/context/skills/metabase_ingest/SKILL.md`: inline the updated
|
||||
protocol block.
|
||||
- `packages/context/skills/metricflow_ingest/SKILL.md`: inline the updated
|
||||
protocol block and fix the legacy SQL fallback example.
|
||||
- `packages/context/skills/live_database_ingest/SKILL.md`: inline the updated
|
||||
protocol block.
|
||||
- `packages/context/skills/historic_sql_table_digest/SKILL.md`: inline the
|
||||
updated protocol block.
|
||||
- `packages/context/skills/historic_sql_patterns/SKILL.md`: inline the updated
|
||||
protocol block.
|
||||
- `packages/context/skills/knowledge_capture/SKILL.md`: inline the updated
|
||||
protocol block.
|
||||
- `packages/context/skills/sl_capture/SKILL.md`: inline the updated protocol
|
||||
block and fix the join-discovery SQL example.
|
||||
|
||||
### Task 1: Add Prompt Guards For The KTX SQL Tool Shape
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/memory/memory-runtime-assets.test.ts`
|
||||
- Modify: `packages/context/src/ingest/ingest-runtime-assets.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add the failing memory asset guard**
|
||||
|
||||
In `packages/context/src/memory/memory-runtime-assets.test.ts`, add this test
|
||||
after `does not ship stale warehouse verification tool names or fictional
|
||||
identifiers`:
|
||||
|
||||
```ts
|
||||
it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => {
|
||||
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
|
||||
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
|
||||
|
||||
for (const skillName of verificationWriterSkills) {
|
||||
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
|
||||
expect(body).toContain('sql_execution({connectionName');
|
||||
expect(body).not.toContain('sql_execution({ sql');
|
||||
expect(body).not.toContain('session shape');
|
||||
expect(body).not.toContain('connection is already pinned by the ingest session');
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Strengthen the shared ingest asset guard**
|
||||
|
||||
In `packages/context/src/ingest/ingest-runtime-assets.test.ts`, update
|
||||
`packages identifier verification prompt assets` so the final assertions are:
|
||||
|
||||
```ts
|
||||
expect(shared).toContain('discover_data');
|
||||
expect(shared).toContain('entity_details');
|
||||
expect(shared).toContain('sql_execution');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run the failing prompt guards**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
|
||||
```
|
||||
|
||||
Expected: FAIL. The failure must mention at least one current legacy string:
|
||||
`sql_execution({ sql`, `session shape`, or missing
|
||||
`sql_execution({connectionName`.
|
||||
|
||||
### Task 2: Update The Shared Identifier Verification Protocol
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/skills/_shared/identifier-verification.md`
|
||||
- Modify: `packages/context/skills/notion_synthesize/SKILL.md`
|
||||
- Modify: `packages/context/skills/dbt_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/lookml_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/looker_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/metabase_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/metricflow_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/live_database_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/historic_sql_table_digest/SKILL.md`
|
||||
- Modify: `packages/context/skills/historic_sql_patterns/SKILL.md`
|
||||
- Modify: `packages/context/skills/knowledge_capture/SKILL.md`
|
||||
- Modify: `packages/context/skills/sl_capture/SKILL.md`
|
||||
|
||||
- [ ] **Step 1: Replace the shared protocol text**
|
||||
|
||||
Replace the full `## Identifier Verification Protocol` block in
|
||||
`packages/context/skills/_shared/identifier-verification.md` with:
|
||||
|
||||
```md
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Inline the same protocol in every writer skill**
|
||||
|
||||
Replace the existing `## Identifier Verification Protocol` block in each writer
|
||||
skill with the exact block from Step 1:
|
||||
|
||||
```bash
|
||||
packages/context/skills/notion_synthesize/SKILL.md
|
||||
packages/context/skills/dbt_ingest/SKILL.md
|
||||
packages/context/skills/lookml_ingest/SKILL.md
|
||||
packages/context/skills/looker_ingest/SKILL.md
|
||||
packages/context/skills/metabase_ingest/SKILL.md
|
||||
packages/context/skills/metricflow_ingest/SKILL.md
|
||||
packages/context/skills/live_database_ingest/SKILL.md
|
||||
packages/context/skills/historic_sql_table_digest/SKILL.md
|
||||
packages/context/skills/historic_sql_patterns/SKILL.md
|
||||
packages/context/skills/knowledge_capture/SKILL.md
|
||||
packages/context/skills/sl_capture/SKILL.md
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run the shared prompt asset tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
|
||||
```
|
||||
|
||||
Expected: still FAIL because the per-skill legacy SQL examples in LookML,
|
||||
MetricFlow, and `sl_capture` have not been fixed yet.
|
||||
|
||||
### Task 3: Fix Legacy Per-Skill SQL Examples
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/skills/lookml_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/metricflow_ingest/SKILL.md`
|
||||
- Modify: `packages/context/skills/sl_capture/SKILL.md`
|
||||
|
||||
- [ ] **Step 1: Fix the LookML fallback probe example**
|
||||
|
||||
In `packages/context/skills/lookml_ingest/SKILL.md`, replace the current
|
||||
Required flow item 2 with:
|
||||
|
||||
```md
|
||||
2. If the table isn't in the manifest, use the warehouse `connectionName`
|
||||
returned by `discover_data` or the target connection chosen from
|
||||
`sl_discover`, then call a dialect-appropriate SQL probe with that
|
||||
connection name, for example:
|
||||
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
|
||||
Replace `warehouse`, `analytics`, and `orders` with the verified connection,
|
||||
schema or dataset, and table from the WorkUnit evidence.
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Fix the MetricFlow fallback probe example**
|
||||
|
||||
In `packages/context/skills/metricflow_ingest/SKILL.md`, replace the paragraph
|
||||
that begins `If \`sl_discover\` errors` with:
|
||||
|
||||
```md
|
||||
If `sl_discover` errors because no such table exists, use `discover_data` and
|
||||
`entity_details` to find the warehouse target. If a SQL probe is still needed,
|
||||
call `sql_execution` with the same warehouse connection name, for example:
|
||||
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
|
||||
**Never invent column names** - every column in `columns:`, `grain:`, and
|
||||
`sql:` must be sourced from raw files, `entity_details`, or a successful SQL
|
||||
probe.
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Fix the `sl_capture` join probe example**
|
||||
|
||||
In `packages/context/skills/sl_capture/SKILL.md`, replace Tool sequence item 6
|
||||
with:
|
||||
|
||||
```md
|
||||
6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run the prompt asset tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS. The tests must report 2 files passed.
|
||||
|
||||
### Task 4: Final Verification
|
||||
|
||||
**Files:**
|
||||
- No new files.
|
||||
|
||||
- [ ] **Step 1: Run focused warehouse prompt and tool tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run package type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Inspect final diff**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git diff -- packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/_shared/identifier-verification.md packages/context/skills/notion_synthesize/SKILL.md packages/context/skills/dbt_ingest/SKILL.md packages/context/skills/lookml_ingest/SKILL.md packages/context/skills/looker_ingest/SKILL.md packages/context/skills/metabase_ingest/SKILL.md packages/context/skills/metricflow_ingest/SKILL.md packages/context/skills/live_database_ingest/SKILL.md packages/context/skills/historic_sql_table_digest/SKILL.md packages/context/skills/historic_sql_patterns/SKILL.md packages/context/skills/knowledge_capture/SKILL.md packages/context/skills/sl_capture/SKILL.md
|
||||
```
|
||||
|
||||
Expected: only prompt wording and prompt-asset guards changed. No tool
|
||||
implementation files changed.
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/_shared/identifier-verification.md packages/context/skills/notion_synthesize/SKILL.md packages/context/skills/dbt_ingest/SKILL.md packages/context/skills/lookml_ingest/SKILL.md packages/context/skills/looker_ingest/SKILL.md packages/context/skills/metabase_ingest/SKILL.md packages/context/skills/metricflow_ingest/SKILL.md packages/context/skills/live_database_ingest/SKILL.md packages/context/skills/historic_sql_table_digest/SKILL.md packages/context/skills/historic_sql_patterns/SKILL.md packages/context/skills/knowledge_capture/SKILL.md packages/context/skills/sl_capture/SKILL.md
|
||||
git commit -m "fix(context): align warehouse sql probe prompt shape"
|
||||
```
|
||||
|
||||
Expected: one focused commit.
|
||||
|
||||
## Self-Review
|
||||
|
||||
Spec coverage:
|
||||
|
||||
- The original spec requires `sql_execution` inputs to include
|
||||
`connectionName`; this plan removes contradictory session-only examples from
|
||||
all active writer guidance.
|
||||
- The shared protocol remains in `_shared` and inlined in every synthesis
|
||||
writer skill named by the original spec.
|
||||
- The tool implementation remains unchanged because the shipped schema already
|
||||
enforces the v1 contract.
|
||||
|
||||
Placeholder scan:
|
||||
|
||||
- The plan has no deferred implementation markers.
|
||||
- Prompt examples use concrete `warehouse`, `analytics`, and `orders` example
|
||||
names only to demonstrate JSON shape, and each example tells the worker to
|
||||
replace them with discovered evidence.
|
||||
|
||||
Type consistency:
|
||||
|
||||
- Tests assert the exact KTX tool call shape:
|
||||
`sql_execution({connectionName, sql: ...})`.
|
||||
- Prompt wording consistently uses `connectionName`, matching
|
||||
`packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts`.
|
||||
|
|
@ -0,0 +1,215 @@
|
|||
# Warehouse Verification SQL Example Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Remove the last connectionless `sql_execution` prompt example so
|
||||
warehouse-verification writer guidance always matches KTX's shipped tool
|
||||
contract.
|
||||
|
||||
**Architecture:** Keep the warehouse verification tool code unchanged. Tighten
|
||||
the prompt asset guard so multiline `sql_execution({ sql: ... })` examples
|
||||
fail tests, then update the stale `sl_capture` worked example to pass
|
||||
`connectionName` explicitly.
|
||||
|
||||
**Tech Stack:** Markdown skill prompts, TypeScript, Vitest, pnpm workspace
|
||||
commands.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
The warehouse verification tools, runner wiring, source-adapter target fan-out,
|
||||
CLI query executor, and focused tests are present. Focused verification passed:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor"
|
||||
```
|
||||
|
||||
Remaining v1-blocking gap:
|
||||
|
||||
- `packages/context/skills/sl_capture/SKILL.md` still contains a worked example
|
||||
with a multiline `sql_execution({ sql: ... })` call. KTX's tool contract is
|
||||
`sql_execution({connectionName, sql, rowLimit?})`, so this example can teach
|
||||
agents to call the shipped tool with invalid input.
|
||||
|
||||
Non-blocking gaps remain out of scope for this v1 plan:
|
||||
|
||||
- Full DDL-style `entity_details` formatting with FK profile summaries.
|
||||
- AST-backed SQL validation for data-modifying CTE bodies.
|
||||
- Search over generated `enrichment/descriptions.json`.
|
||||
- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache
|
||||
hits across separate tool calls.
|
||||
- A deterministic fake-LLM end-to-end Notion hallucination regression.
|
||||
- Tokenized or embedding-backed raw schema search ranking in `discover_data`.
|
||||
|
||||
## File structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/memory/memory-runtime-assets.test.ts`: add a prompt
|
||||
guard that catches multiline `sql_execution` calls without `connectionName`.
|
||||
- `packages/context/skills/sl_capture/SKILL.md`: update the stale worked
|
||||
example to include the target warehouse `connectionName`.
|
||||
|
||||
### Task 1: Add a multiline SQL prompt guard
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/memory/memory-runtime-assets.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add a helper that extracts `sql_execution` call examples**
|
||||
|
||||
In `packages/context/src/memory/memory-runtime-assets.test.ts`, add this helper
|
||||
after `forbiddenProductPattern()`:
|
||||
|
||||
```ts
|
||||
function sqlExecutionCallBlocks(body: string): string[] {
|
||||
const blocks: string[] = [];
|
||||
const marker = 'sql_execution({';
|
||||
let offset = 0;
|
||||
|
||||
while (offset < body.length) {
|
||||
const start = body.indexOf(marker, offset);
|
||||
if (start === -1) {
|
||||
break;
|
||||
}
|
||||
const end = body.indexOf('})', start + marker.length);
|
||||
blocks.push(body.slice(start, end === -1 ? start + marker.length : end + 2));
|
||||
offset = start + marker.length;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Strengthen the existing SQL-shape test**
|
||||
|
||||
Replace the body of
|
||||
`ships only the KTX connectionName sql_execution call shape in writer guidance`
|
||||
with:
|
||||
|
||||
```ts
|
||||
const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8');
|
||||
const bodies = [{ name: '_shared/identifier-verification.md', body: shared }];
|
||||
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT');
|
||||
expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM');
|
||||
|
||||
for (const skillName of verificationWriterSkills) {
|
||||
const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8');
|
||||
bodies.push({ name: `${skillName}/SKILL.md`, body });
|
||||
expect(body).toContain('sql_execution({connectionName');
|
||||
expect(body).not.toContain('sql_execution({ sql');
|
||||
expect(body).not.toContain('session shape');
|
||||
expect(body).not.toContain('connection is already pinned by the ingest session');
|
||||
}
|
||||
|
||||
for (const { name, body } of bodies) {
|
||||
const calls = sqlExecutionCallBlocks(body);
|
||||
expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0);
|
||||
expect(
|
||||
calls.filter((call) => !call.includes('connectionName')),
|
||||
`${name} has sql_execution calls without connectionName`,
|
||||
).toEqual([]);
|
||||
expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch(
|
||||
/sql_execution\(\{\s*sql\s*:/,
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run the failing prompt guard**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts -t "connectionName sql_execution"
|
||||
```
|
||||
|
||||
Expected: FAIL. The failure must identify
|
||||
`sl_capture/SKILL.md` as having a `sql_execution` call without
|
||||
`connectionName` or a connectionless multiline `sql_execution` call.
|
||||
|
||||
- [ ] **Step 4: Commit the failing guard**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/memory/memory-runtime-assets.test.ts
|
||||
git commit -m "test(context): catch connectionless sql execution prompt examples"
|
||||
```
|
||||
|
||||
### Task 2: Fix the stale `sl_capture` SQL example
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/skills/sl_capture/SKILL.md`
|
||||
- Test: `packages/context/src/memory/memory-runtime-assets.test.ts`
|
||||
- Test: `packages/context/src/ingest/ingest-runtime-assets.test.ts`
|
||||
|
||||
- [ ] **Step 1: Update the worked example**
|
||||
|
||||
In `packages/context/skills/sl_capture/SKILL.md`, replace the `sql_execution`
|
||||
block in "Worked example - new join" with:
|
||||
|
||||
```md
|
||||
sql_execution({
|
||||
connectionName: "warehouse",
|
||||
sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM public.fct_orders a JOIN public.fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1"
|
||||
})
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the prompt guards**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run a direct stale-shape scan**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n -U "sql_execution\\(\\{\\s*\\n\\s*sql:" packages/context/skills packages/context/prompts
|
||||
```
|
||||
|
||||
Expected: no matches and exit code 1.
|
||||
|
||||
- [ ] **Step 4: Run the context type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 5: Commit the prompt fix**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/skills/sl_capture/SKILL.md
|
||||
git commit -m "fix(context): include connection name in sl capture sql example"
|
||||
```
|
||||
|
||||
## Self-review
|
||||
|
||||
Spec coverage:
|
||||
|
||||
- The only remaining v1-blocking prompt-shape gap has a failing test and a
|
||||
direct prompt edit.
|
||||
- Tool implementation, runner wiring, adapter scoping, and CLI execution
|
||||
remain covered by the focused suites listed in the audit summary.
|
||||
|
||||
Placeholder scan:
|
||||
|
||||
- This plan contains no deferred implementation placeholders.
|
||||
|
||||
Type consistency:
|
||||
|
||||
- The plan uses the shipped KTX tool shape:
|
||||
`sql_execution({connectionName, sql, rowLimit?})`.
|
||||
|
|
@ -0,0 +1,236 @@
|
|||
# Warehouse Verification Structured Target Miss Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Make `entity_details` return model-visible not-found evidence for every documented target shape, including structured `{catalog, db, name, column?}` targets.
|
||||
|
||||
**Architecture:** Keep the existing warehouse verification module. Add focused tests for missing structured table and column targets, then route structured target labels through the same candidate lookup used by display targets while preserving exact structured resolution.
|
||||
|
||||
**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX ingest tools.
|
||||
|
||||
---
|
||||
|
||||
## Audit Summary
|
||||
|
||||
The implemented plans have landed the warehouse verification tools, ingest
|
||||
runner wiring, adapter warehouse target fan-out, CLI read-only query executor,
|
||||
and prompt-shape closures. Focused verification passed on May 13, 2026:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor"
|
||||
rg -n -U "sql_execution\\(\\{\\s*\\n\\s*sql:" packages/context/skills packages/context/prompts
|
||||
rg -n "wiki_sl_search|sl_describe_table|orbit_analytics\\.customer" packages/context/skills packages/context/prompts packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts packages/context/src/sl/tools/sl-warehouse-validation.ts
|
||||
```
|
||||
|
||||
Remaining v1-blocking gap:
|
||||
|
||||
- `entity_details` accepts structured targets, but if a structured table target
|
||||
does not exist, it records `structured.missing` and emits no markdown. Tool
|
||||
outputs are sent to the model as markdown only, so the synthesis agent gets
|
||||
an empty response instead of the required "Not found in scan" verification
|
||||
signal.
|
||||
|
||||
Non-blocking gaps remain out of scope for this v1 plan:
|
||||
|
||||
- Full DDL-style `entity_details` formatting with FK and profile summaries.
|
||||
- AST-backed SQL validation for data-modifying CTE bodies.
|
||||
- Dialect-specific row-limit wrapping for SQL Server probes.
|
||||
- Search over generated `enrichment/descriptions.json`.
|
||||
- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache
|
||||
hits across separate tool calls.
|
||||
- A deterministic fake-LLM end-to-end Notion hallucination regression.
|
||||
- Cleanup of legacy demo Orbit wiki fixtures that still mention
|
||||
`orbit_analytics.customer`.
|
||||
|
||||
## File Structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`: add failing coverage for missing structured targets.
|
||||
- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`: render missing structured targets into markdown and reuse candidate lookup.
|
||||
|
||||
### Task 1: Report Structured Target Misses In `entity_details`
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`
|
||||
- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`
|
||||
|
||||
- [ ] **Step 1: Add failing structured miss tests**
|
||||
|
||||
In `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`, add these tests after `reports missing explicit columns instead of returning an empty column list`:
|
||||
|
||||
```ts
|
||||
it('reports missing structured table targets in model-visible markdown', async () => {
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionName: 'warehouse',
|
||||
targets: [{ catalog: null, db: 'public', name: 'orderz' }],
|
||||
},
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('Not found in scan: public.orderz');
|
||||
expect(result.markdown).toContain('Closest matches: orders');
|
||||
expect(result.structured.resolved).toHaveLength(0);
|
||||
expect(result.structured.missing).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('reports missing structured column targets in model-visible markdown', async () => {
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionName: 'warehouse',
|
||||
targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }],
|
||||
},
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier');
|
||||
expect(result.markdown).toContain('Available columns: id, status');
|
||||
expect(result.structured.resolved).toHaveLength(0);
|
||||
expect(result.structured.missing).toHaveLength(1);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing focused test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts -t "structured"
|
||||
```
|
||||
|
||||
Expected: FAIL. The first new test must fail because `result.markdown` does not contain `Not found in scan: public.orderz`.
|
||||
|
||||
- [ ] **Step 3: Add structured target labels and candidate lookup**
|
||||
|
||||
In `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`, add this type alias after `type EntityDetailsInput = z.infer<typeof entityDetailsInputSchema>;`:
|
||||
|
||||
```ts
|
||||
type EntityDetailsTarget = EntityDetailsInput['targets'][number];
|
||||
```
|
||||
|
||||
Add these helpers after `function allowedConnectionNames(context: ToolContext): ReadonlySet<string> | null { ... }`:
|
||||
|
||||
```ts
|
||||
function targetLabel(target: EntityDetailsTarget): string {
|
||||
if ('display' in target) {
|
||||
return target.display;
|
||||
}
|
||||
return [target.catalog, target.db, target.name, target.column].filter((part): part is string => !!part).join('.');
|
||||
}
|
||||
|
||||
function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarget, candidates: KtxTableRef[]): void {
|
||||
parts.push(`Not found in scan: ${targetLabel(target)}`);
|
||||
if (candidates.length > 0) {
|
||||
parts.push(`Closest matches: ${candidates.map((candidate) => candidate.name).join(', ')}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveTarget(
|
||||
catalog: WarehouseCatalogService,
|
||||
connectionName: string,
|
||||
target: EntityDetailsTarget,
|
||||
): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> {
|
||||
if ('display' in target) {
|
||||
return catalog.resolveDisplayTarget(connectionName, target.display);
|
||||
}
|
||||
|
||||
const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target));
|
||||
return {
|
||||
resolved: {
|
||||
catalog: target.catalog,
|
||||
db: target.db,
|
||||
name: target.name,
|
||||
column: target.column,
|
||||
},
|
||||
candidates: candidateResolution.candidates,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
Then replace the `const resolution = ...` block inside the `for (const target of input.targets)` loop with:
|
||||
|
||||
```ts
|
||||
const resolution = await resolveTarget(catalog, input.connectionName, target);
|
||||
```
|
||||
|
||||
Replace the missing-resolution block with:
|
||||
|
||||
```ts
|
||||
if (!resolution.resolved) {
|
||||
missing.push({ target, candidates: resolution.candidates });
|
||||
appendMissingTargetMarkdown(parts, target, resolution.candidates);
|
||||
continue;
|
||||
}
|
||||
```
|
||||
|
||||
Replace the missing-detail block with:
|
||||
|
||||
```ts
|
||||
if (!detail) {
|
||||
missing.push({ target, candidates: resolution.candidates });
|
||||
appendMissingTargetMarkdown(parts, target, resolution.candidates);
|
||||
continue;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run the focused entity-details tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 5: Run warehouse verification regression tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 6: Run context type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 7: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \
|
||||
packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts
|
||||
git commit -m "fix(context): report structured entity detail misses"
|
||||
```
|
||||
|
||||
## Self-review
|
||||
|
||||
Spec coverage:
|
||||
|
||||
- The original `entity_details` contract says structured and display targets
|
||||
are mixed shapes and unresolved targets must produce `Not found in scan` with
|
||||
candidates. This plan adds that model-visible behavior for structured table
|
||||
misses and preserves the existing column-miss behavior.
|
||||
|
||||
Placeholder scan:
|
||||
|
||||
- This plan contains no deferred implementation placeholders.
|
||||
|
||||
Type consistency:
|
||||
|
||||
- The plan uses the existing `WarehouseCatalogService`, `KtxTableRef`,
|
||||
`EntityDetailsStructured`, and `ToolOutput` types without adding public API
|
||||
compatibility wrappers.
|
||||
|
|
@ -0,0 +1,331 @@
|
|||
# Warehouse Verification Tools for Ingestion Synthesis
|
||||
|
||||
**Date:** 2026-05-12
|
||||
**Author:** Andrey Avtomonov
|
||||
**Status:** Design — pending implementation plan
|
||||
|
||||
## Background and motivation
|
||||
|
||||
KTX's ingest pipeline synthesises wiki pages and semantic-layer (SL) sources from third-party content (Notion, LookML, Looker, Metabase, dbt, MetricFlow, historic SQL, live-database scans, and chat). The synthesis stage is an LLM call that runs once per WorkUnit, governed by a skill prompt (e.g. `notion_synthesize`) and a set of allowed tools.
|
||||
|
||||
A real-world inspection (project `/tmp/ktx-proj-1`) surfaced two failure modes the synthesis stage produces:
|
||||
|
||||
1. **Fictional identifiers laundered into wiki output.** A Notion page mentioned `orbit_analytics.customer` as a legacy "customer source" table with a `plan_tier in {free, pro, enterprise}` column. Neither the table, the column, nor those values exist in the configured warehouse. The synthesis LLM faithfully copied them into `knowledge/global/orbit/customers-source.md` as a "Conflict Note", giving the fabricated names full wiki frontmatter, a `Source:` citation, and apparent authority.
|
||||
2. **Column attribution drift.** The same wiki page documents columns under `orbit_raw.accounts` but states the `paying_account_count` measure filters on `normalized_plan_code` and `contract_status`. Those columns live on `orbit_analytics.mart_account_segments`, not on `accounts`. A reader (or a downstream agent) following the page will write `accounts.normalized_plan_code` and get a `column does not exist` error.
|
||||
|
||||
Root cause analysis (`packages/context/skills/notion_synthesize/SKILL.md`, `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts`, `packages/context/src/wiki/tools/wiki-write.tool.ts`) showed three contributing factors:
|
||||
|
||||
- The synthesis LLM has no verification primitive that distinguishes a real warehouse identifier from a fabricated one. `sl_discover` only finds objects already promoted into the semantic layer; raw warehouse scans (which already exist on disk under `raw-sources/<conn>/live-database/<sync>/`) are not surfaced to the LLM at all.
|
||||
- `wiki_write` performs no body-text validation — anything the LLM emits is written.
|
||||
- The skill prompt itself uses `orbit_analytics.customer` as a canonical example string (`SKILL.md:70`), reinforcing the same fictional name the LLM ends up emitting.
|
||||
|
||||
Kaelio's server-side ingest WU agent (`/Users/andrey/conductor/workspaces/kaelio-main2/douala/server/src/tools/toolset-factory.service.ts`) had four verification tools that KTX dropped during the open-source extraction: `discover_data`, `entity_details`, `dictionary_search`, and `sql_execution`. The underlying connector infrastructure (`KtxScanConnector`, dialect classes, `assertReadOnlySql`, `SemanticLayerService.executeQuery`) is present in KTX, so the gap is at the tool layer, not the platform layer.
|
||||
|
||||
## Goal
|
||||
|
||||
Give every ingest adapter's synthesis-time LLM call the tools and skill-prompt instructions needed to verify warehouse identifiers (`schema.table`, `schema.table.column`) and sample values before emitting them into wiki pages, SL sources, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback` records.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Not changing `wiki_write` itself. A complementary spec covers hard write-time validation; this spec focuses on giving the LLM the tools to self-validate.
|
||||
- Not modifying any Notion fetch/chunk/cluster behaviour.
|
||||
- Not changing the `_schema/*.yaml` format.
|
||||
- Not introducing a UUID layer for tables or columns; KTX keeps `(connection, catalog, db, name)` as the canonical table identity.
|
||||
- Not adding `semantic_query` to the synthesis toolset. `semantic_query` is a future tool for the research/chat-time agent; synthesis creates SL sources rather than queries them, so the wrong shape.
|
||||
- Not adding `dictionary_search`. `entity_details` already returns per-column `sampleValues` from the relationship-profile, and `sql_execution` covers the rarer "where does this literal live?" case more accurately than a sampled-JSON full-text scan.
|
||||
|
||||
## What already exists in KTX
|
||||
|
||||
The dialect/driver/connection architecture is fully ported from Kaelio. The new tools sit on top of three already-shipping primitives:
|
||||
|
||||
| Primitive | Location |
|
||||
|---|---|
|
||||
| `KtxTableRef = { catalog: string\|null, db: string\|null, name: string }` | `packages/context/src/scan/types.ts:168` |
|
||||
| `SemanticLayerService.executeQuery(connectionId, sql)` | `packages/context/src/sl/semantic-layer.service.ts:1004`, used today by `sl_validate` |
|
||||
| `assertReadOnlySql` / `limitSqlForExecution` | `packages/context/src/connections/read-only-sql.ts` |
|
||||
| 7 connectors with parallel layout (postgres, mysql, sqlserver, snowflake, bigquery, clickhouse, sqlite), each exporting a dialect class | `packages/connector-*` |
|
||||
| Raw scan artefacts: `tables/<base64(catalog??'_')>.<base64(db)>.<base64(name)>.json` and `enrichment/relationship-profile.json` (with `nativeType`, `nullable`, `primaryKey`, `foreignKeys`, `rowCount`, `nullCount`, `distinctCount`, `sampleValues`, descriptions) | `raw-sources/<connectionId>/live-database/<latest-sync>/` |
|
||||
| `wiki_search`, `sl_discover`, `sl_read_source`, `sl_validate`, `emit_unmapped_fallback` | already wired into synthesis stages |
|
||||
|
||||
The only meaningfully new code is `WarehouseCatalogService`, a small `getDialectForDriver` dispatch, the three tool files, and the wiring in `ingest-bundle.runner.ts`.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Module layout
|
||||
|
||||
```
|
||||
packages/context/src/ingest/tools/warehouse-verification/
|
||||
discover-data.tool.ts
|
||||
entity-details.tool.ts
|
||||
sql-execution.tool.ts
|
||||
warehouse-catalog.service.ts
|
||||
index.ts # exports createWarehouseVerificationTools()
|
||||
packages/context/src/connections/
|
||||
dialects.ts # adds getDialectForDriver()
|
||||
packages/context/skills/_shared/
|
||||
identifier-verification.md # the protocol snippet referenced from every synthesis skill
|
||||
```
|
||||
|
||||
### Canonical table identity
|
||||
|
||||
Every tool that names a warehouse object uses the tuple `(connectionName, catalog, db, name[, column])`. `connectionName` is the slug from `ktx.yaml` (e.g., `"warehouse"`), validated against `^[a-zA-Z0-9][a-zA-Z0-9_-]*$`. There is no UUID layer.
|
||||
|
||||
`display` strings the LLM picks up from source pages (e.g., `"orbit_raw.accounts"` for Postgres or `"project.dataset.table"` for BigQuery) are parsed by `WarehouseCatalogService.resolveDisplay`, which knows the connection's driver via `getDialectForDriver`. Ambiguous parses (e.g., a 2-part display on BigQuery) return a candidates list instead of guessing.
|
||||
|
||||
Dialect mapping:
|
||||
|
||||
| Driver | catalog | db | name | Display |
|
||||
|---|---|---|---|---|
|
||||
| postgres | `null` | schema | table | `schema.table` |
|
||||
| mysql | `null` | schema | table | `schema.table` |
|
||||
| sqlserver | catalog | schema | table | `catalog.schema.table` |
|
||||
| snowflake | database | schema | table | `db.schema.table` |
|
||||
| bigquery | project | dataset | table | `project.dataset.table` |
|
||||
| clickhouse | `null` | database | table | `database.table` |
|
||||
| sqlite | `null` | `null` | table | `table` |
|
||||
|
||||
### `WarehouseCatalogService`
|
||||
|
||||
Stateless except for a per-WorkUnit cache. Reads raw scan files under `raw-sources/<connectionName>/live-database/<latest-sync>/`.
|
||||
|
||||
```ts
|
||||
class WarehouseCatalogService {
|
||||
getTable(ref: { connectionName: string } & KtxTableRef): Promise<TableDetail | null>;
|
||||
listTables(connectionName: string): Promise<KtxTableRef[]>;
|
||||
resolveDisplay(connectionName: string, display: string): Promise<{
|
||||
resolved: KtxTableRef | null;
|
||||
candidates: KtxTableRef[]; // ranked by edit distance when resolved is null
|
||||
dialect: string;
|
||||
}>;
|
||||
searchByName(connectionName: string, query: string, limit: number): Promise<Array<
|
||||
| { kind: 'table'; ref: KtxTableRef; matchedOn: 'name'|'db'|'comment'|'description' }
|
||||
| { kind: 'column'; ref: KtxTableRef & { column: string }; matchedOn: 'name'|'comment'|'description' }
|
||||
>>;
|
||||
getLatestSyncId(connectionName: string): Promise<string | null>;
|
||||
}
|
||||
```
|
||||
|
||||
`getTable` merges the raw schema file (native types, PK, FK, nullable) with the enrichment profile (row counts, null rates, distinct counts, sample values, AI-generated descriptions). When no scan exists for the connection, every read returns `null`; tools surface this as a distinct "no scan available" state rather than as "identifier not found", so the LLM doesn't conclude a real table is fictional just because a scan hasn't run yet.
|
||||
|
||||
### `getDialectForDriver`
|
||||
|
||||
```ts
|
||||
// packages/context/src/connections/dialects.ts
|
||||
export type SupportedDriver = 'postgres'|'postgresql'|'mysql'|'sqlserver'|'snowflake'|'bigquery'|'clickhouse'|'sqlite'|'sqlite3';
|
||||
export function getDialectForDriver(driver: SupportedDriver): KtxDialect;
|
||||
```
|
||||
|
||||
Sync dispatch. The connectors' existing dialect classes already expose the same shape — `formatTableName(KtxTableRef)`, `quoteIdentifier(string)`, `mapToDimensionType(nativeType)`. The implementation plan introduces a minimal `KtxDialect` interface that these classes already satisfy structurally; no connector-internal changes required. Used by tools only for display-string parsing and error-message formatting; tools never construct executable SQL.
|
||||
|
||||
## Tool contracts
|
||||
|
||||
### `entity_details`
|
||||
|
||||
```ts
|
||||
input = {
|
||||
connectionName: string,
|
||||
targets: Array< // 1..50, mixed shapes allowed
|
||||
| { display: string } // "orbit_raw.accounts" or "orbit_raw.accounts.account_id"
|
||||
| { catalog: string|null, db: string, name: string, column?: string }
|
||||
>,
|
||||
}
|
||||
```
|
||||
|
||||
Output (markdown, per target):
|
||||
|
||||
```
|
||||
### orbit_raw.accounts
|
||||
Type: table | Native columns: 11 | PK: account_id | FKs: parent_account_id → orbit_raw.accounts.account_id
|
||||
Description: One row per customer account…
|
||||
|
||||
Columns:
|
||||
- account_id (text, nullable=false, PK) — sample: ["acct_001","acct_002",…]
|
||||
- parent_account_id (text, nullable=true, FK → orbit_raw.accounts.account_id)
|
||||
- account_name (text, nullable=false)
|
||||
- …
|
||||
|
||||
Profile: rowCount=4321 distinctCount(account_id)=4321 nullRate(parent_account_id)=0.62
|
||||
```
|
||||
|
||||
When `column` is provided in a target, output is scoped to that one column. When a target doesn't resolve, output is `Not found in scan. Closest matches: …` with up to 5 candidates from `searchByName`. When the connection has no `live-database` scan, output is `No live-database scan available for connection "<name>"; run \`ktx scan\` first.` — distinct from the "not found" state.
|
||||
|
||||
Structured output: `{ resolved: TableDetail[], missing: Array<{target, candidates}>, scanAvailable: boolean }`.
|
||||
|
||||
Refuses `connectionName` values not in the WU-stage's `allowedConnectionNames` set.
|
||||
|
||||
### `sql_execution`
|
||||
|
||||
```ts
|
||||
input = {
|
||||
connectionName: string,
|
||||
sql: string, // single SELECT or WITH only
|
||||
rowLimit?: number, // default 100, hard cap 1000
|
||||
}
|
||||
```
|
||||
|
||||
Pipeline:
|
||||
|
||||
1. `assertReadOnlySql(sql)` — regex rejects anything starting with `insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh`.
|
||||
2. `limitSqlForExecution(sql, rowLimit)` — wraps as `select * from (<llm_sql>) as ktx_query_result limit N`.
|
||||
3. `SemanticLayerService.executeQuery(connectionName, wrappedSql)`.
|
||||
4. Format as markdown table; first ~20 rows inline; if truncated, append `… +N more rows`.
|
||||
|
||||
Structured output: `{ headers, rows, rowCount, truncated, sql, wrappedSql }`.
|
||||
|
||||
Connector errors surface verbatim (e.g., Postgres `relation "orbit_analytics.customer" does not exist`). That error message is the most valuable verification signal — it tells the LLM the identifier is fictional.
|
||||
|
||||
Refuses `connectionName` not in `allowedConnectionNames`. Each connector's driver-level read-only enforcement (Postgres read-only transaction, BigQuery query-only jobs) is a second defence under the regex gate.
|
||||
|
||||
### `discover_data`
|
||||
|
||||
```ts
|
||||
input = {
|
||||
query: string,
|
||||
connectionName?: string, // omit to search all configured warehouse connections
|
||||
limit?: number, // default 10 per section
|
||||
sourceName?: string, // SL source detail mode (delegates to sl_discover)
|
||||
}
|
||||
```
|
||||
|
||||
Composes three searches and groups output into three sections, omitting empty sections:
|
||||
|
||||
1. **Wiki Pages** — `wiki_search({query, limit})`. Routing hint: *use `wiki_read(blockKey)` for full content*.
|
||||
2. **Semantic Layer Sources** — `sl_discover({query, connectionName})`. Routing hint: *use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details*.
|
||||
3. **Raw Warehouse Schema** — `WarehouseCatalogService.searchByName(connectionName, query, limit)`. Routing hint: *use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values*.
|
||||
|
||||
When `sourceName` is set, delegates entirely to `sl_discover` inspect mode and skips other sections. When all three sections are empty, output is `No matches for "<query>" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`
|
||||
|
||||
Structured output: `{ wiki: WikiSearchStructured|null, sl: SlDiscoverStructured|null, raw: RawSchemaHits|null }`.
|
||||
|
||||
## Wiring
|
||||
|
||||
`packages/context/src/ingest/ingest-bundle.runner.ts` already plumbs `emit_unmapped_fallback` into both the WorkUnit stage (`createEmitUnmappedFallbackTool` around line 726) and the reconcile stage (around line 962), with merging done via `packages/context/src/ingest/stages/build-wu-context.ts` and `build-reconcile-context.ts`.
|
||||
|
||||
Add a parallel factory next to those existing calls:
|
||||
|
||||
```ts
|
||||
const warehouseTools = createWarehouseVerificationTools({
|
||||
semanticLayerService: scopedSemanticLayerService,
|
||||
warehouseCatalog: new WarehouseCatalogService({ fileStore, projectDir }),
|
||||
dialects: getDialectForDriver,
|
||||
allowedConnectionNames: slConnectionIds, // reuse existing scoping
|
||||
sqlExecutionRowLimit: 100,
|
||||
});
|
||||
// Merge `entity_details`, `sql_execution`, `discover_data` into both stage tool maps
|
||||
// alongside emit_unmapped_fallback.
|
||||
```
|
||||
|
||||
`createWarehouseVerificationTools` returns `Record<string, Tool>` with three keys. The set is wired into every adapter's synthesis stage — no per-adapter opt-in.
|
||||
|
||||
## Skill-prompt updates
|
||||
|
||||
### Shared protocol
|
||||
|
||||
`packages/context/skills/_shared/identifier-verification.md`:
|
||||
|
||||
```md
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
1. `discover_data({query: "<topic>"})` — see what wikis, SL sources, and raw tables
|
||||
already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` —
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and sampleValues.
|
||||
3. For literal values from the source (status codes, plan tiers): check whether
|
||||
they appear in `entity_details`' `sampleValues` for the relevant column.
|
||||
If `sampleValues` is short or you suspect the sample missed real values, run
|
||||
a `sql_execution` probe: `SELECT DISTINCT <col> FROM <ref> LIMIT 50`.
|
||||
4. If the candidate identifier still doesn't resolve, do one of:
|
||||
(a) Use `sql_execution` with `SELECT 1 FROM <ref> LIMIT 0`. If it errors,
|
||||
the identifier is fictional.
|
||||
(b) Wrap the identifier in `[unverified — from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
(c) When recording `emit_unmapped_fallback` with `no_physical_table`,
|
||||
include the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
```
|
||||
|
||||
Each affected skill inlines this block verbatim (skill files are independent prompts; KTX has no cross-skill include mechanism today).
|
||||
|
||||
### Per-skill diffs
|
||||
|
||||
Two skills are deliberately excluded from updates: `ingest_triage` (read-only triage; produces no wiki or SL output) and `sl` (umbrella reference doc; cross-links to the protocol but doesn't need its own copy).
|
||||
|
||||
| Skill | Changes |
|
||||
|---|---|
|
||||
| `notion_synthesize` | Inline protocol; append `discover_data`, `entity_details`, `sql_execution` to `Allowed:` (line 74); replace `orbit_analytics.customer` example on line 70 with `<schema>.<table>` |
|
||||
| `dbt_ingest` | Inline protocol; line 24: replace `wiki_sl_search` → `discover_data` and `sl_describe_table` → `entity_details`; strengthen the "not permission to invent physical columns" paragraph by naming `entity_details` as the verification call |
|
||||
| `lookml_ingest` | Inline protocol; add: "Verify each `sql_table_name` from the LookML view with `entity_details` before mapping to an SL source" |
|
||||
| `looker_ingest` | Inline protocol; add: "For every Looker field reference, call `entity_details` on the underlying `(schema, table, column)` before promoting to `sl_refs` or quoting in wiki body" |
|
||||
| `metabase_ingest` | Inline protocol; add: "Before writing a wiki page derived from a Metabase question's SQL, verify each `schema.table.column` mentioned with `entity_details`" |
|
||||
| `metricflow_ingest` | Inline protocol; add: "Verify each MetricFlow model's source table with `entity_details` before producing the corresponding `sl_write_source`" |
|
||||
| `live_database_ingest` | Inline protocol; add: "Sample values come from the scan record; do not invent values not present in `relationship-profile.json`" |
|
||||
| `historic_sql_table_digest` | Shortened protocol focused on column attribution: "Only mention columns visible in the table's scan record. Use `entity_details({display})` if uncertain" |
|
||||
| `historic_sql_patterns` | Inline protocol; add: "Every join column mentioned in pattern descriptions must be verified via `entity_details` for both sides of the join" |
|
||||
| `knowledge_capture` | Inline protocol; update line 44: "First call `discover_data` to find existing wiki pages, SL sources, and raw tables on the topic" |
|
||||
| `sl_capture` | Inline protocol; add: "Before `sl_write_source`, call `entity_details` on the target table to confirm column names and types match the YAML being written" |
|
||||
|
||||
### Cleanups beyond the four-tool addition
|
||||
|
||||
- `notion_synthesize/SKILL.md:70` — remove `orbit_analytics.customer` (placeholder).
|
||||
- `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts:67` — same example string in the Zod `.describe()` — replace with `<schema>.<table>`.
|
||||
- `dbt_ingest/SKILL.md:24` — fix `wiki_sl_search` and `sl_describe_table` (neither tool exists in KTX).
|
||||
- `packages/context/src/sl/tools/sl-warehouse-validation.ts:93` — inline error message references the non-existent `sl_describe_table`. Replace with `sl_read_source`.
|
||||
|
||||
## Testing strategy
|
||||
|
||||
### Unit tests
|
||||
|
||||
| Component | Tests |
|
||||
|---|---|
|
||||
| `getDialectForDriver` | Every supported driver returns a dialect; unknown driver throws with a clear list of supported drivers |
|
||||
| `WarehouseCatalogService.getTable` | Reads and merges `tables/<b64>.json` and `relationship-profile.json`; returns `null` when no sync exists; returns `null` for unknown `(catalog, db, name)` |
|
||||
| `WarehouseCatalogService.resolveDisplay` | Postgres 2-part display → `{catalog: null, db, name}`; BigQuery 3-part display → `{catalog, db, name}`; ambiguous 2-part on BigQuery returns candidates list; unknown displays produce closest-match candidates ordered by edit distance |
|
||||
| `WarehouseCatalogService.searchByName` | Substring and token match; tiers (exact-name → token-match) ordered correctly; cache hit on second call within same instance |
|
||||
| `entity_details` | Resolves `{display}` and structured inputs; reports "Not found" with candidates for unknown ref; reports "no scan available" distinctly when scan dir missing; truncates above 50 targets |
|
||||
| `discover_data` | Three sections present when all three have hits; sections omitted when empty; `sourceName` inspect mode delegates to `sl_discover` and skips other sections; `allowedConnectionNames` scope honoured |
|
||||
| `sql_execution` | `assertReadOnlySql` rejects each mutating verb; row-limit wrap visible in `wrappedSql`; connector errors surface verbatim with the failing SQL; rejects `connectionName` not in `allowedConnectionNames` |
|
||||
|
||||
### Integration tests
|
||||
|
||||
- Extend `packages/context/src/ingest/ingest-bundle.runner.test.ts` to verify the three new tools are present in both WU-stage and reconcile-stage tool maps and refuse out-of-scope `connectionName` values.
|
||||
- New fixture-based test: stage a small `raw-sources/<conn>/live-database/<sync>/` directory with 2 tables + 1 enrichment profile, then call each tool through the runner's tool map and assert the markdown contains the expected fields. Uses the same fake-LLM harness as `notion.adapter.test.ts`.
|
||||
- One end-to-end regression test reproducing the `orbit_analytics.customer` hallucination: a fake Notion page mentioning the fictional table is fed to the synthesis stage; the run produces a wiki page where the fictional name is wrapped in `[unverified — …]` or omitted, not promoted to `tables:` frontmatter.
|
||||
|
||||
### Prompt-bundling tests
|
||||
|
||||
Extend `packages/context/src/memory/memory-runtime-assets.test.ts`:
|
||||
|
||||
- Every skill in the synthesis-writers list embeds the verification-protocol block (assert by stable header text).
|
||||
- Every such skill lists the three new tools when it has a `## Tools / Allowed` section, or mentions them inline in a workflow step otherwise.
|
||||
- No skill file contains any of the banned strings: `orbit_analytics.customer`, `wiki_sl_search`, `sl_describe_table`.
|
||||
|
||||
### Performance guards
|
||||
|
||||
`WarehouseCatalogService` caches the per-connection table list per stage (one WorkUnit's lifetime). Tests assert second call is a cache hit. No DB index for `searchByName` in this iteration — linear scan over scan artefacts is acceptable up to ~50K columns. If volume warrants it later, a follow-up PR adds a SQLite FTS index.
|
||||
|
||||
## Rollout
|
||||
|
||||
Four mergeable PRs:
|
||||
|
||||
| PR | Lands |
|
||||
|---|---|
|
||||
| 1 | `getDialectForDriver` + `WarehouseCatalogService` + `entity_details` tool + wiring in `ingest-bundle.runner.ts` + unit/integration tests |
|
||||
| 2 | `sql_execution` tool + tests + the `orbit_analytics.customer` regression test (which exercises protocol steps 4a/4c) |
|
||||
| 3 | `discover_data` tool + tests |
|
||||
| 4 | All 11 skill prompts updated with the verification protocol + the three cleanups + extended `memory-runtime-assets.test.ts` |
|
||||
|
||||
Skill prompts land last so they can reference the tools that already exist.
|
||||
|
||||
## Out of scope
|
||||
|
||||
- **Hard write-time validation in `wiki_write` / `emit_unmapped_fallback`.** A complementary spec covers regex-based identifier validation at the write boundary. Defence-in-depth — separate concern.
|
||||
- **SQLite FTS index for `searchByName`.** Deferred until the linear scan benchmark fails.
|
||||
- **`raw_schema_search` as a standalone tool.** `discover_data`'s raw section covers the concept-search case.
|
||||
- **`semantic_query` in the synthesis toolset.** `semantic_query` will exist in KTX for the research/chat-time agent; it is deliberately excluded from synthesis because synthesis creates SL sources rather than queries them.
|
||||
- **`dictionary_search`.** `entity_details` already returns per-column `sampleValues`; for the rarer "where does this literal live?" case, `sql_execution` is more accurate than a sampled-JSON scan.
|
||||
- **UUID layer for tables/columns.** KTX deliberately stays string-keyed on `(connection, catalog, db, name)`.
|
||||
|
|
@ -14,9 +14,7 @@ generated local project.
|
|||
The managed Python runtime smoke requires `uv` on `PATH`, isolates
|
||||
`KTX_RUNTIME_ROOT`, verifies `ktx dev runtime status`, runs `ktx sl query --yes` to
|
||||
install the core runtime from the bundled wheel, checks `ktx dev runtime status`,
|
||||
starts and reuses the managed daemon, stops it, previews a stale runtime with
|
||||
`ktx dev runtime prune --dry-run`, verifies confirmation is required, and removes
|
||||
the stale runtime with `ktx dev runtime prune --yes`.
|
||||
starts and reuses the managed daemon, and stops it.
|
||||
|
||||
The artifact manifest contains the public `@kaelio/ktx` npm tarball and the
|
||||
bundled `kaelio-ktx` runtime wheel. The smoke does not install standalone
|
||||
|
|
|
|||
114
knip.json
Normal file
114
knip.json
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
{
|
||||
"$schema": "https://unpkg.com/knip@6/schema.json",
|
||||
"workspaces": {
|
||||
".": {
|
||||
"entry": ["scripts/**/*.mjs"],
|
||||
"project": ["scripts/**/*.mjs"]
|
||||
},
|
||||
"packages/cli": {
|
||||
"entry": [
|
||||
"src/index.ts",
|
||||
"src/bin.ts",
|
||||
"src/**/*.test.ts",
|
||||
"src/**/*.test.tsx",
|
||||
"scripts/**/*.mjs"
|
||||
],
|
||||
"project": ["src/**/*.{ts,tsx}", "scripts/**/*.mjs", "vitest.config.ts"]
|
||||
},
|
||||
"packages/context": {
|
||||
"entry": [
|
||||
"src/index.ts",
|
||||
"src/agent/index.ts",
|
||||
"src/core/index.ts",
|
||||
"src/connections/index.ts",
|
||||
"src/daemon/index.ts",
|
||||
"src/ingest/index.ts",
|
||||
"src/ingest/memory-flow/index.ts",
|
||||
"src/ingest/metabase-mapping.ts",
|
||||
"src/scan/index.ts",
|
||||
"src/search/index.ts",
|
||||
"src/sql-analysis/index.ts",
|
||||
"src/memory/index.ts",
|
||||
"src/mcp/index.ts",
|
||||
"src/project/index.ts",
|
||||
"src/prompts/index.ts",
|
||||
"src/skills/index.ts",
|
||||
"src/sl/index.ts",
|
||||
"src/sl/descriptions.ts",
|
||||
"src/tools/index.ts",
|
||||
"src/wiki/index.ts",
|
||||
"src/**/*.test.ts",
|
||||
"scripts/**/*.mjs"
|
||||
],
|
||||
"project": ["src/**/*.ts", "scripts/**/*.mjs", "vitest.config.ts"]
|
||||
},
|
||||
"packages/llm": {
|
||||
"entry": ["src/index.ts", "src/**/*.test.ts"],
|
||||
"project": ["src/**/*.ts", "vitest.config.ts"]
|
||||
},
|
||||
"packages/connector-*": {
|
||||
"entry": ["src/index.ts", "src/**/*.test.ts"],
|
||||
"project": ["src/**/*.ts"]
|
||||
},
|
||||
"docs-site": {
|
||||
"entry": [
|
||||
"app/**/*.{ts,tsx}",
|
||||
"components/**/*.{ts,tsx}",
|
||||
"lib/**/*.{ts,tsx}",
|
||||
"middleware.ts",
|
||||
"next.config.mjs",
|
||||
"source.config.ts",
|
||||
"tests/**/*.mjs"
|
||||
],
|
||||
"project": [
|
||||
"app/**/*.{ts,tsx}",
|
||||
"components/**/*.{ts,tsx}",
|
||||
"lib/**/*.{ts,tsx}",
|
||||
"*.ts",
|
||||
"*.mjs",
|
||||
"tests/**/*.mjs"
|
||||
],
|
||||
"ignoreDependencies": ["tailwindcss"]
|
||||
}
|
||||
},
|
||||
"ignore": [
|
||||
"**/dist/**",
|
||||
"**/coverage/**",
|
||||
"**/.next/**",
|
||||
"**/node_modules/**",
|
||||
"**/*.gen.ts",
|
||||
"**/*.generated.ts"
|
||||
],
|
||||
"ignoreIssues": {
|
||||
"packages/cli/src/clack.ts": ["exports"],
|
||||
"packages/cli/src/commands/connection-metabase-setup.ts": ["exports", "types"],
|
||||
"packages/cli/src/ingest.test-utils.ts": ["exports"],
|
||||
"packages/cli/src/io/symbols.ts": ["exports"],
|
||||
"packages/cli/src/managed-python-command.ts": ["types"],
|
||||
"packages/cli/src/managed-python-daemon.ts": ["types"],
|
||||
"packages/cli/src/managed-python-http.ts": ["exports", "types"],
|
||||
"packages/cli/src/managed-python-runtime.ts": ["types"],
|
||||
"packages/cli/src/memory-flow-tui.tsx": ["types"],
|
||||
"packages/cli/src/next-steps.ts": ["exports"],
|
||||
"packages/cli/src/print-command-tree.ts": ["exports"],
|
||||
"packages/cli/src/setup-agents.ts": ["exports", "types"],
|
||||
"packages/cli/src/setup-context.ts": ["types"],
|
||||
"packages/cli/src/setup-demo-tour.ts": ["exports"],
|
||||
"packages/cli/src/setup-models.ts": ["exports"],
|
||||
"packages/cli/src/setup-project.ts": ["types"],
|
||||
"packages/cli/src/setup-ready-menu.ts": ["types"],
|
||||
"packages/cli/src/setup-sources.ts": ["types"],
|
||||
"packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts": ["exports", "types"],
|
||||
"packages/context/src/ingest/adapters/lookml/pull-config.ts": ["exports"],
|
||||
"packages/context/src/ingest/adapters/metabase/serialize-card.ts": ["types"],
|
||||
"packages/context/src/ingest/adapters/metabase/types.ts": ["exports"],
|
||||
"packages/context/src/ingest/adapters/metricflow/parse.ts": ["types"],
|
||||
"packages/context/src/ingest/ports.ts": ["types"],
|
||||
"packages/context/src/ingest/stages/stage-3-work-units.ts": ["types"],
|
||||
"packages/context/src/ingest/stages/stage-index.types.ts": ["types"],
|
||||
"packages/context/src/project/config.ts": ["types"],
|
||||
"packages/context/src/scan/relationship-candidates.ts": ["types"],
|
||||
"packages/context/src/scan/relationship-diagnostics.ts": ["types"],
|
||||
"packages/context/src/tools/context-evidence-tool-store.ts": ["types"]
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,10 @@
|
|||
"artifacts:verify-manifest": "node scripts/package-artifacts.mjs verify-manifest",
|
||||
"build": "pnpm --filter './packages/*' run build",
|
||||
"check": "node scripts/check-boundaries.mjs && node --test scripts/*.test.mjs && pnpm --filter './packages/*' run build && pnpm --filter './packages/*' run test",
|
||||
"dead-code": "pnpm run dead-code:biome && pnpm run dead-code:knip",
|
||||
"dead-code:biome": "biome ci . --formatter-enabled=false --assist-enabled=false",
|
||||
"dead-code:fix": "biome check . --formatter-enabled=false --assist-enabled=false --write && knip --fix --format",
|
||||
"dead-code:knip": "knip --reporter compact",
|
||||
"ktx": "node scripts/run-ktx.mjs",
|
||||
"link:dev": "node scripts/link-dev-cli.mjs",
|
||||
"native:rebuild": "pnpm -r rebuild better-sqlite3",
|
||||
|
|
@ -36,9 +40,12 @@
|
|||
"type-check": "pnpm --filter './packages/*' run type-check"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^2.4.15",
|
||||
"@types/node": "^25.7.0",
|
||||
"better-sqlite3": "^12.10.0",
|
||||
"knip": "^6.12.2",
|
||||
"typescript": "^6.0.3",
|
||||
"vitest": "^4.1.6"
|
||||
"yaml": "^2.9.0"
|
||||
},
|
||||
"pnpm": {
|
||||
"onlyBuiltDependencies": [
|
||||
|
|
|
|||
|
|
@ -1,152 +0,0 @@
|
|||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
KTX_AGENT_MAX_ROWS_CAP,
|
||||
createKtxAgentRuntime,
|
||||
parseAgentMaxRows,
|
||||
readAgentJsonFile,
|
||||
writeAgentJson,
|
||||
writeAgentJsonError,
|
||||
} from './agent-runtime.js';
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: { write: (chunk: string) => (stdout += chunk) },
|
||||
stderr: { write: (chunk: string) => (stderr += chunk) },
|
||||
},
|
||||
stdout: () => stdout,
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
|
||||
describe('agent runtime helpers', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-agent-runtime-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('writes JSON success and error envelopes without color or spinners', () => {
|
||||
const successIo = makeIo();
|
||||
const errorIo = makeIo();
|
||||
|
||||
writeAgentJson(successIo.io, { ok: true });
|
||||
writeAgentJsonError(errorIo.io, 'missing source', { code: 'NOT_FOUND' });
|
||||
|
||||
expect(JSON.parse(successIo.stdout())).toEqual({ ok: true });
|
||||
expect(successIo.stderr()).toBe('');
|
||||
expect(JSON.parse(errorIo.stderr())).toEqual({
|
||||
ok: false,
|
||||
error: { message: 'missing source', code: 'NOT_FOUND' },
|
||||
});
|
||||
expect(errorIo.stdout()).toBe('');
|
||||
});
|
||||
|
||||
it('reads JSON query files as objects', async () => {
|
||||
const path = join(tempDir, 'query.json');
|
||||
await writeFile(path, '{"measures":["revenue"],"limit":50}', 'utf-8');
|
||||
|
||||
await expect(readAgentJsonFile(path)).resolves.toEqual({ measures: ['revenue'], limit: 50 });
|
||||
});
|
||||
|
||||
it('rejects non-object JSON query files', async () => {
|
||||
const path = join(tempDir, 'query.json');
|
||||
await writeFile(path, '["revenue"]', 'utf-8');
|
||||
|
||||
await expect(readAgentJsonFile(path)).rejects.toThrow('must contain a JSON object');
|
||||
});
|
||||
|
||||
it('requires positive row limits and enforces the agent cap', () => {
|
||||
expect(parseAgentMaxRows(100)).toBe(100);
|
||||
expect(() => parseAgentMaxRows(undefined)).toThrow('maxRows is required');
|
||||
expect(() => parseAgentMaxRows(0)).toThrow('positive integer');
|
||||
expect(() => parseAgentMaxRows(KTX_AGENT_MAX_ROWS_CAP + 1)).toThrow(String(KTX_AGENT_MAX_ROWS_CAP));
|
||||
});
|
||||
|
||||
it('constructs local context ports with semantic compute and query executor', async () => {
|
||||
const project = {
|
||||
projectDir: tempDir,
|
||||
configPath: join(tempDir, 'ktx.yaml'),
|
||||
config: { project: 'revenue', connections: {} },
|
||||
coreConfig: {},
|
||||
git: {},
|
||||
fileStore: {},
|
||||
} as never;
|
||||
const ports = { knowledge: {}, semanticLayer: {} } as never;
|
||||
const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
|
||||
const queryExecutor = { execute: vi.fn() };
|
||||
const loadProject = vi.fn(async () => project);
|
||||
const createContextTools = vi.fn(() => ports);
|
||||
|
||||
await expect(
|
||||
createKtxAgentRuntime(
|
||||
{ projectDir: tempDir, enableSemanticCompute: true, enableQueryExecution: true },
|
||||
{
|
||||
loadProject,
|
||||
createContextTools,
|
||||
createSemanticLayerCompute: () => semanticLayerCompute,
|
||||
createQueryExecutor: () => queryExecutor,
|
||||
},
|
||||
),
|
||||
).resolves.toMatchObject({ project, ports, queryExecutor });
|
||||
|
||||
expect(loadProject).toHaveBeenCalledWith({ projectDir: tempDir });
|
||||
expect(createContextTools).toHaveBeenCalledWith(project, {
|
||||
semanticLayerCompute,
|
||||
queryExecutor,
|
||||
});
|
||||
});
|
||||
|
||||
it('creates managed semantic compute when no test override is injected', async () => {
|
||||
const project = {
|
||||
projectDir: tempDir,
|
||||
configPath: join(tempDir, 'ktx.yaml'),
|
||||
config: { project: 'revenue', connections: {} },
|
||||
coreConfig: {},
|
||||
git: {},
|
||||
fileStore: {},
|
||||
} as never;
|
||||
const ports = { semanticLayer: {} } as never;
|
||||
const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
|
||||
const loadProject = vi.fn(async () => project);
|
||||
const createContextTools = vi.fn(() => ports);
|
||||
const createManagedSemanticLayerCompute = vi.fn(async () => semanticLayerCompute);
|
||||
const { io } = makeIo();
|
||||
|
||||
await expect(
|
||||
createKtxAgentRuntime(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
enableSemanticCompute: true,
|
||||
enableQueryExecution: false,
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
io,
|
||||
},
|
||||
{
|
||||
loadProject,
|
||||
createContextTools,
|
||||
createManagedSemanticLayerCompute,
|
||||
},
|
||||
),
|
||||
).resolves.toMatchObject({ project, ports, semanticLayerCompute });
|
||||
|
||||
expect(createManagedSemanticLayerCompute).toHaveBeenCalledWith({
|
||||
cliVersion: '0.2.0',
|
||||
installPolicy: 'auto',
|
||||
io,
|
||||
});
|
||||
expect(createContextTools).toHaveBeenCalledWith(project, {
|
||||
semanticLayerCompute,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { createDefaultLocalQueryExecutor, type KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import type { KtxSemanticLayerComputePort } from '@ktx/context/daemon';
|
||||
import { createLocalProjectMcpContextPorts, type KtxMcpContextPorts } from '@ktx/context/mcp';
|
||||
import { type KtxLocalProject, loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import {
|
||||
createManagedPythonSemanticLayerComputePort,
|
||||
type KtxManagedPythonInstallPolicy,
|
||||
} from './managed-python-command.js';
|
||||
|
||||
export const KTX_AGENT_MAX_ROWS_CAP = 1000;
|
||||
|
||||
export interface KtxAgentRuntimeOptions {
|
||||
projectDir: string;
|
||||
enableSemanticCompute: boolean;
|
||||
enableQueryExecution: boolean;
|
||||
cliVersion?: string;
|
||||
runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
|
||||
io?: KtxCliIo;
|
||||
}
|
||||
|
||||
export interface KtxAgentRuntime {
|
||||
project: KtxLocalProject;
|
||||
ports: KtxMcpContextPorts;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
}
|
||||
|
||||
export interface KtxAgentRuntimeDeps {
|
||||
loadProject?: typeof loadKtxProject;
|
||||
createContextTools?: typeof createLocalProjectMcpContextPorts;
|
||||
createSemanticLayerCompute?: () => KtxSemanticLayerComputePort;
|
||||
createManagedSemanticLayerCompute?: typeof createManagedPythonSemanticLayerComputePort;
|
||||
createQueryExecutor?: () => KtxSqlQueryExecutorPort;
|
||||
}
|
||||
|
||||
export function writeAgentJson(io: KtxCliIo, value: unknown): void {
|
||||
io.stdout.write(`${JSON.stringify(value, null, 2)}\n`);
|
||||
}
|
||||
|
||||
export function writeAgentJsonError(
|
||||
io: KtxCliIo,
|
||||
message: string,
|
||||
detail: Record<string, unknown> = {},
|
||||
): void {
|
||||
io.stderr.write(`${JSON.stringify({ ok: false, error: { message, ...detail } }, null, 2)}\n`);
|
||||
}
|
||||
|
||||
export async function readAgentJsonFile(path: string): Promise<Record<string, unknown>> {
|
||||
const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error(`${path} must contain a JSON object.`);
|
||||
}
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
|
||||
export function parseAgentMaxRows(value: number | undefined): number {
|
||||
if (!Number.isInteger(value) || value === undefined || value <= 0) {
|
||||
throw new Error('maxRows is required and must be a positive integer.');
|
||||
}
|
||||
if (value > KTX_AGENT_MAX_ROWS_CAP) {
|
||||
throw new Error(`maxRows must be less than or equal to ${KTX_AGENT_MAX_ROWS_CAP}.`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
async function createAgentSemanticLayerCompute(
|
||||
options: KtxAgentRuntimeOptions,
|
||||
deps: KtxAgentRuntimeDeps,
|
||||
): Promise<KtxSemanticLayerComputePort | undefined> {
|
||||
if (!options.enableSemanticCompute) {
|
||||
return undefined;
|
||||
}
|
||||
if (deps.createSemanticLayerCompute) {
|
||||
return deps.createSemanticLayerCompute();
|
||||
}
|
||||
if (!options.cliVersion || !options.runtimeInstallPolicy || !options.io) {
|
||||
throw new Error('Managed Python semantic compute requires cliVersion, runtimeInstallPolicy, and io.');
|
||||
}
|
||||
const createManagedSemanticLayerCompute =
|
||||
deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort;
|
||||
return createManagedSemanticLayerCompute({
|
||||
cliVersion: options.cliVersion,
|
||||
installPolicy: options.runtimeInstallPolicy,
|
||||
io: options.io,
|
||||
});
|
||||
}
|
||||
|
||||
export async function createKtxAgentRuntime(
|
||||
options: KtxAgentRuntimeOptions,
|
||||
deps: KtxAgentRuntimeDeps = {},
|
||||
): Promise<KtxAgentRuntime> {
|
||||
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: options.projectDir });
|
||||
const semanticLayerCompute = await createAgentSemanticLayerCompute(options, deps);
|
||||
const queryExecutor = options.enableQueryExecution
|
||||
? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)()
|
||||
: undefined;
|
||||
const ports = (deps.createContextTools ?? createLocalProjectMcpContextPorts)(project, {
|
||||
...(semanticLayerCompute ? { semanticLayerCompute } : {}),
|
||||
...(queryExecutor ? { queryExecutor } : {}),
|
||||
});
|
||||
return {
|
||||
project,
|
||||
ports,
|
||||
...(semanticLayerCompute ? { semanticLayerCompute } : {}),
|
||||
...(queryExecutor ? { queryExecutor } : {}),
|
||||
};
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
isMissingProjectConfigError,
|
||||
missingConnectionSlSearchReadiness,
|
||||
missingProjectSlSearchReadiness,
|
||||
noConnectionsSlSearchReadiness,
|
||||
noIndexedSourcesSlSearchReadiness,
|
||||
} from './agent-search-readiness.js';
|
||||
|
||||
describe('agent semantic-layer search readiness guidance', () => {
|
||||
it('formats missing project guidance with exact recovery commands', () => {
|
||||
expect(missingProjectSlSearchReadiness('/tmp/ktx-search', 'gross revenue')).toEqual({
|
||||
code: 'agent_sl_search_missing_project',
|
||||
message: 'Semantic-layer search needs an initialized KTX project at /tmp/ktx-search.',
|
||||
nextSteps: [
|
||||
'ktx setup --project-dir /tmp/ktx-search',
|
||||
'ktx status --project-dir /tmp/ktx-search',
|
||||
'ktx ingest run --connection-id <connection> --adapter <adapter>',
|
||||
'ktx agent sl list --json --query "gross revenue" --project-dir /tmp/ktx-search',
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('formats no-connection and no-index guidance without hiding the project path', () => {
|
||||
expect(noConnectionsSlSearchReadiness('/tmp/ktx-search', 'revenue')).toMatchObject({
|
||||
code: 'agent_sl_search_no_connections',
|
||||
message: 'Semantic-layer search found no configured connections in /tmp/ktx-search.',
|
||||
});
|
||||
expect(noIndexedSourcesSlSearchReadiness('/tmp/ktx-search', 'orders')).toMatchObject({
|
||||
code: 'agent_sl_search_no_indexed_sources',
|
||||
message: 'Semantic-layer search found no indexed semantic-layer sources in /tmp/ktx-search.',
|
||||
});
|
||||
});
|
||||
|
||||
it('formats unknown connection guidance', () => {
|
||||
expect(missingConnectionSlSearchReadiness('/tmp/ktx-search', 'warehouse', 'revenue')).toMatchObject({
|
||||
code: 'agent_sl_search_unknown_connection',
|
||||
message: 'Semantic-layer search connection "warehouse" is not configured in /tmp/ktx-search.',
|
||||
});
|
||||
});
|
||||
|
||||
it('detects missing ktx.yaml read errors', () => {
|
||||
const error = Object.assign(new Error('ENOENT: no such file or directory'), {
|
||||
code: 'ENOENT',
|
||||
path: '/tmp/ktx-search/ktx.yaml',
|
||||
});
|
||||
|
||||
expect(isMissingProjectConfigError(error)).toBe(true);
|
||||
expect(isMissingProjectConfigError(new Error('other'))).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
export type KtxAgentSlSearchReadinessCode =
|
||||
| 'agent_sl_search_missing_project'
|
||||
| 'agent_sl_search_no_connections'
|
||||
| 'agent_sl_search_unknown_connection'
|
||||
| 'agent_sl_search_no_indexed_sources';
|
||||
|
||||
export interface KtxAgentSlSearchReadinessDetail {
|
||||
code: KtxAgentSlSearchReadinessCode;
|
||||
message: string;
|
||||
nextSteps: string[];
|
||||
}
|
||||
|
||||
function queryForCommand(query: string | undefined): string {
|
||||
const trimmed = query?.trim();
|
||||
return trimmed && trimmed.length > 0 ? trimmed : 'revenue';
|
||||
}
|
||||
|
||||
function projectSearchCommand(projectDir: string, query: string | undefined): string {
|
||||
return `ktx agent sl list --json --query ${JSON.stringify(queryForCommand(query))} --project-dir ${projectDir}`;
|
||||
}
|
||||
|
||||
function baseNextSteps(projectDir: string, query: string | undefined): string[] {
|
||||
return [
|
||||
`ktx setup --project-dir ${projectDir}`,
|
||||
`ktx status --project-dir ${projectDir}`,
|
||||
'ktx ingest run --connection-id <connection> --adapter <adapter>',
|
||||
projectSearchCommand(projectDir, query),
|
||||
];
|
||||
}
|
||||
|
||||
export function missingProjectSlSearchReadiness(
|
||||
projectDir: string,
|
||||
query: string | undefined,
|
||||
): KtxAgentSlSearchReadinessDetail {
|
||||
return {
|
||||
code: 'agent_sl_search_missing_project',
|
||||
message: `Semantic-layer search needs an initialized KTX project at ${projectDir}.`,
|
||||
nextSteps: baseNextSteps(projectDir, query),
|
||||
};
|
||||
}
|
||||
|
||||
export function noConnectionsSlSearchReadiness(
|
||||
projectDir: string,
|
||||
query: string | undefined,
|
||||
): KtxAgentSlSearchReadinessDetail {
|
||||
return {
|
||||
code: 'agent_sl_search_no_connections',
|
||||
message: `Semantic-layer search found no configured connections in ${projectDir}.`,
|
||||
nextSteps: baseNextSteps(projectDir, query),
|
||||
};
|
||||
}
|
||||
|
||||
export function missingConnectionSlSearchReadiness(
|
||||
projectDir: string,
|
||||
connectionId: string,
|
||||
query: string | undefined,
|
||||
): KtxAgentSlSearchReadinessDetail {
|
||||
return {
|
||||
code: 'agent_sl_search_unknown_connection',
|
||||
message: `Semantic-layer search connection "${connectionId}" is not configured in ${projectDir}.`,
|
||||
nextSteps: baseNextSteps(projectDir, query),
|
||||
};
|
||||
}
|
||||
|
||||
export function noIndexedSourcesSlSearchReadiness(
|
||||
projectDir: string,
|
||||
query: string | undefined,
|
||||
): KtxAgentSlSearchReadinessDetail {
|
||||
return {
|
||||
code: 'agent_sl_search_no_indexed_sources',
|
||||
message: `Semantic-layer search found no indexed semantic-layer sources in ${projectDir}.`,
|
||||
nextSteps: baseNextSteps(projectDir, query),
|
||||
};
|
||||
}
|
||||
|
||||
function errorCode(error: unknown): string | undefined {
|
||||
if (typeof error !== 'object' || error === null || !('code' in error)) {
|
||||
return undefined;
|
||||
}
|
||||
const code = (error as { code?: unknown }).code;
|
||||
return typeof code === 'string' ? code : undefined;
|
||||
}
|
||||
|
||||
function errorPath(error: unknown): string | undefined {
|
||||
if (typeof error !== 'object' || error === null || !('path' in error)) {
|
||||
return undefined;
|
||||
}
|
||||
const path = (error as { path?: unknown }).path;
|
||||
return typeof path === 'string' ? path : undefined;
|
||||
}
|
||||
|
||||
export function isMissingProjectConfigError(error: unknown): boolean {
|
||||
return errorCode(error) === 'ENOENT' && (errorPath(error)?.endsWith('ktx.yaml') ?? false);
|
||||
}
|
||||
|
|
@ -1,428 +0,0 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { buildDefaultKtxProjectConfig } from '@ktx/context/project';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { runKtxAgent } from './agent.js';
|
||||
import type { KtxAgentRuntime } from './agent-runtime.js';
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: { write: (chunk: string) => (stdout += chunk) },
|
||||
stderr: { write: (chunk: string) => (stderr += chunk) },
|
||||
},
|
||||
stdout: () => stdout,
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
|
||||
function runtime(overrides: Record<string, unknown> = {}): KtxAgentRuntime {
|
||||
const config = buildDefaultKtxProjectConfig('revenue');
|
||||
return {
|
||||
project: {
|
||||
projectDir: '/tmp/revenue',
|
||||
configPath: '/tmp/revenue/ktx.yaml',
|
||||
config: {
|
||||
...config,
|
||||
connections: {
|
||||
warehouse: { driver: 'sqlite', path: 'warehouse.sqlite', readonly: true as const },
|
||||
},
|
||||
},
|
||||
coreConfig: {} as KtxAgentRuntime['project']['coreConfig'],
|
||||
git: {} as KtxAgentRuntime['project']['git'],
|
||||
fileStore: {} as KtxAgentRuntime['project']['fileStore'],
|
||||
},
|
||||
ports: {
|
||||
connections: { list: vi.fn(async () => [{ id: 'warehouse', name: 'warehouse', connectionType: 'sqlite' }]) },
|
||||
semanticLayer: {
|
||||
listSources: vi.fn(async () => ({
|
||||
sources: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
connectionName: 'warehouse',
|
||||
name: 'orders',
|
||||
columnCount: 2,
|
||||
measureCount: 1,
|
||||
joinCount: 0,
|
||||
},
|
||||
],
|
||||
totalSources: 1,
|
||||
})),
|
||||
readSource: vi.fn(async () => ({ sourceName: 'orders', yaml: 'name: orders\n' })),
|
||||
writeSource: vi.fn(async () => ({ success: true, sourceName: 'orders' })),
|
||||
validate: vi.fn(async () => ({ success: true, errors: [], warnings: [] })),
|
||||
query: vi.fn(async () => ({ sql: 'select 1', headers: ['x'], rows: [[1]], totalRows: 1, plan: {} })),
|
||||
},
|
||||
knowledge: {
|
||||
search: vi.fn(async () => ({
|
||||
results: [
|
||||
{
|
||||
key: 'page-1',
|
||||
path: 'knowledge/global/page-1.md',
|
||||
scope: 'GLOBAL' as const,
|
||||
summary: 'Revenue logic',
|
||||
score: 0.9,
|
||||
matchReasons: ['lexical' as const],
|
||||
},
|
||||
],
|
||||
totalFound: 1,
|
||||
})),
|
||||
read: vi.fn(async () => ({
|
||||
key: 'page-1',
|
||||
scope: 'GLOBAL' as const,
|
||||
summary: 'Revenue logic',
|
||||
content: 'Use net revenue.',
|
||||
})),
|
||||
write: vi.fn(async () => ({ success: true, key: 'page-1', action: 'created' as const })),
|
||||
},
|
||||
},
|
||||
queryExecutor: {
|
||||
execute: vi.fn(async () => ({ headers: ['x'], rows: [[1]], totalRows: 1, command: 'SELECT', rowCount: 1 })),
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function runtimeWithoutConnections(): KtxAgentRuntime {
|
||||
const base = runtime();
|
||||
return {
|
||||
...base,
|
||||
project: {
|
||||
...base.project,
|
||||
config: {
|
||||
...base.project.config,
|
||||
connections: {},
|
||||
},
|
||||
},
|
||||
ports: {
|
||||
...base.ports,
|
||||
semanticLayer: {
|
||||
...base.ports.semanticLayer!,
|
||||
listSources: vi.fn(async () => ({ sources: [], totalSources: 0 })),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe('runKtxAgent', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-agent-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('prints tool discovery with every stable command', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(runKtxAgent({ command: 'tools', projectDir: tempDir, json: true }, io.io)).resolves.toBe(0);
|
||||
|
||||
const body = JSON.parse(io.stdout());
|
||||
expect(body.projectDir).toBe(tempDir);
|
||||
expect(body.tools.map((tool: { name: string }) => tool.name)).toEqual([
|
||||
'context',
|
||||
'sl.list',
|
||||
'sl.read',
|
||||
'sl.query',
|
||||
'wiki.search',
|
||||
'wiki.read',
|
||||
'sql.execute',
|
||||
]);
|
||||
expect(io.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('prints project context from setup status, connections, and SL summaries', async () => {
|
||||
const io = makeIo();
|
||||
const createRuntime = vi.fn(async () => runtime());
|
||||
const readSetupStatus = vi.fn(async () => ({ project: { path: tempDir, ready: true }, agents: [] }));
|
||||
|
||||
await expect(
|
||||
runKtxAgent({ command: 'context', projectDir: tempDir, json: true }, io.io, { createRuntime, readSetupStatus }),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toMatchObject({
|
||||
projectDir: tempDir,
|
||||
status: { project: { ready: true } },
|
||||
connections: [{ id: 'warehouse' }],
|
||||
semanticLayer: { totalSources: 1 },
|
||||
});
|
||||
});
|
||||
|
||||
it('dispatches SL list, SL read, wiki search, and wiki read through local ports', async () => {
|
||||
for (const args of [
|
||||
{ command: 'sl-list' as const, projectDir: tempDir, json: true as const, connectionId: 'warehouse' },
|
||||
{
|
||||
command: 'sl-read' as const,
|
||||
projectDir: tempDir,
|
||||
json: true as const,
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
},
|
||||
{ command: 'wiki-search' as const, projectDir: tempDir, json: true as const, query: 'revenue', limit: 10 },
|
||||
{ command: 'wiki-read' as const, projectDir: tempDir, json: true as const, pageId: 'page-1' },
|
||||
]) {
|
||||
const io = makeIo();
|
||||
await expect(runKtxAgent(args, io.io, { createRuntime: async () => runtime() })).resolves.toBe(0);
|
||||
expect(JSON.parse(io.stdout())).toBeTruthy();
|
||||
expect(io.stderr()).toBe('');
|
||||
}
|
||||
});
|
||||
|
||||
it('prints wiki hybrid search metadata from the hidden agent wiki search command', async () => {
|
||||
const fakeRuntime = runtime();
|
||||
const knowledge = fakeRuntime.ports.knowledge;
|
||||
if (!knowledge) {
|
||||
throw new Error('Expected runtime knowledge port');
|
||||
}
|
||||
fakeRuntime.ports.knowledge = {
|
||||
...knowledge,
|
||||
search: vi.fn(async () => ({
|
||||
results: [
|
||||
{
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL' as const,
|
||||
summary: 'Revenue metric definition',
|
||||
score: 0.02459016393442623,
|
||||
matchReasons: ['lexical' as const, 'token' as const],
|
||||
},
|
||||
],
|
||||
totalFound: 1,
|
||||
})),
|
||||
};
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxAgent({ command: 'wiki-search', projectDir: tempDir, json: true, query: 'paid order', limit: 5 }, io.io, {
|
||||
createRuntime: async () => fakeRuntime,
|
||||
}),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
matchReasons: ['lexical', 'token'],
|
||||
}),
|
||||
],
|
||||
totalFound: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it('executes SL queries from a JSON query file', async () => {
|
||||
const queryFile = join(tempDir, 'sl-query.json');
|
||||
const io = makeIo();
|
||||
await writeFile(queryFile, '{"measures":["total_revenue"],"dimensions":[]}', 'utf-8');
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{
|
||||
command: 'sl-query',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
queryFile,
|
||||
execute: true,
|
||||
maxRows: 100,
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'never',
|
||||
},
|
||||
io.io,
|
||||
{ createRuntime: async () => runtime() },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toMatchObject({ sql: 'select 1', rows: [[1]] });
|
||||
});
|
||||
|
||||
it('passes managed runtime options into default SL query runtime creation', async () => {
|
||||
const queryFile = join(tempDir, 'sl-query.json');
|
||||
const io = makeIo();
|
||||
const createRuntime = vi.fn(async () => runtime());
|
||||
await writeFile(queryFile, '{"measures":["total_revenue"],"dimensions":[]}', 'utf-8');
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{
|
||||
command: 'sl-query',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
queryFile,
|
||||
execute: false,
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
},
|
||||
io.io,
|
||||
{ createRuntime },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(createRuntime).toHaveBeenCalledWith({
|
||||
projectDir: tempDir,
|
||||
enableSemanticCompute: true,
|
||||
enableQueryExecution: false,
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
io: io.io,
|
||||
});
|
||||
});
|
||||
|
||||
it('executes read-only SQL from a SQL file with an explicit row limit', async () => {
|
||||
const sqlFile = join(tempDir, 'query.sql');
|
||||
const fakeRuntime = runtime();
|
||||
const io = makeIo();
|
||||
await writeFile(sqlFile, 'select 1', 'utf-8');
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{
|
||||
command: 'sql-execute',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
sqlFile,
|
||||
maxRows: 100,
|
||||
},
|
||||
io.io,
|
||||
{ createRuntime: async () => fakeRuntime as never },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(fakeRuntime.queryExecutor?.execute).toHaveBeenCalledWith({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: '/tmp/revenue',
|
||||
connection: { driver: 'sqlite', path: 'warehouse.sqlite', readonly: true },
|
||||
sql: 'select 1',
|
||||
maxRows: 100,
|
||||
});
|
||||
});
|
||||
|
||||
it('prints guided JSON when semantic-layer search runs outside a project', async () => {
|
||||
const io = makeIo();
|
||||
const missingProjectError = Object.assign(new Error('ENOENT: no such file or directory'), {
|
||||
code: 'ENOENT',
|
||||
path: join(tempDir, 'ktx.yaml'),
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{ command: 'sl-list', projectDir: tempDir, json: true, query: 'gross revenue' },
|
||||
io.io,
|
||||
{ createRuntime: vi.fn(async () => Promise.reject(missingProjectError)) },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(JSON.parse(io.stderr())).toEqual({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'agent_sl_search_missing_project',
|
||||
message: `Semantic-layer search needs an initialized KTX project at ${tempDir}.`,
|
||||
nextSteps: [
|
||||
`ktx setup --project-dir ${tempDir}`,
|
||||
`ktx status --project-dir ${tempDir}`,
|
||||
'ktx ingest run --connection-id <connection> --adapter <adapter>',
|
||||
`ktx agent sl list --json --query "gross revenue" --project-dir ${tempDir}`,
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(io.stdout()).toBe('');
|
||||
});
|
||||
|
||||
it('prints guided JSON when semantic-layer search has no configured connections', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{ command: 'sl-list', projectDir: tempDir, json: true, query: 'revenue' },
|
||||
io.io,
|
||||
{ createRuntime: async () => runtimeWithoutConnections() },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(JSON.parse(io.stderr())).toMatchObject({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'agent_sl_search_no_connections',
|
||||
message: `Semantic-layer search found no configured connections in ${tempDir}.`,
|
||||
nextSteps: [
|
||||
`ktx setup --project-dir ${tempDir}`,
|
||||
`ktx status --project-dir ${tempDir}`,
|
||||
'ktx ingest run --connection-id <connection> --adapter <adapter>',
|
||||
`ktx agent sl list --json --query "revenue" --project-dir ${tempDir}`,
|
||||
],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('prints guided JSON when semantic-layer search asks for an unknown connection', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{ command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'missing', query: 'revenue' },
|
||||
io.io,
|
||||
{ createRuntime: async () => runtime() },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(JSON.parse(io.stderr())).toMatchObject({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'agent_sl_search_unknown_connection',
|
||||
message: `Semantic-layer search connection "missing" is not configured in ${tempDir}.`,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('prints guided JSON when semantic-layer search has no indexed sources', async () => {
|
||||
const fakeRuntime = runtime();
|
||||
const semanticLayer = fakeRuntime.ports.semanticLayer!;
|
||||
fakeRuntime.ports.semanticLayer = {
|
||||
...semanticLayer,
|
||||
listSources: vi.fn(async () => ({ sources: [], totalSources: 0 })),
|
||||
};
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxAgent(
|
||||
{ command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'warehouse', query: 'revenue' },
|
||||
io.io,
|
||||
{ createRuntime: async () => fakeRuntime },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(JSON.parse(io.stderr())).toMatchObject({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'agent_sl_search_no_indexed_sources',
|
||||
message: `Semantic-layer search found no indexed semantic-layer sources in ${tempDir}.`,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('returns JSON errors when required ports or records are missing', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxAgent({ command: 'wiki-read', projectDir: tempDir, json: true, pageId: 'missing' }, io.io, {
|
||||
createRuntime: async () =>
|
||||
runtime({
|
||||
ports: { knowledge: { read: vi.fn(async () => null) } },
|
||||
}) as never,
|
||||
}),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(JSON.parse(io.stderr())).toMatchObject({
|
||||
ok: false,
|
||||
error: { message: expect.stringContaining('missing') },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import {
|
||||
createKtxAgentRuntime,
|
||||
parseAgentMaxRows,
|
||||
readAgentJsonFile,
|
||||
writeAgentJson,
|
||||
writeAgentJsonError,
|
||||
type KtxAgentRuntime,
|
||||
type KtxAgentRuntimeDeps,
|
||||
} from './agent-runtime.js';
|
||||
import {
|
||||
isMissingProjectConfigError,
|
||||
missingConnectionSlSearchReadiness,
|
||||
missingProjectSlSearchReadiness,
|
||||
noConnectionsSlSearchReadiness,
|
||||
noIndexedSourcesSlSearchReadiness,
|
||||
type KtxAgentSlSearchReadinessDetail,
|
||||
} from './agent-search-readiness.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
import { readKtxSetupStatus, type KtxSetupStatus } from './setup.js';
|
||||
|
||||
export type KtxAgentArgs =
|
||||
| { command: 'tools'; projectDir: string; json: true }
|
||||
| { command: 'context'; projectDir: string; json: true }
|
||||
| { command: 'sl-list'; projectDir: string; json: true; connectionId?: string; query?: string }
|
||||
| { command: 'sl-read'; projectDir: string; json: true; connectionId?: string; sourceName: string }
|
||||
| {
|
||||
command: 'sl-query';
|
||||
projectDir: string;
|
||||
json: true;
|
||||
connectionId: string;
|
||||
queryFile: string;
|
||||
execute: boolean;
|
||||
maxRows?: number;
|
||||
cliVersion: string;
|
||||
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
|
||||
}
|
||||
| { command: 'wiki-search'; projectDir: string; json: true; query: string; limit: number }
|
||||
| { command: 'wiki-read'; projectDir: string; json: true; pageId: string }
|
||||
| { command: 'sql-execute'; projectDir: string; json: true; connectionId: string; sqlFile: string; maxRows?: number };
|
||||
|
||||
export interface KtxAgentDeps extends KtxAgentRuntimeDeps {
|
||||
createRuntime?: (options: {
|
||||
projectDir: string;
|
||||
enableSemanticCompute: boolean;
|
||||
enableQueryExecution: boolean;
|
||||
cliVersion?: string;
|
||||
runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
|
||||
io?: KtxCliIo;
|
||||
}) => Promise<KtxAgentRuntime>;
|
||||
readSetupStatus?: (
|
||||
projectDir: string,
|
||||
) => Promise<KtxSetupStatus | { project: { path?: string; ready: boolean }; agents: unknown[] }>;
|
||||
}
|
||||
|
||||
const AGENT_TOOLS = [
|
||||
{ name: 'context', command: 'ktx agent context --json' },
|
||||
{ name: 'sl.list', command: 'ktx agent sl list --json [--connection-id <id>] [--query <text>]' },
|
||||
{ name: 'sl.read', command: 'ktx agent sl read <sourceName> --json [--connection-id <id>]' },
|
||||
{
|
||||
name: 'sl.query',
|
||||
command: 'ktx agent sl query --json --connection-id <id> --query-file <path> --execute --max-rows 100',
|
||||
},
|
||||
{ name: 'wiki.search', command: 'ktx agent wiki search <query> --json [--limit 10]' },
|
||||
{ name: 'wiki.read', command: 'ktx agent wiki read <pageId> --json' },
|
||||
{
|
||||
name: 'sql.execute',
|
||||
command: 'ktx agent sql execute --json --connection-id <id> --sql-file <path> --max-rows 100',
|
||||
},
|
||||
] as const;
|
||||
|
||||
function writeAgentSlSearchReadinessError(io: KtxCliIo, detail: KtxAgentSlSearchReadinessDetail): void {
|
||||
writeAgentJsonError(io, detail.message, { code: detail.code, nextSteps: detail.nextSteps });
|
||||
}
|
||||
|
||||
async function runtimeFor(args: KtxAgentArgs, deps: KtxAgentDeps, io: KtxCliIo): Promise<KtxAgentRuntime> {
|
||||
const needsSemanticCompute = args.command === 'sl-query';
|
||||
const needsQueryExecution = args.command === 'sql-execute' || (args.command === 'sl-query' && args.execute);
|
||||
const runtimeOptions = {
|
||||
projectDir: args.projectDir,
|
||||
enableSemanticCompute: needsSemanticCompute,
|
||||
enableQueryExecution: needsQueryExecution,
|
||||
...(args.command === 'sl-query'
|
||||
? {
|
||||
cliVersion: args.cliVersion,
|
||||
runtimeInstallPolicy: args.runtimeInstallPolicy,
|
||||
io,
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
return deps.createRuntime ? deps.createRuntime(runtimeOptions) : createKtxAgentRuntime(runtimeOptions, deps);
|
||||
}
|
||||
|
||||
function connectionIdForSource(runtime: KtxAgentRuntime, requested: string | undefined): string {
|
||||
if (requested) return requested;
|
||||
const ids = Object.keys(runtime.project.config.connections ?? {});
|
||||
if (ids.length === 1) return ids[0] as string;
|
||||
throw new Error('Use --connection-id when the project has zero or multiple connections.');
|
||||
}
|
||||
|
||||
export async function runKtxAgent(args: KtxAgentArgs, io: KtxCliIo, deps: KtxAgentDeps = {}): Promise<number> {
|
||||
try {
|
||||
if (args.command === 'tools') {
|
||||
writeAgentJson(io, { projectDir: args.projectDir, tools: AGENT_TOOLS });
|
||||
return 0;
|
||||
}
|
||||
|
||||
const runtime = await runtimeFor(args, deps, io);
|
||||
|
||||
if (args.command === 'context') {
|
||||
const [status, connections, semanticLayer] = await Promise.all([
|
||||
(deps.readSetupStatus ?? readKtxSetupStatus)(args.projectDir),
|
||||
runtime.ports.connections?.list() ?? [],
|
||||
runtime.ports.semanticLayer?.listSources({}) ?? { sources: [], totalSources: 0 },
|
||||
]);
|
||||
writeAgentJson(io, { projectDir: args.projectDir, status, connections, semanticLayer, tools: AGENT_TOOLS });
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.command === 'sl-list') {
|
||||
const semanticLayer = runtime.ports.semanticLayer;
|
||||
if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.');
|
||||
if (args.query) {
|
||||
const connectionIds = Object.keys(runtime.project.config.connections ?? {});
|
||||
if (args.connectionId && !runtime.project.config.connections[args.connectionId]) {
|
||||
writeAgentSlSearchReadinessError(
|
||||
io,
|
||||
missingConnectionSlSearchReadiness(args.projectDir, args.connectionId, args.query),
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
if (connectionIds.length === 0) {
|
||||
writeAgentSlSearchReadinessError(io, noConnectionsSlSearchReadiness(args.projectDir, args.query));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
const listed = await semanticLayer.listSources({ connectionId: args.connectionId, query: args.query });
|
||||
if (args.query && listed.sources.length === 0) {
|
||||
const allSources = await semanticLayer.listSources({ connectionId: args.connectionId });
|
||||
if (allSources.totalSources === 0) {
|
||||
writeAgentSlSearchReadinessError(io, noIndexedSourcesSlSearchReadiness(args.projectDir, args.query));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
writeAgentJson(io, listed);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.command === 'sl-read') {
|
||||
const semanticLayer = runtime.ports.semanticLayer;
|
||||
if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.');
|
||||
const source = await semanticLayer.readSource({
|
||||
connectionId: connectionIdForSource(runtime, args.connectionId),
|
||||
sourceName: args.sourceName,
|
||||
});
|
||||
if (!source) throw new Error(`Semantic-layer source "${args.sourceName}" was not found.`);
|
||||
writeAgentJson(io, source);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.command === 'sl-query') {
|
||||
const semanticLayer = runtime.ports.semanticLayer;
|
||||
if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.');
|
||||
const query = await readAgentJsonFile(args.queryFile);
|
||||
const maxRows = args.execute ? parseAgentMaxRows(args.maxRows) : args.maxRows;
|
||||
writeAgentJson(
|
||||
io,
|
||||
await semanticLayer.query({
|
||||
connectionId: args.connectionId,
|
||||
query: { ...query, ...(maxRows !== undefined ? { limit: maxRows } : {}) } as never,
|
||||
}),
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.command === 'wiki-search') {
|
||||
const knowledge = runtime.ports.knowledge;
|
||||
if (!knowledge) throw new Error('Wiki tools are not available for this project.');
|
||||
writeAgentJson(io, await knowledge.search({ userId: 'agent', query: args.query, limit: args.limit }));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.command === 'wiki-read') {
|
||||
const knowledge = runtime.ports.knowledge;
|
||||
if (!knowledge) throw new Error('Wiki tools are not available for this project.');
|
||||
const page = await knowledge.read({ userId: 'agent', key: args.pageId });
|
||||
if (!page) throw new Error(`Wiki page "${args.pageId}" was not found.`);
|
||||
writeAgentJson(io, page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const queryExecutor = runtime.queryExecutor;
|
||||
if (!queryExecutor) throw new Error('SQL execution is not available for this project.');
|
||||
const connection = runtime.project.config.connections[args.connectionId];
|
||||
if (!connection) throw new Error(`Connection "${args.connectionId}" was not found.`);
|
||||
const maxRows = parseAgentMaxRows(args.maxRows);
|
||||
writeAgentJson(
|
||||
io,
|
||||
await queryExecutor.execute({
|
||||
connectionId: args.connectionId,
|
||||
projectDir: runtime.project.projectDir,
|
||||
connection,
|
||||
sql: await readFile(args.sqlFile, 'utf-8'),
|
||||
maxRows,
|
||||
}),
|
||||
);
|
||||
return 0;
|
||||
} catch (error) {
|
||||
if (args.command === 'sl-list' && args.query && isMissingProjectConfigError(error)) {
|
||||
writeAgentSlSearchReadinessError(io, missingProjectSlSearchReadiness(args.projectDir, args.query));
|
||||
return 1;
|
||||
}
|
||||
writeAgentJsonError(io, error instanceof Error ? error.message : String(error));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
import { Command, InvalidArgumentError } from '@commander-js/extra-typings';
|
||||
import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
|
||||
import { registerAgentCommands } from './commands/agent-commands.js';
|
||||
import { registerConnectionCommands } from './commands/connection-commands.js';
|
||||
import { registerIngestCommands } from './commands/ingest-commands.js';
|
||||
import { registerWikiCommands } from './commands/knowledge-commands.js';
|
||||
|
|
@ -321,7 +320,6 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
|
|||
registerWikiCommands(program, context);
|
||||
registerSlCommands(program, context);
|
||||
registerStatusCommands(program, context);
|
||||
registerAgentCommands(program, context);
|
||||
registerDevCommands(program, context);
|
||||
|
||||
return program;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import { createRequire } from 'node:module';
|
|||
|
||||
import type { KtxConnectionMetabaseSetupArgs } from './commands/connection-metabase-setup.js';
|
||||
import type { KtxConnectionNotionArgs } from './commands/connection-notion.js';
|
||||
import type { KtxAgentArgs } from './agent.js';
|
||||
import type { KtxConnectionArgs } from './connection.js';
|
||||
import type { KtxDoctorArgs } from './doctor.js';
|
||||
import type { KtxIngestArgs } from './ingest.js';
|
||||
|
|
@ -30,7 +29,6 @@ export interface KtxCliIo {
|
|||
|
||||
export interface KtxCliDeps {
|
||||
setup?: (args: KtxSetupArgs, io: KtxCliIo) => Promise<number>;
|
||||
agent?: (args: KtxAgentArgs, io: KtxCliIo) => Promise<number>;
|
||||
connection?: (args: KtxConnectionArgs, io: KtxCliIo) => Promise<number>;
|
||||
connectionNotion?: (args: KtxConnectionNotionArgs, io: KtxCliIo) => Promise<number>;
|
||||
connectionMetabaseSetup?: (args: KtxConnectionMetabaseSetupArgs, io: KtxCliIo) => Promise<number>;
|
||||
|
|
|
|||
|
|
@ -53,15 +53,18 @@ export const slQueryCommandSchema = z.object({
|
|||
command: z.literal('query'),
|
||||
projectDir: projectDirSchema,
|
||||
connectionId: z.string().min(1).optional(),
|
||||
query: z.object({
|
||||
measures: z.array(z.string().min(1)).min(1),
|
||||
dimensions: stringArraySchema,
|
||||
filters: stringArraySchema.optional(),
|
||||
segments: stringArraySchema.optional(),
|
||||
order_by: z.array(orderBySchema).optional(),
|
||||
limit: z.number().int().positive().optional(),
|
||||
include_empty: z.literal(true).optional(),
|
||||
}),
|
||||
query: z
|
||||
.object({
|
||||
measures: z.array(z.string().min(1)).min(1),
|
||||
dimensions: stringArraySchema,
|
||||
filters: stringArraySchema.optional(),
|
||||
segments: stringArraySchema.optional(),
|
||||
order_by: z.array(orderBySchema).optional(),
|
||||
limit: z.number().int().positive().optional(),
|
||||
include_empty: z.literal(true).optional(),
|
||||
})
|
||||
.optional(),
|
||||
queryFile: z.string().min(1).optional(),
|
||||
format: z.enum(['json', 'sql']),
|
||||
execute: z.boolean(),
|
||||
cliVersion: z.string().min(1),
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
import { Option, type Command } from '@commander-js/extra-typings';
|
||||
import type { KtxAgentArgs } from '../agent.js';
|
||||
import type { KtxCliCommandContext } from '../cli-program.js';
|
||||
import { parsePositiveIntegerOption, resolveCommandProjectDir } from '../cli-program.js';
|
||||
import { runtimeInstallPolicyFromFlags } from '../managed-python-command.js';
|
||||
|
||||
async function runAgent(context: KtxCliCommandContext, args: KtxAgentArgs): Promise<void> {
|
||||
const runner = context.deps.agent ?? (await import('../agent.js')).runKtxAgent;
|
||||
context.setExitCode(await runner(args, context.io));
|
||||
}
|
||||
|
||||
function jsonOption(): Option {
|
||||
return new Option('--json', 'Print JSON output').makeOptionMandatory();
|
||||
}
|
||||
|
||||
export function registerAgentCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const agent = program
|
||||
.command('agent', { hidden: true })
|
||||
.description('Machine-readable KTX commands for coding agents')
|
||||
.showHelpAfterError();
|
||||
|
||||
agent.hook('preAction', (_thisCommand, actionCommand) => {
|
||||
context.writeDebug?.('agent', actionCommand);
|
||||
});
|
||||
|
||||
agent
|
||||
.command('tools')
|
||||
.description('Print available agent-facing KTX tools')
|
||||
.addOption(jsonOption())
|
||||
.action(async (_options, command) => {
|
||||
await runAgent(context, { command: 'tools', projectDir: resolveCommandProjectDir(command), json: true });
|
||||
});
|
||||
|
||||
agent
|
||||
.command('context')
|
||||
.description('Print project context for agent planning')
|
||||
.addOption(jsonOption())
|
||||
.action(async (_options, command) => {
|
||||
await runAgent(context, { command: 'context', projectDir: resolveCommandProjectDir(command), json: true });
|
||||
});
|
||||
|
||||
const sl = agent.command('sl').description('Semantic-layer agent commands');
|
||||
sl.command('list')
|
||||
.description('List semantic-layer sources')
|
||||
.addOption(jsonOption())
|
||||
.option('--connection-id <id>', 'Filter by connection id')
|
||||
.option('--query <text>', 'Search source names and descriptions')
|
||||
.action(async (options: { connectionId?: string; query?: string }, command) => {
|
||||
await runAgent(context, {
|
||||
command: 'sl-list',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
json: true,
|
||||
...(options.connectionId ? { connectionId: options.connectionId } : {}),
|
||||
...(options.query ? { query: options.query } : {}),
|
||||
});
|
||||
});
|
||||
sl.command('read')
|
||||
.description('Read one semantic-layer source')
|
||||
.argument('<sourceName>')
|
||||
.addOption(jsonOption())
|
||||
.option('--connection-id <id>', 'Connection id containing the source')
|
||||
.action(async (sourceName: string, options: { connectionId?: string }, command) => {
|
||||
await runAgent(context, {
|
||||
command: 'sl-read',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
json: true,
|
||||
sourceName,
|
||||
...(options.connectionId ? { connectionId: options.connectionId } : {}),
|
||||
});
|
||||
});
|
||||
sl.command('query')
|
||||
.description('Run a semantic-layer query JSON file')
|
||||
.addOption(jsonOption())
|
||||
.requiredOption('--connection-id <id>', 'Connection id for execution')
|
||||
.requiredOption('--query-file <path>', 'JSON semantic-layer query file')
|
||||
.option('--execute', 'Execute the compiled query against the connection', false)
|
||||
.option('--yes', 'Install the managed Python runtime without prompting when required', false)
|
||||
.option('--no-input', 'Disable interactive managed runtime installation')
|
||||
.option('--max-rows <number>', 'Maximum rows to return when executing', parsePositiveIntegerOption)
|
||||
.action(
|
||||
async (
|
||||
options: {
|
||||
connectionId: string;
|
||||
queryFile: string;
|
||||
execute: boolean;
|
||||
maxRows?: number;
|
||||
yes?: boolean;
|
||||
input?: boolean;
|
||||
},
|
||||
command,
|
||||
) => {
|
||||
await runAgent(context, {
|
||||
command: 'sl-query',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
json: true,
|
||||
connectionId: options.connectionId,
|
||||
queryFile: options.queryFile,
|
||||
execute: options.execute,
|
||||
cliVersion: context.packageInfo.version,
|
||||
runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options),
|
||||
...(options.maxRows !== undefined ? { maxRows: options.maxRows } : {}),
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
const wiki = agent.command('wiki').description('KTX wiki agent commands');
|
||||
wiki
|
||||
.command('search')
|
||||
.description('Search KTX wiki pages')
|
||||
.argument('<query>')
|
||||
.addOption(jsonOption())
|
||||
.option('--limit <number>', 'Maximum search results', parsePositiveIntegerOption, 10)
|
||||
.action(async (query: string, options: { limit: number }, command) => {
|
||||
await runAgent(context, {
|
||||
command: 'wiki-search',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
json: true,
|
||||
query,
|
||||
limit: options.limit,
|
||||
});
|
||||
});
|
||||
wiki
|
||||
.command('read')
|
||||
.description('Read one KTX wiki page')
|
||||
.argument('<pageId>')
|
||||
.addOption(jsonOption())
|
||||
.action(async (pageId: string, _options, command) => {
|
||||
await runAgent(context, { command: 'wiki-read', projectDir: resolveCommandProjectDir(command), json: true, pageId });
|
||||
});
|
||||
|
||||
const sql = agent.command('sql').description('Safe SQL execution commands');
|
||||
sql
|
||||
.command('execute')
|
||||
.description('Execute read-only SQL with a row limit')
|
||||
.addOption(jsonOption())
|
||||
.requiredOption('--connection-id <id>', 'Connection id for execution')
|
||||
.requiredOption('--sql-file <path>', 'SQL file to execute')
|
||||
.requiredOption('--max-rows <number>', 'Maximum rows to return', parsePositiveIntegerOption)
|
||||
.action(async (options: { connectionId: string; sqlFile: string; maxRows: number }, command) => {
|
||||
await runAgent(context, {
|
||||
command: 'sql-execute',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
json: true,
|
||||
connectionId: options.connectionId,
|
||||
sqlFile: options.sqlFile,
|
||||
maxRows: options.maxRows,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
@ -188,7 +188,7 @@ export function registerConnectionCommands(program: Command, context: KtxCliComm
|
|||
registerConnectionNotionCommands(connection, context);
|
||||
}
|
||||
|
||||
export function registerConnectionMappingCommands(connection: Command, context: KtxCliCommandContext): void {
|
||||
function registerConnectionMappingCommands(connection: Command, context: KtxCliCommandContext): void {
|
||||
const mapping = connection
|
||||
.command('mapping')
|
||||
.description('Manage Metabase warehouse mappings')
|
||||
|
|
|
|||
|
|
@ -369,14 +369,6 @@ function setExpanded(state: PickerState, nodeId: string, value: boolean | 'toggl
|
|||
return cloneState(state, { expanded });
|
||||
}
|
||||
|
||||
function expandPath(state: PickerState, nodeId: string): PickerState {
|
||||
const expanded = new Set(state.expanded);
|
||||
for (const ancestorId of ancestorsOf(nodeId, state.byId)) {
|
||||
expanded.add(ancestorId);
|
||||
}
|
||||
return cloneState(state, { expanded });
|
||||
}
|
||||
|
||||
export function moveCursor(state: PickerState, dir: 'up' | 'down' | 'left' | 'right'): PickerState {
|
||||
const node = state.byId.get(state.cursorId);
|
||||
if (!node) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/* @jsxImportSource react */
|
||||
import { render as renderInkTest } from 'ink-testing-library';
|
||||
import React, { act, type ReactNode } from 'react';
|
||||
import { act, type ReactNode } from 'react';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './connection-notion-tree.js';
|
||||
import {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/* @jsxImportSource react */
|
||||
import { Box, Text, render as renderInkRuntime, useApp, useInput } from 'ink';
|
||||
import React, { type ReactNode, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { type ReactNode, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
filterTree,
|
||||
flattenSelection,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
import { type Command, Option } from '@commander-js/extra-typings';
|
||||
import { collectOption, type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js';
|
||||
import {
|
||||
collectOption,
|
||||
type KtxCliCommandContext,
|
||||
parsePositiveIntegerOption,
|
||||
resolveCommandProjectDir,
|
||||
} from '../cli-program.js';
|
||||
import { wikiWriteCommandSchema } from '../command-schemas.js';
|
||||
import type { KtxKnowledgeArgs } from '../knowledge.js';
|
||||
import { profileMark } from '../startup-profile.js';
|
||||
|
|
@ -24,12 +29,14 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
|
|||
wiki
|
||||
.command('list')
|
||||
.description('List local wiki pages')
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.option('--user-id <id>', 'Local user id', 'local')
|
||||
.action(async (options: { userId: string }, command) => {
|
||||
.action(async (options: { userId: string; json?: boolean }, command) => {
|
||||
await runKnowledgeArgs(context, {
|
||||
command: 'list',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
userId: options.userId,
|
||||
json: options.json,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -37,13 +44,15 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
|
|||
.command('read')
|
||||
.description('Read one local wiki page')
|
||||
.argument('<key>', 'Wiki page key')
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.option('--user-id <id>', 'Local user id', 'local')
|
||||
.action(async (key: string, options: { userId: string }, command) => {
|
||||
.action(async (key: string, options: { userId: string; json?: boolean }, command) => {
|
||||
await runKnowledgeArgs(context, {
|
||||
command: 'read',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
key,
|
||||
userId: options.userId,
|
||||
json: options.json,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -51,13 +60,17 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
|
|||
.command('search')
|
||||
.description('Search local wiki pages')
|
||||
.argument('<query>', 'Search query')
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.option('--user-id <id>', 'Local user id', 'local')
|
||||
.action(async (query: string, options: { userId: string }, command) => {
|
||||
.option('--limit <number>', 'Maximum search results', parsePositiveIntegerOption)
|
||||
.action(async (query: string, options: { userId: string; json?: boolean; limit?: number }, command) => {
|
||||
await runKnowledgeArgs(context, {
|
||||
command: 'search',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
query,
|
||||
userId: options.userId,
|
||||
json: options.json,
|
||||
...(options.limit !== undefined ? { limit: options.limit } : {}),
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ async function runRuntimeArgs(context: KtxCliCommandContext, args: KtxRuntimeArg
|
|||
export function registerRuntimeCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const runtime = program
|
||||
.command('runtime')
|
||||
.description('Install, inspect, and prune the KTX-managed Python runtime')
|
||||
.description('Install, start, stop, and inspect the KTX-managed Python runtime')
|
||||
.showHelpAfterError();
|
||||
|
||||
runtime
|
||||
|
|
@ -64,7 +64,7 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand
|
|||
|
||||
runtime
|
||||
.command('status')
|
||||
.description('Show managed Python runtime status')
|
||||
.description('Show managed Python runtime status and readiness checks')
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.action(async (options: { json?: boolean }) => {
|
||||
await runRuntimeArgs(context, {
|
||||
|
|
@ -73,18 +73,4 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand
|
|||
json: options.json === true,
|
||||
});
|
||||
});
|
||||
|
||||
runtime
|
||||
.command('prune')
|
||||
.description('Remove stale managed Python runtimes for older CLI versions')
|
||||
.option('--dry-run', 'List stale runtimes without deleting them', false)
|
||||
.option('--yes', 'Confirm deletion of stale runtime directories', false)
|
||||
.action(async (options: { dryRun?: boolean; yes?: boolean }) => {
|
||||
await runRuntimeArgs(context, {
|
||||
command: 'prune',
|
||||
cliVersion: context.packageInfo.version,
|
||||
dryRun: options.dryRun === true,
|
||||
yes: options.yes === true,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte
|
|||
sl.command('list')
|
||||
.description('List semantic-layer sources')
|
||||
.option('--connection-id <id>', 'KTX connection id')
|
||||
.option('--query <text>', 'Search source names and descriptions')
|
||||
.addOption(
|
||||
new Option('--output <mode>', 'Output mode: pretty (default in TTY), plain (TSV), or json').choices([
|
||||
'pretty',
|
||||
|
|
@ -59,26 +60,34 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte
|
|||
]),
|
||||
)
|
||||
.option('--json', 'Shortcut for --output=json (overrides --output)', false)
|
||||
.action(async (options: { connectionId?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, command) => {
|
||||
.action(
|
||||
async (
|
||||
options: { connectionId?: string; query?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean },
|
||||
command,
|
||||
) => {
|
||||
await runSlArgs(context, {
|
||||
command: 'list',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
connectionId: options.connectionId,
|
||||
query: options.query,
|
||||
output: options.output,
|
||||
json: options.json,
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
sl.command('read')
|
||||
.description('Read a semantic-layer source')
|
||||
.argument('<sourceName>', 'Semantic-layer source name')
|
||||
.requiredOption('--connection-id <id>', 'KTX connection id')
|
||||
.action(async (sourceName: string, options: { connectionId: string }, command) => {
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.action(async (sourceName: string, options: { connectionId: string; json?: boolean }, command) => {
|
||||
await runSlArgs(context, {
|
||||
command: 'read',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
connectionId: options.connectionId,
|
||||
sourceName,
|
||||
json: options.json,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -113,6 +122,7 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte
|
|||
sl.command('query')
|
||||
.description('Compile or execute a semantic-layer query')
|
||||
.option('--connection-id <id>', 'KTX connection id')
|
||||
.option('--query-file <path>', 'JSON semantic-layer query file')
|
||||
.option('--measure <measure>', 'Measure to query; repeatable', collectOption, [])
|
||||
.option('--dimension <dimension>', 'Dimension to include; repeatable', collectOption, [])
|
||||
.option('--filter <filter>', 'Filter expression; repeatable', collectOption, [])
|
||||
|
|
@ -126,22 +136,26 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte
|
|||
.option('--no-input', 'Disable interactive managed runtime installation')
|
||||
.option('--max-rows <n>', 'Maximum rows to return when executing', parsePositiveIntegerOption)
|
||||
.action(async (options, command) => {
|
||||
if (options.measure.length === 0) {
|
||||
if (options.measure.length === 0 && !options.queryFile) {
|
||||
throw new Error('sl query requires at least one --measure');
|
||||
}
|
||||
const args = slQueryCommandSchema.parse({
|
||||
command: 'query',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
connectionId: options.connectionId,
|
||||
query: {
|
||||
measures: options.measure,
|
||||
dimensions: options.dimension,
|
||||
...(options.filter.length > 0 ? { filters: options.filter } : {}),
|
||||
...(options.segment.length > 0 ? { segments: options.segment } : {}),
|
||||
...(options.orderBy.length > 0 ? { order_by: options.orderBy } : {}),
|
||||
...(options.limit !== undefined ? { limit: options.limit } : {}),
|
||||
...(options.includeEmpty === true ? { include_empty: true } : {}),
|
||||
},
|
||||
...(options.queryFile
|
||||
? { queryFile: options.queryFile }
|
||||
: {
|
||||
query: {
|
||||
measures: options.measure,
|
||||
dimensions: options.dimension,
|
||||
...(options.filter.length > 0 ? { filters: options.filter } : {}),
|
||||
...(options.segment.length > 0 ? { segments: options.segment } : {}),
|
||||
...(options.orderBy.length > 0 ? { order_by: options.orderBy } : {}),
|
||||
...(options.limit !== undefined ? { limit: options.limit } : {}),
|
||||
...(options.includeEmpty === true ? { include_empty: true } : {}),
|
||||
},
|
||||
}),
|
||||
format: options.format,
|
||||
execute: options.execute === true,
|
||||
cliVersion: context.packageInfo.version,
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ describe('dev Commander tree', () => {
|
|||
for (const argv of [
|
||||
['dev', 'doctor', 'setup'],
|
||||
['dev', 'runtime', 'doctor'],
|
||||
['dev', 'runtime', 'prune', '--dry-run'],
|
||||
['dev', 'scan', 'warehouse'],
|
||||
['dev', 'ingest', 'run'],
|
||||
['dev', 'mapping', 'list'],
|
||||
|
|
@ -126,7 +127,7 @@ describe('dev Commander tree', () => {
|
|||
it.each([
|
||||
{
|
||||
argv: ['dev', 'runtime', '--help'],
|
||||
expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status', 'prune'],
|
||||
expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status'],
|
||||
},
|
||||
{
|
||||
argv: ['scan', '--help'],
|
||||
|
|
@ -147,6 +148,10 @@ describe('dev Commander tree', () => {
|
|||
for (const text of expected) {
|
||||
expect(io.stdout()).toContain(text);
|
||||
}
|
||||
if (argv.join(' ') === 'dev runtime --help') {
|
||||
expect(io.stdout()).not.toContain('prune');
|
||||
expect(io.stdout()).not.toContain('doctor');
|
||||
}
|
||||
expect(io.stderr()).toBe('');
|
||||
expect(doctor).not.toHaveBeenCalled();
|
||||
expect(ingest).not.toHaveBeenCalled();
|
||||
|
|
|
|||
|
|
@ -73,26 +73,27 @@ describe('standalone local warehouse example', () => {
|
|||
const projectDir = await copyExampleProject(tempDir);
|
||||
const sourceDir = join(projectDir, 'source');
|
||||
|
||||
const knowledgeList = await runBuiltCli(['agent', 'wiki', 'search', 'revenue', '--json', '--project-dir', projectDir]);
|
||||
const knowledgeList = await runBuiltCli(['wiki', 'search', 'revenue', '--json', '--project-dir', projectDir]);
|
||||
expect(knowledgeList).toMatchObject({ code: 0, stderr: '' });
|
||||
expect(parseJsonOutput<{ results: Array<{ key: string; summary: string }> }>(knowledgeList.stdout).results).toContainEqual(
|
||||
expect.objectContaining({ key: 'revenue', summary: 'Paid order value after refunds' }),
|
||||
);
|
||||
expect(
|
||||
parseJsonOutput<{ data: { items: Array<{ key: string; summary: string }> } }>(knowledgeList.stdout).data.items,
|
||||
).toContainEqual(expect.objectContaining({ key: 'revenue', summary: 'Paid order value after refunds' }));
|
||||
|
||||
const knowledgeRead = await runBuiltCli(['agent', 'wiki', 'read', 'revenue', '--json', '--project-dir', projectDir]);
|
||||
const knowledgeRead = await runBuiltCli(['wiki', 'read', 'revenue', '--json', '--project-dir', projectDir]);
|
||||
expect(knowledgeRead).toMatchObject({ code: 0, stderr: '' });
|
||||
expect(parseJsonOutput<{ content: string }>(knowledgeRead.stdout).content).toContain(
|
||||
expect(parseJsonOutput<{ data: { content: string } }>(knowledgeRead.stdout).data.content).toContain(
|
||||
'Revenue is paid order amount after refund adjustments.',
|
||||
);
|
||||
|
||||
const slList = await runBuiltCli(['agent', 'sl', 'list', '--json', '--project-dir', projectDir, '--connection-id', 'warehouse']);
|
||||
const slList = await runBuiltCli(['sl', 'list', '--json', '--project-dir', projectDir, '--connection-id', 'warehouse']);
|
||||
expect(slList).toMatchObject({ code: 0, stderr: '' });
|
||||
expect(parseJsonOutput<{ sources: Array<{ connectionId: string; name: string; columnCount: number }> }>(slList.stdout).sources).toContainEqual(
|
||||
expect.objectContaining({ connectionId: 'warehouse', name: 'orders', columnCount: 3 }),
|
||||
);
|
||||
expect(
|
||||
parseJsonOutput<{ data: { items: Array<{ connectionId: string; name: string; columnCount: number }> } }>(
|
||||
slList.stdout,
|
||||
).data.items,
|
||||
).toContainEqual(expect.objectContaining({ connectionId: 'warehouse', name: 'orders', columnCount: 3 }));
|
||||
|
||||
const slRead = await runBuiltCli([
|
||||
'agent',
|
||||
'sl',
|
||||
'read',
|
||||
'orders',
|
||||
|
|
@ -103,7 +104,7 @@ describe('standalone local warehouse example', () => {
|
|||
projectDir,
|
||||
]);
|
||||
expect(slRead).toMatchObject({ code: 0, stderr: '' });
|
||||
expect(parseJsonOutput<{ yaml: string }>(slRead.stdout).yaml).toContain('name: orders');
|
||||
expect(parseJsonOutput<{ data: { yaml: string } }>(slRead.stdout).data.yaml).toContain('name: orders');
|
||||
|
||||
const ingest = await runBuiltCli([
|
||||
'ingest',
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { createRequire } from 'node:module';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
|
|
@ -159,7 +159,7 @@ describe('runKtxCli', () => {
|
|||
await expect(runKtxCli(['dev', 'runtime', 'stop'], stopIo.io, { runtime })).resolves.toBe(0);
|
||||
await expect(runKtxCli(['dev', 'runtime', 'stop', '--all'], stopAllIo.io, { runtime })).resolves.toBe(0);
|
||||
await expect(runKtxCli(['dev', 'runtime', 'status', '--json'], statusIo.io, { runtime })).resolves.toBe(0);
|
||||
await expect(runKtxCli(['dev', 'runtime', 'prune', '--dry-run'], pruneIo.io, { runtime })).resolves.toBe(0);
|
||||
await expect(runKtxCli(['dev', 'runtime', 'prune', '--dry-run'], pruneIo.io, { runtime })).resolves.toBe(1);
|
||||
|
||||
expect(runtime).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
|
|
@ -208,19 +208,11 @@ describe('runKtxCli', () => {
|
|||
},
|
||||
statusIo.io,
|
||||
);
|
||||
expect(runtime).toHaveBeenNthCalledWith(
|
||||
6,
|
||||
{
|
||||
command: 'prune',
|
||||
cliVersion: '0.0.0-private',
|
||||
dryRun: true,
|
||||
yes: false,
|
||||
},
|
||||
pruneIo.io,
|
||||
);
|
||||
for (const io of [installIo, startIo, stopIo, stopAllIo, statusIo, pruneIo]) {
|
||||
expect(runtime).toHaveBeenCalledTimes(5);
|
||||
for (const io of [installIo, startIo, stopIo, stopAllIo, statusIo]) {
|
||||
expect(io.stderr()).toBe('');
|
||||
}
|
||||
expect(pruneIo.stderr()).toMatch(/unknown command|error:/);
|
||||
});
|
||||
|
||||
it('prints the resolved project directory for ordinary project commands', async () => {
|
||||
|
|
@ -1149,136 +1141,28 @@ describe('runKtxCli', () => {
|
|||
expect(setupIo.stderr()).toContain('Choose only one Historic SQL action');
|
||||
});
|
||||
|
||||
it('registers hidden agent help and tools discovery without showing agent in root help', async () => {
|
||||
const helpIo = makeIo();
|
||||
const toolsIo = makeIo();
|
||||
const agent = vi.fn(async () => 0);
|
||||
it('rejects the removed hidden agent command', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(runKtxCli(['agent', '--help'], helpIo.io, { agent })).resolves.toBe(0);
|
||||
await expect(
|
||||
runKtxCli(['--project-dir', tempDir, 'agent', 'tools', '--json'], toolsIo.io, { agent }),
|
||||
).resolves.toBe(0);
|
||||
await expect(runKtxCli(['agent'], io.io)).resolves.toBe(1);
|
||||
|
||||
expect(helpIo.stdout()).toContain('Usage: ktx agent');
|
||||
expect(toolsIo.stderr()).toBe('');
|
||||
expect(agent).toHaveBeenCalledWith({ command: 'tools', projectDir: tempDir, json: true }, toolsIo.io);
|
||||
expect(io.stderr()).toContain("unknown command 'agent'");
|
||||
expect(io.stdout()).toBe('');
|
||||
});
|
||||
|
||||
it('dispatches full hidden agent commands without exposing agent in root help', async () => {
|
||||
const agent = vi.fn(async () => 0);
|
||||
const cases = [
|
||||
{
|
||||
argv: ['--project-dir', tempDir, 'agent', 'context', '--json'],
|
||||
args: { command: 'context', projectDir: tempDir, json: true },
|
||||
},
|
||||
{
|
||||
argv: [
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'agent',
|
||||
'sl',
|
||||
'list',
|
||||
'--json',
|
||||
'--connection-id',
|
||||
'warehouse',
|
||||
'--query',
|
||||
'orders',
|
||||
],
|
||||
args: { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'warehouse', query: 'orders' },
|
||||
},
|
||||
{
|
||||
argv: ['--project-dir', tempDir, 'agent', 'sl', 'read', 'orders', '--json', '--connection-id', 'warehouse'],
|
||||
args: { command: 'sl-read', projectDir: tempDir, json: true, sourceName: 'orders', connectionId: 'warehouse' },
|
||||
},
|
||||
{
|
||||
argv: [
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'agent',
|
||||
'sl',
|
||||
'query',
|
||||
'--json',
|
||||
'--connection-id',
|
||||
'warehouse',
|
||||
'--query-file',
|
||||
'/tmp/query.json',
|
||||
'--execute',
|
||||
'--max-rows',
|
||||
'100',
|
||||
],
|
||||
args: {
|
||||
command: 'sl-query',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
queryFile: '/tmp/query.json',
|
||||
execute: true,
|
||||
maxRows: 100,
|
||||
cliVersion: '0.0.0-private',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: ['--project-dir', tempDir, 'agent', 'wiki', 'search', 'revenue', '--json', '--limit', '5'],
|
||||
args: { command: 'wiki-search', projectDir: tempDir, json: true, query: 'revenue', limit: 5 },
|
||||
},
|
||||
{
|
||||
argv: ['--project-dir', tempDir, 'agent', 'wiki', 'read', 'page-1', '--json'],
|
||||
args: { command: 'wiki-read', projectDir: tempDir, json: true, pageId: 'page-1' },
|
||||
},
|
||||
{
|
||||
argv: [
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'agent',
|
||||
'sql',
|
||||
'execute',
|
||||
'--json',
|
||||
'--connection-id',
|
||||
'warehouse',
|
||||
'--sql-file',
|
||||
'/tmp/query.sql',
|
||||
'--max-rows',
|
||||
'100',
|
||||
],
|
||||
args: {
|
||||
command: 'sql-execute',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
sqlFile: '/tmp/query.sql',
|
||||
maxRows: 100,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
for (const entry of cases) {
|
||||
const io = makeIo();
|
||||
await expect(runKtxCli(entry.argv, io.io, { agent })).resolves.toBe(0);
|
||||
expect(agent).toHaveBeenLastCalledWith(entry.args, io.io);
|
||||
expect(io.stderr()).toBe('');
|
||||
}
|
||||
|
||||
const helpIo = makeIo();
|
||||
await expect(runKtxCli(['--help'], helpIo.io, { agent })).resolves.toBe(0);
|
||||
expect(helpIo.stdout()).not.toContain('agent ');
|
||||
});
|
||||
|
||||
it('routes hidden agent SL query managed runtime policies', async () => {
|
||||
it('routes public SL query files with managed runtime policies', async () => {
|
||||
const autoIo = makeIo();
|
||||
const neverIo = makeIo();
|
||||
const conflictIo = makeIo();
|
||||
const agent = vi.fn(async () => 0);
|
||||
const sl = vi.fn(async () => 0);
|
||||
|
||||
await expect(
|
||||
runKtxCli(
|
||||
[
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'agent',
|
||||
'sl',
|
||||
'query',
|
||||
'--json',
|
||||
'--connection-id',
|
||||
'warehouse',
|
||||
'--query-file',
|
||||
|
|
@ -1286,7 +1170,7 @@ describe('runKtxCli', () => {
|
|||
'--yes',
|
||||
],
|
||||
autoIo.io,
|
||||
{ agent },
|
||||
{ sl },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
|
|
@ -1295,10 +1179,8 @@ describe('runKtxCli', () => {
|
|||
[
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'agent',
|
||||
'sl',
|
||||
'query',
|
||||
'--json',
|
||||
'--connection-id',
|
||||
'warehouse',
|
||||
'--query-file',
|
||||
|
|
@ -1306,7 +1188,7 @@ describe('runKtxCli', () => {
|
|||
'--no-input',
|
||||
],
|
||||
neverIo.io,
|
||||
{ agent },
|
||||
{ sl },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
|
|
@ -1315,10 +1197,8 @@ describe('runKtxCli', () => {
|
|||
[
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'agent',
|
||||
'sl',
|
||||
'query',
|
||||
'--json',
|
||||
'--connection-id',
|
||||
'warehouse',
|
||||
'--query-file',
|
||||
|
|
@ -1327,33 +1207,33 @@ describe('runKtxCli', () => {
|
|||
'--no-input',
|
||||
],
|
||||
conflictIo.io,
|
||||
{ agent },
|
||||
{ sl },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(agent).toHaveBeenNthCalledWith(
|
||||
expect(sl).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
{
|
||||
command: 'sl-query',
|
||||
command: 'query',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
queryFile: '/tmp/query.json',
|
||||
execute: false,
|
||||
format: 'json',
|
||||
cliVersion: '0.0.0-private',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
},
|
||||
autoIo.io,
|
||||
);
|
||||
expect(agent).toHaveBeenNthCalledWith(
|
||||
expect(sl).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
{
|
||||
command: 'sl-query',
|
||||
command: 'query',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
queryFile: '/tmp/query.json',
|
||||
execute: false,
|
||||
format: 'json',
|
||||
cliVersion: '0.0.0-private',
|
||||
runtimeInstallPolicy: 'never',
|
||||
},
|
||||
|
|
@ -1362,112 +1242,6 @@ describe('runKtxCli', () => {
|
|||
expect(conflictIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input');
|
||||
});
|
||||
|
||||
it('prints semantic-layer hybrid search metadata from the hidden agent sl list command', async () => {
|
||||
const agent = vi.fn(async (args, io) => {
|
||||
expect(args).toEqual({
|
||||
command: 'sl-list',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
connectionId: 'warehouse',
|
||||
query: 'paid',
|
||||
});
|
||||
io.stdout.write(
|
||||
`${JSON.stringify(
|
||||
{
|
||||
sources: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
connectionName: 'warehouse',
|
||||
name: 'orders',
|
||||
columnCount: 2,
|
||||
measureCount: 1,
|
||||
joinCount: 0,
|
||||
score: 0.03278688524590164,
|
||||
matchReasons: ['dictionary'],
|
||||
dictionaryMatches: [{ column: 'status', values: ['paid'] }],
|
||||
},
|
||||
],
|
||||
totalSources: 1,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
);
|
||||
return 0;
|
||||
});
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxCli(
|
||||
['--project-dir', tempDir, 'agent', 'sl', 'list', '--json', '--connection-id', 'warehouse', '--query', 'paid'],
|
||||
io.io,
|
||||
{ agent },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toEqual({
|
||||
sources: [
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
name: 'orders',
|
||||
matchReasons: ['dictionary'],
|
||||
dictionaryMatches: [{ column: 'status', values: ['paid'] }],
|
||||
}),
|
||||
],
|
||||
totalSources: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it('prints wiki hybrid search metadata from the hidden agent wiki search command', async () => {
|
||||
const agent = vi.fn(async (args, io) => {
|
||||
expect(args).toEqual({
|
||||
command: 'wiki-search',
|
||||
projectDir: tempDir,
|
||||
json: true,
|
||||
query: 'paid order',
|
||||
limit: 5,
|
||||
});
|
||||
io.stdout.write(
|
||||
`${JSON.stringify(
|
||||
{
|
||||
results: [
|
||||
{
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Revenue metric definition',
|
||||
score: 0.02459016393442623,
|
||||
matchReasons: ['lexical', 'token'],
|
||||
},
|
||||
],
|
||||
totalFound: 1,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
);
|
||||
return 0;
|
||||
});
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxCli(['--project-dir', tempDir, 'agent', 'wiki', 'search', 'paid order', '--json', '--limit', '5'], io.io, {
|
||||
agent,
|
||||
}),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({
|
||||
key: 'metrics-revenue',
|
||||
path: 'knowledge/global/metrics-revenue.md',
|
||||
matchReasons: ['lexical', 'token'],
|
||||
}),
|
||||
],
|
||||
totalFound: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it('dispatches public connection subcommands through the existing connection implementation', async () => {
|
||||
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-connection-dispatch-'));
|
||||
const connection = vi.fn(async () => 0);
|
||||
|
|
|
|||
|
|
@ -9,17 +9,6 @@ export {
|
|||
type KtxCliIo,
|
||||
type KtxCliPackageInfo,
|
||||
} from './cli-runtime.js';
|
||||
export { runKtxAgent, type KtxAgentArgs } from './agent.js';
|
||||
export {
|
||||
KTX_AGENT_MAX_ROWS_CAP,
|
||||
createKtxAgentRuntime,
|
||||
parseAgentMaxRows,
|
||||
readAgentJsonFile,
|
||||
writeAgentJson,
|
||||
writeAgentJsonError,
|
||||
type KtxAgentRuntime,
|
||||
type KtxAgentRuntimeDeps,
|
||||
} from './agent-runtime.js';
|
||||
export { runKtxSetup, type KtxSetupArgs, type KtxSetupStatus } from './setup.js';
|
||||
export type {
|
||||
KtxSetupDatabaseDriver,
|
||||
|
|
|
|||
86
packages/cli/src/ingest-query-executor.test.ts
Normal file
86
packages/cli/src/ingest-query-executor.test.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import { createKtxConnectorCapabilities, type KtxScanConnector } from '@ktx/context/scan';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
|
||||
function project(): KtxLocalProject {
|
||||
return {
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
config: {
|
||||
project: 'warehouse',
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
|
||||
},
|
||||
},
|
||||
} as unknown as KtxLocalProject;
|
||||
}
|
||||
|
||||
function connector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
|
||||
return {
|
||||
id: 'warehouse',
|
||||
driver: 'postgres',
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: true }),
|
||||
async introspect() {
|
||||
throw new Error('introspect is not used by this test');
|
||||
},
|
||||
executeReadOnly: vi.fn(async () => ({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
})),
|
||||
cleanup: vi.fn(async () => {}),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('createKtxCliIngestQueryExecutor', () => {
|
||||
it('executes read-only SQL through the scan connector and cleans it up', async () => {
|
||||
const scanConnector = connector();
|
||||
const createConnector = vi.fn(async () => scanConnector);
|
||||
const executor = createKtxCliIngestQueryExecutor(project(), { createConnector });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
sql: 'select 1',
|
||||
maxRows: 5,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
});
|
||||
|
||||
expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse');
|
||||
expect(scanConnector.executeReadOnly).toHaveBeenCalledWith(
|
||||
{ connectionId: 'warehouse', sql: 'select 1', maxRows: 5 },
|
||||
{ runId: 'ingest-sql-execution' },
|
||||
);
|
||||
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('rejects connectors without read-only SQL support', async () => {
|
||||
const scanConnector = connector({
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: false }),
|
||||
executeReadOnly: undefined,
|
||||
});
|
||||
const executor = createKtxCliIngestQueryExecutor(project(), {
|
||||
createConnector: vi.fn(async () => scanConnector),
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres' },
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.');
|
||||
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
49
packages/cli/src/ingest-query-executor.ts
Normal file
49
packages/cli/src/ingest-query-executor.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import type { KtxScanConnector, KtxScanContext } from '@ktx/context/scan';
|
||||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
|
||||
type CreateConnector = typeof createKtxCliScanConnector;
|
||||
|
||||
export interface KtxCliIngestQueryExecutorDeps {
|
||||
createConnector?: CreateConnector;
|
||||
}
|
||||
|
||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||
await connector?.cleanup?.();
|
||||
}
|
||||
|
||||
export function createKtxCliIngestQueryExecutor(
|
||||
project: KtxLocalProject,
|
||||
deps: KtxCliIngestQueryExecutorDeps = {},
|
||||
): KtxSqlQueryExecutorPort {
|
||||
const createConnector = deps.createConnector ?? createKtxCliScanConnector;
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput) {
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await createConnector(project, input.connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(
|
||||
`Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`,
|
||||
);
|
||||
}
|
||||
|
||||
const ctx: KtxScanContext = { runId: 'ingest-sql-execution' };
|
||||
const result = await connector.executeReadOnly(
|
||||
{ connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows },
|
||||
ctx,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
command: 'SELECT',
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} finally {
|
||||
await cleanupConnector(connector);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1,10 +1,8 @@
|
|||
import { EventEmitter } from 'node:events';
|
||||
import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent';
|
||||
import {
|
||||
LocalLookerRuntimeStore,
|
||||
KtxYamlMetabaseSourceStateReader,
|
||||
LocalMetabaseDiscoveryCache,
|
||||
MetabaseSourceAdapter,
|
||||
|
|
@ -13,12 +11,10 @@ import {
|
|||
type FetchContext,
|
||||
type IngestReportSnapshot,
|
||||
type LocalIngestResult,
|
||||
type LocalMetabaseFanoutProgress,
|
||||
type LookerMappingClient,
|
||||
type LookerRuntimeClient,
|
||||
type LookerTableIdentifierParser,
|
||||
type MemoryFlowEventSink,
|
||||
type MemoryFlowReplayInput,
|
||||
type MetabaseCard,
|
||||
type MetabaseCardSummary,
|
||||
type MetabaseClientFactory,
|
||||
|
|
@ -29,7 +25,7 @@ import {
|
|||
} from '@ktx/context/ingest';
|
||||
import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project';
|
||||
import { expect, vi } from 'vitest';
|
||||
import { type KtxIngestArgs, runKtxIngest } from './ingest.js';
|
||||
import { runKtxIngest } from './ingest.js';
|
||||
|
||||
export function makeIo(
|
||||
options: {
|
||||
|
|
@ -266,6 +262,18 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
|
||||
) {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'cli-looker-verification-ledger', messages: [] },
|
||||
);
|
||||
const slWrite = params.toolSet.sl_write_source;
|
||||
if (!slWrite?.execute) {
|
||||
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
|
||||
|
|
|
|||
|
|
@ -4,10 +4,8 @@ import { join } from 'node:path';
|
|||
import {
|
||||
LocalLookerRuntimeStore,
|
||||
LocalMetabaseDiscoveryCache,
|
||||
getLocalIngestStatus,
|
||||
type LocalIngestResult,
|
||||
type LocalMetabaseFanoutProgress,
|
||||
type MemoryFlowReplayInput,
|
||||
type RunLocalIngestOptions,
|
||||
type SourceAdapter,
|
||||
} from '@ktx/context/ingest';
|
||||
|
|
@ -20,7 +18,6 @@ import {
|
|||
CliMetabaseAgentRunner,
|
||||
CliMetabaseSourceAdapter,
|
||||
completedLocalBundleRun,
|
||||
emitLiveLocalMemoryFlow,
|
||||
failedLocalBundleRun,
|
||||
localFakeBundleReport,
|
||||
makeCliLookerParser,
|
||||
|
|
@ -28,7 +25,6 @@ import {
|
|||
makeIo,
|
||||
persistLocalBundleReport,
|
||||
runPublicMetabaseSyncModeCase,
|
||||
writeBundleReportFile,
|
||||
writeMetabaseConfig,
|
||||
writeWarehouseConfig,
|
||||
} from './ingest.test-utils.js';
|
||||
|
|
@ -803,6 +799,44 @@ describe('runKtxIngest', () => {
|
|||
expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ llmDebugRequestFile: debugFile }));
|
||||
});
|
||||
|
||||
it('supplies a scan-connector query executor to local ingest runs', async () => {
|
||||
const io = makeIo();
|
||||
const projectDir = join(tempDir, 'query-executor-project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const queryExecutor = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: [],
|
||||
rows: [],
|
||||
totalRows: 0,
|
||||
command: 'SELECT',
|
||||
rowCount: 0,
|
||||
})),
|
||||
};
|
||||
const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> =>
|
||||
completedLocalBundleRun(input, 'query-executor-run'),
|
||||
);
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
adapter: 'fake',
|
||||
outputMode: 'json',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
runLocalIngest,
|
||||
createAdapters: () => [],
|
||||
createQueryExecutor: () => queryExecutor,
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ queryExecutor }));
|
||||
});
|
||||
|
||||
it('passes daemon database introspection URL to default local ingest adapters', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -16,7 +16,9 @@ import {
|
|||
runLocalMetabaseIngest,
|
||||
savedMemoryCountsForReport,
|
||||
} from '@ktx/context/ingest';
|
||||
import { loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
|
||||
import { createCliOperationalLogger } from './io/logger.js';
|
||||
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
|
||||
|
|
@ -69,6 +71,7 @@ interface KtxIngestDeps {
|
|||
jobIdFactory?: () => string;
|
||||
now?: () => Date;
|
||||
createAdapters?: typeof createKtxCliLocalIngestAdapters;
|
||||
createQueryExecutor?: (project: KtxLocalProject) => KtxSqlQueryExecutorPort;
|
||||
runLocalIngest?: typeof runLocalIngest;
|
||||
runLocalMetabaseIngest?: typeof runLocalMetabaseIngest;
|
||||
readReportFile?: typeof readIngestReportSnapshotFile;
|
||||
|
|
@ -532,6 +535,9 @@ export async function runKtxIngest(
|
|||
...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}),
|
||||
logger: operationalLogger,
|
||||
};
|
||||
const queryExecutor =
|
||||
localIngestOptions.queryExecutor ??
|
||||
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project);
|
||||
if (args.adapter === 'metabase' && args.sourceDir) {
|
||||
throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter');
|
||||
}
|
||||
|
|
@ -544,6 +550,7 @@ export async function runKtxIngest(
|
|||
adapters: createAdapters(project, adapterOptions),
|
||||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
...(progress ? { progress } : {}),
|
||||
|
|
@ -604,6 +611,7 @@ export async function runKtxIngest(
|
|||
trigger: 'manual_resync',
|
||||
jobId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
pullConfigOptions: adapterOptions,
|
||||
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
||||
...(memoryFlow ? { memoryFlow } : {}),
|
||||
|
|
|
|||
|
|
@ -93,6 +93,65 @@ describe('runKtxKnowledge', () => {
|
|||
expect(searchIo.stdout()).toContain('metrics-revenue');
|
||||
});
|
||||
|
||||
it('prints wiki list, search, and read as public JSON envelopes', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{
|
||||
command: 'write',
|
||||
projectDir,
|
||||
key: 'metrics-revenue',
|
||||
scope: 'GLOBAL',
|
||||
userId: 'local',
|
||||
summary: 'Revenue',
|
||||
content: 'Revenue is paid order value.',
|
||||
tags: ['finance'],
|
||||
refs: [],
|
||||
slRefs: ['orders'],
|
||||
},
|
||||
makeIo().io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const listIo = makeIo();
|
||||
await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local', json: true }, listIo.io)).resolves.toBe(
|
||||
0,
|
||||
);
|
||||
expect(JSON.parse(listIo.stdout())).toMatchObject({
|
||||
kind: 'list',
|
||||
data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] },
|
||||
meta: { command: 'wiki list' },
|
||||
});
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{ command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, limit: 5 },
|
||||
searchIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
expect(JSON.parse(searchIo.stdout())).toMatchObject({
|
||||
kind: 'list',
|
||||
data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] },
|
||||
meta: { command: 'wiki search' },
|
||||
});
|
||||
|
||||
const readIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local', json: true }, readIo.io),
|
||||
).resolves.toBe(0);
|
||||
expect(JSON.parse(readIo.stdout())).toMatchObject({
|
||||
kind: 'wiki.page',
|
||||
data: {
|
||||
key: 'metrics-revenue',
|
||||
summary: 'Revenue',
|
||||
content: 'Revenue is paid order value.',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects slash-delimited write keys with a flat-key suggestion', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -11,11 +11,12 @@ import {
|
|||
searchLocalKnowledgePages,
|
||||
writeLocalKnowledgePage,
|
||||
} from '@ktx/context/wiki';
|
||||
import { writeJsonResult } from './io/print-list.js';
|
||||
|
||||
export type KtxKnowledgeArgs =
|
||||
| { command: 'list'; projectDir: string; userId: string }
|
||||
| { command: 'read'; projectDir: string; key: string; userId: string }
|
||||
| { command: 'search'; projectDir: string; query: string; userId: string }
|
||||
| { command: 'list'; projectDir: string; userId: string; json?: boolean }
|
||||
| { command: 'read'; projectDir: string; key: string; userId: string; json?: boolean }
|
||||
| { command: 'search'; projectDir: string; query: string; userId: string; json?: boolean; limit?: number }
|
||||
| {
|
||||
command: 'write';
|
||||
projectDir: string;
|
||||
|
|
@ -61,6 +62,14 @@ export async function runKtxKnowledge(
|
|||
const project = await loadKtxProject({ projectDir: args.projectDir });
|
||||
if (args.command === 'list') {
|
||||
const pages = await listLocalKnowledgePages(project, { userId: args.userId });
|
||||
if (args.json) {
|
||||
writeJsonResult(io, {
|
||||
kind: 'list',
|
||||
data: { items: pages },
|
||||
meta: { command: 'wiki list' },
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
for (const page of pages) {
|
||||
io.stdout.write(`${page.scope}\t${page.key}\t${page.summary}\n`);
|
||||
}
|
||||
|
|
@ -71,6 +80,14 @@ export async function runKtxKnowledge(
|
|||
if (!page) {
|
||||
throw new Error(`Knowledge page "${args.key}" was not found`);
|
||||
}
|
||||
if (args.json) {
|
||||
writeJsonResult(io, {
|
||||
kind: 'wiki.page',
|
||||
data: page,
|
||||
meta: { command: 'wiki read' },
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
io.stdout.write(`# ${page.key}\n\n`);
|
||||
io.stdout.write(`Scope: ${page.scope}\n`);
|
||||
io.stdout.write(`Summary: ${page.summary}\n\n`);
|
||||
|
|
@ -82,7 +99,16 @@ export async function runKtxKnowledge(
|
|||
query: args.query,
|
||||
userId: args.userId,
|
||||
embeddingService: wikiSearchEmbeddingService(project, deps),
|
||||
limit: args.limit,
|
||||
});
|
||||
if (args.json) {
|
||||
writeJsonResult(io, {
|
||||
kind: 'list',
|
||||
data: { items: results },
|
||||
meta: { command: 'wiki search' },
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
if (results.length === 0) {
|
||||
const pages = await listLocalKnowledgePages(project, { userId: args.userId });
|
||||
if (pages.length === 0) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from 'node:fs/promises';
|
||||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
|
@ -8,7 +8,6 @@ import {
|
|||
doctorManagedPythonRuntime,
|
||||
installManagedPythonRuntime,
|
||||
managedPythonRuntimeLayout,
|
||||
pruneManagedPythonRuntimes,
|
||||
readManagedPythonRuntimeStatus,
|
||||
verifyRuntimeAsset,
|
||||
type ManagedPythonRuntimeExec,
|
||||
|
|
@ -471,41 +470,3 @@ describe('doctorManagedPythonRuntime', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('pruneManagedPythonRuntimes', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-runtime-prune-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('removes stale version directories and keeps the current version', async () => {
|
||||
const runtimeRoot = join(tempDir, 'runtime');
|
||||
await mkdir(join(runtimeRoot, '0.1.0'), { recursive: true });
|
||||
await mkdir(join(runtimeRoot, '0.2.0'), { recursive: true });
|
||||
await writeFile(join(runtimeRoot, 'README.txt'), 'not a runtime directory\n');
|
||||
|
||||
const result = await pruneManagedPythonRuntimes({ cliVersion: '0.2.0', runtimeRoot });
|
||||
|
||||
expect(result.removed).toEqual([join(runtimeRoot, '0.1.0')]);
|
||||
expect(result.kept).toEqual([join(runtimeRoot, '0.2.0')]);
|
||||
await expect(stat(join(runtimeRoot, '0.1.0'))).rejects.toThrow();
|
||||
expect(await readdir(runtimeRoot)).toEqual(['0.2.0', 'README.txt']);
|
||||
});
|
||||
|
||||
it('supports dry-run without deleting stale directories', async () => {
|
||||
const runtimeRoot = join(tempDir, 'runtime');
|
||||
await mkdir(join(runtimeRoot, '0.1.0'), { recursive: true });
|
||||
await mkdir(join(runtimeRoot, '0.2.0'), { recursive: true });
|
||||
|
||||
const result = await pruneManagedPythonRuntimes({ cliVersion: '0.2.0', runtimeRoot, dryRun: true });
|
||||
|
||||
expect(result.removed).toEqual([]);
|
||||
expect(result.stale).toEqual([join(runtimeRoot, '0.1.0')]);
|
||||
expect(await readdir(runtimeRoot)).toEqual(['0.1.0', '0.2.0']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { execFile } from 'node:child_process';
|
||||
import { createHash } from 'node:crypto';
|
||||
import { access, appendFile, mkdir, readFile, readdir, rm, stat, writeFile } from 'node:fs/promises';
|
||||
import { access, appendFile, mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { homedir } from 'node:os';
|
||||
import { basename, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
|
@ -107,13 +107,6 @@ export interface ManagedPythonRuntimeDoctorCheck {
|
|||
fix?: string;
|
||||
}
|
||||
|
||||
export interface ManagedPythonRuntimePruneResult {
|
||||
runtimeRoot: string;
|
||||
stale: string[];
|
||||
kept: string[];
|
||||
removed: string[];
|
||||
}
|
||||
|
||||
export const MISSING_UV_RUNTIME_INSTALL_MESSAGE =
|
||||
'uv is required to install the KTX Python runtime. KTX does not download uv automatically. Install uv, make sure it is on PATH, and retry: ktx dev runtime install --yes';
|
||||
|
||||
|
|
@ -441,36 +434,3 @@ export async function doctorManagedPythonRuntime(
|
|||
);
|
||||
return checks;
|
||||
}
|
||||
|
||||
export async function pruneManagedPythonRuntimes(options: {
|
||||
cliVersion: string;
|
||||
runtimeRoot: string;
|
||||
dryRun?: boolean;
|
||||
}): Promise<ManagedPythonRuntimePruneResult> {
|
||||
if (!(await pathExists(options.runtimeRoot))) {
|
||||
return { runtimeRoot: options.runtimeRoot, stale: [], kept: [], removed: [] };
|
||||
}
|
||||
const entries = await readdir(options.runtimeRoot);
|
||||
const stale: string[] = [];
|
||||
const kept: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const path = join(options.runtimeRoot, entry);
|
||||
const info = await stat(path);
|
||||
if (!info.isDirectory()) {
|
||||
continue;
|
||||
}
|
||||
if (entry === options.cliVersion) {
|
||||
kept.push(path);
|
||||
} else {
|
||||
stale.push(path);
|
||||
}
|
||||
}
|
||||
const removed: string[] = [];
|
||||
if (options.dryRun !== true) {
|
||||
for (const path of stale) {
|
||||
await rm(path, { recursive: true, force: true });
|
||||
removed.push(path);
|
||||
}
|
||||
}
|
||||
return { runtimeRoot: options.runtimeRoot, stale, kept, removed };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* @jsxImportSource react */
|
||||
import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@ktx/context/ingest/memory-flow';
|
||||
import { Box, Text } from 'ink';
|
||||
import React, { type ReactNode } from 'react';
|
||||
import { type ReactNode } from 'react';
|
||||
import { buildDemoMetrics, formatCost, formatDuration } from './demo-metrics.js';
|
||||
import { formatNextStepLines } from './next-steps.js';
|
||||
import { profileMark } from './startup-profile.js';
|
||||
|
|
@ -38,45 +38,6 @@ function isPrepopulatedDemoReplay(input: MemoryFlowReplayInput): boolean {
|
|||
return input.metadata?.origin === 'packaged' || input.metadata?.timing === 'prebuilt';
|
||||
}
|
||||
|
||||
function flowLine(width: number, frame: number, active: boolean): string {
|
||||
if (!active) return '━'.repeat(width);
|
||||
const pulse = ['░', '▒', '▓', '█', '█', '█', '▓', '▒', '░'];
|
||||
const pw = pulse.length;
|
||||
const chars: string[] = [];
|
||||
const offset = (frame * 2) % (width + pw);
|
||||
for (let i = 0; i < width; i += 1) {
|
||||
const p = i - offset + pw;
|
||||
chars.push(p >= 0 && p < pw ? (pulse[p] ?? '━') : '━');
|
||||
}
|
||||
return chars.join('');
|
||||
}
|
||||
|
||||
function brailleFlow(width: number, frame: number): string {
|
||||
// Braille unicode: U+2800 + dot bitmask
|
||||
// Dots: 1=0x01 2=0x02 3=0x04 4=0x08 5=0x10 6=0x20 7=0x40 8=0x80
|
||||
// Layout: col0=[1,2,3,7] col1=[4,5,6,8]
|
||||
const chars: string[] = [];
|
||||
for (let i = 0; i < width; i += 1) {
|
||||
const density = (i + 1) / width;
|
||||
const phase = (i * 3 + frame * 2) % 12;
|
||||
let dots = 0;
|
||||
|
||||
// Sparse diagonal streams on the left, dense on the right
|
||||
// Each "stream" is a diagonal line of dots moving rightward
|
||||
if ((phase + 0) % 4 < density * 4) dots |= 0x01; // dot 1
|
||||
if ((phase + 1) % 5 < density * 4) dots |= 0x08; // dot 4
|
||||
if ((phase + 2) % 4 < density * 3) dots |= 0x02; // dot 2
|
||||
if ((phase + 3) % 5 < density * 3) dots |= 0x10; // dot 5
|
||||
if ((phase + 4) % 4 < density * 2.5) dots |= 0x04; // dot 3
|
||||
if ((phase + 5) % 5 < density * 2.5) dots |= 0x20; // dot 6
|
||||
if ((phase + 1) % 6 < density * 2) dots |= 0x40; // dot 7
|
||||
if ((phase + 3) % 6 < density * 2) dots |= 0x80; // dot 8
|
||||
|
||||
chars.push(String.fromCharCode(0x2800 + dots));
|
||||
}
|
||||
return chars.join('');
|
||||
}
|
||||
|
||||
function progressBarOverall(
|
||||
finishedCount: number,
|
||||
activeCount: number,
|
||||
|
|
@ -104,43 +65,6 @@ function progressBarOverall(
|
|||
return finished + activeChars.join('') + '░'.repeat(queuedWidth);
|
||||
}
|
||||
|
||||
function sparkleWipe(width: number, frame: number, row: number): string {
|
||||
const chars: string[] = [];
|
||||
const sweepPos = (frame * 2 + row * 6) % (width + 8);
|
||||
const sparkles = ['✨', '✦', '✧', '·'];
|
||||
for (let i = 0; i < width; i += 1) {
|
||||
const dist = i - sweepPos;
|
||||
if (dist < -6) {
|
||||
const t = (i * 11 + row * 5 + frame * 3) % 10;
|
||||
chars.push(t === 0 ? sparkles[0]! : t === 3 ? sparkles[1]! : t === 7 ? sparkles[2]! : ' ');
|
||||
} else if (dist < -3) {
|
||||
const t = (i + frame) % 3;
|
||||
chars.push(t === 0 ? sparkles[1]! : t === 1 ? sparkles[2]! : sparkles[3]!);
|
||||
} else if (dist <= 0) {
|
||||
const gradient = ['░', '▒', '▓', '█'];
|
||||
chars.push(gradient[Math.min(3, dist + 3)] ?? '█');
|
||||
} else if (dist <= 2) {
|
||||
chars.push(dist === 1 ? '▓' : '▒');
|
||||
} else {
|
||||
const noise = (i * 31 + row * 17 + frame * 3) % 5;
|
||||
const messy = ['░', '▒', '▓', '▒', '░'];
|
||||
chars.push(messy[noise] ?? '▒');
|
||||
}
|
||||
}
|
||||
return chars.join('');
|
||||
}
|
||||
|
||||
function activityWave(width: number, frame: number, offset: number): string {
|
||||
const heights = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'];
|
||||
const chars: string[] = [];
|
||||
for (let i = 0; i < width; i += 1) {
|
||||
const wave = Math.sin(((i * 2 + frame + offset * 5) * Math.PI) / 6);
|
||||
const idx = Math.round(((wave + 1) / 2) * (heights.length - 1));
|
||||
chars.push(heights[idx] ?? '▁');
|
||||
}
|
||||
return chars.join('');
|
||||
}
|
||||
|
||||
function topicName(key: string): string {
|
||||
return (key.split('/').pop()?.replace(/\.md$/, '') ?? key).replace(/[_-]/g, ' ');
|
||||
}
|
||||
|
|
@ -155,18 +79,9 @@ function humanizeInsight(key: string, target: 'sl' | 'wiki', summary: string | u
|
|||
return target === 'sl' ? `Query definition: ${name}` : `Knowledge page: ${name}`;
|
||||
}
|
||||
|
||||
const ADAPTER_PREFIXES = ['live_database_', 'metabase_', 'looker_', 'lookml_', 'metricflow_', 'notion_', 'historic_sql_', 'dbt_descriptions_'];
|
||||
const INTERNAL_DEMO_CONNECTION_ID = 'orbit_demo';
|
||||
const PUBLIC_DEMO_SOURCE_LABEL = 'Orbit Demo';
|
||||
|
||||
function humanizeUnitKey(unitKey: string): string {
|
||||
let key = unitKey.replace(/-/g, '_');
|
||||
for (const prefix of ADAPTER_PREFIXES) {
|
||||
if (key.startsWith(prefix)) { key = key.slice(prefix.length); break; }
|
||||
}
|
||||
return key.replace(/_/g, ' ');
|
||||
}
|
||||
|
||||
interface SourceInfo {
|
||||
type: string;
|
||||
name: string;
|
||||
|
|
@ -224,13 +139,6 @@ function sourceDescription(input: MemoryFlowReplayInput): SourceInfo {
|
|||
return { type: info.type, name: conn, sourceCount: count, itemNounPlural: info.plural, readingVerb: info.verb, ingestDescription: info.description };
|
||||
}
|
||||
|
||||
function activeWorkUnit(
|
||||
input: MemoryFlowReplayInput,
|
||||
): { unitKey: string; stepIndex: number; stepBudget: number } | null {
|
||||
const units = activeWorkUnits(input);
|
||||
return units.at(-1) ?? null;
|
||||
}
|
||||
|
||||
function activeWorkUnits(
|
||||
input: MemoryFlowReplayInput,
|
||||
): Array<{ unitKey: string; stepIndex: number; stepBudget: number }> {
|
||||
|
|
@ -299,22 +207,6 @@ function finishedUnits(input: MemoryFlowReplayInput): Array<{ unitKey: string; a
|
|||
return units;
|
||||
}
|
||||
|
||||
function artifactCounts(input: MemoryFlowReplayInput): { sl: number; wiki: number } {
|
||||
let sl = 0;
|
||||
let wiki = 0;
|
||||
for (const e of input.events) {
|
||||
if (e.type === 'candidate_action') {
|
||||
if (e.target === 'sl') sl++;
|
||||
else wiki++;
|
||||
}
|
||||
}
|
||||
return { sl, wiki };
|
||||
}
|
||||
|
||||
function pad(str: string, width: number): string {
|
||||
return str.length >= width ? str : str + ' '.repeat(width - str.length);
|
||||
}
|
||||
|
||||
const KTX_LOGO_SMALL = [
|
||||
'██╗ ██╗████████╗██╗ ██╗',
|
||||
'██║ ██╔╝╚══██╔══╝╚██╗██╔╝',
|
||||
|
|
@ -344,12 +236,7 @@ export function Hud(props: {
|
|||
width: number;
|
||||
now?: () => number;
|
||||
}): ReactNode {
|
||||
const isRunning = props.input.status === 'running';
|
||||
const isDone = props.input.status === 'done';
|
||||
const isFlowing = isRunning && hasWorkStarted(props.input);
|
||||
|
||||
const src = sourceDescription(props.input);
|
||||
const counts = artifactCounts(props.input);
|
||||
const metrics = buildDemoMetrics(props.input, props.now ? { now: props.now } : {});
|
||||
const workStarted = hasWorkStarted(props.input);
|
||||
|
||||
|
|
@ -358,11 +245,6 @@ export function Hud(props: {
|
|||
|
||||
const innerWidth = Math.max(60, props.width - 6);
|
||||
|
||||
const actives = activeWorkUnits(props.input);
|
||||
const reconEvent = props.input.events.find((e) => e.type === 'reconciliation_finished');
|
||||
const allAnalyzed = isFlowing && actives.length === 0;
|
||||
const isReconciling = allAnalyzed && !reconEvent && !isDone;
|
||||
|
||||
const hLine = '─'.repeat(innerWidth);
|
||||
|
||||
const elapsed = formatDuration(metrics.elapsedMs);
|
||||
|
|
@ -429,7 +311,6 @@ export function ActivityFeed(props: {
|
|||
|
||||
const workStarted = hasWorkStarted(props.input);
|
||||
const totalChunks = planEvent?.chunkCount ?? 0;
|
||||
const finishedWithArtifacts = finished.filter((u) => u.artifactCount > 0);
|
||||
const finishedAreas = totalChunks > 0 ? Math.min(finished.length, totalChunks) : finished.length;
|
||||
const allWorkDone = workStarted && actives.length === 0 && queued.length === 0;
|
||||
const isReconciling = allWorkDone && !reconEvent && !isDone && !isError;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import {
|
|||
startLiveMemoryFlowTui,
|
||||
type KtxMemoryFlowTuiIo,
|
||||
type MemoryFlowInkInstance,
|
||||
type MemoryFlowInkRenderOptions,
|
||||
} from './memory-flow-tui.js';
|
||||
|
||||
function replayInput(): MemoryFlowReplayInput {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
/* @jsxImportSource react */
|
||||
import {
|
||||
buildMemoryFlowViewModel,
|
||||
buildMemoryFlowVisualModel,
|
||||
createInitialMemoryFlowInteractionState,
|
||||
findMemoryFlowSearchMatches,
|
||||
type MemoryFlowColumnId,
|
||||
|
|
@ -14,8 +13,7 @@ import {
|
|||
selectedMemoryFlowDetails,
|
||||
} from '@ktx/context/ingest';
|
||||
import { Box, Text, render as renderInkRuntime, useApp, useInput } from 'ink';
|
||||
import React, { type ReactNode, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { buildDemoMetrics } from './demo-metrics.js';
|
||||
import { type ReactNode, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
ActivityFeed,
|
||||
Hud,
|
||||
|
|
@ -201,14 +199,6 @@ function stageLabel(columnId: MemoryFlowColumnId): string {
|
|||
return STAGE_LABELS[columnId];
|
||||
}
|
||||
|
||||
function statusLabel(status: string): 'OK' | 'RUN' | 'WARN' | 'FAIL' | 'WAIT' {
|
||||
if (status === 'complete') return 'OK';
|
||||
if (status === 'active') return 'RUN';
|
||||
if (status === 'warning') return 'WARN';
|
||||
if (status === 'failed') return 'FAIL';
|
||||
return 'WAIT';
|
||||
}
|
||||
|
||||
function filterLabel(filter: MemoryFlowInteractionState['filter']): string {
|
||||
return filter === 'failed_or_flagged' ? 'issues' : 'all';
|
||||
}
|
||||
|
|
@ -325,7 +315,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode {
|
|||
const view = useMemo(() => buildMemoryFlowViewModel(pacedInput), [pacedInput]);
|
||||
const [state, setState] = useState<MemoryFlowInteractionState>(() => createInitialMemoryFlowInteractionState(view));
|
||||
const [frame, setFrame] = useState(0);
|
||||
const [thoughtFrame, setThoughtFrame] = useState(0);
|
||||
const [completionFrame, setCompletionFrame] = useState(0);
|
||||
const [holdComplete, setHoldComplete] = useState(false);
|
||||
const [userHasNavigated, setUserHasNavigated] = useState(false);
|
||||
|
|
@ -346,7 +335,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode {
|
|||
useEffect(() => {
|
||||
const timer = setInterval(() => {
|
||||
setFrame((current) => current + 1);
|
||||
setThoughtFrame((current) => current + 1);
|
||||
}, props.frameMs ?? DEFAULT_TUI_TIMING.frameMs);
|
||||
return () => clearInterval(timer);
|
||||
}, [props.frameMs]);
|
||||
|
|
@ -354,7 +342,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode {
|
|||
useEffect(() => {
|
||||
if (lastEventCountRef.current !== pacedInput.events.length) {
|
||||
lastEventCountRef.current = pacedInput.events.length;
|
||||
setThoughtFrame(0);
|
||||
}
|
||||
}, [pacedInput.events.length]);
|
||||
|
||||
|
|
@ -409,10 +396,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode {
|
|||
});
|
||||
|
||||
const isComplete = pacedInput.status === 'done' || pacedInput.status === 'error';
|
||||
const completionMetrics = useMemo(
|
||||
() => buildDemoMetrics(pacedInput, pacedNow ? { now: pacedNow } : {}),
|
||||
[pacedInput, pacedNow],
|
||||
);
|
||||
|
||||
const termWidth = props.terminalWidth ?? 80;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,8 @@ describe('KTX demo next steps', () => {
|
|||
it('uses supported final public commands', () => {
|
||||
expect(KTX_NEXT_STEP_COMMANDS).toEqual([
|
||||
{
|
||||
command: 'ktx agent context --json',
|
||||
description: 'Verify the project context your agent can read',
|
||||
},
|
||||
{
|
||||
command: 'ktx agent tools --json',
|
||||
description: 'List direct CLI tools available to agents',
|
||||
command: 'ktx status --json',
|
||||
description: 'Verify project setup and context readiness',
|
||||
},
|
||||
{
|
||||
command: 'ktx sl list',
|
||||
|
|
@ -46,8 +42,8 @@ describe('KTX demo next steps', () => {
|
|||
it('uses only the direct CLI route for agent verification', () => {
|
||||
const commands = KTX_NEXT_STEP_COMMANDS.map((step) => step.command);
|
||||
|
||||
expect(commands).toContain('ktx agent context --json');
|
||||
expect(commands).toContain('ktx agent tools --json');
|
||||
expect(commands).not.toContain('ktx agent context --json');
|
||||
expect(commands).toContain('ktx status --json');
|
||||
expect(commands).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
});
|
||||
|
||||
|
|
@ -64,8 +60,8 @@ describe('KTX demo next steps', () => {
|
|||
it('does not advertise removed Commander migration commands', () => {
|
||||
const rendered = formatNextStepLines().join('\n');
|
||||
|
||||
expect(rendered).toContain('ktx agent tools --json');
|
||||
expect(rendered).toContain('ktx agent context --json');
|
||||
expect(rendered).toContain('ktx status --json');
|
||||
expect(rendered).not.toContain('ktx agent');
|
||||
expect(rendered).toContain('ktx sl list');
|
||||
expect(rendered).toContain('ktx wiki list');
|
||||
|
||||
|
|
@ -109,7 +105,8 @@ describe('KTX demo next steps', () => {
|
|||
}).join('\n');
|
||||
|
||||
expect(rendered).toContain('KTX context is ready for agents.');
|
||||
expect(rendered).toContain('ktx agent context --json');
|
||||
expect(rendered).toContain('ktx status --json');
|
||||
expect(rendered).not.toContain('ktx agent');
|
||||
expect(rendered).not.toContain('ktx serve --mcp stdio --user-id local');
|
||||
expect(rendered).not.toContain('Build KTX context next.');
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,12 +11,8 @@ export const KTX_CONTEXT_BUILD_COMMANDS = [
|
|||
|
||||
export const KTX_NEXT_STEP_DIRECT_COMMANDS = [
|
||||
{
|
||||
command: 'ktx agent context --json',
|
||||
description: 'Verify the project context your agent can read',
|
||||
},
|
||||
{
|
||||
command: 'ktx agent tools --json',
|
||||
description: 'List direct CLI tools available to agents',
|
||||
command: 'ktx status --json',
|
||||
description: 'Verify project setup and context readiness',
|
||||
},
|
||||
{
|
||||
command: 'ktx sl list',
|
||||
|
|
|
|||
|
|
@ -35,8 +35,7 @@ describe('project directory defaults', () => {
|
|||
const ingest = vi.fn(async () => 0);
|
||||
const scan = vi.fn(async () => 0);
|
||||
const setup = vi.fn(async () => 0);
|
||||
const agent = vi.fn(async () => 0);
|
||||
const deps: KtxCliDeps = { agent, connection, doctor, ingest, scan, setup };
|
||||
const deps: KtxCliDeps = { connection, doctor, ingest, scan, setup };
|
||||
|
||||
const cases: Array<{
|
||||
argv: string[];
|
||||
|
|
@ -74,12 +73,6 @@ describe('project directory defaults', () => {
|
|||
expected: { command: 'run', projectDir: '/tmp/ktx-env-project', connectionId: 'warehouse' },
|
||||
expectedStderr: 'Project: /tmp/ktx-env-project\n',
|
||||
},
|
||||
{
|
||||
argv: ['agent', 'tools', '--json'],
|
||||
spy: agent,
|
||||
expected: { command: 'tools', projectDir: '/tmp/ktx-env-project' },
|
||||
expectedStderr: '',
|
||||
},
|
||||
];
|
||||
|
||||
for (const item of cases) {
|
||||
|
|
|
|||
|
|
@ -1,5 +0,0 @@
|
|||
import { resolve } from 'node:path';
|
||||
|
||||
export function resolveProjectDir(projectDir?: string, fallback = '.'): string {
|
||||
return resolve(projectDir ?? fallback);
|
||||
}
|
||||
|
|
@ -6,9 +6,9 @@ import { profileMark } from './startup-profile.js';
|
|||
|
||||
profileMark('module:public-ingest');
|
||||
|
||||
export type KtxPublicIngestStepName = 'scan' | 'source-ingest' | 'enrich' | 'memory-update';
|
||||
export type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run';
|
||||
export type KtxPublicIngestInputMode = 'auto' | 'disabled';
|
||||
type KtxPublicIngestStepName = 'scan' | 'source-ingest' | 'enrich' | 'memory-update';
|
||||
type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run';
|
||||
type KtxPublicIngestInputMode = 'auto' | 'disabled';
|
||||
|
||||
export type KtxPublicIngestArgs =
|
||||
| {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import type {
|
|||
ManagedPythonDaemonStopResult,
|
||||
} from './managed-python-daemon.js';
|
||||
import type {
|
||||
ManagedPythonRuntimeDoctorCheck,
|
||||
ManagedPythonRuntimeInstallResult,
|
||||
ManagedPythonRuntimeStatus,
|
||||
} from './managed-python-runtime.js';
|
||||
|
|
@ -256,7 +257,7 @@ describe('runKtxRuntime', () => {
|
|||
expect(io.stderr()).toContain('process scan: ps failed');
|
||||
});
|
||||
|
||||
it('prints runtime status as JSON', async () => {
|
||||
it('prints runtime status and doctor checks as JSON with doctor-style exit status', async () => {
|
||||
const io = makeIo();
|
||||
const deps: KtxRuntimeDeps = {
|
||||
readStatus: vi.fn(async (): Promise<ManagedPythonRuntimeStatus> => ({
|
||||
|
|
@ -278,38 +279,41 @@ describe('runKtxRuntime', () => {
|
|||
daemonStderrPath: '/runtime/0.2.0/daemon.stderr.log',
|
||||
},
|
||||
})),
|
||||
doctorRuntime: vi.fn(async (): Promise<ManagedPythonRuntimeDoctorCheck[]> => [
|
||||
{ id: 'uv', label: 'uv', status: 'pass', detail: 'uv 0.9.5' },
|
||||
{ id: 'asset', label: 'Bundled Python wheel', status: 'pass', detail: '/assets/python/runtime.whl' },
|
||||
{
|
||||
id: 'runtime',
|
||||
label: 'Managed Python runtime',
|
||||
status: 'fail',
|
||||
detail: 'No runtime manifest at /runtime/0.2.0/manifest.json',
|
||||
fix: 'Run: ktx dev runtime install --yes',
|
||||
},
|
||||
]),
|
||||
};
|
||||
|
||||
await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: true }, io.io, deps)).resolves.toBe(0);
|
||||
await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: true }, io.io, deps)).resolves.toBe(1);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toMatchObject({
|
||||
kind: 'missing',
|
||||
detail: 'No runtime manifest at /runtime/0.2.0/manifest.json',
|
||||
layout: { runtimeRoot: '/runtime' },
|
||||
checks: [
|
||||
{ id: 'uv', status: 'pass' },
|
||||
{ id: 'asset', status: 'pass' },
|
||||
{ id: 'runtime', status: 'fail' },
|
||||
],
|
||||
});
|
||||
expect(deps.readStatus).toHaveBeenCalledWith({ cliVersion: '0.2.0' });
|
||||
expect(deps.doctorRuntime).toHaveBeenCalledWith({ cliVersion: '0.2.0' });
|
||||
});
|
||||
|
||||
it('requires --yes before pruning stale runtime directories', async () => {
|
||||
const io = makeIo();
|
||||
const deps: KtxRuntimeDeps = {
|
||||
pruneRuntime: vi.fn(async () => {
|
||||
throw new Error('should not prune without --yes');
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(runKtxRuntime({ command: 'prune', cliVersion: '0.2.0', dryRun: false, yes: false }, io.io, deps))
|
||||
.resolves.toBe(1);
|
||||
|
||||
expect(io.stderr()).toContain('Refusing to prune without --yes');
|
||||
expect(deps.pruneRuntime).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('prints stale directories during prune dry-run', async () => {
|
||||
it('prints runtime status and doctor checks in plain output', async () => {
|
||||
const io = makeIo();
|
||||
const deps: KtxRuntimeDeps = {
|
||||
readStatus: vi.fn(async (): Promise<ManagedPythonRuntimeStatus> => ({
|
||||
kind: 'missing',
|
||||
detail: 'No runtime manifest at /runtime/0.2.0/manifest.json',
|
||||
kind: 'ready',
|
||||
detail: 'Runtime ready at /runtime/0.2.0',
|
||||
layout: {
|
||||
cliVersion: '0.2.0',
|
||||
runtimeRoot: '/runtime',
|
||||
|
|
@ -325,19 +329,43 @@ describe('runKtxRuntime', () => {
|
|||
daemonStdoutPath: '/runtime/0.2.0/daemon.stdout.log',
|
||||
daemonStderrPath: '/runtime/0.2.0/daemon.stderr.log',
|
||||
},
|
||||
manifest: {
|
||||
schemaVersion: 1,
|
||||
cliVersion: '0.2.0',
|
||||
installedAt: '2026-05-11T00:00:00.000Z',
|
||||
asset: {
|
||||
schemaVersion: 1,
|
||||
distributionName: 'kaelio-ktx',
|
||||
normalizedName: 'kaelio_ktx',
|
||||
version: '0.1.0',
|
||||
wheel: {
|
||||
file: 'kaelio_ktx-0.1.0-py3-none-any.whl',
|
||||
sha256: 'a'.repeat(64),
|
||||
bytes: 10,
|
||||
},
|
||||
},
|
||||
features: ['core'],
|
||||
python: {
|
||||
executable: '/runtime/0.2.0/.venv/bin/python',
|
||||
daemonExecutable: '/runtime/0.2.0/.venv/bin/ktx-daemon',
|
||||
},
|
||||
installLog: '/runtime/0.2.0/install.log',
|
||||
},
|
||||
})),
|
||||
pruneRuntime: vi.fn(async () => ({
|
||||
runtimeRoot: '/runtime',
|
||||
stale: ['/runtime/0.1.0'],
|
||||
kept: ['/runtime/0.2.0'],
|
||||
removed: [],
|
||||
})),
|
||||
doctorRuntime: vi.fn(async (): Promise<ManagedPythonRuntimeDoctorCheck[]> => [
|
||||
{ id: 'uv', label: 'uv', status: 'pass', detail: 'uv 0.9.5' },
|
||||
{ id: 'asset', label: 'Bundled Python wheel', status: 'pass', detail: '/assets/python/runtime.whl' },
|
||||
{ id: 'runtime', label: 'Managed Python runtime', status: 'pass', detail: 'Runtime ready at /runtime/0.2.0' },
|
||||
]),
|
||||
};
|
||||
|
||||
await expect(runKtxRuntime({ command: 'prune', cliVersion: '0.2.0', dryRun: true, yes: false }, io.io, deps))
|
||||
.resolves.toBe(0);
|
||||
await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: false }, io.io, deps)).resolves.toBe(0);
|
||||
|
||||
expect(io.stdout()).toContain('Stale KTX Python runtimes');
|
||||
expect(io.stdout()).toContain('/runtime/0.1.0');
|
||||
expect(io.stdout()).toContain('KTX Python runtime');
|
||||
expect(io.stdout()).toContain('status: ready');
|
||||
expect(io.stdout()).toContain('KTX Python runtime checks');
|
||||
expect(io.stdout()).toContain('PASS uv: uv 0.9.5');
|
||||
expect(io.stdout()).toContain('PASS Managed Python runtime: Runtime ready at /runtime/0.2.0');
|
||||
expect(io.stderr()).toBe('');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ import {
|
|||
type ManagedPythonDaemonStopResult,
|
||||
} from './managed-python-daemon.js';
|
||||
import {
|
||||
doctorManagedPythonRuntime,
|
||||
installManagedPythonRuntime,
|
||||
pruneManagedPythonRuntimes,
|
||||
readManagedPythonRuntimeStatus,
|
||||
type KtxRuntimeFeature,
|
||||
type ManagedPythonRuntimeDoctorCheck,
|
||||
type ManagedPythonRuntimeInstallOptions,
|
||||
type ManagedPythonRuntimeInstallResult,
|
||||
type ManagedPythonRuntimeLayoutOptions,
|
||||
type ManagedPythonRuntimePruneResult,
|
||||
type ManagedPythonRuntimeStatus,
|
||||
} from './managed-python-runtime.js';
|
||||
|
||||
|
|
@ -23,8 +23,7 @@ export type KtxRuntimeArgs =
|
|||
| { command: 'install'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean }
|
||||
| { command: 'start'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean }
|
||||
| { command: 'stop'; cliVersion: string; all: boolean }
|
||||
| { command: 'status'; cliVersion: string; json: boolean }
|
||||
| { command: 'prune'; cliVersion: string; dryRun: boolean; yes: boolean };
|
||||
| { command: 'status'; cliVersion: string; json: boolean };
|
||||
|
||||
export interface KtxRuntimeDeps {
|
||||
installRuntime?: (options: ManagedPythonRuntimeInstallOptions) => Promise<ManagedPythonRuntimeInstallResult>;
|
||||
|
|
@ -36,11 +35,7 @@ export interface KtxRuntimeDeps {
|
|||
stopDaemon?: (options: { cliVersion: string }) => Promise<ManagedPythonDaemonStopResult>;
|
||||
stopAllDaemons?: (options: { cliVersion: string }) => Promise<ManagedPythonDaemonStopAllResult>;
|
||||
readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise<ManagedPythonRuntimeStatus>;
|
||||
pruneRuntime?: (options: {
|
||||
cliVersion: string;
|
||||
runtimeRoot: string;
|
||||
dryRun?: boolean;
|
||||
}) => Promise<ManagedPythonRuntimePruneResult>;
|
||||
doctorRuntime?: (options: ManagedPythonRuntimeLayoutOptions) => Promise<ManagedPythonRuntimeDoctorCheck[]>;
|
||||
}
|
||||
|
||||
function writeJson(io: KtxCliIo, value: unknown): void {
|
||||
|
|
@ -145,17 +140,20 @@ function writeStatus(io: KtxCliIo, status: ManagedPythonRuntimeStatus): void {
|
|||
}
|
||||
}
|
||||
|
||||
function writePrune(io: KtxCliIo, result: ManagedPythonRuntimePruneResult, dryRun: boolean): void {
|
||||
if (result.stale.length === 0) {
|
||||
io.stdout.write(`No stale KTX Python runtimes found under ${result.runtimeRoot}\n`);
|
||||
return;
|
||||
}
|
||||
io.stdout.write(dryRun ? 'Stale KTX Python runtimes\n' : 'Removed stale KTX Python runtimes\n');
|
||||
for (const path of dryRun ? result.stale : result.removed) {
|
||||
io.stdout.write(`${path}\n`);
|
||||
function writeRuntimeChecks(io: KtxCliIo, checks: ManagedPythonRuntimeDoctorCheck[]): void {
|
||||
io.stdout.write('KTX Python runtime checks\n');
|
||||
for (const check of checks) {
|
||||
io.stdout.write(`${check.status.toUpperCase()} ${check.label}: ${check.detail}\n`);
|
||||
if (check.fix) {
|
||||
io.stdout.write(` Fix: ${check.fix}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function hasRuntimeCheckFailures(checks: ManagedPythonRuntimeDoctorCheck[]): boolean {
|
||||
return checks.some((check) => check.status === 'fail');
|
||||
}
|
||||
|
||||
export async function runKtxRuntime(
|
||||
args: KtxRuntimeArgs,
|
||||
io: KtxCliIo = process,
|
||||
|
|
@ -196,27 +194,19 @@ export async function runKtxRuntime(
|
|||
}
|
||||
if (args.command === 'status') {
|
||||
const readStatus = deps.readStatus ?? readManagedPythonRuntimeStatus;
|
||||
const doctorRuntime = deps.doctorRuntime ?? doctorManagedPythonRuntime;
|
||||
const status = await readStatus({ cliVersion: args.cliVersion });
|
||||
const checks = await doctorRuntime({ cliVersion: args.cliVersion });
|
||||
if (args.json) {
|
||||
writeJson(io, status);
|
||||
writeJson(io, { ...status, checks });
|
||||
} else {
|
||||
writeStatus(io, status);
|
||||
writeRuntimeChecks(io, checks);
|
||||
}
|
||||
return 0;
|
||||
return hasRuntimeCheckFailures(checks) ? 1 : 0;
|
||||
}
|
||||
if (!args.dryRun && !args.yes) {
|
||||
io.stderr.write('Refusing to prune without --yes. Preview with: ktx dev runtime prune --dry-run\n');
|
||||
return 1;
|
||||
}
|
||||
const status = await (deps.readStatus ?? readManagedPythonRuntimeStatus)({ cliVersion: args.cliVersion });
|
||||
const pruneRuntime = deps.pruneRuntime ?? pruneManagedPythonRuntimes;
|
||||
const result = await pruneRuntime({
|
||||
cliVersion: args.cliVersion,
|
||||
runtimeRoot: status.layout.runtimeRoot,
|
||||
dryRun: args.dryRun,
|
||||
});
|
||||
writePrune(io, result, args.dryRun);
|
||||
return 0;
|
||||
const _exhaustive: never = args;
|
||||
return _exhaustive;
|
||||
} catch (error) {
|
||||
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -84,7 +84,10 @@ describe('setup agents', () => {
|
|||
const skill = await readFile(join(tempDir, '.agents/skills/ktx/SKILL.md'), 'utf-8');
|
||||
expect(skill).toContain(`--project-dir ${tempDir}`);
|
||||
expect(skill).toContain('must not print secrets');
|
||||
expect(skill).toContain('agent sql execute');
|
||||
expect(skill).toContain('status --json');
|
||||
expect(skill).toContain('sl list --json');
|
||||
expect(skill).not.toContain('agent ');
|
||||
expect(skill).not.toContain('sql execute');
|
||||
expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({
|
||||
version: 1,
|
||||
projectDir: tempDir,
|
||||
|
|
@ -115,8 +118,9 @@ describe('setup agents', () => {
|
|||
|
||||
const skill = await readFile(join(tempDir, '.agents/skills/ktx/SKILL.md'), 'utf-8');
|
||||
expect(skill).not.toContain('`ktx agent');
|
||||
expect(skill).toContain('agent context --json');
|
||||
expect(skill).toContain('agent sql execute');
|
||||
expect(skill).toContain('status --json');
|
||||
expect(skill).toContain('sl query');
|
||||
expect(skill).not.toContain('sql execute');
|
||||
});
|
||||
|
||||
it('removes only manifest-listed files', async () => {
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun
|
|||
return [
|
||||
'---',
|
||||
'name: ktx',
|
||||
'description: Use local KTX semantic context, wiki knowledge, and safe SQL execution for this project.',
|
||||
'description: Use local KTX semantic context and wiki knowledge for this project.',
|
||||
'---',
|
||||
'',
|
||||
'# KTX Local Context',
|
||||
|
|
@ -136,11 +136,11 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun
|
|||
'',
|
||||
'Available commands:',
|
||||
'',
|
||||
`- \`${ktxCommandLine(input.launcher, ['agent', 'context', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['agent', 'sl', 'list', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['agent', 'sl', 'read', '<sourceName>', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['status', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs, '--query', '<text>'])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['sl', 'read', '<sourceName>', ...projectDirArgs, '--connection-id', '<id>'])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, [
|
||||
'agent',
|
||||
'sl',
|
||||
'query',
|
||||
...projectDirArgs,
|
||||
|
|
@ -152,29 +152,17 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun
|
|||
'--max-rows',
|
||||
'100',
|
||||
])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['agent', 'wiki', 'search', '<query>', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['agent', 'wiki', 'read', '<pageId>', ...projectDirArgs])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, [
|
||||
'agent',
|
||||
'sql',
|
||||
'execute',
|
||||
...projectDirArgs,
|
||||
'--connection-id',
|
||||
'<id>',
|
||||
'--sql-file',
|
||||
'<path>',
|
||||
'--max-rows',
|
||||
'100',
|
||||
])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '<query>', ...projectDirArgs, '--limit', '10'])}\``,
|
||||
`- \`${ktxCommandLine(input.launcher, ['wiki', 'read', '<pageId>', ...projectDirArgs])}\``,
|
||||
'',
|
||||
'SQL execution is read-only, requires an explicit row limit, and should use the smallest useful limit.',
|
||||
'Use semantic-layer queries before direct database access. Do not print secrets or credential references.',
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function ruleInstructionContent(input: { projectDir: string }): string {
|
||||
return [
|
||||
`Use the \`ktx\` CLI to query local semantic context, wiki knowledge, and execute safe SQL for this project (\`--project-dir ${input.projectDir}\`).`,
|
||||
`Use the \`ktx\` CLI to query local semantic context and wiki knowledge for this project (\`--project-dir ${input.projectDir}\`).`,
|
||||
'',
|
||||
'Use when the user asks about data schemas, metrics, dimensions, database structure, or wants to run SQL queries.',
|
||||
'',
|
||||
|
|
|
|||
|
|
@ -472,16 +472,6 @@ async function markContextComplete(projectDir: string): Promise<void> {
|
|||
await markKtxSetupStateStepComplete(projectDir, 'context');
|
||||
}
|
||||
|
||||
function writeBuildHeader(projectDir: string, runId: string, io: KtxCliIo): void {
|
||||
const commands = contextBuildCommands(projectDir, runId);
|
||||
io.stdout.write('\nKTX context build\n');
|
||||
io.stdout.write(`Run: ${runId}\n`);
|
||||
io.stdout.write(`Project: ${resolve(projectDir)}\n\n`);
|
||||
io.stdout.write('Detach: press d to leave this running.\n');
|
||||
io.stdout.write(`Resume: ${commands.watch}\n`);
|
||||
io.stdout.write(`Status: ${commands.status}\n\n`);
|
||||
}
|
||||
|
||||
function writeMissingCapabilities(missing: string[], io: KtxCliIo): void {
|
||||
io.stderr.write('KTX cannot build agent-ready context yet.\n\n');
|
||||
io.stderr.write('Missing:\n');
|
||||
|
|
|
|||
|
|
@ -84,6 +84,71 @@ describe('runKtxSl', () => {
|
|||
expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0');
|
||||
});
|
||||
|
||||
it('prints semantic-layer reads and searched lists as public JSON envelopes', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{
|
||||
command: 'write',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: [
|
||||
'name: orders',
|
||||
'table: public.orders',
|
||||
'description: Paid order facts',
|
||||
'grain: [order_id]',
|
||||
'columns:',
|
||||
' - name: order_id',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n'),
|
||||
},
|
||||
makeIo().io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const readIo = makeIo();
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{ command: 'read', projectDir, connectionId: 'warehouse', sourceName: 'orders', json: true },
|
||||
readIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
expect(JSON.parse(readIo.stdout())).toMatchObject({
|
||||
kind: 'sl.source',
|
||||
data: {
|
||||
connectionId: 'warehouse',
|
||||
name: 'orders',
|
||||
yaml: expect.stringContaining('name: orders'),
|
||||
},
|
||||
});
|
||||
|
||||
const listIo = makeIo();
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{ command: 'list', projectDir, connectionId: 'warehouse', query: 'paid', json: true },
|
||||
listIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
expect(JSON.parse(listIo.stdout())).toMatchObject({
|
||||
kind: 'list',
|
||||
data: {
|
||||
items: [
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
name: 'orders',
|
||||
score: expect.any(Number),
|
||||
matchReasons: expect.arrayContaining(['token']),
|
||||
}),
|
||||
],
|
||||
},
|
||||
meta: { command: 'sl list' },
|
||||
});
|
||||
});
|
||||
|
||||
it('fails validation when a table-backed source declares columns absent from a matching warehouse manifest', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
@ -191,6 +256,73 @@ joins: []
|
|||
expect(stderr.write).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('runs sl query from a JSON query file', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
project.config.connections.warehouse = { driver: 'postgres', readonly: true };
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
`name: orders
|
||||
table: public.orders
|
||||
grain: [id]
|
||||
columns:
|
||||
- name: id
|
||||
type: number
|
||||
measures:
|
||||
- name: order_count
|
||||
expr: count(*)
|
||||
joins: []
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add orders source',
|
||||
);
|
||||
const queryFile = join(tempDir, 'query.json');
|
||||
await writeFile(queryFile, '{"measures":["orders.order_count"],"dimensions":[]}', 'utf-8');
|
||||
|
||||
const stdout = { write: vi.fn() };
|
||||
const stderr = { write: vi.fn() };
|
||||
const query = vi.fn(async () => ({
|
||||
sql: 'select count(*) as order_count from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: {},
|
||||
}));
|
||||
const createSemanticLayerCompute = vi.fn(() => ({
|
||||
query,
|
||||
validateSources: vi.fn(),
|
||||
generateSources: vi.fn(),
|
||||
}));
|
||||
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{
|
||||
command: 'query',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
queryFile,
|
||||
format: 'json',
|
||||
execute: false,
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
},
|
||||
{ stdout, stderr },
|
||||
{ createSemanticLayerCompute },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(query).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
);
|
||||
expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({
|
||||
sql: 'select count(*) as order_count from public.orders',
|
||||
plan: { execution: { mode: 'compile_only' } },
|
||||
});
|
||||
expect(stderr.write).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('creates default sl query compute through the managed runtime helper', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
|
||||
|
|
|
|||
|
|
@ -1,14 +1,22 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { createDefaultLocalQueryExecutor, type KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import {
|
||||
createLocalKtxEmbeddingProviderFromConfig,
|
||||
KtxIngestEmbeddingPortAdapter,
|
||||
type KtxEmbeddingPort,
|
||||
} from '@ktx/context';
|
||||
import type { KtxSemanticLayerComputePort } from '@ktx/context/daemon';
|
||||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
import {
|
||||
compileLocalSlQuery,
|
||||
listLocalSlSources,
|
||||
readLocalSlSource,
|
||||
searchLocalSlSources,
|
||||
validateLocalSlSource,
|
||||
writeLocalSlSource,
|
||||
type SemanticLayerQueryInput,
|
||||
} from '@ktx/context/sl';
|
||||
import { writeJsonResult } from './io/print-list.js';
|
||||
import {
|
||||
createManagedPythonSemanticLayerComputePort,
|
||||
type KtxManagedPythonInstallPolicy,
|
||||
|
|
@ -20,15 +28,16 @@ profileMark('module:sl');
|
|||
type SlQueryFormat = 'json' | 'sql';
|
||||
|
||||
export type KtxSlArgs =
|
||||
| { command: 'list'; projectDir: string; connectionId?: string; output?: string; json?: boolean }
|
||||
| { command: 'read'; projectDir: string; connectionId: string; sourceName: string }
|
||||
| { command: 'list'; projectDir: string; connectionId?: string; query?: string; output?: string; json?: boolean }
|
||||
| { command: 'read'; projectDir: string; connectionId: string; sourceName: string; json?: boolean }
|
||||
| { command: 'validate'; projectDir: string; connectionId: string; sourceName: string }
|
||||
| { command: 'write'; projectDir: string; connectionId: string; sourceName: string; yaml: string }
|
||||
| {
|
||||
command: 'query';
|
||||
projectDir: string;
|
||||
connectionId?: string;
|
||||
query: SemanticLayerQueryInput;
|
||||
query?: SemanticLayerQueryInput;
|
||||
queryFile?: string;
|
||||
format: SlQueryFormat;
|
||||
execute: boolean;
|
||||
maxRows?: number;
|
||||
|
|
@ -43,6 +52,8 @@ interface KtxSlIo {
|
|||
|
||||
interface KtxSlDeps {
|
||||
loadProject?: typeof loadKtxProject;
|
||||
embeddingService?: KtxEmbeddingPort | null;
|
||||
createEmbeddingProvider?: typeof createLocalKtxEmbeddingProviderFromConfig;
|
||||
createSemanticLayerCompute?: () => KtxSemanticLayerComputePort;
|
||||
createManagedSemanticLayerCompute?: (options: {
|
||||
cliVersion: string;
|
||||
|
|
@ -52,11 +63,35 @@ interface KtxSlDeps {
|
|||
createQueryExecutor?: () => KtxSqlQueryExecutorPort;
|
||||
}
|
||||
|
||||
function slSearchEmbeddingService(project: KtxLocalProject, deps: KtxSlDeps): KtxEmbeddingPort | null {
|
||||
if ('embeddingService' in deps) {
|
||||
return deps.embeddingService ?? null;
|
||||
}
|
||||
const provider = (deps.createEmbeddingProvider ?? createLocalKtxEmbeddingProviderFromConfig)(
|
||||
project.config.ingest.embeddings,
|
||||
);
|
||||
return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null;
|
||||
}
|
||||
|
||||
async function readSlQueryFile(path: string): Promise<SemanticLayerQueryInput> {
|
||||
const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error(`${path} must contain a JSON object.`);
|
||||
}
|
||||
return parsed as SemanticLayerQueryInput;
|
||||
}
|
||||
|
||||
export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise<number> {
|
||||
try {
|
||||
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir });
|
||||
if (args.command === 'list') {
|
||||
const sources = await listLocalSlSources(project, { connectionId: args.connectionId });
|
||||
const sources = args.query
|
||||
? await searchLocalSlSources(project, {
|
||||
connectionId: args.connectionId,
|
||||
query: args.query,
|
||||
embeddingService: slSearchEmbeddingService(project, deps),
|
||||
})
|
||||
: await listLocalSlSources(project, { connectionId: args.connectionId });
|
||||
const { resolveOutputMode } = await import('./io/mode.js');
|
||||
const { printList } = await import('./io/print-list.js');
|
||||
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
|
||||
|
|
@ -86,6 +121,14 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
|
|||
if (!source) {
|
||||
throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`);
|
||||
}
|
||||
if (args.json) {
|
||||
writeJsonResult(io, {
|
||||
kind: 'sl.source',
|
||||
data: source,
|
||||
meta: { command: 'sl read' },
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
io.stdout.write(source.yaml);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -108,6 +151,10 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
|
|||
return 0;
|
||||
}
|
||||
if (args.command === 'query') {
|
||||
const query = args.query ?? (args.queryFile ? await readSlQueryFile(args.queryFile) : undefined);
|
||||
if (!query) {
|
||||
throw new Error('sl query requires query input from --query-file or at least one --measure');
|
||||
}
|
||||
const compute = deps.createSemanticLayerCompute
|
||||
? deps.createSemanticLayerCompute()
|
||||
: await (deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort)({
|
||||
|
|
@ -118,7 +165,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
|
|||
const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined;
|
||||
const result = await compileLocalSlQuery(project as KtxLocalProject, {
|
||||
connectionId: args.connectionId,
|
||||
query: args.query,
|
||||
query,
|
||||
compute,
|
||||
execute: args.execute,
|
||||
maxRows: args.maxRows,
|
||||
|
|
|
|||
|
|
@ -126,10 +126,6 @@ async function writeSqliteScanConfig(projectDir: string, dbPath: string, enrich
|
|||
);
|
||||
}
|
||||
|
||||
function parseJsonOutput<T>(stdout: string): T {
|
||||
return JSON.parse(stdout) as T;
|
||||
}
|
||||
|
||||
function expectProjectStderr(result: CliResult, projectDir: string): void {
|
||||
expect(result).toMatchObject({ code: 0, stderr: `Project: ${projectDir}\n` });
|
||||
}
|
||||
|
|
@ -190,49 +186,21 @@ describe('standalone built ktx CLI smoke', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('prints guided JSON for agent semantic-layer search outside a project through the built binary', async () => {
|
||||
const projectDir = join(tempDir, 'missing-search-project');
|
||||
await mkdir(projectDir, { recursive: true });
|
||||
|
||||
const result = await runBuiltCli([
|
||||
'agent',
|
||||
'sl',
|
||||
'list',
|
||||
'--json',
|
||||
'--query',
|
||||
'revenue',
|
||||
'--project-dir',
|
||||
projectDir,
|
||||
]);
|
||||
it('rejects the removed agent command through the built binary', async () => {
|
||||
const result = await runBuiltCli(['agent']);
|
||||
|
||||
expect(result.code).toBe(1);
|
||||
expect(result.stdout).toBe('');
|
||||
const errorJson = parseJsonOutput<{
|
||||
ok: false;
|
||||
error: { code: string; message: string; nextSteps: string[] };
|
||||
}>(result.stderr);
|
||||
expect(errorJson).toEqual({
|
||||
ok: false,
|
||||
error: {
|
||||
code: 'agent_sl_search_missing_project',
|
||||
message: `Semantic-layer search needs an initialized KTX project at ${projectDir}.`,
|
||||
nextSteps: [
|
||||
`ktx setup --project-dir ${projectDir}`,
|
||||
`ktx status --project-dir ${projectDir}`,
|
||||
'ktx ingest run --connection-id <connection> --adapter <adapter>',
|
||||
`ktx agent sl list --json --query "revenue" --project-dir ${projectDir}`,
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(result.stderr).toContain("unknown command 'agent'");
|
||||
});
|
||||
|
||||
it('runs doctor setup through the built binary', async () => {
|
||||
const result = await runBuiltCli(['status', '--no-input']);
|
||||
|
||||
expect(result.stdout).toContain('KTX setup doctor');
|
||||
expect(result.stdout).toMatch(/KTX (setup|project) doctor/);
|
||||
expect(result.stdout).toContain('Node 22+');
|
||||
expect(result.stdout).toContain('Workspace-local CLI');
|
||||
expect(result.stderr).toBe('');
|
||||
expect(result.stderr === '' || result.stderr.startsWith('Project: ')).toBe(true);
|
||||
expect([0, 1]).toContain(result.code);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t
|
|||
|
||||
<scope>
|
||||
All wiki writes are GLOBAL (same as Stage 3). SL writes target the same session worktree Stage 3 used.
|
||||
Wiki keys must be flat slugs, not directory paths. If a Stage 3 page used a path-like key and a flat retry exists, treat the flat key as the canonical page.
|
||||
</scope>
|
||||
|
||||
<do_not>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
<role>
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and searchable via `wiki_sl_search`.
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`.
|
||||
</role>
|
||||
|
||||
<stance>
|
||||
|
|
@ -8,9 +8,9 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
|
|||
|
||||
<workflow>
|
||||
1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files.
|
||||
2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `wiki_sl_search` reveals that a prior WU already wrote something overlapping.
|
||||
2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping.
|
||||
3. If the system prompt includes `<canonical_pins>`, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `wiki_sl_search` for each candidate name to find prior-WU writes; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
|
||||
5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit.
|
||||
6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag.
|
||||
7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written.
|
||||
|
|
@ -19,12 +19,13 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
|
|||
|
||||
<scope>
|
||||
All wiki writes go to the GLOBAL scope. Bundle ingests are not personal. The `wiki_write` tool selects scope automatically for this caller.
|
||||
Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths like `historic-sql/paid-order-lifecycle`. Use `tags`, `source`, and page content to group related pages.
|
||||
</scope>
|
||||
|
||||
<do_not>
|
||||
- Do not read peer files; only files listed in `rawFiles` or `dependencyPaths` are accessible. `read_raw_file` will reject everything else.
|
||||
- Do not invent measures/joins/rules not declared in the raw files.
|
||||
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`wiki_sl_search`, `sl_discover`, `sl_describe_table`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
|
||||
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
|
||||
- Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture.
|
||||
- Do not duplicate an artifact that prior provenance says you already produced; update it.
|
||||
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
|
||||
|
|
|
|||
27
packages/context/skills/_shared/identifier-verification.md
Normal file
27
packages/context/skills/_shared/identifier-verification.md
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
|
@ -12,16 +12,16 @@ Use this skill for **uploaded** dbt projects (`dbt_project.yml` at stage root, `
|
|||
|
||||
| dbt | KTX | Notes |
|
||||
|-----|--------|--------|
|
||||
| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `wiki_sl_search` / `sl_describe_table`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. |
|
||||
| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `discover_data` / `entity_details`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. |
|
||||
| `sources:` → `tables:` | Same as models; use `identifier` when present instead of logical `name`. | Schema + name must match how the connection sees tables. |
|
||||
| Column `description` | `descriptions.user` or merged `descriptions` map on the column | Do not overwrite `dbt` description keys from sync. |
|
||||
| `data_tests: not_null` / `unique` | Short hint in column `descriptions` or notes: “dbt: not null”, “dbt: unique” | Full structured metadata lands in manifest via **sync**; the skill keeps bundle-time SL text useful for the agent. |
|
||||
| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `wiki_sl_search` / filters. |
|
||||
| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `wiki_sl_search` / `sl_describe_table` | If the ref cannot be resolved, capture the intent in a wiki page instead. |
|
||||
| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `discover_data` / filters. |
|
||||
| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `discover_data` / `entity_details` | If the ref cannot be resolved, capture the intent in a wiki page instead. |
|
||||
|
||||
## Physical schema grounding
|
||||
|
||||
dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `wiki_sl_search`, `sl_discover`, or `sl_describe_table` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`.
|
||||
dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `discover_data`, `sl_discover`, or `entity_details` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`.
|
||||
|
||||
For dbt context-source ingest, the dbt connection is usually not the warehouse connection. Call `sl_discover` without `connectionId` first, then write overlays to the connection that owns the matching manifest-backed source (for example `postgres-warehouse`), not to the dbt connection (for example `dbt-main`). If no matching manifest-backed source is visible on any warehouse connection, do not call `sl_write_source`; record `emit_unmapped_fallback` and keep the fact wiki-only.
|
||||
|
||||
|
|
@ -31,6 +31,34 @@ Include `rawPaths` on every `wiki_write`, `sl_write_source`, and `sl_edit_source
|
|||
|
||||
After every `sl_write_source`, call `sl_validate`. A validation error saying a declared column or measure reference is absent from the physical table is a hard stop: re-read the warehouse-backed source and rewrite with confirmed names, or remove the invalid SL fields.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## 1.1 test hints (descriptions / meta)
|
||||
|
||||
When YAML shows `accepted_values` or `not_null`, add **short** hints into `columns[].descriptions` (e.g. under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible.
|
||||
|
|
|
|||
|
|
@ -18,6 +18,37 @@ Use this skill when the WorkUnit raw file is a `patterns-input/part-0001.json` s
|
|||
6. Set each evidence object's `rawPath` to the exact raw file path read in step 3.
|
||||
7. Stop after all pattern evidence has been emitted.
|
||||
|
||||
Every join column mentioned in pattern descriptions must be verified via
|
||||
entity_details for both sides of the join.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Evidence Shape
|
||||
|
||||
Each call to `emit_historic_sql_evidence` must use this shape:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,34 @@ Use this skill when the WorkUnit raw file is one `tables/<schema>.<name>.json` f
|
|||
5. Call `emit_historic_sql_evidence` exactly once with `kind: "table_usage"`.
|
||||
6. Stop after the evidence tool succeeds.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Evidence Shape
|
||||
|
||||
Call `emit_historic_sql_evidence` with this shape:
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ If nothing is worth capturing, respond without calling any tool.
|
|||
|
||||
1. Read the wiki index (provided in the prompt) and decide whether the turn introduces durable knowledge.
|
||||
2. **Before writing**, search for related content so cross-references are accurate:
|
||||
- `discover_data` first when a page relates to data or SL concepts — find
|
||||
existing wiki pages, SL sources, and raw warehouse schema together.
|
||||
- `wiki_search` with the topic — find related wiki pages to populate `refs`.
|
||||
- `sl_discover` with the concept — if the page defines a metric (revenue, churn, retention, LTV, ARR, MRR, CAC, attribution, etc.), find matching SL sources or measures to populate `sl_refs`. If no matches, pass `sl_refs: []` so future readers know you checked.
|
||||
3. If updating an existing page, `wiki_read` it first. Use the returned `structured.content` or markdown body as the exact stored text for targeted replacements; current tags, refs, and sl_refs are returned in structured metadata.
|
||||
|
|
@ -48,6 +50,34 @@ If nothing is worth capturing, respond without calling any tool.
|
|||
|
||||
For bundle/external ingest, include `rawPaths` on every `wiki_write`/`wiki_remove` call with only the raw files that directly support that wiki action. This keeps ingest provenance tied to the actual source file, not every file in the WorkUnit.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Keys, summaries, and content
|
||||
|
||||
- **Keys** are short kebab-case topic identifiers: `leads-source-filter`, `revenue-definition`, `churn-calculation`. No namespacing, no prefixes.
|
||||
|
|
@ -70,6 +100,10 @@ The `wiki_write` tool accepts three array fields that go into the page frontmatt
|
|||
- **`refs`**: keys of related wiki pages. Add when the new page materially depends on concepts from another (e.g., a churn definition that uses the paid-orders filter from a revenue definition). Don't add refs just because pages share a topic area.
|
||||
- **`sl_refs`**: names of SL sources or measures the page relates to. Format: `"source_name"` or `"source_name.measure_name"`. Discover via `sl_discover` → inspect with `sl_read_source` → include the confirmed matches.
|
||||
|
||||
Wiki page keys must be flat slugs. Use `large-contract-requesters`, not
|
||||
`historic-sql/large-contract-requesters`. Use `tags`, `source`, and content
|
||||
headings for grouping.
|
||||
|
||||
### Replace semantics
|
||||
|
||||
All three fields use REPLACE semantics on update:
|
||||
|
|
|
|||
|
|
@ -24,6 +24,37 @@ Use this skill when the ingest work unit contains raw files under
|
|||
or column comments.
|
||||
9. Run `sl_validate` for the table source before the work unit completes.
|
||||
|
||||
Sample values come from the scan record; do not invent values not present in
|
||||
relationship-profile.json.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Source shape
|
||||
|
||||
For a raw table with this shape:
|
||||
|
|
|
|||
|
|
@ -21,6 +21,37 @@ Looker runtime ingest turns API-staged dashboards, Looks, and explores into dura
|
|||
9. Write SL from Looker runtime evidence only through the staged warehouse target contract. For explores and inherited dashboard/Look queries, branch on `targetTable.ok`; when it is true, write on `targetWarehouseConnectionId` and use `targetTable.canonicalTable` as `source.table`. When it is false or missing, write wiki knowledge candidates and record `emit_unmapped_fallback` with the staged reason.
|
||||
10. Run `sl_validate` after every SL write. If validation fails, fix the source or roll it back before the WorkUnit ends.
|
||||
|
||||
For every Looker field reference, call entity_details on the underlying
|
||||
schema.table.column before promoting it to sl_refs or quoting it in wiki body.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Explore WorkUnits
|
||||
|
||||
Explore WUs have raw files like `explores/<model>/<explore>.json` and usually depend on `lookml_models.json`.
|
||||
|
|
|
|||
|
|
@ -51,10 +51,47 @@ LookML's `dimension_group: date { type: time; timeframes: [raw, date, week, mont
|
|||
|
||||
A prior replay hallucinated `date_date`, `date_week` into `sql:`, `columns:`, and `grain:` across 4+ standalones; every measure on each affected source returned `400 Unrecognized name: date_date` at query time. Preventable.
|
||||
|
||||
Verify each sql_table_name from the LookML view with entity_details before
|
||||
mapping to an SL source.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
**Required flow before writing any overlay or standalone**:
|
||||
|
||||
1. Call `sl_discover(<tableName>)` for each base table you're about to touch. That returns the real columns.
|
||||
2. If the table isn't in the manifest, fall back to `sql_execution({ sql: "SELECT column_name FROM <dataset>.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '<table>'" })` (session shape — a connection is already pinned by the ingest session).
|
||||
2. If the table isn't in the manifest, use the warehouse `connectionName`
|
||||
returned by `discover_data` or the target connection chosen from
|
||||
`sl_discover`, then call a dialect-appropriate SQL probe with that
|
||||
connection name, for example:
|
||||
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
|
||||
Replace `warehouse`, `analytics`, and `orders` with the verified connection,
|
||||
schema or dataset, and table from the WorkUnit evidence.
|
||||
3. Use only those names in `sql:`, `columns:`, and `grain:`. Map each `dimension_group` to ONE `{ name: <physical_col>, type: time, role: time }` entry — never one per timeframe.
|
||||
|
||||
| LookML input | KTX `columns:` entry |
|
||||
|
|
|
|||
|
|
@ -44,6 +44,37 @@ Use `resultMetadata` to:
|
|||
- `lastRunAt`: ISO timestamp of the card's last execution. If null or very old, the card may be dead; prefer skipping over creating a source.
|
||||
- `dashboardCount`: number of dashboards referencing the card. Cards with `dashboardCount: 0` and a stale `lastRunAt` are strong skip signals.
|
||||
|
||||
Before writing a wiki page derived from a Metabase question SQL, verify each
|
||||
schema.table.column mentioned with entity_details.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Decision tree
|
||||
|
||||
For each card:
|
||||
|
|
|
|||
|
|
@ -29,6 +29,37 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to
|
|||
|
||||
Type map: MetricFlow `time` to KTX `time`; `categorical` to `string`; `number` to `number`; `boolean` to `boolean`. Follow `expr` over `name` when both differ — `expr` is the physical column.
|
||||
|
||||
Verify each MetricFlow model source table with entity_details before producing
|
||||
the corresponding sl_write_source.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Flattening `extends:`
|
||||
|
||||
Within one WorkUnit, multiple semantic_models linked by `extends:` are guaranteed to be present (the chunker groups them). Resolve inheritance **before** writing:
|
||||
|
|
@ -49,7 +80,13 @@ The `model:` field on a semantic_model is a string like `ref('table_name')`, `so
|
|||
- `source('s','t')` → table name `t`. Verify via `sl_discover(t)`.
|
||||
- Literal (no `ref(...)` / `source(...)`) → treat as the table name directly.
|
||||
|
||||
If `sl_discover` errors (no such table), fall back to `sql_execution({ sql: "SELECT column_name FROM <dataset>.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '<x>'" })` (session shape — a connection is already pinned by the ingest session). **Never invent column names** — every column in `columns:`, `grain:`, and `sql:` must be sourced from a real probe.
|
||||
If `sl_discover` errors because no such table exists, use `discover_data` and
|
||||
`entity_details` to find the warehouse target. If a SQL probe is still needed,
|
||||
call `sql_execution` with the same warehouse connection name, for example:
|
||||
`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`.
|
||||
**Never invent column names** - every column in `columns:`, `grain:`, and
|
||||
`sql:` must be sourced from raw files, `entity_details`, or a successful SQL
|
||||
probe.
|
||||
|
||||
After every `sl_write_source`, call `sl_validate`. The warehouse will reject invented columns with `Unrecognized name: <name>` — treat as a hard failure and re-read the schema.
|
||||
|
||||
|
|
|
|||
|
|
@ -67,10 +67,38 @@ Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter and
|
|||
- Do not create SL sources under the Notion connection just because a page mentions a warehouse, dbt, Looker, or Metabase object. Use the mapped warehouse/source connection after discovery, or emit an unmapped fallback and write wiki-only.
|
||||
- Distinguish fallback reasons precisely: if a non-Notion warehouse/dbt connection exists but `sl_discover` cannot find the named table/source, use `no_physical_table`; reserve `no_connection_mapping` for cases where there is no plausible non-Notion target connection at all.
|
||||
- If `sl_discover` resolves the table/source, do not call `emit_unmapped_fallback` for that table. Use the resolved source for `sl_refs`, overlay edits, or wiki-only documentation.
|
||||
- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: "orbit_analytics.customer"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`.
|
||||
- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: "<schema>.<table>"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Tools
|
||||
|
||||
Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`.
|
||||
Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `discover_data`, `entity_details`, `sql_execution`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`.
|
||||
|
||||
Not allowed: `context_candidate_write`, `context_candidate_mark`.
|
||||
|
|
|
|||
|
|
@ -13,6 +13,10 @@ This skill covers two parts:
|
|||
|
||||
Capture (when and how to add new patterns to the SL) is a separate concern handled by the memory-agent — see the `sl_capture` skill if you are running in capture mode. The research agent **reads** and **queries** the SL via the tools described here; it does not write to it.
|
||||
|
||||
For capture-time identifier verification, load `sl_capture`. Synthesis writer
|
||||
skills must verify warehouse identifiers with `discover_data`,
|
||||
`entity_details`, and `sql_execution` before emitting table or column names.
|
||||
|
||||
---
|
||||
|
||||
## Part 1 — Schema reference
|
||||
|
|
|
|||
|
|
@ -174,6 +174,37 @@ Wiki-only is correct when the user is documenting *about* the measure
|
|||
(definition in business terms, owner, policy, glossary, examples of when to
|
||||
use it) without changing its SQL expression or filters.
|
||||
|
||||
Before sl_write_source, call entity_details on the target table to confirm
|
||||
column names and types match the YAML being written.
|
||||
|
||||
## Identifier Verification Protocol
|
||||
|
||||
Before writing a wiki page or SL source on any topic:
|
||||
|
||||
1. `discover_data({query: "<topic>"})` - see what wikis, SL sources, and raw
|
||||
tables already exist. Prefer updating existing pages over creating new ones.
|
||||
|
||||
Before emitting any `schema.table` or `schema.table.column` into a wiki body,
|
||||
SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`:
|
||||
|
||||
2. `entity_details({connectionName, targets: [{display: "<identifier>"}]})` -
|
||||
confirm the identifier resolves; inspect native types, FK/PK, and
|
||||
sampleValues.
|
||||
3. For literal values from the source, such as status codes or plan tiers,
|
||||
check whether they appear in `entity_details` sampleValues for the relevant
|
||||
column. If sampleValues is short or the sample may have missed real values,
|
||||
run a `sql_execution` probe with the same warehouse connection name:
|
||||
`sql_execution({connectionName, sql: "SELECT DISTINCT <col> FROM <ref> LIMIT 50"})`.
|
||||
4. If the candidate identifier still does not resolve, do one of:
|
||||
- Use `sql_execution({connectionName, sql: "SELECT 1 FROM <ref> LIMIT 0"})`.
|
||||
If it errors, the identifier is fictional.
|
||||
- Wrap the identifier in `[unverified - from <rawPath>]` in the wiki body,
|
||||
citing the exact raw path that mentioned it.
|
||||
- When recording `emit_unmapped_fallback` with `no_physical_table`, include
|
||||
the failing probe error in `clarification`.
|
||||
5. Never copy `<schema>.<table>` placeholder strings from these instructions
|
||||
into output.
|
||||
|
||||
## Tool sequence
|
||||
|
||||
1. `sl_discover` — see what source files exist.
|
||||
|
|
@ -181,7 +212,7 @@ use it) without changing its SQL expression or filters.
|
|||
3. `sl_read_source({ sourceName })` — read the raw YAML before editing.
|
||||
4. For modifications: `sl_edit_source({ sourceName, old_string, new_string })` with exact-string replacements. `old_string` must match exactly and be unique in the file.
|
||||
5. For new sources or full rewrites: `sl_write_source({ sourceName, content })` with the full YAML content.
|
||||
6. For join discovery: `sql_execution({ sql })` to verify the join key exists in both tables and assess cardinality before declaring the join.
|
||||
6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join.
|
||||
7. Cross-reference knowledge: author the edge once on the **wiki** side via `sl_refs: [source_name]` in the page's front-matter. The reverse edge (wiki pages that cite an SL source) is derived automatically by the reconciler — do not add a `knowledge_refs:` field to SL YAMLs.
|
||||
8. `sl_validate` — run after writing or editing to surface schema issues, duplicate measure names, and cross-source validation errors. Read-only; the writes are already committed (the squash-at-end flow will collapse them into one commit).
|
||||
|
||||
|
|
@ -248,7 +279,8 @@ Prior turn: user asked to correlate LTV with protocol count; assistant joined `f
|
|||
sl_read_source({ sourceName: "fct_orders" })
|
||||
→ no joins section yet
|
||||
sql_execution({
|
||||
sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM fct_orders a JOIN fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1"
|
||||
connectionName: "warehouse",
|
||||
sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM public.fct_orders a JOIN public.fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1"
|
||||
})
|
||||
→ confirms cardinality (many orders per MAU row = many_to_one)
|
||||
sl_edit_source({
|
||||
|
|
|
|||
30
packages/context/src/connections/dialects.test.ts
Normal file
30
packages/context/src/connections/dialects.test.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { getDialectForDriver } from './dialects.js';
|
||||
|
||||
describe('getDialectForDriver', () => {
|
||||
it.each([
|
||||
['postgres', '"public"."orders"'],
|
||||
['postgresql', '"public"."orders"'],
|
||||
['mysql', '`public`.`orders`'],
|
||||
['clickhouse', '`public`.`orders`'],
|
||||
['sqlite', '"orders"'],
|
||||
['snowflake', '"analytics"."public"."orders"'],
|
||||
['bigquery', '`analytics`.`public`.`orders`'],
|
||||
['sqlserver', '[analytics].[public].[orders]'],
|
||||
] as const)('formats table names for %s', (driver, expected) => {
|
||||
const dialect = getDialectForDriver(driver);
|
||||
expect(
|
||||
dialect.formatTableName({
|
||||
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
name: 'orders',
|
||||
}),
|
||||
).toBe(expected);
|
||||
});
|
||||
|
||||
it('throws with a supported-driver list for unknown drivers', () => {
|
||||
expect(() => getDialectForDriver('oracle')).toThrow(
|
||||
'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, postgresql, sqlite, sqlite3, snowflake, sqlserver',
|
||||
);
|
||||
});
|
||||
});
|
||||
102
packages/context/src/connections/dialects.ts
Normal file
102
packages/context/src/connections/dialects.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
|
||||
|
||||
export type SupportedDriver =
|
||||
| 'postgres'
|
||||
| 'postgresql'
|
||||
| 'mysql'
|
||||
| 'sqlserver'
|
||||
| 'snowflake'
|
||||
| 'bigquery'
|
||||
| 'clickhouse'
|
||||
| 'sqlite'
|
||||
| 'sqlite3';
|
||||
|
||||
export interface KtxDialect {
|
||||
readonly type: SupportedDriver;
|
||||
quoteIdentifier(identifier: string): string;
|
||||
formatTableName(table: KtxTableRef): string;
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
|
||||
}
|
||||
|
||||
const supportedDrivers: SupportedDriver[] = [
|
||||
'bigquery',
|
||||
'clickhouse',
|
||||
'mysql',
|
||||
'postgres',
|
||||
'postgresql',
|
||||
'sqlite',
|
||||
'sqlite3',
|
||||
'snowflake',
|
||||
'sqlserver',
|
||||
];
|
||||
|
||||
function doubleQuoted(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
function backtickQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
function bigQueryQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
function bracketQuoted(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
const normalized = nativeType.toLowerCase().trim();
|
||||
if (normalized.includes('date') || normalized.includes('time')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double') ||
|
||||
normalized.includes('real')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool') || normalized === 'bit') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
|
||||
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
|
||||
return parts.map(quote).join('.');
|
||||
}
|
||||
|
||||
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
|
||||
return {
|
||||
type,
|
||||
quoteIdentifier: quote,
|
||||
formatTableName: (table) => formatWithParts(table, quote, sqlite),
|
||||
mapToDimensionType: inferDimensionType,
|
||||
};
|
||||
}
|
||||
|
||||
const dialects: Record<SupportedDriver, KtxDialect> = {
|
||||
postgres: createDialect('postgres', doubleQuoted),
|
||||
postgresql: createDialect('postgresql', doubleQuoted),
|
||||
mysql: createDialect('mysql', backtickQuoted),
|
||||
clickhouse: createDialect('clickhouse', backtickQuoted),
|
||||
sqlite: createDialect('sqlite', doubleQuoted, true),
|
||||
sqlite3: createDialect('sqlite3', doubleQuoted, true),
|
||||
snowflake: createDialect('snowflake', doubleQuoted),
|
||||
bigquery: createDialect('bigquery', bigQueryQuoted),
|
||||
sqlserver: createDialect('sqlserver', bracketQuoted),
|
||||
};
|
||||
|
||||
export function getDialectForDriver(driver: string): KtxDialect {
|
||||
const normalized = driver.toLowerCase().trim();
|
||||
if (normalized in dialects) {
|
||||
return dialects[normalized as SupportedDriver];
|
||||
}
|
||||
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
|
||||
}
|
||||
|
|
@ -3,7 +3,9 @@ export type {
|
|||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
export type { KtxDialect, SupportedDriver } from './dialects.js';
|
||||
export { createDefaultLocalQueryExecutor, type DefaultLocalQueryExecutorOptions } from './local-query-executor.js';
|
||||
export { getDialectForDriver } from './dialects.js';
|
||||
export { normalizeQueryRows } from './query-executor.js';
|
||||
export { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
export { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js';
|
||||
|
|
|
|||
|
|
@ -379,5 +379,37 @@ describe('GitService', () => {
|
|||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('reports untracked files that would be overwritten by the squash merge', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-untracked`);
|
||||
await service.addWorktree(wtDir, 'session/untracked', baseSha);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'knowledge.md'), 'session version\n', 'utf-8');
|
||||
await scoped.commitFile('knowledge.md', 'session write', 'System User', 'system@example.com');
|
||||
await writeFile(join(tempDir, 'knowledge.md'), 'untracked local version\n', 'utf-8');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/untracked',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 1 file [chat=untracked]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.conflict).toBe(true);
|
||||
expect(result.conflictPaths).toEqual(['knowledge.md']);
|
||||
|
||||
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
|
||||
expect(status.not_added).toContain('knowledge.md');
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -31,6 +31,40 @@ export type SquashMergeResult =
|
|||
| { ok: true; squashSha: string; touchedPaths: string[] }
|
||||
| { ok: false; conflict: true; conflictPaths: string[] };
|
||||
|
||||
function mergeErrorMessage(error: unknown): string {
|
||||
if (error instanceof Error) {
|
||||
return error.message;
|
||||
}
|
||||
return String(error);
|
||||
}
|
||||
|
||||
function extractUntrackedOverwritePaths(message: string): string[] {
|
||||
const marker = 'The following untracked working tree files would be overwritten by merge:';
|
||||
const markerIndex = message.indexOf(marker);
|
||||
if (markerIndex === -1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const afterMarker = message.slice(markerIndex + marker.length);
|
||||
const abortIndex = afterMarker.indexOf('Please move or remove them before you merge.');
|
||||
const pathBlock = abortIndex === -1 ? afterMarker : afterMarker.slice(0, abortIndex);
|
||||
return pathBlock
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0 && line !== 'Aborting')
|
||||
.map((line) => line.replace(/^"(.+)"$/, '$1'));
|
||||
}
|
||||
|
||||
function mergeConflictPaths(unmergedPaths: string[], mergeError: unknown): string[] {
|
||||
const paths = new Set(unmergedPaths);
|
||||
if (mergeError !== null) {
|
||||
for (const path of extractUntrackedOverwritePaths(mergeErrorMessage(mergeError))) {
|
||||
paths.add(path);
|
||||
}
|
||||
}
|
||||
return [...paths];
|
||||
}
|
||||
|
||||
export class GitService {
|
||||
private static readonly mutationQueues = new Map<string, Promise<void>>();
|
||||
|
||||
|
|
@ -639,10 +673,11 @@ export class GitService {
|
|||
}
|
||||
|
||||
const unmergedOut = await this.git.raw(['diff', '--name-only', '--diff-filter=U']).catch(() => '');
|
||||
const conflictPaths = unmergedOut
|
||||
const unmergedPaths = unmergedOut
|
||||
.split('\n')
|
||||
.map((l) => l.trim())
|
||||
.filter(Boolean);
|
||||
const conflictPaths = mergeConflictPaths(unmergedPaths, mergeError);
|
||||
|
||||
if (conflictPaths.length > 0 || mergeError !== null) {
|
||||
// `merge --abort` only works for an in-progress merge; squash sets MERGE_MSG but not
|
||||
|
|
@ -651,7 +686,7 @@ export class GitService {
|
|||
await this.git.raw(['reset', '--hard', 'HEAD']).catch(() => undefined);
|
||||
this.logger.warn(
|
||||
`squashMergeIntoMain: conflict merging ${branch} — aborted. conflictPaths=${conflictPaths.join(',')}` +
|
||||
(mergeError ? ` error=${mergeError instanceof Error ? mergeError.message : String(mergeError)}` : ''),
|
||||
(mergeError ? ` error=${mergeErrorMessage(mergeError)}` : ''),
|
||||
);
|
||||
return { ok: false, conflict: true, conflictPaths };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -277,7 +277,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
|
|||
|
||||
await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves
|
||||
.toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.');
|
||||
await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql/paid-order-lifecycle.md'), 'utf-8'))
|
||||
await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql-paid-order-lifecycle.md'), 'utf-8'))
|
||||
.resolves.toContain('Paid Order Lifecycle');
|
||||
|
||||
const reloaded = await loadKtxProject({ projectDir: project.projectDir });
|
||||
|
|
@ -295,7 +295,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
|
|||
searchLocalKnowledgePages(reloaded, { query: 'paid order lifecycle', userId: 'local', limit: 5 }),
|
||||
).resolves.toEqual([
|
||||
expect.objectContaining({
|
||||
key: 'historic-sql/paid-order-lifecycle',
|
||||
key: 'historic-sql-paid-order-lifecycle',
|
||||
summary: 'Paid Order Lifecycle',
|
||||
matchReasons: expect.arrayContaining(['lexical']),
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ async function commitProjectionChanges(workdir: string): Promise<void> {
|
|||
const status = await git.status();
|
||||
const paths = status.files
|
||||
.map((file) => file.path)
|
||||
.filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql/'));
|
||||
.filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql'));
|
||||
if (paths.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/old-order-lifecycle.md',
|
||||
'knowledge/global/historic-sql-old-order-lifecycle.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -127,7 +127,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
);
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/retired-pattern.md',
|
||||
'knowledge/global/historic-sql-retired-pattern.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -164,15 +164,15 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
|
||||
|
||||
expect(result.patternPagesWritten).toBe(1);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
|
||||
'Order Lifecycle Analysis',
|
||||
);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
'stale_since: "2026-05-11T00:00:00.000Z"',
|
||||
);
|
||||
});
|
||||
|
||||
it('writes a reappearing pattern to the active slug instead of reusing an archived page key', async () => {
|
||||
it('rewrites a reappearing archived pattern at the flat slug', async () => {
|
||||
const workdir = await tempWorkdir();
|
||||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
|
||||
source: 'historic-sql',
|
||||
|
|
@ -192,7 +192,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md',
|
||||
'knowledge/global/historic-sql-order-lifecycle-analysis.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -230,15 +230,10 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
|
||||
|
||||
expect(result.patternPagesWritten).toBe(1);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain(
|
||||
'Order Lifecycle Analysis',
|
||||
);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain(
|
||||
'Archived body',
|
||||
);
|
||||
await expect(
|
||||
readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/order-lifecycle-analysis.md'), 'utf-8'),
|
||||
).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
const page = await readFile(join(workdir, 'knowledge/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8');
|
||||
expect(page).toContain('Analysts compare order status with customer segment again.');
|
||||
expect(page).not.toContain('Archived body');
|
||||
expect(page).not.toContain('archived');
|
||||
});
|
||||
|
||||
it('leaves already archived pattern pages stable when they are still absent', async () => {
|
||||
|
|
@ -259,7 +254,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
});
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/_archived/retired-pattern.md',
|
||||
'knowledge/global/historic-sql-retired-pattern.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -284,12 +279,9 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
|
||||
expect(result.archivedPatternPages).toBe(0);
|
||||
expect(result.stalePatternPagesMarked).toBe(0);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
'Archived retired body',
|
||||
);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/retired-pattern.md'), 'utf-8')).rejects.toMatchObject({
|
||||
code: 'ENOENT',
|
||||
});
|
||||
});
|
||||
|
||||
it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => {
|
||||
|
|
@ -330,7 +322,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
});
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/legacy-template.md',
|
||||
'knowledge/global/historic-sql-legacy-template.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -365,7 +357,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
commonJoins: [],
|
||||
staleSince: '2026-05-11T00:00:00.000Z',
|
||||
});
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/legacy-template.md'), 'utf-8')).rejects.toMatchObject({
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-legacy-template.md'), 'utf-8')).rejects.toMatchObject({
|
||||
code: 'ENOENT',
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ interface HistoricSqlPatternPage {
|
|||
}
|
||||
|
||||
function safeKnowledgeSlug(value: string): string {
|
||||
return value.toLowerCase().replace(/[^a-z0-9/-]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
return value.toLowerCase().replace(/[^a-z0-9_-]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
}
|
||||
|
||||
async function pathExists(path: string): Promise<boolean> {
|
||||
|
|
@ -159,7 +159,7 @@ function isLegacyQueryPage(page: HistoricSqlPatternPage): boolean {
|
|||
|
||||
function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean {
|
||||
const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : [];
|
||||
return page.key.startsWith('_archived/') || tags.includes('archived');
|
||||
return tags.includes('archived');
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
|
|
@ -191,6 +191,9 @@ async function loadPatternPages(root: string): Promise<HistoricSqlPatternPage[]>
|
|||
const files = await walkFiles(root);
|
||||
const pages: HistoricSqlPatternPage[] = [];
|
||||
for (const file of files.filter((candidate) => candidate.endsWith('.md'))) {
|
||||
if (file.includes('/')) {
|
||||
continue;
|
||||
}
|
||||
const key = file.replace(/\.md$/, '');
|
||||
const path = join(root, file);
|
||||
const page = parseMarkdownPage(key, path, await readFile(path, 'utf-8'));
|
||||
|
|
@ -201,6 +204,10 @@ async function loadPatternPages(root: string): Promise<HistoricSqlPatternPage[]>
|
|||
return pages;
|
||||
}
|
||||
|
||||
function historicSqlFlatKey(slug: string): string {
|
||||
return `historic-sql-${safeKnowledgeSlug(slug)}`;
|
||||
}
|
||||
|
||||
async function currentStagedTables(rawDir: string): Promise<Set<string>> {
|
||||
const tablesRoot = join(rawDir, 'tables');
|
||||
const files = await walkFiles(tablesRoot);
|
||||
|
|
@ -276,7 +283,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
}
|
||||
}
|
||||
|
||||
const wikiRoot = join(input.workdir, 'knowledge/global/historic-sql');
|
||||
const wikiRoot = join(input.workdir, 'knowledge/global');
|
||||
await mkdir(wikiRoot, { recursive: true });
|
||||
const allPages = await loadPatternPages(wikiRoot);
|
||||
const activePages = allPages.filter((page) => !isArchivedPatternPage(page));
|
||||
|
|
@ -286,7 +293,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
for (const pattern of patternEvidence) {
|
||||
const incomingSignals = [...pattern.pattern.tablesInvolved, ...pattern.pattern.constituentTemplateIds];
|
||||
const reusable = patternPages.find((page) => overlapRatio(incomingSignals, existingPageSignals(page)) >= 0.6);
|
||||
const key = reusable?.key ?? safeKnowledgeSlug(pattern.pattern.slug);
|
||||
const key = reusable?.key ?? historicSqlFlatKey(pattern.pattern.slug);
|
||||
const pagePath = join(wikiRoot, `${key}.md`);
|
||||
const frontmatter = {
|
||||
summary: pattern.pattern.title,
|
||||
|
|
@ -308,11 +315,12 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
for (const page of patternPages) {
|
||||
if (writtenKeys.has(page.key)) continue;
|
||||
if (shouldArchive(page.frontmatter.stale_since, manifest.fetchedAt, manifest.staleArchiveAfterDays)) {
|
||||
const archivePath = join(wikiRoot, '_archived', `${page.key}.md`);
|
||||
const tags = [...new Set([...stringArray(page.frontmatter.tags), 'archived'])];
|
||||
await mkdir(dirname(archivePath), { recursive: true });
|
||||
await writeFile(archivePath, renderMarkdownPage({ ...page.frontmatter, tags }, page.content), 'utf-8');
|
||||
await rm(page.path, { force: true });
|
||||
await writeFile(
|
||||
page.path,
|
||||
renderMarkdownPage({ ...page.frontmatter, tags, archived_since: manifest.fetchedAt }, page.content),
|
||||
'utf-8',
|
||||
);
|
||||
result.archivedPatternPages += 1;
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,18 @@ describe('LookmlSourceAdapter validation sidecars', () => {
|
|||
|
||||
afterEach(async () => rm(tmpRoot, { recursive: true, force: true }));
|
||||
|
||||
it('returns configured target warehouse connection ids', async () => {
|
||||
const adapter = new LookmlSourceAdapter({
|
||||
homeDir: join(tmpRoot, 'home'),
|
||||
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
|
||||
});
|
||||
|
||||
await expect(adapter.listTargetConnectionIds?.(join(tmpRoot, 'staged'))).resolves.toEqual([
|
||||
'analytics',
|
||||
'warehouse',
|
||||
]);
|
||||
});
|
||||
|
||||
it('writes a partial fetch report and marks mismatched chunks as SL-disallowed', async () => {
|
||||
const originRoot = join(tmpRoot, 'origin-src');
|
||||
await mkdir(join(originRoot, 'views'), { recursive: true });
|
||||
|
|
|
|||
|
|
@ -14,6 +14,11 @@ import { parseLookmlPullConfig } from './pull-config.js';
|
|||
|
||||
export interface LookmlSourceAdapterDeps {
|
||||
homeDir: string;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
function uniqueSorted(values: readonly string[] | undefined): string[] {
|
||||
return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
export class LookmlSourceAdapter implements SourceAdapter {
|
||||
|
|
@ -43,6 +48,10 @@ export class LookmlSourceAdapter implements SourceAdapter {
|
|||
return readLookmlFetchReport(stagedDir);
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return uniqueSorted(this.deps.targetConnectionIds);
|
||||
}
|
||||
|
||||
async chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const project = await parseLookmlStagedDir(stagedDir);
|
||||
const mismatchedModelNames = await readLookmlMismatchedModelNames(stagedDir);
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue