Merge origin/main into npx-ktx-python-daemon

This commit is contained in:
Andrey Avtomonov 2026-05-11 15:07:35 +02:00
commit 88a65bbdc7
148 changed files with 14743 additions and 3508 deletions

View file

@ -39,6 +39,12 @@ jobs:
- name: Run TypeScript checks
run: pnpm run check
- name: Run slow TypeScript tests
run: pnpm run test:slow
- name: Run CLI smoke tests
run: pnpm run smoke
- name: Setup Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:

3
.gitignore vendored
View file

@ -30,6 +30,8 @@ htmlcov/
# Node
node_modules/
.next/
.source/
.npm/
.pnpm-store/
*.tsbuildinfo
@ -50,6 +52,7 @@ yarn-error.log*
# Private local agent overlays
.agents/
.claude/
.superpowers/
# Editors and OS files
.idea/

View file

@ -12,7 +12,7 @@ artifacts. You can inspect them, commit them, and serve them to any MCP client.
- Durable warehouse memory with semantic-layer sources and knowledge pages.
- Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server,
BigQuery, Snowflake, and PostHog.
BigQuery, and Snowflake.
- Agentic ingest with provenance links, tool transcripts, and replay metadata.
- Local semantic-layer query planning and optional query execution.
- A stdio MCP server with tools for connections, knowledge, semantic-layer
@ -258,7 +258,6 @@ The MCP server exposes `connection_list`, `knowledge_search`,
- `packages/connector-clickhouse`: ClickHouse scan connector.
- `packages/connector-mysql`: MySQL scan connector.
- `packages/connector-postgres`: Postgres scan connector.
- `packages/connector-posthog`: PostHog scan connector.
- `packages/connector-snowflake`: Snowflake scan connector.
- `packages/connector-sqlite`: SQLite scan connector.
- `packages/connector-sqlserver`: SQL Server scan connector.

7
conductor.json Normal file
View file

@ -0,0 +1,7 @@
{
"scripts": {
"setup": "bash scripts/conductor-setup.sh",
"run": "bash scripts/conductor-run.sh"
},
"runScriptMode": "nonconcurrent"
}

View file

@ -0,0 +1,7 @@
import { HomeLayout } from "fumadocs-ui/layouts/home";
import type { ReactNode } from "react";
import { baseOptions } from "@/app/layout.config";
export default function Layout({ children }: { children: ReactNode }) {
return <HomeLayout {...baseOptions}>{children}</HomeLayout>;
}

5
docs/app/(home)/page.tsx Normal file
View file

@ -0,0 +1,5 @@
import { redirect } from "next/navigation";
export default function HomePage() {
redirect("/docs/getting-started/introduction");
}

View file

@ -0,0 +1,47 @@
import { source } from "@/lib/source";
import {
DocsPage,
DocsBody,
DocsTitle,
DocsDescription,
} from "fumadocs-ui/page";
import { notFound } from "next/navigation";
import defaultMdxComponents from "fumadocs-ui/mdx";
import { CodeBlock } from "@/components/code-block";
export default async function Page(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
const MDX = page.data.body;
return (
<DocsPage toc={page.data.toc}>
<DocsTitle>{page.data.title}</DocsTitle>
<DocsDescription>{page.data.description}</DocsDescription>
<DocsBody>
<MDX components={{ ...defaultMdxComponents, pre: CodeBlock }} />
</DocsBody>
</DocsPage>
);
}
export function generateStaticParams() {
return source.generateParams();
}
export async function generateMetadata(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
return {
title: page.data.title,
description: page.data.description,
};
}

12
docs/app/docs/layout.tsx Normal file
View file

@ -0,0 +1,12 @@
import { source } from "@/lib/source";
import { DocsLayout } from "fumadocs-ui/layouts/docs";
import type { ReactNode } from "react";
import { baseOptions } from "@/app/layout.config";
export default function Layout({ children }: { children: ReactNode }) {
return (
<DocsLayout tree={source.pageTree} {...baseOptions}>
{children}
</DocsLayout>
);
}

977
docs/app/global.css Normal file
View file

@ -0,0 +1,977 @@
@import "tailwindcss";
@import "fumadocs-ui/css/neutral.css";
@import "fumadocs-ui/css/preset.css";
@theme inline {
--font-sans: var(--font-inter);
--font-display: var(--font-outfit);
--font-mono: var(--font-geist-mono);
}
/*
KTX Light Theme Warm Cream & Taupe
*/
:root {
--color-fd-background: #faf9f6;
--color-fd-foreground: #1b1b18;
--color-fd-muted: #f3f1ec;
--color-fd-muted-foreground: #6b6560;
--color-fd-popover: #ffffff;
--color-fd-popover-foreground: #1b1b18;
--color-fd-card: #ffffff;
--color-fd-card-foreground: #1b1b18;
--color-fd-border: #e2dfd9;
--color-fd-primary: #0e7490;
--color-fd-primary-foreground: #ffffff;
--color-fd-secondary: #f3f1ec;
--color-fd-secondary-foreground: #44403c;
--color-fd-accent: rgba(14, 116, 144, 0.06);
--color-fd-accent-foreground: #0e7490;
--color-fd-ring: #0e7490;
/* Extended brand tokens */
--ktx-cream: #faf9f6;
--ktx-cream-deep: #f3f1ec;
--ktx-ink: #1b1b18;
--ktx-ink-soft: #57534e;
--ktx-ink-muted: #8c857f;
--ktx-teal: #0e7490;
--ktx-teal-soft: #cffafe;
--ktx-coral: #c2897a;
--ktx-ease: cubic-bezier(0.16, 1, 0.3, 1);
}
/*
KTX Dark Theme Deep Ocean Slate
*/
.dark {
--color-fd-background: #0f1719;
--color-fd-foreground: #e8e4df;
--color-fd-muted: #1a2429;
--color-fd-muted-foreground: #8a9da6;
--color-fd-popover: #182228;
--color-fd-popover-foreground: #e8e4df;
--color-fd-card: #16202570;
--color-fd-card-foreground: #e8e4df;
--color-fd-border: rgba(255, 255, 255, 0.07);
--color-fd-primary: #22d3ee;
--color-fd-primary-foreground: #0c1518;
--color-fd-secondary: #1c2a31;
--color-fd-secondary-foreground: #c8c3bc;
--color-fd-accent: rgba(34, 211, 238, 0.08);
--color-fd-accent-foreground: #22d3ee;
--color-fd-ring: #22d3ee;
}
.dark #nd-sidebar {
--color-fd-muted: #14202559;
--color-fd-secondary: #1a262c;
--color-fd-muted-foreground: #7a8d96;
}
html, body {
overflow-x: clip;
}
body {
-webkit-font-smoothing: antialiased;
text-rendering: optimizeLegibility;
}
/*
Typography Outfit display, Inter body
*/
h1, h2, h3, h4 {
font-family: var(--font-display), var(--font-sans), sans-serif;
letter-spacing: -0.02em;
}
h1 {
font-weight: 700;
letter-spacing: -0.03em;
}
h2 {
font-weight: 650;
}
/*
Prose & Content Refinements
*/
/* Inline code */
:not(pre) > code {
background: var(--color-fd-muted) !important;
border: 1px solid var(--color-fd-border) !important;
border-radius: 5px !important;
padding: 0.15em 0.4em !important;
font-size: 0.875em !important;
font-weight: 450 !important;
}
.dark :not(pre) > code {
background: rgba(255, 255, 255, 0.05) !important;
border-color: rgba(255, 255, 255, 0.08) !important;
}
/* Code blocks — give them a subtle traffic-light feel */
figure[data-rehype-pretty-code-figure],
figure:has(> pre) {
position: relative;
border-radius: 12px;
overflow: hidden;
border: 1px solid var(--color-fd-border);
background: var(--color-fd-card);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.03),
0 8px 24px -12px rgba(27, 27, 24, 0.06);
transition: box-shadow 0.3s var(--ktx-ease), border-color 0.3s ease;
}
figure[data-rehype-pretty-code-figure]:hover,
figure:has(> pre):hover {
border-color: color-mix(in oklch, var(--color-fd-primary) 30%, var(--color-fd-border));
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.04),
0 14px 32px -12px rgba(14, 116, 144, 0.12);
}
.dark figure[data-rehype-pretty-code-figure],
.dark figure:has(> pre) {
background: #0c1417;
border-color: rgba(255, 255, 255, 0.06);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.2),
0 12px 32px -12px rgba(0, 0, 0, 0.4);
}
.dark figure[data-rehype-pretty-code-figure]:hover,
.dark figure:has(> pre):hover {
border-color: rgba(34, 211, 238, 0.2);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.2),
0 14px 32px -12px rgba(34, 211, 238, 0.12);
}
pre {
border: 0 !important;
border-radius: 0 !important;
background: transparent !important;
font-size: 13.5px !important;
line-height: 1.7 !important;
}
.dark pre {
background: transparent !important;
}
/*
Code blocks context-aware modes
*/
/* Shared wrapper base */
.ktx-code {
border-radius: 14px;
overflow: hidden;
margin: 1.25rem 0;
font-family: var(--font-mono), ui-monospace, SFMono-Regular, monospace;
transition: box-shadow 0.3s var(--ktx-ease), border-color 0.3s ease;
}
.ktx-code-body {
margin: 0 !important;
padding: 14px 18px !important;
font-size: 13.5px !important;
line-height: 1.7 !important;
overflow-x: auto;
border: 0 !important;
border-radius: 0 !important;
}
/* Neutralize the outer figure styling that our wrapper now owns */
figure:has(> .ktx-code),
figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
border: 0 !important;
background: transparent !important;
box-shadow: none !important;
border-radius: 0 !important;
margin: 0;
}
/* ── Mode A: Terminal ─────────────────────── */
.ktx-code-terminal {
background: #0c1417;
border: 1px solid rgba(255, 255, 255, 0.08);
color: #c8c3bc;
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 12px 32px -16px rgba(0, 0, 0, 0.3);
}
.ktx-code-terminal:hover {
border-color: rgba(34, 211, 238, 0.2);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 14px 32px -12px rgba(34, 211, 238, 0.18);
}
.ktx-code-terminal-head {
display: flex;
align-items: center;
gap: 6px;
padding: 10px 12px;
border-bottom: 1px solid rgba(255, 255, 255, 0.06);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.03), transparent);
}
.ktx-tl-dot {
width: 11px;
height: 11px;
border-radius: 999px;
flex-shrink: 0;
}
.ktx-code-terminal-label {
margin-left: 8px;
font-size: 11px;
font-weight: 500;
letter-spacing: 0.02em;
color: rgba(255, 255, 255, 0.4);
}
.ktx-code-body-terminal {
background: transparent !important;
color: #c8c3bc !important;
}
/* ── Mode B: VS Code tab (filename) ───────── */
.ktx-code-tab {
background: var(--color-fd-card);
border: 1px solid var(--color-fd-border);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.03),
0 8px 24px -12px rgba(27, 27, 24, 0.06);
}
.dark .ktx-code-tab {
background: #0c1417;
border-color: rgba(255, 255, 255, 0.06);
}
.ktx-code-tab:hover {
border-color: rgba(14, 116, 144, 0.4);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.04),
0 14px 32px -12px rgba(14, 116, 144, 0.14);
}
.dark .ktx-code-tab:hover {
border-color: rgba(34, 211, 238, 0.25);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.2),
0 14px 32px -12px rgba(34, 211, 238, 0.14);
}
.ktx-code-tab-head {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 10px 8px 14px;
border-bottom: 1px solid var(--color-fd-border);
background: linear-gradient(180deg, var(--color-fd-muted), transparent);
}
.dark .ktx-code-tab-head {
border-bottom-color: rgba(255, 255, 255, 0.05);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.02), transparent);
}
.ktx-file-glyph {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 999px;
background: var(--color-fd-muted-foreground);
flex-shrink: 0;
}
.ktx-file-glyph[data-lang="yaml"],
.ktx-file-glyph[data-lang="yml"] { background: #fbbf24; }
.ktx-file-glyph[data-lang="ts"],
.ktx-file-glyph[data-lang="tsx"],
.ktx-file-glyph[data-lang="typescript"] { background: #3b82f6; }
.ktx-file-glyph[data-lang="js"],
.ktx-file-glyph[data-lang="jsx"],
.ktx-file-glyph[data-lang="javascript"] { background: #facc15; }
.ktx-file-glyph[data-lang="json"] { background: #84cc16; }
.ktx-file-glyph[data-lang="md"],
.ktx-file-glyph[data-lang="mdx"] { background: #a3a3a3; }
.ktx-file-glyph[data-lang="sql"] { background: #f97316; }
.ktx-file-glyph[data-lang="py"],
.ktx-file-glyph[data-lang="python"] { background: #22d3ee; }
.ktx-code-tab-filename {
font-family: var(--font-mono), ui-monospace, monospace;
font-size: 12.5px;
color: var(--color-fd-foreground);
}
.ktx-lang-pill {
margin-left: 4px;
padding: 1px 6px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.04em;
color: var(--color-fd-muted-foreground);
border: 1px solid var(--color-fd-border);
border-radius: 4px;
background: var(--color-fd-card);
font-family: var(--font-display), var(--font-sans), sans-serif;
}
.ktx-code-body-tab {
background: transparent !important;
}
/* ── Mode C: Minimal default ──────────────── */
.ktx-code-minimal {
background: var(--color-fd-card);
border: 1px solid var(--color-fd-border);
position: relative;
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.03),
0 8px 24px -12px rgba(27, 27, 24, 0.06);
}
.dark .ktx-code-minimal {
background: #0c1417;
border-color: rgba(255, 255, 255, 0.06);
}
.ktx-code-minimal:hover {
border-color: rgba(14, 116, 144, 0.3);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.04),
0 14px 32px -12px rgba(14, 116, 144, 0.12);
}
.dark .ktx-code-minimal:hover {
border-color: rgba(34, 211, 238, 0.2);
}
.ktx-code-minimal-lang {
position: absolute;
top: 8px;
left: 14px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--color-fd-muted-foreground);
font-family: var(--font-display), var(--font-sans), sans-serif;
opacity: 0;
transition: opacity 0.2s var(--ktx-ease);
pointer-events: none;
z-index: 1;
}
.ktx-code-minimal:hover .ktx-code-minimal-lang {
opacity: 0.5;
}
.ktx-code-minimal-copy {
position: absolute !important;
top: 6px !important;
right: 6px !important;
opacity: 0;
transform: translateY(-4px);
transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease);
z-index: 2;
}
.ktx-code-minimal:hover .ktx-code-minimal-copy {
opacity: 0.7;
transform: translateY(0);
}
.ktx-code-minimal-copy:hover {
opacity: 1 !important;
}
.ktx-code-body-minimal {
background: transparent !important;
}
/* Tables */
table {
border-radius: 8px;
overflow: hidden;
}
th {
font-family: var(--font-display), var(--font-sans), sans-serif !important;
font-weight: 600 !important;
font-size: 0.78rem !important;
letter-spacing: 0.02em;
text-transform: uppercase;
color: var(--color-fd-muted-foreground) !important;
}
/*
Sidebar Typographic sections + active rail
*/
#nd-sidebar {
border-right: 1px solid var(--color-fd-border);
}
.dark #nd-sidebar {
border-right-color: rgba(255, 255, 255, 0.05);
background: rgba(15, 23, 25, 0.6);
backdrop-filter: blur(10px);
}
/* Section folder trigger uppercase tracked label
Fumadocs 15 section wrappers are bare <div data-state> (no class, no id);
content panels and other Radix collapsibles always carry a class attribute,
so :not([class]) tightly scopes these rules to section triggers only. */
#nd-sidebar div[data-state]:not([class]) > button[data-state] {
font-family: var(--font-display), var(--font-sans), sans-serif !important;
font-size: 11px !important;
font-weight: 600 !important;
letter-spacing: 0.08em !important;
text-transform: uppercase !important;
color: var(--color-fd-muted-foreground) !important;
padding: 14px 12px 8px !important;
margin-top: 8px !important;
border-top: 1px solid var(--color-fd-border);
width: 100%;
display: flex;
align-items: center;
justify-content: space-between;
text-align: left;
background: transparent;
cursor: pointer;
transition: color 0.15s ease;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state]:hover {
color: var(--color-fd-foreground) !important;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state]:focus-visible {
outline: 2px solid var(--color-fd-primary);
outline-offset: 2px;
border-radius: 4px;
}
/* Remove top border from the first section in the sidebar */
#nd-sidebar div[data-state]:not([class]):first-child > button[data-state] {
border-top: none;
margin-top: 0 !important;
padding-top: 4px !important;
}
/* Chevron rotation on toggle */
#nd-sidebar div[data-state]:not([class]) > button[data-state] svg {
transition: transform 0.2s cubic-bezier(0.16, 1, 0.3, 1);
opacity: 0.7;
}
/* Page link items */
#nd-sidebar a[data-active] {
font-size: 14px;
padding: 6px 12px;
border-radius: 6px;
margin-left: 0;
border-left: 2px solid transparent;
transition: background 0.15s ease, color 0.15s ease, border-color 0.15s ease;
}
#nd-sidebar a[data-active="false"]:hover {
background: var(--color-fd-accent);
color: var(--color-fd-foreground);
}
#nd-sidebar a[data-active="true"] {
background: color-mix(in oklch, var(--color-fd-primary) 8%, transparent) !important;
border-left-color: var(--color-fd-primary) !important;
color: var(--color-fd-primary) !important;
font-weight: 500;
}
#nd-sidebar a[data-active]:focus-visible {
outline: 2px solid var(--color-fd-primary);
outline-offset: 2px;
border-radius: 6px;
}
.dark #nd-sidebar a[data-active="true"] {
background: color-mix(in oklch, var(--color-fd-primary) 12%, transparent) !important;
}
/*
Cards refined with multi-layer shadow & lift
*/
[data-card="true"] {
border-radius: 12px !important;
border: 1px solid var(--color-fd-border) !important;
background: var(--color-fd-card) !important;
position: relative;
overflow: hidden;
transition:
transform 0.4s var(--ktx-ease),
box-shadow 0.4s var(--ktx-ease),
border-color 0.3s ease !important;
box-shadow: 0 1px 2px rgba(27, 27, 24, 0.02);
}
[data-card="true"]::before {
content: "";
position: absolute;
inset: 0;
border-radius: inherit;
padding: 1px;
background: linear-gradient(
135deg,
rgba(14, 116, 144, 0) 0%,
rgba(14, 116, 144, 0) 70%,
rgba(14, 116, 144, 0.3) 100%
);
mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0);
-webkit-mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0);
mask-composite: exclude;
-webkit-mask-composite: xor;
opacity: 0;
transition: opacity 0.4s var(--ktx-ease);
pointer-events: none;
}
[data-card="true"]:hover {
border-color: color-mix(in oklch, var(--color-fd-primary) 40%, var(--color-fd-border)) !important;
transform: translateY(-2px);
box-shadow:
0 18px 36px -16px rgba(14, 116, 144, 0.18),
0 2px 6px rgba(27, 27, 24, 0.04) !important;
}
[data-card="true"]:hover::before {
opacity: 1;
}
.dark [data-card="true"]:hover {
border-color: rgba(34, 211, 238, 0.3) !important;
box-shadow:
0 18px 36px -16px rgba(34, 211, 238, 0.18),
0 2px 6px rgba(0, 0, 0, 0.3) !important;
}
/*
Callouts / Admonitions
*/
[data-callout] {
border-radius: 12px !important;
border-left-width: 3px !important;
}
/*
Nav & Header
*/
#nd-nav {
backdrop-filter: blur(14px) saturate(1.5);
-webkit-backdrop-filter: blur(14px) saturate(1.5);
}
:root #nd-nav {
background: rgba(250, 249, 246, 0.78) !important;
border-bottom: 1px solid var(--color-fd-border);
}
.dark #nd-nav {
background: rgba(15, 23, 25, 0.7) !important;
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
}
/*
Page title area give docs pages a hero feel
*/
[data-page-header] h1,
article > h1:first-of-type {
font-size: 2.25rem !important;
font-weight: 750 !important;
letter-spacing: -0.035em !important;
line-height: 1.1 !important;
background: linear-gradient(
180deg,
var(--color-fd-foreground) 0%,
color-mix(in oklch, var(--color-fd-foreground) 85%, var(--color-fd-primary)) 100%
);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
-webkit-text-fill-color: transparent;
}
[data-page-header] p,
article > h1:first-of-type + p {
font-size: 1.075rem !important;
color: var(--color-fd-muted-foreground) !important;
line-height: 1.6 !important;
max-width: 640px;
}
/*
Links
*/
article a:not([data-card]) {
text-decoration-thickness: 1px !important;
text-underline-offset: 3px !important;
transition: color 0.15s ease, text-decoration-color 0.15s ease;
}
article a:not([data-card]):hover {
text-decoration-color: var(--color-fd-primary) !important;
}
/*
Background atmosphere gradient blobs (subtle)
*/
body::before {
content: "";
position: fixed;
inset: 0;
pointer-events: none;
z-index: 0;
background:
radial-gradient(
ellipse 60% 40% at 10% 0%,
rgba(14, 116, 144, 0.05) 0%,
transparent 60%
),
radial-gradient(
ellipse 70% 50% at 100% 100%,
rgba(194, 137, 122, 0.04) 0%,
transparent 65%
);
}
.dark body::before {
background:
radial-gradient(
ellipse 60% 40% at 10% 0%,
rgba(34, 211, 238, 0.06) 0%,
transparent 60%
),
radial-gradient(
ellipse 70% 50% at 100% 100%,
rgba(124, 58, 237, 0.04) 0%,
transparent 65%
);
}
/* Noise texture overlay (above atmosphere, below content) */
body::after {
content: "";
position: fixed;
inset: 0;
pointer-events: none;
z-index: 1;
opacity: 0.02;
background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E");
background-repeat: repeat;
background-size: 220px 220px;
mix-blend-mode: multiply;
}
.dark body::after {
opacity: 0.035;
mix-blend-mode: overlay;
}
/* Make sure content stays above background */
body > * {
position: relative;
z-index: 2;
}
/*
TOC refinement
*/
[data-toc] a {
font-size: 0.8rem !important;
transition: color 0.15s ease !important;
}
[data-toc] a[data-active="true"] {
color: var(--color-fd-primary) !important;
font-weight: 500 !important;
}
/*
Scrollbar (dark mode)
*/
.dark ::-webkit-scrollbar {
width: 6px;
height: 6px;
}
.dark ::-webkit-scrollbar-track {
background: transparent;
}
.dark ::-webkit-scrollbar-thumb {
background: rgba(255, 255, 255, 0.12);
border-radius: 3px;
}
.dark ::-webkit-scrollbar-thumb:hover {
background: rgba(255, 255, 255, 0.2);
}
/*
Selection color
*/
::selection {
background: rgba(14, 116, 144, 0.18);
color: inherit;
}
.dark ::selection {
background: rgba(34, 211, 238, 0.22);
}
/*
Landing page utilities
*/
/* Hero gradient text */
.gradient-text {
background: linear-gradient(
135deg,
var(--color-fd-foreground) 0%,
var(--color-fd-primary) 100%
);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
-webkit-text-fill-color: transparent;
}
/* Pill badge */
.pill-badge {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.375rem 0.875rem;
border-radius: 999px;
font-size: 0.75rem;
font-weight: 500;
letter-spacing: 0.01em;
background: var(--color-fd-muted);
border: 1px solid var(--color-fd-border);
color: var(--color-fd-muted-foreground);
backdrop-filter: blur(8px);
}
.pill-badge .pill-dot {
width: 6px;
height: 6px;
border-radius: 999px;
background: var(--color-fd-primary);
box-shadow: 0 0 8px var(--color-fd-primary);
animation: pill-pulse 2.4s ease-in-out infinite;
}
@keyframes pill-pulse {
0%, 100% { opacity: 1; transform: scale(1); }
50% { opacity: 0.65; transform: scale(0.9); }
}
/* Dot grid */
.dot-grid {
background-image: radial-gradient(
circle,
color-mix(in oklch, var(--color-fd-foreground) 8%, transparent) 1px,
transparent 1px
);
background-size: 24px 24px;
}
.dot-grid-fade {
-webkit-mask-image: radial-gradient(ellipse 60% 60% at center, black, transparent);
mask-image: radial-gradient(ellipse 60% 60% at center, black, transparent);
}
/* Card lift (use on custom landing cards) */
.card-lift {
transition:
transform 0.4s var(--ktx-ease),
box-shadow 0.4s var(--ktx-ease),
border-color 0.3s ease;
}
.card-lift:hover {
transform: translateY(-3px);
box-shadow:
0 20px 40px -12px rgba(27, 49, 57, 0.1),
0 0 0 1px rgba(14, 116, 144, 0.08);
}
.dark .card-lift:hover {
box-shadow:
0 20px 40px -12px rgba(0, 0, 0, 0.5),
0 0 0 1px rgba(34, 211, 238, 0.15);
}
/* Reveal animations on scroll */
.rv {
opacity: 0;
transform: translateY(24px);
transition:
opacity 0.7s var(--ktx-ease),
transform 0.7s var(--ktx-ease);
}
.rv.visible {
opacity: 1;
transform: translateY(0);
}
.rv-stagger > .rv:nth-child(1) { transition-delay: 0ms; }
.rv-stagger > .rv:nth-child(2) { transition-delay: 80ms; }
.rv-stagger > .rv:nth-child(3) { transition-delay: 160ms; }
.rv-stagger > .rv:nth-child(4) { transition-delay: 240ms; }
.rv-stagger > .rv:nth-child(5) { transition-delay: 320ms; }
.rv-stagger > .rv:nth-child(6) { transition-delay: 400ms; }
.rv-stagger > .rv:nth-child(7) { transition-delay: 480ms; }
.rv-stagger > .rv:nth-child(8) { transition-delay: 560ms; }
/* Float animation */
@keyframes float {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-6px); }
}
.anim-float { animation: float 6s ease-in-out infinite; }
/* Sheen across surfaces */
.sheen {
position: relative;
overflow: hidden;
}
.sheen::after {
content: "";
position: absolute;
top: 0;
left: -100%;
width: 50%;
height: 100%;
background: linear-gradient(
110deg,
transparent 20%,
rgba(255, 255, 255, 0.12) 50%,
transparent 80%
);
mix-blend-mode: screen;
animation: sheen-slide 4s ease-in-out infinite;
animation-delay: 1s;
}
@keyframes sheen-slide {
0% { left: -100%; }
100% { left: 200%; }
}
/* Glow text — use sparingly on hero key phrase */
.glow-text {
position: relative;
color: var(--color-fd-primary);
}
.glow-text::after {
content: attr(data-text);
position: absolute;
inset: 0;
color: var(--color-fd-primary);
filter: blur(14px);
opacity: 0.35;
z-index: -1;
}
/* Terminal frame for landing page code preview */
.terminal-frame {
background: #0c1417;
border-radius: 14px;
border: 1px solid rgba(255, 255, 255, 0.08);
overflow: hidden;
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 20px 50px -20px rgba(14, 116, 144, 0.4),
0 50px 100px -40px rgba(0, 0, 0, 0.5);
font-family: var(--font-mono), ui-monospace, SFMono-Regular, monospace;
font-size: 13px;
line-height: 1.65;
}
.terminal-frame .terminal-head {
display: flex;
align-items: center;
gap: 6px;
padding: 10px 14px;
border-bottom: 1px solid rgba(255, 255, 255, 0.06);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.03), rgba(255, 255, 255, 0));
}
.terminal-frame .terminal-dot {
width: 11px;
height: 11px;
border-radius: 999px;
}
.terminal-frame .terminal-body {
padding: 16px 18px;
color: #c8c3bc;
}
.terminal-frame .term-prompt { color: #22d3ee; }
.terminal-frame .term-cmd { color: #e8e4df; }
.terminal-frame .term-comment { color: #6b7280; }
.terminal-frame .term-ok { color: #4ade80; }
.terminal-frame .term-info { color: #fbbf24; }
.terminal-frame .term-dim { color: #71717a; }
.terminal-frame .term-key { color: #c2897a; }
/* Cursor blink */
.term-cursor {
display: inline-block;
width: 8px;
height: 1em;
vertical-align: text-bottom;
background: #22d3ee;
animation: cursor-blink 1.1s steps(2) infinite;
}
@keyframes cursor-blink {
0%, 50% { opacity: 1; }
51%, 100% { opacity: 0; }
}
/*
Reduced motion
*/
@media (prefers-reduced-motion: reduce) {
.rv { transition: none; opacity: 1; transform: none; }
.anim-float { animation: none; }
.sheen::after { animation: none; }
.term-cursor { animation: none; }
.pill-badge .pill-dot { animation: none; }
.card-lift { transition: none; }
.ktx-code,
.ktx-code-minimal-copy,
.ktx-code-minimal-lang {
transition: none;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state] svg {
transition: none;
}
}

View file

@ -0,0 +1,10 @@
import type { BaseLayoutProps } from "fumadocs-ui/layouts/shared";
import { Logo } from "@/components/logo";
export const baseOptions: BaseLayoutProps = {
nav: {
title: <Logo />,
transparentMode: "top",
},
githubUrl: "https://github.com/kaelio/ktx",
};

44
docs/app/layout.tsx Normal file
View file

@ -0,0 +1,44 @@
import "./global.css";
import { RootProvider } from "fumadocs-ui/provider";
import { Outfit, Inter, Geist_Mono } from "next/font/google";
import type { ReactNode } from "react";
import type { Metadata } from "next";
const outfit = Outfit({
variable: "--font-outfit",
subsets: ["latin"],
weight: ["400", "500", "600", "700", "800"],
});
const inter = Inter({
variable: "--font-inter",
subsets: ["latin"],
});
const geistMono = Geist_Mono({
variable: "--font-geist-mono",
subsets: ["latin"],
});
export const metadata: Metadata = {
title: {
template: "%s | KTX Docs",
default: "KTX Docs",
},
description:
"Open-source context infrastructure that makes agentic analytics reliable.",
};
export default function RootLayout({ children }: { children: ReactNode }) {
return (
<html
lang="en"
className={`${outfit.variable} ${inter.variable} ${geistMono.variable}`}
suppressHydrationWarning
>
<body>
<RootProvider>{children}</RootProvider>
</body>
</html>
);
}

View file

@ -0,0 +1,110 @@
"use client";
import {
type ReactNode,
type ReactElement,
isValidElement,
} from "react";
import { CopyButton } from "./copy-button";
type Props = {
children?: ReactNode;
className?: string;
title?: string;
// rehype-pretty-code adds data attributes such as data-language; capture them via index signature
[key: string]: unknown;
};
const TERMINAL_LANGS = new Set(["bash", "sh", "shell", "zsh"]);
const WIZARD_GLYPHS = /^\s*[◆◇◯◐○●]/;
function extractText(node: ReactNode): string {
if (typeof node === "string") return node;
if (typeof node === "number") return String(node);
if (Array.isArray(node)) return node.map(extractText).join("");
if (isValidElement(node)) {
const props = (node as ReactElement<{ children?: ReactNode }>).props;
return extractText(props.children);
}
return "";
}
function detectLanguage(props: Props, children: ReactNode): string | null {
const dataLang = props["data-language"];
if (typeof dataLang === "string" && dataLang) return dataLang;
const className = typeof props.className === "string" ? props.className : "";
const m = className.match(/language-([\w-]+)/);
if (m) return m[1];
if (isValidElement(children)) {
const childProps = (children as ReactElement<{ className?: string }>).props;
const childClass = typeof childProps.className === "string" ? childProps.className : "";
const cm = childClass.match(/language-([\w-]+)/);
if (cm) return cm[1];
}
return null;
}
export function CodeBlock(props: Props) {
const { children, title, className: _ignored, ...rest } = props;
const language = detectLanguage(props, children);
const codeText = extractText(children);
const isTerminal =
(language !== null && TERMINAL_LANGS.has(language)) ||
WIZARD_GLYPHS.test(codeText);
const hasTitle = typeof title === "string" && title.length > 0;
// Mode A — Terminal
if (isTerminal) {
return (
<div className="ktx-code ktx-code-terminal group">
<div className="ktx-code-terminal-head">
<span className="ktx-tl-dot" style={{ background: "#ff5f57" }} />
<span className="ktx-tl-dot" style={{ background: "#febc2e" }} />
<span className="ktx-tl-dot" style={{ background: "#28c840" }} />
<span className="ktx-code-terminal-label">
{hasTitle ? title : "zsh"}
</span>
<CopyButton
text={codeText}
className="ml-auto text-white/80"
/>
</div>
<pre {...rest} className="ktx-code-body ktx-code-body-terminal">
{children}
</pre>
</div>
);
}
// Mode B — VS Code tab (filename present)
if (hasTitle) {
return (
<div className="ktx-code ktx-code-tab group">
<div className="ktx-code-tab-head">
<span className="ktx-file-glyph" data-lang={language ?? ""} />
<span className="ktx-code-tab-filename">{title}</span>
{language && <span className="ktx-lang-pill">{language}</span>}
<CopyButton text={codeText} className="ml-auto" />
</div>
<pre {...rest} className="ktx-code-body ktx-code-body-tab">
{children}
</pre>
</div>
);
}
// Mode C — Minimal default
return (
<div className="ktx-code ktx-code-minimal group relative">
{language && <span className="ktx-code-minimal-lang">{language}</span>}
<CopyButton text={codeText} className="ktx-code-minimal-copy" />
<pre {...rest} className="ktx-code-body ktx-code-body-minimal">
{children}
</pre>
</div>
);
}

View file

@ -0,0 +1,64 @@
"use client";
import { useState } from "react";
type Props = {
text: string;
className?: string;
};
export function CopyButton({ text, className = "" }: Props) {
const [copied, setCopied] = useState(false);
const onClick = async () => {
try {
await navigator.clipboard.writeText(text);
setCopied(true);
setTimeout(() => setCopied(false), 1500);
} catch {
// Older browsers or denied permission — fail silently
}
};
return (
<button
type="button"
onClick={onClick}
aria-label={copied ? "Copied" : "Copy code"}
className={`inline-flex items-center justify-center w-7 h-7 rounded-md transition-all hover:bg-white/5 ${className}`}
>
{copied ? (
<svg
width="14"
height="14"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2.4"
strokeLinecap="round"
strokeLinejoin="round"
className="text-emerald-400"
aria-hidden="true"
>
<polyline points="20 6 9 17 4 12" />
</svg>
) : (
<svg
width="13"
height="13"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
className="opacity-70"
aria-hidden="true"
>
<rect x="9" y="9" width="13" height="13" rx="2" ry="2" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg>
)}
</button>
);
}

56
docs/components/logo.tsx Normal file
View file

@ -0,0 +1,56 @@
export function Logo() {
return (
<div className="flex items-center gap-2 group">
<div className="relative flex items-center justify-center transition-transform duration-300 ease-out group-hover:rotate-[-4deg]">
<svg
width="22"
height="22"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
aria-hidden="true"
>
<defs>
<linearGradient id="ktx-grad-a" x1="0" y1="0" x2="24" y2="24" gradientUnits="userSpaceOnUse">
<stop offset="0%" stopColor="var(--color-fd-primary)" />
<stop offset="100%" stopColor="var(--color-fd-primary)" stopOpacity="0.55" />
</linearGradient>
<linearGradient id="ktx-grad-b" x1="0" y1="0" x2="24" y2="24" gradientUnits="userSpaceOnUse">
<stop offset="0%" stopColor="var(--color-fd-primary)" stopOpacity="0.85" />
<stop offset="100%" stopColor="var(--color-fd-primary)" stopOpacity="0.4" />
</linearGradient>
</defs>
{/* Bottom layer */}
<path
d="M3 17 L12 21.5 L21 17 L12 12.5 Z"
fill="url(#ktx-grad-a)"
opacity="0.4"
/>
{/* Middle layer */}
<path
d="M3 12 L12 16.5 L21 12 L12 7.5 Z"
fill="url(#ktx-grad-b)"
opacity="0.7"
/>
{/* Top layer */}
<path
d="M3 7 L12 11.5 L21 7 L12 2.5 Z"
fill="var(--color-fd-primary)"
/>
</svg>
</div>
<span
className="text-[15px] font-semibold text-fd-foreground tracking-tight"
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
>
KTX
</span>
<span
className="text-[13px] font-medium text-fd-muted-foreground/80 tracking-tight border-l border-fd-border pl-2 ml-0.5"
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
>
Docs
</span>
</div>
);
}

View file

@ -0,0 +1,58 @@
"use client";
import { useEffect, useRef, type ReactNode } from "react";
type Props = {
children: ReactNode;
className?: string;
stagger?: boolean;
threshold?: number;
};
export function ScrollReveal({
children,
className = "",
stagger = false,
threshold = 0.1,
}: Props) {
const ref = useRef<HTMLDivElement>(null);
useEffect(() => {
const node = ref.current;
if (!node) return;
const observer = new IntersectionObserver(
(entries) => {
for (const entry of entries) {
if (entry.isIntersecting) {
entry.target.classList.add("visible");
if (stagger) {
entry.target.querySelectorAll(".rv").forEach((el) => {
el.classList.add("visible");
});
}
observer.unobserve(entry.target);
}
}
},
{ threshold, rootMargin: "0px 0px -40px 0px" }
);
if (stagger) {
observer.observe(node);
} else {
node.querySelectorAll(".rv").forEach((el) => observer.observe(el));
}
return () => observer.disconnect();
}, [stagger, threshold]);
return (
<div
ref={ref}
className={`${stagger ? "rv rv-stagger" : ""} ${className}`}
>
{children}
</div>
);
}

View file

@ -0,0 +1,56 @@
export function TerminalPreview() {
return (
<div className="terminal-frame sheen w-full max-w-[560px]">
<div className="terminal-head">
<span className="terminal-dot" style={{ background: "#ff5f57" }} />
<span className="terminal-dot" style={{ background: "#febc2e" }} />
<span className="terminal-dot" style={{ background: "#28c840" }} />
<span className="ml-2 text-[11px] text-zinc-500 font-medium tracking-wide">
~/analytics
</span>
</div>
<div className="terminal-body">
<div>
<span className="term-prompt">$</span>{" "}
<span className="term-cmd">ktx setup</span>
</div>
<div className="h-2" />
<div className="term-dim"> Welcome to KTX setup</div>
<div className="term-dim"></div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">LLM</span>{" "}
<span className="term-ok"> claude-sonnet-4-6</span>
</div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">Embeddings</span>{" "}
<span className="term-ok"> openai · text-embedding-3-small</span>
</div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">Database</span>{" "}
<span className="term-ok"> postgres-warehouse · 42 tables</span>
</div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">Sources</span>{" "}
<span className="term-ok"> dbt-main · 218 models</span>
</div>
<div className="h-2" />
<div className="term-info"> Building context for agents</div>
<div className="pl-3 text-[12px] term-dim">
enriching schema · detecting relationships · ingesting dbt
</div>
<div className="h-2" />
<div className="term-ok"> KTX context is ready for agents.</div>
<div className="h-2" />
<div>
<span className="term-prompt">$</span>{" "}
<span className="term-cmd">ktx serve</span>
<span className="term-cursor ml-1" />
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,152 @@
---
title: Link Detection
description: How KTX's relationship detection performs on real-world schemas.
---
KTX infers foreign key relationships between tables even when the database declares no primary keys or foreign key constraints. This is critical for analytics warehouses, where constraints are rarely enforced. This page documents the methodology, scoring pipeline, and a reproducible benchmark you can run yourself.
## What this measures
Most analytics warehouses — Snowflake, BigQuery, Redshift — don't enforce referential integrity constraints. Tables like `fct_product_events` reference `dim_accounts` by convention (`account_id` → `id`), but nothing in the schema says so.
KTX's relationship detection discovers these links automatically. The benchmark measures how accurately it recovers known foreign key relationships from a schema with **all declared constraints removed** — the hardest operating mode.
Metrics tracked:
- **Accepted** — relationships scored above the accept threshold (default 0.85) and written to the project manifest
- **Review** — relationships scored between the review threshold (0.55) and accept threshold, flagged for human review
- **Rejected** — relationships scored below the review threshold
- **Skipped** — relationships not evaluated (e.g., filtered by candidate limits)
## Methodology
### Detection pipeline
Relationship detection runs as a multi-stage pipeline during `ktx dev scan`:
1. **Candidate generation** — scans the schema for potential FK relationships using multiple heuristics: exact column name matches, normalized table name matching, name inflection (singular/plural), column suffix patterns (`_id`, `_key`, `_code`, `_uuid`), self-references (`parent_id`, `manager_id`), and optionally embedding similarity and LLM proposals.
2. **Column profiling** — samples up to 10,000 rows per column (configurable via `profile_sample_rows`) to collect statistics: row counts, null rates, distinct value counts, uniqueness ratios, sample values, and text length ranges.
3. **Validation** — tests each candidate relationship against actual data by measuring target uniqueness, source coverage, violation ratio, and value overlap between child and parent columns.
4. **Scoring** — combines 7 weighted signals into a confidence score:
| Signal | Weight | What it captures |
|--------|--------|-----------------|
| Name similarity | 0.24 | How closely column/table names match FK conventions |
| Value overlap | 0.22 | What percentage of FK values exist in the PK column |
| Profile uniqueness | 0.22 | How unique the target column values are |
| Type compatibility | 0.10 | Whether data types are compatible (hard gate — score is 0 if incompatible) |
| Embedding similarity | 0.10 | Semantic similarity between column names |
| Profile null rate | 0.08 | Presence of non-null values |
| Structural prior | 0.04 | Baseline structural hints from schema conventions |
Each signal is normalized to \[0, 1\], multiplied by its weight, and summed. The final confidence is `0.56 + (weighted_sum × 0.65)`, clamped to \[0, 1\].
5. **Graph resolution** — resolves conflicts when multiple candidates target the same column, detects primary keys (by name pattern and validation), and classifies each relationship into `accepted`, `review`, or `rejected` based on thresholds.
### Threshold configuration
```yaml
scan:
relationships:
accept_threshold: 0.85
review_threshold: 0.55
```
Relationships scoring above `accept_threshold` are automatically accepted into the project manifest. Those between `review_threshold` and `accept_threshold` are flagged for analyst review. Below `review_threshold`, they're rejected.
### Test fixture
The benchmark uses the **Orbit-style product warehouse** — a synthetic schema modeled after a real SaaS analytics warehouse with all declared constraints removed. The fixture is a SQLite database with 6 tables:
| Table | Role | Estimated rows |
|-------|------|---------------|
| `dim_accounts` | Dimension | 3 |
| `dim_users` | Dimension | 4 |
| `dim_workspaces` | Dimension | 4 |
| `fct_product_events` | Fact | 5 |
| `fct_invoices` | Fact | 3 |
| `support_tickets` | Fact | 4 |
**Ground truth:** 6 primary keys (one `id` column per table) and 9 foreign key relationships, all `many_to_one`:
| Source column | Target |
|--------------|--------|
| `dim_users.account_id` | `dim_accounts.id` |
| `dim_workspaces.account_id` | `dim_accounts.id` |
| `dim_workspaces.user_id` | `dim_users.id` |
| `fct_product_events.account_id` | `dim_accounts.id` |
| `fct_product_events.user_id` | `dim_users.id` |
| `fct_product_events.workspace_id` | `dim_workspaces.id` |
| `fct_invoices.account_id` | `dim_accounts.id` |
| `support_tickets.account_id` | `dim_accounts.id` |
| `support_tickets.user_id` | `dim_users.id` |
The fixture runs in multiple modes to isolate the contribution of each pipeline stage: with LLM disabled, profiling disabled, validation disabled, and embeddings disabled.
## Results
Results for the default configuration will be added after the benchmark run is finalized.
## Reproducing the benchmark
### Prerequisites
- Node.js 22+
- pnpm
- The KTX repository cloned and dependencies installed (`pnpm install`)
### Running
From the repository root:
```bash
pnpm run relationships:verify-orbit
```
This runs `ktx dev scan` against the bundled SQLite fixture with enrichment disabled, then generates a verification report at:
```text
examples/orbit-relationship-verification/reports/orbit-verification.md
```
The report includes the full relationship summary, enrichment details, artifact paths, and any warnings.
### Custom project
To run verification against your own database (e.g., a local Orbit project):
```bash
KTX_ORBIT_PROJECT_DIR=/path/to/your-project pnpm run relationships:verify-orbit
```
### Configuration
The benchmark project configuration lives at `examples/orbit-relationship-verification/ktx.yaml`:
```yaml
scan:
enrichment:
backend: none
relationships:
enabled: true
llm_proposals: false
accept_threshold: 0.85
review_threshold: 0.55
profile_sample_rows: 10000
validation_concurrency: 4
```
Adjust `accept_threshold` and `review_threshold` to see how threshold changes affect the accepted/review/rejected distribution. Lower thresholds accept more relationships (higher recall, lower precision); higher thresholds are more conservative.
## Broader benchmark suite
Beyond the Orbit fixture, KTX includes a full benchmark corpus at `packages/context/test/fixtures/relationship-benchmarks/` with fixtures across multiple tiers:
- **Unit** — minimal schemas testing individual heuristics
- **Row-bearing** — small schemas with data for validation testing
- **Product** — full warehouse schemas like the Orbit fixture
Fixtures from public datasets (Chinook, Sakila, AdventureWorks, Northwind) supplement the synthetic fixtures. The benchmark runner measures precision, recall, and F1 for both primary key and foreign key detection across all fixtures and modes.

View file

@ -0,0 +1,5 @@
{
"title": "Benchmarks",
"defaultOpen": true,
"pages": ["link-detection"]
}

View file

@ -0,0 +1,126 @@
---
title: "ktx agent"
description: "Machine-readable commands for coding agents."
---
Hidden commands that provide machine-readable JSON output for coding agents. These are the commands that agent integrations (Claude Code, Cursor, Codex, OpenCode) call under the hood — you typically won't use them directly.
All `ktx agent` subcommands require `--json` and produce structured JSON output on stdout.
## Usage
```bash
ktx agent <subcommand> --json [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `tools` | Print available agent-facing KTX tools |
| `context` | Print project context for agent planning |
| `sl list` | List semantic-layer sources |
| `sl read <sourceName>` | Read one semantic-layer source |
| `sl query` | Run a semantic-layer query from a JSON file |
| `wiki search <query>` | Search KTX wiki pages |
| `wiki read <pageId>` | Read one KTX wiki page |
| `sql execute` | Execute read-only SQL with a row limit |
## Options
### `agent tools`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
### `agent context`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
### `agent sl list`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Filter by connection id | — |
| `--query <text>` | Search source names and descriptions | — |
### `agent sl read`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Connection id containing the source | — |
### `agent sl query`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Connection id for execution (required) | — |
| `--query-file <path>` | JSON semantic-layer query file (required) | — |
| `--execute` | Execute the compiled query against the connection | `false` |
| `--max-rows <number>` | Maximum rows to return when executing (1-1000) | — |
### `agent wiki search`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--limit <number>` | Maximum search results | `10` |
### `agent wiki read`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
### `agent sql execute`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Connection id for execution (required) | — |
| `--sql-file <path>` | SQL file to execute (required) | — |
| `--max-rows <number>` | Maximum rows to return, 1-1000 (required) | — |
## Examples
```bash
# List available tools
ktx agent tools --json
# Get project context for planning
ktx agent context --json
# List semantic sources
ktx agent sl list --json
# Search semantic sources by name
ktx agent sl list --json --query "revenue"
# Read a semantic source
ktx agent sl read orders --json --connection-id my-warehouse
# Run a semantic-layer query from a file
ktx agent sl query --json \
--connection-id my-warehouse \
--query-file /tmp/query.json \
--execute \
--max-rows 100
# Search wiki pages
ktx agent wiki search "churn definition" --json
# Read a specific wiki page
ktx agent wiki read page-abc123 --json
# Execute read-only SQL
ktx agent sql execute --json \
--connection-id my-warehouse \
--sql-file /tmp/query.sql \
--max-rows 500
```

View file

@ -0,0 +1,149 @@
---
title: "ktx connection"
description: "Add, list, test, and map data sources."
---
Manage database and source connections in your KTX project. Connections define how KTX reaches your data warehouse, BI tools, and context sources.
## Usage
```bash
ktx connection <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `list` | List configured connections |
| `test <connectionId>` | Test a configured connection |
| `add <driver> <connectionId>` | Add or replace a configured connection |
| `remove <connectionId>` | Remove a configured connection from `ktx.yaml` |
| `map <sourceConnectionId>` | Refresh and validate BI-to-warehouse mappings |
| `mapping list <connectionId>` | List Metabase database mappings |
| `mapping set <connectionId> <field> <assignment>` | Set a Metabase or Looker warehouse mapping |
| `mapping apply-bulk <connectionId>` | Apply mappings from JSON |
| `mapping set-sync-enabled <connectionId> <dbId>` | Enable or disable sync for one Metabase database |
| `mapping sync-state get <connectionId>` | Read sync-state selection |
| `mapping sync-state set <connectionId>` | Write sync-state selection |
| `mapping refresh <connectionId>` | Refresh Metabase database mappings |
| `mapping validate <connectionId>` | Validate Metabase database mappings |
| `mapping clear <connectionId> [dbId]` | Clear Metabase database mappings |
| `metabase setup` | Guided setup for a Metabase connection |
| `notion pick <connectionId>` | Pick Notion root pages for a configured Notion connection |
## Options
### `connection add`
| Flag | Description | Default |
|------|-------------|---------|
| `--url <url>` | Connection URL, `env:NAME`, or `file:/path` reference | — |
| `--schema <schema>` | Schema to include; repeatable | — |
| `--readonly` | Mark the connection as read-only | `false` |
| `--force` | Replace an existing connection | `false` |
| `--allow-literal-credentials` | Allow writing a literal credential URL to `ktx.yaml` | `false` |
#### Notion-specific options for `connection add`
| Flag | Description | Default |
|------|-------------|---------|
| `--token-env <name>` | Environment variable containing Notion auth token | — |
| `--token-file <path>` | File containing Notion auth token | — |
| `--crawl-mode <mode>` | Notion crawl mode (`all_accessible` or `selected_roots`) | `selected_roots` |
| `--root-page-id <id>` | Root page to crawl; repeatable | — |
| `--root-database-id <id>` | Root database to crawl; repeatable | — |
| `--root-data-source-id <id>` | Root data source to crawl; repeatable | — |
| `--max-pages <n>` | Maximum pages per run | — |
| `--max-knowledge-creates <n>` | Maximum knowledge creates per run | — |
| `--max-knowledge-updates <n>` | Maximum knowledge updates per run | — |
### `connection remove`
| Flag | Description | Default |
|------|-------------|---------|
| `--force` | Remove without prompting | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `connection map`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
### `connection mapping` subcommands
| Flag | Subcommand | Description | Default |
|------|-----------|-------------|---------|
| `--json` | `list`, `sync-state get` | Print JSON output | `false` |
| `--file <path>` | `apply-bulk` | JSON mapping file (required) | — |
| `--enabled <value>` | `set-sync-enabled` | `true` or `false` (required) | — |
| `--mode <mode>` | `sync-state set` | `ALL`, `ONLY`, or `EXCEPT` (required) | — |
| `--collections <ids>` | `sync-state set` | Comma-separated collection ids | — |
| `--items <ids>` | `sync-state set` | Comma-separated item ids | — |
| `--tag-names <names>` | `sync-state set` | Comma-separated tag names | — |
| `--auto-accept` | `refresh` | Accept refresh changes without prompting | `false` |
### `connection metabase setup`
| Flag | Description | Default |
|------|-------------|---------|
| `--id <connectionId>` | KTX connection id to write | — |
| `--url <url>` | Metabase API URL | — |
| `--api-key <key>` | Metabase API key | — |
| `--mint-api-key` | Mint a Metabase API key with credentials | `false` |
| `--username <email>` | Metabase admin username for API-key minting | — |
| `--password <password>` | Metabase admin password for API-key minting | — |
| `--map <id=target>` | Assign a Metabase database id to a warehouse connection; repeatable | — |
| `--sync <metabaseDatabaseId>` | Enable sync for a discovered database; repeatable | — |
| `--sync-mode <mode>` | Metabase sync selection mode (`ALL`, `ONLY`, or `EXCEPT`) | `ALL` |
| `--run-ingest` | Run ingest after setup | `false` |
| `--yes` | Confirm and apply setup changes without prompting | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `connection notion pick`
| Flag | Description | Default |
|------|-------------|---------|
| `--no-input` | Disable interactive terminal input | — |
| `--root-page-id <id>` | Root page UUID to crawl; repeatable (required with `--no-input`) | — |
## Examples
```bash
# List all configured connections
ktx connection list
# Add a Postgres connection using an environment variable
ktx connection add postgres my-warehouse --url "env:DATABASE_URL"
# Add a Postgres connection with specific schemas
ktx connection add postgres analytics --url "env:PG_URL" --schema public --schema analytics
# Add a read-only Snowflake connection
ktx connection add snowflake sf-prod --url "env:SNOWFLAKE_URL" --readonly
# Test a connection
ktx connection test my-warehouse
# Remove a connection
ktx connection remove old-warehouse
# Add a Notion source connection
ktx connection add notion my-notion \
--token-env NOTION_TOKEN \
--crawl-mode selected_roots \
--root-page-id abc123def456...
# Run guided Metabase setup
ktx connection metabase setup --url https://metabase.example.com
# Map a BI database to a warehouse connection
ktx connection mapping set metabase-prod databaseMappings 1=my-warehouse
# Refresh Metabase mappings
ktx connection mapping refresh metabase-prod --auto-accept
# Pick Notion root pages interactively
ktx connection notion pick my-notion
```

View file

@ -0,0 +1,147 @@
---
title: "ktx dev"
description: "Low-level diagnostics, scans, adapter commands, and mapping tools."
---
Hidden commands for low-level project management, diagnostics, direct adapter control, and shell completion. Most users interact with these through higher-level commands like [`ktx ingest`](/docs/cli-reference/ktx-ingest) and [`ktx setup`](/docs/cli-reference/ktx-setup), but `ktx dev` provides direct access when you need fine-grained control.
## Usage
```bash
ktx dev <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `init [directory]` | Initialize a Git-backed KTX project directory |
| `doctor` | Check KTX setup, project, and demo readiness |
| `doctor setup` | Check KTX install, build, and local runtime readiness |
| `scan` | Run or inspect standalone connection scans |
| `ingest run` | Run local ingest for one configured connection and source adapter |
| `ingest status [runId]` | Print status for a stored local ingest run |
| `ingest watch [runId]` | Open a stored ingest visual report |
| `ingest replay <runId>` | Replay a stored ingest run through memory-flow output |
| `mapping` | Manage Metabase warehouse mappings (same as `ktx connection mapping`) |
| `completion zsh` | Generate zsh completion script |
## Options
### `dev init`
| Flag | Description | Default |
|------|-------------|---------|
| `--name <name>` | Project name written to `ktx.yaml` | — |
| `--force` | Rewrite `ktx.yaml` and scaffold files in an existing project | `false` |
### `dev doctor`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `dev doctor setup`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `dev scan`
See [`ktx scan`](/docs/cli-reference/ktx-scan) for the full scan command reference.
### `dev ingest run`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <connectionId>` | KTX connection id (required) | — |
| `--adapter <adapter>` | Ingest source adapter name (required) | — |
| `--source-dir <path>` | Directory containing source files | — |
| `--database-introspection-url <url>` | Daemon URL for live-database introspection | — |
| `--debug-llm-request-file <path>` | Write sanitized LLM request structure to a JSONL file | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output | `false` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev ingest status`
| Flag | Description | Default |
|------|-------------|---------|
| `--report-file <path>` | Bundle ingest report JSON file to render | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output | `false` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev ingest watch`
| Flag | Description | Default |
|------|-------------|---------|
| `--report-file <path>` | Bundle ingest report JSON file to render | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output (the default unless `--plain` or `--json` is set) | `true` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev ingest replay`
| Flag | Description | Default |
|------|-------------|---------|
| `--report-file <path>` | Bundle ingest report JSON file to render | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output | `false` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev completion zsh`
| Flag | Description | Default |
|------|-------------|---------|
| `--install` | Install zsh completion into `~/.zfunc` and update `~/.zshrc` | `false` |
## Examples
```bash
# Initialize a new KTX project
ktx dev init
# Initialize in a specific directory with a project name
ktx dev init ./my-project --name "Analytics Context"
# Re-initialize an existing project
ktx dev init --force
# Check project readiness
ktx dev doctor
# Check CLI install readiness
ktx dev doctor setup
# Run a low-level ingest with a specific adapter
ktx dev ingest run --connection-id my-dbt --adapter dbt
# Run ingest from a specific source directory
ktx dev ingest run \
--connection-id my-dbt \
--adapter dbt \
--source-dir ./dbt-project
# View ingest status with the visual TUI
ktx dev ingest watch run-abc123
# Replay a stored ingest session
ktx dev ingest replay run-abc123
# View ingest status from a report file
ktx dev ingest status --report-file /tmp/ingest-report.json
# Generate zsh completions
ktx dev completion zsh
# Install zsh completions
ktx dev completion zsh --install
```

View file

@ -0,0 +1,70 @@
---
title: "ktx ingest"
description: "Build and refresh context from configured sources."
---
Ingest context from your configured sources — dbt, Looker, Metabase, MetricFlow, LookML, or Notion. The ingest process extracts metadata from your tools, then uses an LLM agent to reconcile it with existing context, writing semantic sources and knowledge pages to your project.
## Usage
```bash
ktx ingest [connectionId] [options]
ktx ingest <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `status [runId]` | Print status for the latest or selected public ingest run |
| `watch [runId]` | Open the latest or selected public ingest visual report |
## Options
### `ingest` (run)
| Flag | Description | Default |
|------|-------------|---------|
| `--all` | Ingest every eligible configured source | `false` |
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `ingest status`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `ingest watch`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output instead of the visual report | `false` |
| `--no-input` | Disable interactive terminal input | — |
## Examples
```bash
# Ingest from a specific connection
ktx ingest my-dbt-source
# Ingest from all eligible sources
ktx ingest --all
# Check the status of the latest ingest
ktx ingest status
# Check the status of a specific ingest run
ktx ingest status run-abc123
# Watch the latest ingest report
ktx ingest watch
# Get ingest status as JSON
ktx ingest status --json
```
## Low-level ingest commands
For adapter-level control, use `ktx dev ingest`. See [`ktx dev`](/docs/cli-reference/ktx-dev) for the full low-level ingest surface including `run`, `status`, `watch`, and `replay` with output mode options (`--plain`, `--json`, `--viz`).

View file

@ -0,0 +1,145 @@
---
title: "ktx scan"
description: "Run or inspect database scans."
---
Discover your database schema — tables, columns, types, constraints, and relationships. Scanning is the first step in building context: KTX needs to understand your warehouse structure before it can build semantic sources.
Scan commands live under `ktx dev scan`. See also the [Building Context](/docs/guides/building-context) guide for a walkthrough.
## Usage
```bash
ktx dev scan <connectionId> [options]
ktx dev scan <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `status <runId>` | Print status for a local scan run |
| `report <runId>` | Print a local scan report |
| `relationships <runId>` | Print relationship artifacts for a local scan run |
| `relationship-apply <runId>` | Apply accepted relationship review decisions as manual manifest joins |
| `relationship-feedback` | Export persisted relationship review decisions as calibration labels |
| `relationship-calibration` | Summarize relationship feedback labels against current score thresholds |
| `relationship-thresholds` | Evaluate relationship feedback labels for offline threshold advice |
## Options
### `scan` (run)
| Flag | Description | Default |
|------|-------------|---------|
| `--mode <mode>` | Scan mode: `structural`, `enriched`, or `relationships` | `structural` |
| `--dry-run` | Run without writing scan results | `false` |
| `--database-introspection-url <url>` | Daemon URL for live-database introspection | — |
### `scan report`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print the raw scan report JSON | `false` |
### `scan relationships`
| Flag | Description | Default |
|------|-------------|---------|
| `--status <status>` | Filter by status: `accepted`, `review`, `rejected`, `skipped`, or `all` | `review` |
| `--limit <count>` | Maximum relationships to print per status | `25` |
| `--accept <candidateId>` | Record an accepted decision for a relationship candidate | — |
| `--reject <candidateId>` | Record a rejected decision for a relationship candidate | — |
| `--note <text>` | Attach a note when recording a relationship review decision | — |
| `--reviewer <name>` | Reviewer name for a relationship review decision | — |
| `--json` | Print relationship artifacts as JSON | `false` |
### `scan relationship-apply`
| Flag | Description | Default |
|------|-------------|---------|
| `--all-accepted` | Apply all accepted relationship review decisions for the scan run | `false` |
| `--candidate <candidateId>` | Apply one accepted relationship review decision; repeatable | — |
| `--dry-run` | Preview relationships that would be written without rewriting manifest shards | `false` |
| `--json` | Print the apply result as JSON | `false` |
### `scan relationship-feedback`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection <connectionId>` | Only export labels for one KTX connection | — |
| `--decision <decision>` | Filter: `accepted`, `rejected`, or `all` | `all` |
| `--json` | Print the export as JSON | `false` |
| `--jsonl` | Print labels as newline-delimited JSON | `false` |
### `scan relationship-calibration`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection <connectionId>` | Only calibrate labels for one KTX connection | — |
| `--decision <decision>` | Filter: `accepted`, `rejected`, or `all` | `all` |
| `--accept-threshold <value>` | Score threshold treated as predicted accepted (01) | `0.85` |
| `--review-threshold <value>` | Score threshold treated as predicted review (01) | `0.55` |
| `--json` | Print the calibration report as JSON | `false` |
### `scan relationship-thresholds`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection <connectionId>` | Only evaluate labels for one KTX connection | — |
| `--min-total-labels <count>` | Minimum scored labels before advice can be ready | `20` |
| `--min-accepted-labels <count>` | Minimum accepted labels before advice can be ready | `5` |
| `--min-rejected-labels <count>` | Minimum rejected labels before advice can be ready | `5` |
| `--json` | Print the threshold advice report as JSON | `false` |
## Examples
```bash
# Run a structural scan of a connection
ktx dev scan my-warehouse
# Run a scan with LLM enrichment
ktx dev scan my-warehouse --mode enriched
# Run a scan with relationship detection
ktx dev scan my-warehouse --mode relationships
# Dry-run a scan (don't write results)
ktx dev scan my-warehouse --dry-run
# Check the status of a scan run
ktx dev scan status run-abc123
# View the scan report
ktx dev scan report run-abc123
# View scan report as JSON
ktx dev scan report run-abc123 --json
# List relationship candidates pending review
ktx dev scan relationships run-abc123
# List all relationships regardless of status
ktx dev scan relationships run-abc123 --status all
# Accept a relationship candidate
ktx dev scan relationships run-abc123 --accept candidate-xyz
# Reject a relationship candidate with a note
ktx dev scan relationships run-abc123 --reject candidate-xyz --note "false positive"
# Apply all accepted relationships to the manifest
ktx dev scan relationship-apply run-abc123 --all-accepted
# Preview what would be applied
ktx dev scan relationship-apply run-abc123 --all-accepted --dry-run
# Export relationship feedback as calibration labels
ktx dev scan relationship-feedback --json
# Calibrate relationship detection thresholds
ktx dev scan relationship-calibration --accept-threshold 0.9 --review-threshold 0.6
# Get threshold advice based on review decisions
ktx dev scan relationship-thresholds
```

View file

@ -0,0 +1,51 @@
---
title: "ktx serve"
description: "Run the MCP stdio server."
---
Start a Model Context Protocol (MCP) server that exposes your KTX project's context to coding agents. The server runs over stdio and provides tools for querying semantic sources, searching knowledge, managing connections, and running ingests.
## Usage
```bash
ktx serve --mcp stdio [options]
```
## Options
| Flag | Description | Default |
|------|-------------|---------|
| `--mcp <mode>` | MCP transport mode (required; only `stdio` is supported) | — |
| `--user-id <id>` | Local user id | `local` |
| `--semantic-compute` | Enable semantic-layer compute | `false` |
| `--semantic-compute-url <url>` | HTTP semantic-layer compute URL | — |
| `--database-introspection-url <url>` | Daemon URL for live-database introspection | — |
| `--execute-queries` | Allow semantic-layer query execution (requires `--semantic-compute`) | `false` |
| `--memory-capture` | Enable memory capture | `false` |
| `--memory-model <model>` | Memory capture model | — |
## Examples
```bash
# Start the MCP server over stdio
ktx serve --mcp stdio
# Start with semantic-layer compute enabled
ktx serve --mcp stdio --semantic-compute
# Start with query execution enabled
ktx serve --mcp stdio --semantic-compute --execute-queries
# Start with a remote semantic compute backend
ktx serve --mcp stdio --semantic-compute-url http://localhost:8080
# Start with memory capture
ktx serve --mcp stdio --memory-capture
# Use a specific project directory
ktx serve --mcp stdio --project-dir /path/to/my-project
```
## Agent integration
The MCP server is typically configured through `ktx setup --agents` rather than started manually. See the [Serving Agents](/docs/guides/serving-agents) guide and [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool configuration.

View file

@ -0,0 +1,174 @@
---
title: "ktx setup"
description: "Set up or resume a local KTX project."
---
Interactive wizard that walks you through configuring LLM credentials, embeddings, database connections, context sources, and agent integrations. When run without flags in a directory that has no `ktx.yaml`, it launches the full guided flow. When run in an existing project, it resumes from the first incomplete step.
## Usage
```bash
ktx setup [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `setup demo` | Run the packaged KTX demo from setup |
| `setup demo init` | Initialize the packaged demo project |
| `setup demo reset` | Reset the packaged demo project |
| `setup demo replay` | Replay the packaged demo memory-flow |
| `setup demo scan` | Run the packaged demo scan |
| `setup demo inspect` | Inspect packaged demo outputs |
| `setup demo doctor` | Check packaged demo readiness |
| `setup demo ingest` | Run packaged demo ingest |
| `setup context build` | Build agent-ready KTX context for setup |
| `setup context watch [runId]` | Watch a setup-managed context build |
| `setup context status [runId]` | Print setup-managed context build status |
| `setup context stop [runId]` | Request a pause for a setup-managed context build |
| `setup remove` | Remove setup-managed local integrations |
| `setup status` | Show setup readiness for the resolved KTX project |
## Options
### General
| Flag | Description | Default |
|------|-------------|---------|
| `--project-dir <path>` | KTX project directory | `KTX_PROJECT_DIR`, nearest `ktx.yaml`, or cwd |
| `--new` | Create a new KTX project before setup | `false` |
| `--existing` | Use an existing KTX project | `false` |
| `--yes` | Accept safe defaults in non-interactive setup | `false` |
| `--no-input` | Disable interactive terminal input | — |
### Agent Integration
| Flag | Description | Default |
|------|-------------|---------|
| `--agents` | Install agent integration only | `false` |
| `--target <target>` | Agent target (`claude-code`, `codex`, `cursor`, `opencode`, `universal`) | — |
| `--agent-scope <scope>` | Agent install scope (`project` or `global`) | `project` |
| `--agent-install-mode <mode>` | Agent install mode (`cli`, `mcp`, or `both`) | `cli` |
| `--project` | Install agent integration into the project scope | `false` |
| `--global` | Install agent integration into the global target scope (Claude Code and Codex only) | `false` |
| `--skip-agents` | Leave agent integration incomplete for now | `false` |
### LLM Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--anthropic-api-key-env <name>` | Environment variable containing the Anthropic API key | — |
| `--anthropic-api-key-file <path>` | File containing the Anthropic API key | — |
| `--anthropic-model <model>` | Anthropic model ID to validate and save | — |
| `--skip-llm` | Leave LLM setup incomplete for now | `false` |
### Embedding Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--embedding-backend <backend>` | Embedding backend (`openai` or `sentence-transformers`) | — |
| `--embedding-api-key-env <name>` | Environment variable containing the embedding provider API key | — |
| `--embedding-api-key-file <path>` | File containing the embedding provider API key | — |
| `--skip-embeddings` | Leave embedding setup incomplete for now | `false` |
### Database Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--database <driver>` | Database driver to configure; repeatable (`sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake`) | — |
| `--database-connection-id <id>` | Existing or new connection id; repeatable | — |
| `--new-database-connection-id <id>` | Connection id for one new database connection | — |
| `--database-url <url>` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection | — |
| `--database-schema <schema>` | Database schema to include; repeatable | — |
| `--skip-databases` | Leave database setup incomplete | `false` |
### Historic SQL
| Flag | Description | Default |
|------|-------------|---------|
| `--enable-historic-sql` | Enable Historic SQL when the selected database supports it | `false` |
| `--disable-historic-sql` | Disable Historic SQL for the selected database | `false` |
| `--historic-sql-window-days <number>` | Historic SQL query-history window in days | — |
| `--historic-sql-min-calls <number>` | Postgres `pg_stat_statements` minimum calls floor | — |
| `--historic-sql-service-account-pattern <pattern>` | Historic SQL service-account regex; repeatable | — |
| `--historic-sql-redaction-pattern <pattern>` | Historic SQL SQL-literal redaction regex; repeatable | — |
### Context Source Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--source <type>` | Source connector type (`dbt`, `metricflow`, `metabase`, `looker`, `lookml`, `notion`) | — |
| `--source-connection-id <id>` | Connection id for source setup | — |
| `--source-path <path>` | Local source path for dbt, MetricFlow, or LookML | — |
| `--source-git-url <url>` | Git URL for dbt, MetricFlow, or LookML | — |
| `--source-branch <branch>` | Git branch for source setup | — |
| `--source-subpath <path>` | Repo subpath for source setup | — |
| `--source-auth-token-ref <ref>` | `env:` or `file:` credential ref for source repo auth | — |
| `--source-url <url>` | Source service URL for Metabase or Looker | — |
| `--source-api-key-ref <ref>` | `env:` or `file:` API key ref for Metabase or Notion | — |
| `--source-client-id <id>` | Looker client id | — |
| `--source-client-secret-ref <ref>` | `env:` or `file:` Looker client secret ref | — |
| `--source-warehouse-connection-id <id>` | Mapped warehouse connection id | — |
| `--source-project-name <name>` | dbt project name override | — |
| `--source-profiles-path <path>` | dbt profiles path | — |
| `--source-target <target>` | dbt target or source-specific mapping target | — |
| `--metabase-database-id <id>` | Metabase database id to map | — |
| `--notion-crawl-mode <mode>` | Notion crawl mode (`all_accessible` or `selected_roots`) | — |
| `--notion-root-page-id <id>` | Notion root page id; repeatable | — |
| `--skip-initial-source-ingest` | Validate source setup without building source context during setup | `false` |
| `--skip-sources` | Mark optional source setup complete with no sources | `false` |
### Subcommand Options
| Flag | Subcommand | Description | Default |
|------|-----------|-------------|---------|
| `--json` | `status`, `context status` | Print JSON output | `false` |
| `--no-input` | `context build`, `context watch` | Disable interactive terminal input | — |
| `--force` | `context stop` | Request the pause without interactive confirmation | `false` |
| `--agents` | `remove` | Remove setup-managed agent integration files | `false` |
| `--mode <mode>` | `demo` | Demo mode: `seeded`, `replay`, or `full` | `seeded` |
| `--plain` | `demo` | Print plain text output | `false` |
## Examples
```bash
# Run the interactive setup wizard
ktx setup
# Create a new project and run setup
ktx setup --new
# Resume setup in an existing project
ktx setup --existing
# Non-interactive setup with Anthropic key from environment
ktx setup --yes --anthropic-api-key-env ANTHROPIC_API_KEY
# Set up a Postgres connection
ktx setup --database postgres --database-url "env:DATABASE_URL"
# Install agent integration for Claude Code only
ktx setup --agents --target claude-code
# Install agent integration globally for Codex
ktx setup --agents --target codex --global
# Add a dbt source from a local path
ktx setup --source dbt --source-path ./my-dbt-project
# Skip optional steps for a minimal setup
ktx setup --skip-sources --skip-agents
# Check setup readiness
ktx setup status
# Build context after setup
ktx setup context build
# Watch a running context build
ktx setup context watch
# Run the packaged demo
ktx setup demo
```

View file

@ -0,0 +1,122 @@
---
title: "ktx sl"
description: "List, read, validate, query, or write semantic-layer sources."
---
Interact with your project's semantic layer. Semantic sources are YAML definitions that describe your tables, columns, measures, joins, and grain — the vocabulary agents use to generate correct SQL.
## Usage
```bash
ktx sl <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `list` | List semantic-layer sources |
| `read <sourceName>` | Read a semantic-layer source |
| `validate <sourceName>` | Validate a semantic-layer source against the database schema |
| `write <sourceName>` | Write a semantic-layer source |
| `query` | Compile or execute a semantic-layer query |
## Options
### `sl list`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | Filter by KTX connection id | — |
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
### `sl read`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id (required) | — |
### `sl validate`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id (required) | — |
### `sl write`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id (required) | — |
| `--yaml <yaml>` | Semantic-layer source YAML content (required) | — |
### `sl query`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id | — |
| `--measure <measure>` | Measure to query; repeatable (at least one required) | — |
| `--dimension <dimension>` | Dimension to include; repeatable | — |
| `--filter <filter>` | Filter expression; repeatable | — |
| `--segment <segment>` | Segment to include; repeatable | — |
| `--order-by <field[:direction]>` | Order field, optionally suffixed with `:asc` or `:desc`; repeatable | — |
| `--limit <n>` | Query limit | — |
| `--include-empty` | Include empty rows | `false` |
| `--format <format>` | Output format: `json` or `sql` | `json` |
| `--execute` | Execute the compiled query against the database | `false` |
| `--max-rows <n>` | Maximum rows to return when executing | — |
## Examples
```bash
# List all semantic sources
ktx sl list
# List sources for a specific connection
ktx sl list --connection-id my-warehouse
# List sources as JSON
ktx sl list --json
# Read a source definition
ktx sl read orders --connection-id my-warehouse
# Validate a source against the live schema
ktx sl validate orders --connection-id my-warehouse
# Write a new source from YAML
ktx sl write customers --connection-id my-warehouse --yaml "$(cat sources/customers.yaml)"
# Compile a query and view the generated SQL
ktx sl query \
--connection-id my-warehouse \
--measure orders.total_revenue \
--dimension orders.created_date \
--format sql
# Execute a query with filters
ktx sl query \
--connection-id my-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--filter "orders.created_date >= '2024-01-01'" \
--execute \
--max-rows 100
# Query with ordering and limit
ktx sl query \
--connection-id my-warehouse \
--measure orders.total_revenue \
--dimension customers.country \
--order-by total_revenue:desc \
--limit 10 \
--execute
# Execute and cap the result set
ktx sl query \
--connection-id my-warehouse \
--measure orders.count \
--dimension orders.created_date \
--execute \
--max-rows 1000
```

View file

@ -0,0 +1,28 @@
---
title: "ktx status"
description: "Show current project status."
---
Print the current setup status of your KTX project — which steps are complete, which need attention, and whether the project is ready for agents.
## Usage
```bash
ktx status [options]
```
## Options
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
## Examples
```bash
# Show project status
ktx status
# Get status as JSON (useful for scripting)
ktx status --json
```

View file

@ -0,0 +1,92 @@
---
title: "ktx wiki"
description: "List, read, search, or write knowledge pages."
---
Manage knowledge pages in your KTX project. Knowledge pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data.
## Usage
```bash
ktx wiki <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `list` | List local wiki pages |
| `read <key>` | Read one local wiki page |
| `search <query>` | Search local wiki pages |
| `write <key>` | Write one local wiki page |
## Options
### `wiki list`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
### `wiki read`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
### `wiki search`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
### `wiki write`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
| `--scope <scope>` | Scope: `global` or `user` | `global` |
| `--summary <summary>` | Wiki page summary (required) | — |
| `--content <content>` | Wiki page content (required) | — |
| `--tag <tag>` | Wiki tag; repeatable | — |
| `--ref <ref>` | Wiki ref; repeatable | — |
| `--sl-ref <ref>` | Semantic-layer ref; repeatable | — |
## Examples
```bash
# List all wiki pages
ktx wiki list
# Read a specific wiki page
ktx wiki read revenue-definitions
# Search wiki pages
ktx wiki search "monthly recurring revenue"
# Write a global knowledge page
ktx wiki write revenue-definitions \
--summary "Canonical revenue metric definitions" \
--content "## MRR\nMonthly Recurring Revenue is calculated as..."
# Write a user-scoped knowledge page
ktx wiki write my-notes \
--scope user \
--summary "Personal analysis notes" \
--content "Things to check when revenue numbers look off..."
# Write a page with tags and references
ktx wiki write churn-rules \
--summary "Churn calculation business rules" \
--content "A customer is considered churned when..." \
--tag finance \
--tag retention \
--sl-ref customers \
--sl-ref subscriptions
# Write a page with external references
ktx wiki write data-freshness \
--summary "Data pipeline SLAs and freshness guarantees" \
--content "The orders table refreshes every 15 minutes..." \
--ref "https://wiki.example.com/data-pipelines"
```

View file

@ -0,0 +1,16 @@
{
"title": "CLI Reference",
"defaultOpen": true,
"pages": [
"ktx-setup",
"ktx-connection",
"ktx-scan",
"ktx-ingest",
"ktx-sl",
"ktx-wiki",
"ktx-serve",
"ktx-status",
"ktx-agent",
"ktx-dev"
]
}

View file

@ -0,0 +1,222 @@
---
title: Contributing
description: How to contribute to KTX.
---
KTX is an open-source project and welcomes contributions — bug fixes, new connectors, documentation improvements, and feature proposals. This page covers how to set up a development environment, navigate the repository, run tests, and submit changes.
## Development setup
### Prerequisites
- **Node.js 22+** and **pnpm** — for the TypeScript workspace
- **Python 3.11+** and **uv** — for the Python semantic layer and daemon
- **Git** — for version control
### Clone and install
```bash
git clone https://github.com/kaelio/ktx.git
cd ktx
pnpm install
uv sync --all-groups
```
`pnpm install` sets up all TypeScript packages in the workspace. `uv sync --all-groups` installs Python dependencies for the semantic layer and daemon, including dev and test groups.
### Build
```bash
pnpm run build
```
This builds all TypeScript packages. You can also build individual packages:
```bash
pnpm --filter @ktx/cli run build
pnpm --filter @ktx/context run build
```
### Link the CLI for local testing
```bash
pnpm run setup:dev
pnpm run link:dev
```
This makes the `ktx` command available globally, pointing at your local build.
## Repository structure
KTX is a pnpm + uv workspace. TypeScript packages live in `packages/`, Python projects in `python/`.
```text
packages/
cli/ # CLI entry point and commands
context/ # Core context engine (scan, ingest, MCP, semantic layer)
llm/ # LLM client abstraction
connector-postgres/ # PostgreSQL connector
connector-snowflake/ # Snowflake connector
connector-bigquery/ # BigQuery connector
connector-clickhouse/ # ClickHouse connector
connector-mysql/ # MySQL connector
connector-sqlserver/ # SQL Server connector
connector-sqlite/ # SQLite connector
connector-posthog/ # PostHog connector
python/
ktx-sl/ # Semantic layer — grain-aware query planning and SQL generation
ktx-daemon/ # Daemon — portable API server around the semantic layer
examples/ # Example projects and fixtures
scripts/ # Workspace scripts (benchmarks, verification, release)
docs/ # Documentation site (Fumadocs)
```
All TypeScript packages are ESM (`"type": "module"`) and use `NodeNext` module resolution. The Python projects use `pyproject.toml` for dependency management.
## Running tests
### TypeScript
```bash
# Run all tests
pnpm run test
# Run tests for a specific package
pnpm --filter @ktx/cli run test
pnpm --filter @ktx/context run test
# Type-check all packages
pnpm run type-check
# Type-check a specific package
pnpm --filter @ktx/context run type-check
# CLI smoke test
pnpm --filter @ktx/cli run smoke
```
### Python
```bash
# Run all Python tests
uv run pytest -q
# Semantic layer tests
uv run pytest python/ktx-sl/tests -q
# Daemon tests
uv run pytest python/ktx-daemon/tests -q
```
### Pre-commit checks
After modifying Python files, run pre-commit on the changed files:
```bash
uv run pre-commit run --files python/ktx-sl/src/changed_file.py
```
### Full verification
For cross-cutting changes that affect package exports or shared contracts:
```bash
pnpm run build
pnpm run type-check
pnpm run test
uv run pytest -q
```
## Adding a connector
Database connectors live in `packages/connector-<name>/`. Each connector implements the `KtxScanConnector` interface from `@ktx/context`.
### Step 1: Scaffold the package
Create a new directory at `packages/connector-<name>/` with:
```text
packages/connector-<name>/
package.json
tsconfig.json
src/
index.ts # Public exports
connector.ts # KtxScanConnector implementation
dialect.ts # SQL dialect handling
```
The `package.json` should follow the pattern of existing connectors:
```json
{
"name": "@ktx/connector-<name>",
"private": true,
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js"
}
},
"dependencies": {
"@ktx/context": "workspace:*"
}
}
```
### Step 2: Implement the connector
Your connector class must implement `KtxScanConnector`, which requires:
- **`id`** — a string identifier, typically `"<driver>:<connectionId>"`
- **`driver`** — the `KtxConnectionDriver` value for your database
- **`capabilities`** — a `KtxConnectorCapabilities` object declaring what your connector supports: `tableSampling`, `columnSampling`, `columnStats`, `readOnlySql`, `nestedAnalysis`, `eventStreamDiscovery`, `formalForeignKeys`, `estimatedRowCounts`
- **`introspect()`** — discovers tables, columns, types, and constraints, returning a `KtxSchemaSnapshot`
Optional methods for richer scanning:
- **`sampleColumn()`** — sample values from a specific column
- **`sampleTable()`** — sample rows from a table
- **`columnStats()`** — compute column statistics
- **`executeReadOnly()`** — execute arbitrary read-only SQL
### Step 3: Add a dialect
The dialect class handles database-specific concerns: identifier quoting, type mapping (native types to normalized types), and query generation for sampling and statistics.
### Step 4: Wire it up
Register the new connector driver in `packages/context` so the CLI and scan engine can instantiate it. Look at how existing connectors are registered for the pattern.
### Step 5: Test
```bash
pnpm --filter @ktx/connector-<name> run build
pnpm --filter @ktx/connector-<name> run type-check
pnpm --filter @ktx/connector-<name> run test
```
Use `packages/connector-sqlite/` as a minimal reference and `packages/connector-postgres/` as a full-featured one.
## Code conventions
- **TypeScript**: strict types, no `any`, no `as unknown as`. Use `zod` schemas for runtime validation at CLI and config boundaries. Follow the `camelCaseSchema` / `PascalCaseType` naming convention for Zod schemas and inferred types.
- **Python**: type hints on all new code, `pathlib` over `os.path`, explicit exception types over broad `except Exception`, `logger.exception()` for caught exceptions. Use `sqlglot` for SQL parsing — never regex.
- **Dependencies**: `pnpm` for Node packages (never `npm` or `bun`), `uv` for Python (never `pip`).
- **Dead code**: remove it. Don't leave commented-out code, unused wrappers, or empty directories.
## PR guidelines
Before submitting a pull request:
1. **Run the relevant checks** — at minimum, `pnpm run type-check` and `pnpm run test` for TypeScript changes, `uv run pytest -q` and `uv run pre-commit run --files [FILES]` for Python changes.
2. **Build if you changed exports** — run `pnpm run build` to verify package exports and `dist/` expectations still align.
3. **Keep changes focused** — one logical change per PR. Don't bundle unrelated refactors.
4. **Follow existing patterns** — match the style and conventions of surrounding code. The codebase favors explicit over clever.
5. **Don't commit artifacts** — `node_modules/`, `.venv/`, `dist/`, coverage output, and local databases should not be committed.
For larger features or architectural changes, open an issue first to discuss the approach.

View file

@ -0,0 +1,5 @@
{
"title": "Community",
"defaultOpen": true,
"pages": ["contributing"]
}

View file

@ -0,0 +1,82 @@
---
title: Context as Code
description: Treat analytics context like code — version it, review it, merge it.
---
## The idea
dbt proved that analytics transformations belong in version control. Before dbt, SQL lived in BI tools, scheduling systems, and spreadsheets — scattered, unreviewed, impossible to audit. "Analytics as code" changed that: put your models in git, review them in PRs, deploy them by merging.
KTX applies the same principle to analytics context. Metric definitions, business rules, join relationships, knowledge pages — these are artifacts that determine whether an agent produces correct results. They change over time. They need review. They need history. They need to be treated like code.
A KTX project is a git repository. Semantic sources are YAML files. Knowledge pages are Markdown files. Changes are commits. Updates are pull requests. Deployment is a merge. The entire lifecycle of your analytics context follows the same workflow your team already uses for dbt models, application code, and infrastructure.
## Auto-ingestion
Most analytics context already exists — it's in your dbt manifests, LookML models, Metabase questions, and team Notion pages. KTX pulls from these sources automatically through adapters.
An ingestion run works like this:
1. **Adapters extract metadata.** Each configured source — dbt, LookML, Metabase, MetricFlow, Notion, or your live database — provides structured metadata about models, metrics, dimensions, questions, and documentation.
2. **The LLM agent reconciles.** KTX doesn't blindly overwrite existing context. An LLM agent compares incoming metadata against your current semantic sources and knowledge pages. It decides what to create, what to update, and what to leave alone. If your dbt project added a new model, the agent writes a new semantic source. If a Metabase question references a metric you've already defined, the agent skips the duplicate.
3. **Files are written.** New and updated YAML sources and Markdown knowledge pages are written to the project directory. Every decision is recorded in the session transcript.
This reconciliation step is what separates auto-ingestion from a simple sync. A naive import would overwrite your hand-tuned metric definitions every time dbt's manifest changes. KTX's agent-driven approach merges intelligently: it respects your edits, fills gaps, and flags conflicts for human review.
## The git workflow
Auto-ingestion is designed to plug into a PR-based workflow. Run ingestion on a branch, review the changed YAML and Markdown files, and merge them the same way you merge dbt models or application code.
```
dbt / Looker / Metabase KTX project repo
┌──────────────┐ ┌──────────────────────┐
│ Metadata │───ingestion──▶│ Branch: ingest/... │
│ changes │ │ │
└──────────────┘ │ + 3 new sources │
│ ~ 2 updated joins │
│ + 1 knowledge page │
│ │
│ ──── Open PR ──── │
│ │
│ Review semantic diff │
│ Approve & merge │
└──────────────────────┘
Agents see updated
context immediately
```
A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge.
Once merged, agents querying through KTX's MCP server or CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request.
This workflow gives you the same review guarantees you have for dbt models. No semantic source reaches production without a human approving it. But unlike maintaining context manually, the heavy lifting — discovering new tables, drafting source definitions, extracting business rules from documentation — is done by the ingestion agent. You review and approve. You don't write from scratch.
## Feedback loops
Context improves over time through three feedback channels.
**Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a knowledge page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest.
**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, source definitions can be tightened with better filters or grain declarations, and relationship thresholds can be adjusted.
**Relationship calibration.** KTX infers foreign key relationships between tables automatically, even when the database has no declared constraints. It does this by analyzing column names, types, value distributions, and asking the LLM for proposals. Each inferred relationship gets a confidence score. You control two thresholds: `acceptThreshold` (relationships above this score are accepted automatically, default 0.85) and `reviewThreshold` (relationships between review and accept are flagged for human review, default 0.55). As you accept or reject proposals, the system learns which patterns match your schema conventions.
Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Relationship calibration tunes the discovery process to your warehouse's conventions. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration.
## Deterministic replay
Every ingestion session in KTX produces a full transcript: every tool call the LLM agent made, every response it received, every source it created or modified, and the reasoning behind each decision.
This matters for three reasons.
**Debugging.** When a semantic source looks wrong — the grain is off, a join points to the wrong table, a measure formula doesn't match the business definition — you can trace it back to the ingestion session that created it. The transcript shows exactly which adapter provided the input, how the LLM interpreted it, and why it made the decision it did. You don't have to guess.
**Trust.** Analytics teams need to trust the context that agents consume. Deterministic replay means you can verify any part of the context layer by re-examining the session that produced it. If a stakeholder asks "where did this revenue definition come from?", you have a complete audit trail — from the dbt manifest entry, through the LLM's reconciliation logic, to the YAML file that was written.
**Reproducibility.** Because ingestion sessions are recorded as structured transcripts (tool calls and responses, not just logs), they can be replayed for testing and validation. If you change your ingestion configuration or upgrade the LLM, you can replay previous sessions to see how the output would differ. This gives you a safety net for changes that affect how context is generated.
The transcript is stored with local ingest run state and can be reviewed or replayed when you need to audit a decision. Commit the resulting YAML and Markdown changes; commit reports or transcripts only when they are part of your team's review workflow.

View file

@ -0,0 +1,5 @@
{
"title": "Concepts",
"defaultOpen": true,
"pages": ["the-context-layer", "context-as-code"]
}

View file

@ -0,0 +1,147 @@
---
title: The Context Layer
description: What a context layer is, why agents need one, and how KTX compares to other semantic layers.
---
## The problem
Give an agent access to your database and it will generate SQL. It might even produce a decent chart. But ask it a real analytics question — "what's our net revenue trend by segment?" — and things fall apart.
The agent doesn't know that `orders.amount` includes refunds and needs a status filter. It doesn't know that `customers` should join to `orders` on `customer_id`, not `id`. It doesn't know that your team stopped using `legacy_segments` six months ago, or that "enterprise" means contracts over $100k, not just big logos. It sees column names and types. It doesn't see your business.
This isn't a model capability problem. GPT-4, Claude, and Gemini can all write correct SQL — when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision.
Analytics engineers already know this pain. It's the same reason you write dbt tests, maintain a data dictionary, and spend half of standup explaining why someone's dashboard number doesn't match the board deck. The difference is that agents make decisions at machine speed, so the wrong context propagates faster than a human can catch it.
## Three waves of AI analytics
The industry has moved through three distinct approaches to getting AI and data to work together.
**Wave one: database access.** Connect an LLM to a database, let it generate SQL. This works for simple lookups — "how many orders last week?" — but breaks on anything that requires business knowledge. The agent guesses at joins, invents metrics, and hallucinates table relationships. Every query is a coin flip.
**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but it's still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter.
**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, data quality gotchas, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer.
## What a context layer is
A context layer is the infrastructure that gives agents the business knowledge they need to produce correct analytics artifacts. It includes a semantic layer — that's a critical component — but it's not the whole thing.
KTX organizes context into four pillars:
**Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives.
```yaml
name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
- name: customer_id
type: number
- name: amount
type: number
- name: status
type: string
- name: created_at
type: time
role: time
joins:
- to: customers
"on": customer_id = customers.id
relationship: many_to_one
measures:
- name: revenue
expr: sum(amount)
filter: "status != 'refunded'"
description: Net revenue excluding refunds
- name: order_count
expr: count(id)
```
**Knowledge pages** are Markdown documents that capture business definitions, rules, and gotchas — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it.
```markdown
---
summary: Gross-to-net revenue reconciles paid invoices, credits, and refunds.
tags:
- finance
- revenue
refs:
- arr-contract-first
sl_refs:
- warehouse.invoices
usage_mode: auto
---
Gross revenue starts from paid invoice activity. Net revenue subtracts
credits and successful refunds in the month they are recorded.
Exclude unpaid, void, draft, and test-account invoice activity from
canonical revenue reporting.
```
**Scan artifacts** are the raw output of KTX's database scanner: table and column metadata, inferred foreign key relationships (even without declared constraints), column statistics, and enrichment reports. They form the foundation that semantic sources are built on.
**Provenance** is the record of how context was created and changed. Every ingestion session records a full transcript — which adapter ran, what the LLM decided, which sources were created or updated, and why. This is what makes the system auditable: you can trace any semantic source back to the ingestion decision that created it.
Together, these four pillars give agents enough context to produce analytics artifacts that match what your team would produce — not just syntactically valid SQL, but the right query for the question.
## How KTX compares
KTX is a context layer, and its structured core is an agent-native semantic layer. That matters. MetricFlow, Cube, and Malloy all give teams ways to model metrics, dimensions, joins, and generated SQL. KTX covers that same semantic-layer job, then adds the surrounding context agents need to use it well: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools.
The primary user is different. MetricFlow is centered on dbt-style metric definitions. Cube is centered on a governed semantic runtime for BI, applications, and agents. Malloy is centered on an expressive modeling and query language. KTX is centered on agents that need to read a semantic model, change it, validate it, inspect the generated SQL, and leave a reviewable git diff.
| | KTX semantic layer | MetricFlow | Cube | Malloy |
|---|---|---|---|---|
| **Design center** | Agent-native semantic modeling inside a broader context layer | Metric definitions and dbt semantic models | Governed serving layer for BI, embedded analytics, APIs, and agents | Semantic modeling and analytical query language |
| **Model surface** | Plain YAML sources plus Markdown knowledge pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations |
| **What it models** | Sources, columns, measures, segments, joins, grain, filters, default time dimensions, and context references | Semantic models, entities, dimensions, measures, metrics, time grains, and metric types | Cubes, views, measures, dimensions, segments, joins, hierarchies, policies, and rollups | Sources, joins, dimensions, measures, calculations, nested results, and query pipelines |
| **Agent edit loop** | First-class. Agents can patch small files, save imperfect drafts, run validation, query through MCP, inspect SQL, and refine in the same workflow | Possible, but the interface is a dbt/metric workflow rather than an agent context workflow | Possible through code-first models and platform APIs, but changes are tied to runtime deployment and governance concerns | Possible, but agents must operate in Malloy's language and compiler model |
| **Fan-out safety** | Explicit `grain` plus relationship metadata. KTX detects `one_to_many` fan-out, identifies chasm traps, pre-aggregates independent fact measures into CTEs, and rejects unsafe filters | Dataflow query planning for metric requests, multi-hop joins, metric time, and metric types | Runtime planner, modeled joins, primary keys, views, multi-fact views, and pre-aggregations | Symmetric aggregates and path-based aggregation in the language |
| **SQL generation** | Structured semantic query to canonical SQL, then dialect transpilation with sqlglot | Metric request to optimized query plan, then engine-specific SQL | REST, GraphQL, Postgres-compatible SQL, Semantic SQL, and cached/pre-aggregated execution | Malloy source/query to dialect-specific SQL and result metadata |
| **Context around semantics** | Built in: wiki pages, scan artifacts, relationship inference, ingest transcripts, replay, and agent-facing MCP tools | Primarily metric and dbt project context | Descriptions and `meta.ai_context` inside the semantic model, plus platform agent features | Annotations/tags can carry metadata; surrounding context depends on the application |
| **Best fit** | Agents maintaining analytics code, metrics, joins, SQL, docs, and semantic definitions | Teams standardizing metrics inside dbt workflows | Production semantic APIs, BI integrations, access control, caching, and concurrency | Expressive modeling and exploratory analysis above SQL |
**Agent-native by design.** KTX's advantage is not just that the files are YAML. The whole loop is shaped for agents: sources are small, overlays can add measures or computed columns without copying entire generated schemas, writes are permissive so an agent can save a draft, and validation/query tools give immediate feedback. An agent can move from "this metric is wrong" to "here is the semantic diff, generated SQL, and supporting context" without leaving the project.
**A semantic layer plus the context to use it.** Traditional semantic layers define what to calculate. KTX also stores why the definition exists, where it came from, what schema evidence supports it, and what an agent did when it changed. A measure can live next to a knowledge page about exclusions, a scan artifact that proves the join path, and an ingest transcript that explains the source of the definition. That is the difference between giving an agent a metric catalog and giving it operational memory.
**Fan-out handling is explicit and reviewable.** KTX asks model authors and agents to declare grain and relationship direction. The planner uses that metadata to avoid silent row multiplication: it detects `one_to_many` fan-out paths, separates independent fact measures into aggregate-locality CTEs, and refuses filters that would be unsafe to apply after pre-aggregation. Cube, MetricFlow, and Malloy all have strong approaches to this class of problem, but KTX's approach is deliberately inspectable in the files and in the generated plan.
**Where other systems are stronger.** KTX draws a clear product boundary around agent-native context and semantic modeling. Cube is stronger when you need a production semantic API with access policies, pre-aggregations, refresh workers, and high-concurrency serving. MetricFlow is stronger when your primary workflow is dbt-native metric standardization. Malloy is stronger when you want a full analytical language with nested query shapes. KTX is strongest when the semantic layer is the substrate agents will read, edit, validate, and extend as part of day-to-day analytics engineering.
**When KTX replaces your semantic layer vs. works beside it.** If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow, LookML, Looker, Metabase, dbt, or Notion, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against.
## The plain-files philosophy
A KTX project is a directory of plain files. No server to run, no database to manage, no proprietary store to back up. Everything is YAML, Markdown, and SQLite — formats you can read, diff, and version-control with tools you already use.
```
my-project/
├── ktx.yaml # Project configuration
├── semantic-layer/
│ └── warehouse/
│ ├── orders.yaml # Semantic source definitions
│ ├── customers.yaml
│ └── order_items.yaml
├── knowledge/
│ ├── global/
│ │ ├── revenue.md # Business definitions and rules
│ │ └── segment-classification.md
│ └── user/
│ └── local/
│ └── data-quality-notes.md
├── raw-sources/
│ └── warehouse/
│ └── live-database/ # Scan artifacts and reports
└── .ktx/
├── db.sqlite # Local state (git-ignored)
└── cache/ # Runtime cache (git-ignored)
```
Semantic sources and knowledge pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run.
This means your analytics context travels with your code. You can fork it, branch it, review it in a PR, and merge it with the same tools you use for dbt models. There's no sync problem between a remote server and your local state. There's no migration to run. The files are the source of truth.

View file

@ -0,0 +1,59 @@
---
title: Introduction
description: What KTX is and who it's for.
---
Data agents can write SQL. The hard part is making sure they write the SQL your analytics team would have written.
KTX is the agent-native context layer for analytics engineering. At its core is a semantic layer: YAML sources that define tables, columns, measures, joins, grain, filters, segments, and computed fields. Around that core, KTX adds the context analytics agents need to work safely: warehouse scans, knowledge pages, ingestion from existing tools, provenance, validation, and MCP access.
KTX projects are plain files — YAML, Markdown, and SQLite — that you commit to git and review in PRs, just like dbt models. Agents can read them, edit them, validate them, query through them, and leave behind a diff your team can review.
## Who KTX is for
KTX is built for analytics engineers and data teams who want data agents to work on real analytics systems, not just generate one-off SQL.
Use KTX when you want agents to:
- Generate SQL from approved measures, dimensions, and joins
- Repair or extend semantic definitions through reviewable git diffs
- Explain where a metric definition came from and what business rules shape it
- Use warehouse scans and relationship evidence instead of guessing join paths
- Work alongside **dbt**, **LookML**, **MetricFlow**, **Looker**, **Metabase**, **Notion**, and BI platforms
- Work with warehouses like **PostgreSQL**, **Snowflake**, **BigQuery**, **ClickHouse**, **MySQL**, or **SQL Server**
If you've ever watched an agent confidently generate a query that joins on the wrong key or invents a metric that doesn't exist, KTX is the fix.
## What KTX gives agents
- **A semantic layer they can edit** — plain YAML sources with measures, dimensions, joins, grain, segments, filters, and computed columns
- **Safe query planning** — grain-aware SQL generation, fan-out detection, chasm-trap handling, and dialect transpilation
- **Business context** — Markdown knowledge pages for definitions, rules, exceptions, and data quality notes
- **Schema evidence** — warehouse scans with table metadata, column stats, constraints, and inferred relationships
- **Provenance** — ingest transcripts and replay metadata that explain where context came from and why it changed
- **An agent-facing API** — MCP and CLI tools for reading, writing, validating, searching, and querying context
## How these docs are organized
<Cards>
<Card title="Quickstart" href="/docs/getting-started/quickstart">
Set up KTX and build your first context in under 10 minutes.
</Card>
<Card title="Concepts" href="/docs/concepts/the-context-layer">
Understand what a context layer is, why agents need one, and how KTX compares to other semantic layers.
</Card>
<Card title="Guides" href="/docs/guides/building-context">
Hands-on workflows for scanning, ingesting, writing semantic sources, and serving agents.
</Card>
<Card title="Integrations" href="/docs/integrations/primary-sources">
Setup details for every supported database, context source, and agent client.
</Card>
<Card title="CLI Reference" href="/docs/cli-reference/ktx-setup">
Exhaustive flag and subcommand reference for every KTX command.
</Card>
</Cards>
## Next steps
- **Get hands-on** — follow the [Quickstart](/docs/getting-started/quickstart) to set up KTX with your own database in under 10 minutes.
- **Understand the theory** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why schema access alone breaks on real analytics and how KTX addresses it.

View file

@ -0,0 +1,5 @@
{
"title": "Getting Started",
"defaultOpen": true,
"pages": ["introduction", "quickstart"]
}

View file

@ -0,0 +1,255 @@
---
title: Quickstart
description: Set up KTX and build your first context in under 10 minutes.
---
This guide walks you through `ktx setup` — an interactive wizard that configures your LLM provider, connects your database, optionally ingests from your existing tools, builds context, and installs agent integration.
## Prerequisites
- **Node.js 22+** and **pnpm**
- An **Anthropic API key** for LLM-powered enrichment and ingestion
- A **database connection** — PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite
- Optionally, a **dbt project**, **LookML repo**, **Metabase instance**, or other context source
## Install and run setup
KTX is currently used from a local checkout or linked workspace CLI. Build and link the CLI first:
```bash
git clone https://github.com/kaelio/ktx.git
cd ktx
pnpm install
pnpm run setup:dev
pnpm run link:dev
```
Then run the setup wizard in the directory where you want your KTX project:
```bash
ktx setup
```
The wizard walks through six steps. You can go back at any point, and if you exit early, running `ktx setup` again resumes where you left off.
## Step 1: Configure LLM
KTX uses an Anthropic model to enrich schema descriptions, generate semantic sources during ingestion, and reconcile metadata from your tools.
The wizard asks how to find your API key:
```
◆ How should KTX find your Anthropic API key?
│ ○ Use ANTHROPIC_API_KEY from the environment
│ ○ Paste a key and save it as a local secret file
```
If you choose to paste a key, KTX saves it in `.ktx/secrets/anthropic-api-key` with local file permissions. Your `ktx.yaml` stores a `file:` reference, never the raw key.
Next, choose a model:
```
◆ Which Anthropic model should KTX use?
│ ○ Claude Sonnet 4.6 (recommended)
│ ○ Claude Opus 4.6
│ ○ Claude Haiku 4.5
│ ○ Enter a model ID manually
```
KTX runs a health check to verify your key and model work before saving.
## Step 2: Configure embeddings
KTX uses embeddings for semantic search over sources, wiki content, schema metadata, and relationship evidence.
```
◆ Which embedding option should KTX use?
│ ○ Local sentence-transformers embeddings
│ ○ OpenAI embeddings (recommended)
```
**OpenAI embeddings** use `text-embedding-3-small` (1536 dimensions) and require an `OPENAI_API_KEY`.
**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX Python daemon. No API key is needed. If you run the daemon as a long-lived HTTP service, start it with:
```bash
ktx-daemon serve-http --host 127.0.0.1 --port 8765
```
## Step 3: Connect a database
Select one or more databases for KTX to scan. The wizard supports SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake.
For PostgreSQL, you can enter connection details field by field or paste a connection URL:
```
◆ How do you want to connect to PostgreSQL?
│ ○ Enter connection details (host, port, database, user)
│ ○ Paste a connection URL
```
If your URL contains credentials, KTX saves it to `.ktx/secrets/` and writes a `file:` reference in `ktx.yaml`. You can also use `env:DATABASE_URL` to reference an environment variable.
After connecting, KTX automatically runs a connection test and a structural scan:
```
◇ Testing postgres-warehouse
│ ✓ Connection test passed
│ Driver: PostgreSQL · Tables: 42
◇ Scanning postgres-warehouse
│ ✓ Structural scan completed
│ Changes: 42 new tables
◇ Primary source ready
│ postgres-warehouse · PostgreSQL · structural scan complete
```
For Snowflake and BigQuery, the wizard offers **Historic SQL** configuration for query history views. For PostgreSQL, enable Historic SQL with `--enable-historic-sql` when `pg_stat_statements` is configured.
## Step 4: Add context sources
Context sources let KTX ingest metadata from your existing analytics tools. This step is optional — you can skip it and add sources later.
```
◆ Which context sources should KTX ingest?
│ ◻ dbt
│ ◻ MetricFlow
│ ◻ Metabase
│ ◻ Looker
│ ◻ LookML
│ ◻ Notion
```
For **dbt**, point KTX at a local path or git URL. KTX reads your `dbt_project.yml` and schema files to extract model metadata:
```
◆ dbt source location
│ ○ Local path
│ ○ Git URL
```
For **Metabase** and **Looker**, you provide an API URL and credentials. KTX maps BI databases to your KTX primary source connections so it knows which warehouse tables the BI metadata refers to.
Context sources are saved to `ktx.yaml` and built during the next step.
## Step 5: Build context
This is where KTX does the heavy lifting. It runs an enriched scan of your database (generating AI-powered column and table descriptions) and ingests metadata from any configured context sources.
```
◆ Build KTX context for agents?
│ ○ Build context now (recommended)
│ ○ Leave context unbuilt and exit setup
```
The build scans each primary source with LLM enrichment, detects table relationships, and runs ingestion agents that reconcile metadata from your context sources into semantic-layer YAML files and knowledge pages.
For a small database (under 50 tables), this takes a few minutes. Larger warehouses can take longer. You can press <kbd>d</kbd> to detach and let it run in the background:
```
KTX context build
Run: setup-context-local-abc123
Project: /home/user/analytics
Detach: press d to leave this running.
Resume: ktx setup context watch setup-context-local-abc123
Status: ktx setup context status setup-context-local-abc123
```
When the build completes, KTX verifies that agent-ready context was produced:
```
KTX context is ready for agents.
Primary sources:
postgres-warehouse: enriched scan complete
Context sources:
dbt-main: memory update complete
Verification:
Agent context: ready
Semantic search: ready
```
## Step 6: Install agent integration
The final step connects KTX to your coding agent. Choose how agents should access the project:
```
◆ How should agents use this KTX project?
│ ○ CLI tools and skills
│ ○ MCP server config
│ ○ Both
```
Then select which agents to install for:
```
◆ Which agent targets should KTX install?
│ ◻ Claude Code
│ ◻ Codex
│ ◻ Cursor
│ ◻ OpenCode
```
**CLI mode** writes a skill file (e.g., `.claude/skills/ktx/SKILL.md`) that teaches the agent to call KTX commands directly.
**MCP mode** writes an MCP server configuration (e.g., `.mcp.json`) that lets the agent call KTX tools like `sl_query`, `knowledge_search`, and `sl_write_source` over the Model Context Protocol.
## Verify it worked
Check your project status:
```bash
ktx status
```
```
KTX project: /home/user/analytics
Project ready: yes
LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Primary sources configured: yes (postgres-warehouse)
Context sources configured: yes (dbt-main)
KTX context built: yes
Agent integration ready: yes (claude-code:project)
```
List your semantic sources:
```bash
ktx sl list
```
Query through the semantic layer:
```bash
ktx sl query \
--connection-id postgres-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--order-by orders.total_revenue:desc \
--limit 5 \
--format sql
```
This outputs the generated SQL. Add `--execute` to run it against your warehouse:
```bash
ktx sl query \
--connection-id postgres-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--order-by orders.total_revenue:desc \
--limit 5 \
--execute --max-rows 10
```
## Next steps
- **Build more context** — learn about [scanning](/docs/guides/building-context), relationship detection, and ingestion workflows in the Building Context guide.
- **Refine your semantic layer** — the [Writing Context](/docs/guides/writing-context) guide covers source YAML, measures, joins, and knowledge pages.
- **Understand the architecture** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why a context layer is more than a semantic layer.
- **Connect more agents** — see the [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool setup details.

View file

@ -0,0 +1,241 @@
---
title: Building Context
description: Scan your database schema and ingest context from dbt, Looker, Metabase, and more.
---
Building context is a two-step process. First, you **scan** your database to discover its structure — tables, columns, types, constraints, and relationships. Then you **ingest** from your existing tools to enrich that structure with semantic meaning — metric definitions, business descriptions, join logic, and knowledge that agents need to generate correct analytics.
## Scanning
Scanning connects to your database and extracts structural metadata. KTX stores the results locally so agents can understand your schema without querying the database directly.
### Running a scan
```bash
ktx dev scan <connection-id>
```
This runs a structural scan by default. You can control what the scan does with the `--mode` flag:
| Mode | What it does |
|------|-------------|
| `structural` | Tables, columns, types, constraints, row counts (default) |
| `enriched` | Structural scan plus LLM-generated column descriptions |
| `relationships` | Structural scan plus foreign key relationship detection |
```bash
# Scan with relationship detection
ktx dev scan my-postgres --mode relationships
# Preview without writing results
ktx dev scan my-postgres --dry-run
```
### Checking scan status
Every scan produces a run ID. Use it to check progress or review results:
```bash
# Check status of a scan run
ktx dev scan status <run-id>
# Print the full scan report
ktx dev scan report <run-id>
# Get the report as JSON for scripting
ktx dev scan report <run-id> --json
```
### Relationship detection
Many databases lack declared foreign keys. KTX infers relationships by scoring column pairs across seven signals — name similarity, type compatibility, value overlap, embedding similarity, profile uniqueness, null rate, and structural priors. The weighted score determines each candidate's status:
| Score range | Status | Meaning |
|-------------|--------|---------|
| &ge; 0.85 | `accepted` | High confidence — applied automatically |
| 0.55 &ndash; 0.84 | `review` | Plausible — needs human review |
| &lt; 0.55 | `rejected` | Low confidence — not applied |
After a relationship scan, review the candidates:
```bash
# Show candidates pending review (default)
ktx dev scan relationships <run-id>
# Show all candidates regardless of status
ktx dev scan relationships <run-id> --status all
# Accept a specific candidate
ktx dev scan relationships <run-id> --accept <candidate-id>
# Reject a candidate with a note
ktx dev scan relationships <run-id> --reject <candidate-id> --note "These columns share a name but are unrelated"
```
Once you've reviewed candidates, apply the accepted ones as joins in your semantic layer:
```bash
# Apply all accepted relationships
ktx dev scan relationship-apply <run-id> --all-accepted
# Preview what would be applied
ktx dev scan relationship-apply <run-id> --all-accepted --dry-run
# Apply a specific candidate
ktx dev scan relationship-apply <run-id> --candidate <candidate-id>
```
### Calibrating thresholds
As you review more relationships, KTX can evaluate whether the default thresholds (0.85 accept, 0.55 review) are optimal for your schema:
```bash
# See how your feedback aligns with current thresholds
ktx dev scan relationship-calibration --connection my-postgres
# Get threshold recommendations (needs 20+ labels, 5+ accepted, 5+ rejected)
ktx dev scan relationship-thresholds --connection my-postgres
# Export your review decisions as calibration labels
ktx dev scan relationship-feedback --connection my-postgres
```
## Ingestion
Ingestion pulls semantic context from your existing analytics tools — dbt projects, Looker models, Metabase questions, and more — and writes it into your KTX project as semantic sources and knowledge pages.
### How it works
Each ingest run follows this flow:
1. An **adapter** extracts metadata from your tool (dbt manifest, LookML files, Metabase API, etc.)
2. An **LLM agent** reconciles the extracted metadata with your existing context — it merges intelligently rather than overwriting
3. **Semantic sources** (YAML) and **knowledge pages** (Markdown) are written to your project directory
### Running an ingest
```bash
# Ingest one configured context source
ktx ingest my-dbt-source
# Ingest every configured context source
ktx ingest --all
```
The public `ktx ingest` command uses the source configuration in `ktx.yaml`, including the source `driver` and any adapter-specific paths or credentials.
For adapter-level debugging, use the low-level `ktx dev ingest run` command:
```bash
ktx dev ingest run --connection-id my-dbt-source --adapter dbt
```
Useful low-level flags:
| Flag | Description |
|------|-------------|
| `--source-dir <path>` | Directory containing source files (e.g., your dbt project) |
| `--viz` | Render the memory-flow TUI for real-time progress |
| `--json` | Output as JSON |
| `--plain` | Plain text output |
### Watching progress
```bash
# Check status of the latest ingest
ktx ingest status
# Check a specific run
ktx ingest status <run-id>
# Open the visual ingest report (TUI)
ktx ingest watch
# Replay a past ingest run
ktx dev ingest replay <run-id>
```
The `watch` command opens an interactive TUI that shows the memory-flow output — every tool call, LLM decision, and artifact written during the ingest.
### Available adapters
| Adapter | Source | What gets ingested |
|---------|--------|--------------------|
| `dbt` | dbt project | Model definitions, column descriptions, tests, tags |
| `metricflow` | MetricFlow semantic models | Metrics, dimensions, entities, semantic joins |
| `lookml` | LookML files | Views, explores, dimensions, measures, joins |
| `looker` | Looker API | Explores, looks, dashboard metadata |
| `metabase` | Metabase API | Questions, dashboards, table metadata |
| `notion` | Notion API | Database pages, knowledge articles |
| `historic-sql` | Query history | Frequent queries, usage patterns, runtime stats |
| `live-database` | Direct DB connection | Live schema introspection |
See [Context Sources](/docs/integrations/context-sources) for adapter-specific setup and auth configuration.
### What gets generated
A typical dbt ingest produces semantic sources and knowledge pages in your project:
**Semantic source** (`semantic-layer/my-postgres/orders.yaml`):
```yaml title="semantic-layer/my-postgres/orders.yaml"
name: orders
table: public.orders
grain:
- order_id
columns:
- name: order_id
type: string
description: Unique order identifier
- name: customer_id
type: string
description: Foreign key to customers table
- name: order_date
type: time
role: time
description: Date the order was placed
- name: total_amount
type: number
description: Total order value in USD
measures:
- name: total_revenue
expr: SUM(total_amount)
description: Sum of all order values
- name: order_count
expr: COUNT(DISTINCT order_id)
description: Number of distinct orders
joins:
- to: customers
on: orders.customer_id = customers.customer_id
relationship: many_to_one
```
**Knowledge page** (`knowledge/global/order-status-definitions.md`):
```markdown
---
summary: Business definitions for order status values
tags: [orders, definitions]
sl_refs: [orders]
---
## Order Statuses
- **pending**: Order placed but not yet processed
- **confirmed**: Payment received, awaiting fulfillment
- **shipped**: Order dispatched to carrier
- **delivered**: Order received by customer
- **cancelled**: Order cancelled before shipment
Orders in "pending" status for more than 48 hours are flagged for review.
```
### Deterministic replay
Every ingest session records a full transcript — tool calls, LLM responses, and write decisions. You can replay any session to debug why a source was written a certain way:
```bash
ktx dev ingest replay <run-id> --viz
```
This opens the same TUI view as the original run, letting you step through the agent's reasoning.

View file

@ -0,0 +1,5 @@
{
"title": "Guides",
"defaultOpen": true,
"pages": ["building-context", "writing-context", "serving-agents"]
}

View file

@ -0,0 +1,207 @@
---
title: Serving Agents
description: Expose your context to Claude Code, Cursor, Codex, and other coding agents.
---
Once you've built and refined your context, the final step is exposing it to coding agents. KTX provides two channels: an **MCP server** for persistent integration with tools like Claude Code and Cursor, and **CLI commands** for direct terminal access.
## MCP Server
The MCP (Model Context Protocol) server gives agents structured access to your entire context layer — semantic sources, knowledge pages, scans, and ingestion — through a standard tool-calling interface.
### Starting the server
```bash
ktx serve --mcp stdio
```
This starts an MCP server on stdio, which is how Claude Code, Cursor, and other MCP-compatible tools communicate with KTX. You typically don't run this manually — your agent's configuration handles it.
### Configuration options
| Flag | Description | Default |
|------|-------------|---------|
| `--mcp <mode>` | MCP transport mode (currently `stdio`) | Required |
| `--user-id <id>` | User identifier for knowledge scoping | `local` |
| `--semantic-compute` | Enable semantic layer planning and query execution | `false` |
| `--semantic-compute-url <url>` | URL for the semantic compute daemon | &mdash; |
| `--database-introspection-url <url>` | Daemon URL for live database access | &mdash; |
| `--execute-queries` | Allow agents to execute SQL queries | `false` |
| `--memory-capture` | Enable memory capture from conversations | `false` |
| `--memory-model <model>` | LLM model for memory capture | &mdash; |
### Available tools
When an agent connects via MCP, it can call these tools:
**Connections**
| Tool | Description |
|------|-------------|
| `connection_list` | List configured data connections |
| `connection_test` | Test a connection through the scan connector |
**Semantic Layer**
| Tool | Description |
|------|-------------|
| `sl_list_sources` | List sources, optionally filtered by connection or search query |
| `sl_read_source` | Read a source YAML by connection and name |
| `sl_write_source` | Create, replace, or delete a source |
| `sl_validate` | Validate sources against the database schema |
| `sl_query` | Execute a semantic query — returns rows, SQL, and query plan |
**Knowledge**
| Tool | Description |
|------|-------------|
| `knowledge_search` | Search knowledge pages by query, returns ranked summaries |
| `knowledge_read` | Read a knowledge page by key |
| `knowledge_write` | Create or replace a knowledge page |
**Scanning**
| Tool | Description |
|------|-------------|
| `scan_trigger` | Run a structural, enriched, or relationship scan |
| `scan_status` | Check the status of a running scan |
| `scan_report` | Read a completed scan report |
| `scan_list_artifacts` | List files produced by a scan run |
| `scan_read_artifact` | Read a scan artifact by path |
**Ingestion**
| Tool | Description |
|------|-------------|
| `ingest_trigger` | Trigger an ingest run for an adapter and connection |
| `ingest_status` | Check ingest progress, including diff and work-unit summaries |
| `ingest_report` | Read a stored ingest report |
| `ingest_replay` | Read the memory-flow replay for a past ingest |
**Memory**
| Tool | Description |
|------|-------------|
| `memory_capture` | Capture knowledge and semantic updates from a conversation |
| `memory_capture_status` | Check the status of a memory capture run |
### How agents use these tools
A typical agent interaction flows like this:
1. Agent calls `connection_list` to see available databases
2. Agent calls `sl_list_sources` to discover what semantic sources exist
3. Agent calls `knowledge_search` to find business context relevant to the user's question
4. Agent calls `sl_query` with measures, dimensions, and filters to get data
5. Agent presents results with the business context it found
Agents should use the semantic layer for analytics questions because it enforces correct joins, grain-aware aggregation, and consistent metric definitions. If SQL execution is enabled, KTX only allows read-only SQL with row limits.
## CLI Commands
For agents that work through the terminal rather than MCP, KTX provides a set of machine-readable commands under `ktx agent`. These return JSON output designed for programmatic consumption.
### Available commands
```bash
# List available tools and their descriptions
ktx agent tools --json
# Get project context for planning
ktx agent context --json
```
**Semantic layer:**
```bash
# List sources
ktx agent sl list --json
ktx agent sl list --json --connection-id my-postgres
# Read a source
ktx agent sl read orders --json --connection-id my-postgres
# Run a query from a JSON file
ktx agent sl query --json \
--connection-id my-postgres \
--query-file query.json \
--execute \
--max-rows 100
```
**Knowledge:**
```bash
# Search knowledge pages
ktx agent wiki search "revenue recognition" --json --limit 10
# Read a specific page
ktx agent wiki read order-status-definitions --json
```
**SQL execution:**
```bash
# Execute read-only SQL with a row limit
ktx agent sql execute --json \
--connection-id my-postgres \
--sql-file query.sql \
--max-rows 500
```
### When to use CLI vs MCP
| | MCP | CLI |
|---|-----|-----|
| **Best for** | Persistent agent integrations | Terminal-based workflows, scripting |
| **Protocol** | Structured tool calls over stdio | Shell commands with JSON output |
| **Used by** | Claude Code, Cursor, Codex | Shell scripts, custom agents, debugging |
| **State** | Server runs continuously | Stateless per invocation |
Most users should set up MCP — it gives agents richer context and a more natural interaction model. The CLI commands are useful for scripting, debugging, and agents that operate through terminal tools.
## Setting Up Your Agent
The fastest way to connect an agent is through the setup wizard:
```bash
ktx setup
```
The agents step auto-detects installed tools and generates the right configuration. For manual setup or per-tool details, see the [Agent Clients](/docs/integrations/agent-clients) integration page.
### Quick manual setup
**Claude Code** — add to `.claude/settings.json`:
```json
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": ["serve", "--mcp", "stdio", "--semantic-compute", "--execute-queries"],
"env": {
"KTX_PROJECT_DIR": "/path/to/your/ktx/project"
}
}
}
}
```
**Cursor** — add to `.cursor/mcp.json`:
```json
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": ["serve", "--mcp", "stdio", "--semantic-compute", "--execute-queries"],
"env": {
"KTX_PROJECT_DIR": "/path/to/your/ktx/project"
}
}
}
}
```
After configuration, the agent can immediately start calling KTX tools — listing sources, searching knowledge, and querying your semantic layer.

View file

@ -0,0 +1,273 @@
---
title: Writing Context
description: Write and refine semantic sources and knowledge pages.
---
After building context through scanning and ingestion, you'll want to refine it — edit semantic sources to match your business logic, add knowledge pages that capture tribal knowledge, and query your data through the semantic layer to verify everything works.
## Semantic Sources
Semantic sources are YAML files that describe your tables, columns, measures, and joins. They're the core of the context layer — the structured definitions that agents use to generate correct SQL.
### Listing sources
```bash
# List all sources across connections
ktx sl list
# List sources for a specific connection
ktx sl list --connection-id my-postgres
# Output as JSON
ktx sl list --json
```
### Reading a source
```bash
ktx sl read orders --connection-id my-postgres
```
This prints the full YAML definition for the source.
### The source schema
A semantic source defines a single queryable entity — usually a table or a SQL expression. Here's a fully annotated example:
```yaml
name: orders
description: Customer orders with line-item totals
table: public.orders # or use `sql:` for a custom SQL expression
grain:
- order_id # columns that uniquely identify a row
columns:
- name: order_id
type: string # string | number | time | boolean
description: Unique order identifier
- name: order_date
type: time
role: time # marks this as the default time dimension
description: Date the order was placed
- name: status
type: string
visibility: public # public (default) | internal | hidden
description: Current order status
- name: _etl_loaded_at
type: time
visibility: hidden # hidden columns are excluded from agent queries
description: Internal ETL timestamp
- name: total_amount
type: number
description: Order total in USD
measures:
- name: total_revenue
expr: SUM(total_amount)
description: Sum of all order values
- name: order_count
expr: COUNT(DISTINCT order_id)
description: Number of distinct orders
- name: avg_order_value
expr: AVG(total_amount)
description: Average order value
- name: high_value_revenue
expr: SUM(total_amount)
filter: total_amount > 100
description: Revenue from orders over $100
segments:
- name: us_orders
expr: country = 'US'
description: Orders from US customers
joins:
- to: customers
on: orders.customer_id = customers.customer_id
relationship: many_to_one # many_to_one | one_to_many | one_to_one
- to: order_items
on: orders.order_id = order_items.order_id
relationship: one_to_many
alias: items # optional alias for the joined source
```
Key fields:
| Field | Required | Description |
|-------|----------|-------------|
| `name` | Yes | Source identifier (lowercase, underscores) |
| `table` or `sql` | Yes | Database table or custom SQL expression (exactly one) |
| `grain` | Yes | Columns that define row uniqueness |
| `columns` | No | Column definitions with type, role, visibility |
| `measures` | No | Aggregation expressions (SUM, COUNT, AVG, etc.) |
| `joins` | No | Relationships to other sources |
| `segments` | No | Named filter conditions |
| `inherits_columns_from` | No | Inherit column metadata from a manifest entry |
Column visibility controls what agents see:
| Visibility | Behavior |
|------------|----------|
| `public` | Included in agent queries and listings (default) |
| `internal` | Available for joins and measures but not shown to agents |
| `hidden` | Excluded entirely — useful for ETL columns |
### Writing a source
```bash
ktx sl write orders --connection-id my-postgres --yaml '
name: orders
table: public.orders
grain: [order_id]
columns:
- name: order_id
type: string
- name: total_amount
type: number
measures:
- name: total_revenue
expr: SUM(total_amount)
'
```
You can also edit source files directly — they live at `semantic-layer/<connection-id>/<source-name>.yaml` in your project directory.
### Validating sources
Validation checks a source definition against the actual database schema:
```bash
ktx sl validate orders --connection-id my-postgres
```
This catches mismatches — columns that don't exist in the table, type mismatches, invalid join targets — before an agent tries to use the source.
### Querying
The semantic layer compiles your measures and dimensions into SQL, optionally executing it against the database:
```bash
# Compile a query to SQL
ktx sl query \
--connection-id my-postgres \
--measure total_revenue \
--measure order_count \
--dimension "order_date" \
--filter "status = 'completed'" \
--order-by order_date:desc \
--limit 10 \
--format sql
```
This outputs the compiled SQL without executing it. To run the query:
```bash
# Execute and return results
ktx sl query \
--connection-id my-postgres \
--measure total_revenue \
--dimension "order_date" \
--execute \
--max-rows 100
```
Query flags:
| Flag | Description |
|------|-------------|
| `--measure <name>` | Measure to query (repeatable, at least one required) |
| `--dimension <name>` | Dimension to group by (repeatable) |
| `--filter <expr>` | Filter expression (repeatable) |
| `--segment <name>` | Named segment to apply (repeatable) |
| `--order-by <field[:dir]>` | Sort field, optionally with `:asc` or `:desc` (repeatable) |
| `--limit <n>` | Maximum rows in the compiled query |
| `--format <mode>` | Output format: `json` (default) or `sql` |
| `--execute` | Execute the query against the database |
| `--max-rows <n>` | Maximum rows to return when executing |
| `--include-empty` | Include empty/null rows in results |
The query planner is grain-aware — it understands the cardinality of joins and avoids chasm traps (double-counting caused by many-to-many fan-outs). When you query measures that span multiple sources, KTX generates sub-queries at the correct grain before joining.
## Knowledge Pages
Knowledge pages are Markdown files that capture business context — definitions, rules, gotchas, and anything an agent needs to understand beyond what the schema tells it.
### What they are
When an agent asks "what counts as an active user?" or "why do revenue numbers differ between the dashboard and the SQL query?", the answer isn't in the schema. It's tribal knowledge that lives in Slack threads, Notion pages, or someone's head. Knowledge pages make that context searchable and available to agents.
### Organization
Knowledge pages are organized by scope:
```
knowledge/
├── global/ # Cross-cutting definitions
│ ├── order-status-definitions.md
│ ├── revenue-recognition-rules.md
│ └── data-freshness-sla.md
└── user/
└── local/ # User-scoped context
├── schema-conventions.md
└── known-data-issues.md
```
- **Global pages** apply across all connections — business definitions, metric standards, company terminology.
- **User-scoped pages** are private to a user ID — personal notes, local gotchas, or context you do not want shared globally.
### Writing pages
```bash
ktx wiki write order-status-definitions \
--scope global \
--summary "Business definitions for order status values" \
--content "## Order Statuses
- **pending**: Order placed but not yet processed
- **confirmed**: Payment received, awaiting fulfillment
- **shipped**: Order dispatched to carrier
- **delivered**: Order received by customer
- **cancelled**: Order cancelled before shipment
Orders in pending status for more than 48 hours are flagged for review." \
--tag orders \
--tag definitions \
--sl-ref orders
```
Write flags:
| Flag | Description |
|------|-------------|
| `--scope <scope>` | `global` (default) or `user` |
| `--summary <text>` | Short description for search results (required) |
| `--content <text>` | Full Markdown content (required) |
| `--tag <tag>` | Categorization tag (repeatable) |
| `--ref <ref>` | Reference to external resources (repeatable) |
| `--sl-ref <ref>` | Link to a semantic source (repeatable) |
You can also create and edit knowledge pages directly as Markdown files in the `knowledge/` directory.
### Listing pages
```bash
ktx wiki list
```
### Reading a page
```bash
ktx wiki read order-status-definitions
```
### Searching
```bash
ktx wiki search "revenue recognition"
```
Search uses both full-text matching and semantic similarity — it finds relevant pages even when the exact terms don't match. Agents call this automatically when they need business context to answer a question.

View file

@ -0,0 +1,279 @@
---
title: Agent Clients
description: Set up KTX with Claude Code, Cursor, Codex, and OpenCode.
---
KTX integrates with coding agents through two channels that can be used independently or together:
- **MCP server** — A persistent Model Context Protocol server that exposes KTX tools (semantic queries, knowledge search, SQL execution) directly to the agent
- **CLI skills** — Command definitions that teach the agent how to invoke KTX via the terminal
Run `ktx setup` and select your agent targets, or configure manually using the snippets below.
## Claude Code
### Install via `ktx setup`
During setup, select **Claude Code** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI skills | `.claude/skills/ktx/SKILL.md` |
| MCP server | `.mcp.json` (under `mcpServers.ktx`) |
Both project-scoped and global installations are supported. Global installs write to `~/.claude/skills/ktx/SKILL.md`.
### Manual MCP configuration
Add KTX to `.mcp.json` at your project root:
```json title=".mcp.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
Replace `/path/to/ktx-project` with your KTX project directory. For a pinned local checkout, use the absolute path to the built CLI as the command and arguments generated by `ktx setup`.
### Manual CLI skills configuration
Create `.claude/skills/ktx/SKILL.md`:
```markdown title=".claude/skills/ktx/SKILL.md"
---
name: ktx
description: Use local KTX semantic context, wiki knowledge, and safe SQL execution for this project.
---
Available commands:
- `ktx agent context --json --project-dir /path/to/project`
- `ktx agent sl list --json --project-dir /path/to/project`
- `ktx agent sl read '<sourceName>' --json --project-dir /path/to/project`
- `ktx agent sl query --json --project-dir /path/to/project --connection-id '<id>' --query-file '<path>' --execute --max-rows 100`
- `ktx agent wiki search '<query>' --json --project-dir /path/to/project`
- `ktx agent wiki read '<pageId>' --json --project-dir /path/to/project`
- `ktx agent sql execute --json --project-dir /path/to/project --connection-id '<id>' --sql-file '<path>' --max-rows 100`
```
### Workflow tips
- Claude Code discovers skills automatically from `.claude/skills/` — no restart needed after setup
- The MCP server starts on-demand when Claude Code first calls a KTX tool
- Use `--semantic-compute` to enable query planning and execution
- Global installation (`~/.claude/skills/ktx/SKILL.md`) makes KTX available in all projects without per-project setup
---
## Cursor
### Install via `ktx setup`
During setup, select **Cursor** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI rules | `.cursor/rules/ktx.mdc` |
| MCP server | `.cursor/mcp.json` (under `mcpServers.ktx`) |
Cursor supports project-scoped installation only.
### Manual MCP configuration
Create or edit `.cursor/mcp.json`:
```json title=".cursor/mcp.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
### Manual CLI rules configuration
Create `.cursor/rules/ktx.mdc` with the same content structure as the Claude Code SKILL.md file — Cursor rules use the `.mdc` extension but support the same markdown format with command definitions.
### Workflow tips
- After adding MCP config, restart Cursor or reload the window for the server to connect
- Cursor rules in `.cursor/rules/` are automatically loaded into agent context
- MCP tools appear in Cursor's tool list once the server is running
- Project-scoped only — no global installation option
---
## Codex
### Install via `ktx setup`
During setup, select **Codex** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI skills | `.agents/skills/ktx/SKILL.md` |
| MCP server | `.agents/mcp/ktx.json` (under `mcpServers.ktx`) |
Both project-scoped and global installations are supported. Global installs write to `$CODEX_HOME/skills/ktx/SKILL.md` (defaults to `~/.codex/skills/ktx/SKILL.md`).
### Manual MCP configuration
Create or edit `.agents/mcp/ktx.json`:
```json title=".agents/mcp/ktx.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
### Manual CLI skills configuration
Create `.agents/skills/ktx/SKILL.md` with the same content structure as Claude Code's SKILL.md.
### Workflow tips
- Set `CODEX_HOME` environment variable to customize the global installation directory
- Codex shares the `.agents/` directory structure with the universal format — one install covers both
- Global installation makes KTX available across all Codex sessions
---
## OpenCode
### Install via `ktx setup`
During setup, select **OpenCode** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI commands | `.opencode/commands/ktx.md` |
| MCP server | `.opencode/mcp.json` (under `mcpServers.ktx`) |
OpenCode supports project-scoped installation only.
### Manual MCP configuration
Create or edit `.opencode/mcp.json`:
```json title=".opencode/mcp.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
### Manual CLI commands configuration
Create `.opencode/commands/ktx.md` with the same command definitions as Claude Code's SKILL.md.
### Workflow tips
- OpenCode reads commands from `.opencode/commands/` on startup
- Project-scoped only — no global installation option
- Commands file uses standard markdown format (`.md` extension)
---
## MCP server reference
All agent clients connect to the same KTX MCP server. The server exposes these tools:
| Tool | Description |
|------|-------------|
| `connection_list` | List configured database connections |
| `connection_test` | Test a database connection |
| `knowledge_search` | Semantic + full-text search across knowledge pages |
| `knowledge_read` | Read a specific knowledge page |
| `knowledge_write` | Write or update a knowledge page |
| `sl_list_sources` | List semantic layer sources |
| `sl_read_source` | Read a semantic source definition |
| `sl_write_source` | Write or update a semantic source |
| `sl_validate` | Validate a source against the database schema |
| `sl_query` | Execute a semantic layer query |
| `ingest_trigger` | Trigger an ingestion run |
| `ingest_status` | Check ingestion status |
| `ingest_report` | View an ingestion report |
| `ingest_replay` | Replay a past ingestion session |
| `scan_trigger` | Trigger a structural, enriched, or relationship scan |
| `scan_status` | Check scan status |
| `scan_report` | View a completed scan report |
| `scan_list_artifacts` | List artifacts produced by a scan |
| `scan_read_artifact` | Read a scan artifact |
| `memory_capture` | Capture reusable context from an agent conversation when memory capture is enabled |
| `memory_capture_status` | Check a memory capture run |
### Server flags
| Flag | Description | Default |
|------|-------------|---------|
| `--project-dir` | KTX project directory; otherwise KTX uses `KTX_PROJECT_DIR`, the nearest `ktx.yaml`, or the current directory | Auto-detected |
| `--mcp stdio` | Transport mode (stdio only) | Required |
| `--semantic-compute` | Enable semantic layer queries | `false` |
| `--execute-queries` | Allow read-only SQL execution | `false` |
| `--semantic-compute-url` | Remote compute endpoint URL | — |
| `--database-introspection-url` | Live schema introspection endpoint | — |
| `--memory-capture` | Record agent interactions | `false` |
| `--memory-model` | LLM model for memory processing | — |
### Security constraints
- SQL execution is always read-only
- Agent CLI SQL execution requires an explicit `--max-rows` limit from 1 to 1000; MCP semantic queries default to a 1000-row cap
- Secrets and credentials are never exposed in tool responses
- The server runs as a child process of the agent client (no network exposure)
---
## Comparison
| | Claude Code | Cursor | Codex | OpenCode |
|---|---|---|---|---|
| MCP support | Yes | Yes | Yes | Yes |
| CLI skills | Yes | Yes (.mdc) | Yes | Yes |
| Global install | Yes | No | Yes | No |
| Config location | `.mcp.json` | `.cursor/mcp.json` | `.agents/mcp/ktx.json` | `.opencode/mcp.json` |
| Skills location | `.claude/skills/` | `.cursor/rules/` | `.agents/skills/` | `.opencode/commands/` |

View file

@ -0,0 +1,353 @@
---
title: Context Sources
description: Ingest semantic context from dbt, MetricFlow, LookML, Metabase, Looker, and Notion.
---
Context sources feed your existing analytics tooling into KTX. During ingestion, KTX extracts metadata from each source and uses an LLM agent to reconcile it with your existing semantic layer and knowledge base — merging intelligently rather than overwriting.
All context sources are configured in `ktx.yaml` under `connections` with their respective `driver` value.
## dbt
Ingests schema definitions, model descriptions, column metadata, and test coverage from a dbt project.
### What it provides
- Model and source definitions from `schema.yml` files
- Column descriptions and types
- Test coverage signals
- Semantic model references (if using dbt semantic layer)
- Data lineage between models
### Connection config
```yaml title="ktx.yaml"
connections:
my-dbt:
driver: dbt
source_dir: /path/to/dbt/project
readonly: true
```
For a Git-hosted project:
```yaml title="ktx.yaml"
connections:
my-dbt:
driver: dbt
repo_url: https://github.com/org/dbt-repo
branch: main
path: analytics/dbt # For monorepos
auth_token_ref: env:GITHUB_TOKEN
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Local path | `source_dir: /absolute/path/to/dbt/project` |
| Public repo | `repo_url: https://github.com/org/repo` |
| Private repo | `repo_url` + `auth_token_ref: env:GITHUB_TOKEN` |
**Optional fields:**
| Field | Description |
|-------|-------------|
| `profiles_path` | Path to `profiles.yml` (if non-standard location) |
| `target` | dbt target name (e.g., `dev`, `prod`) |
| `project_name` | Override auto-detected project name |
### What gets ingested
- YAML semantic sources generated from dbt schema files
- One work unit per model file (for projects with >25 YAML files) or all at once for smaller projects
- Column descriptions, tests, and relationships are preserved
---
## MetricFlow
Ingests MetricFlow semantic models and metric definitions. Useful when your team defines metrics in MetricFlow's YAML format.
### What it provides
- Semantic model definitions (entities, dimensions, measures)
- Cross-model metric definitions
- Dimension and entity relationships between models
### Connection config
```yaml title="ktx.yaml"
connections:
my-metricflow:
driver: metricflow
metricflow:
repoUrl: https://github.com/org/metricflow-repo
branch: main
path: dbt_metrics # Subdirectory for monorepos
auth_token_ref: env:GITHUB_TOKEN
readonly: true
```
For a local path:
```yaml
metricflow:
repoUrl: file:///absolute/path/to/project
```
### Authentication
| Method | Config |
|--------|--------|
| Public repo | `repoUrl: https://github.com/org/repo` |
| Private repo | `repoUrl` + `auth_token_ref: env:GITHUB_TOKEN` |
| Local path | `repoUrl: file:///path/to/project` |
### What gets ingested
- Semantic models with their entities, dimensions, and measures
- Metric definitions with their expressions and filters
- Work units organized by connected component (metrics + related semantic models grouped together)
---
## LookML
Ingests LookML view and model definitions from a Git repository. Extracts field definitions, SQL table references, and join relationships.
### What it provides
- View definitions (dimensions, measures, derived tables)
- Model explore definitions and joins
- SQL table name references
- Field-level descriptions and labels
### Connection config
```yaml title="ktx.yaml"
connections:
my-lookml:
driver: lookml
repoUrl: https://github.com/org/lookml-repo
branch: main
path: analytics # Subdirectory for monorepos
auth_token_ref: env:GITHUB_TOKEN
readonly: true
```
For a local path:
```yaml
repoUrl: file:///absolute/path/to/lookml
```
### Authentication
| Method | Config |
|--------|--------|
| Public repo | `repoUrl: https://github.com/org/repo` |
| Private repo | `repoUrl` + `auth_token_ref: env:GITHUB_TOKEN` |
| Local path | `repoUrl: file:///path/to/project` |
### What gets ingested
- View and model definitions organized by connected component
- LookML field types mapped to semantic layer column types
- Join definitions and relationship cardinalities
- SQL table references for warehouse mapping validation
### Warehouse mapping
Optionally validate that LookML references match your expected Looker connection:
```yaml
mappings:
expectedLookerConnectionName: postgres_connection
```
This validates that LookML model `connection:` declarations match expectations, flagging mismatches during ingestion.
---
## Metabase
Ingests dashboards, questions, and their underlying SQL queries from a Metabase instance. Maps Metabase databases to your KTX warehouse connections.
### What it provides
- Dashboard metadata and organization
- Question/query definitions (native SQL and structured queries)
- Table and column usage patterns from queries
- Database-to-warehouse relationship mapping
### Connection config
```yaml title="ktx.yaml"
connections:
my-metabase:
driver: metabase
api_url: https://metabase.company.com
api_key_ref: env:METABASE_API_KEY
mappings:
databaseMappings:
"3": postgres-main # Metabase DB ID → KTX connection
syncEnabled:
"3": true
syncMode: ONLY # Only ingest mapped databases
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| API key | `api_key_ref: env:METABASE_API_KEY` |
Generate an API key in Metabase: **Admin > Settings > Authentication > API Keys**.
### What gets ingested
- Semantic sources generated from SQL queries in questions
- Knowledge pages for dashboards (purpose, key metrics, relationships)
- Work units per dashboard and per question
### Warehouse mapping
Metabase databases must be mapped to KTX connections so ingested context links to the correct warehouse:
```yaml
mappings:
databaseMappings:
"<metabase_db_id>": "<ktx_connection_id>"
syncEnabled:
"<metabase_db_id>": true
syncMode: ONLY # ONLY = restrict to mapped DBs
```
Find Metabase database IDs in **Admin > Databases** — the ID is in the URL when editing a database.
---
## Looker
Ingests explores, looks, and dashboards from a Looker instance via the Looker API. Maps Looker connections to your KTX warehouse connections.
### What it provides
- Explore definitions and field metadata
- Dashboard and look configurations
- Query patterns and usage signals
- Looker folder structure for organization context
### Connection config
```yaml title="ktx.yaml"
connections:
my-looker:
driver: looker
base_url: https://looker.company.com
client_id: your-looker-client-id
client_secret_ref: env:LOOKER_CLIENT_SECRET
mappings:
connectionMappings:
postgres_connection: postgres-main # Looker conn → KTX conn
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| OAuth client credentials | `client_id` + `client_secret_ref: env:LOOKER_CLIENT_SECRET` |
Generate API credentials in Looker: **Admin > Users > Edit > API Keys**.
### What gets ingested
- Semantic sources from explore field definitions
- Knowledge pages for dashboards (purpose, audience, key metrics)
- Triage signals for automated content classification
- Work units per explore and per dashboard
### Warehouse mapping
Map Looker connection names to KTX connections so explores link to the correct warehouse:
```yaml
mappings:
connectionMappings:
"<looker_connection_name>": "<ktx_connection_id>"
```
Find Looker connection names in **Admin > Database > Connections**.
---
## Notion
Ingests pages and databases from a Notion workspace as knowledge pages. Useful for capturing business definitions, data dictionaries, and team documentation that agents need for context.
### What it provides
- Knowledge pages synthesized from Notion content
- Page hierarchy and relationships
- Database schemas (when Notion databases describe data sources)
- Semantic clustering for organized ingestion
### Connection config
```yaml title="ktx.yaml"
connections:
my-notion:
driver: notion
auth_token_ref: env:NOTION_TOKEN
crawl_mode: selected_roots
root_page_ids:
- "abc123def456..."
readonly: true
```
For crawling all accessible pages:
```yaml title="ktx.yaml"
connections:
my-notion:
driver: notion
auth_token_ref: env:NOTION_TOKEN
crawl_mode: all_accessible
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Internal integration token | `auth_token_ref: env:NOTION_TOKEN` |
Create an integration at [notion.so/my-integrations](https://www.notion.so/my-integrations), then share target pages with the integration.
### Configuration options
| Field | Description | Default |
|-------|-------------|---------|
| `crawl_mode` | `all_accessible` or `selected_roots` | — |
| `root_page_ids` | Page IDs to crawl from (for `selected_roots`) | `[]` |
| `root_database_ids` | Database IDs to include | `[]` |
| `max_pages_per_run` | Pages processed per sync | `1000` |
| `max_knowledge_creates_per_run` | New pages created per sync | `5` |
| `max_knowledge_updates_per_run` | Pages updated per sync | `20` |
### What gets ingested
- Knowledge pages synthesized from Notion content (not raw copies)
- Domain context extracted and organized by topic
- Triage signals for classifying page relevance
- Work units clustered by semantic similarity for efficient processing
### Notes
- Notion is knowledge-only — it does not produce semantic layer sources
- Rate limits apply; large workspaces may require multiple ingestion runs
- `last_successful_cursor` is auto-managed for incremental sync

View file

@ -0,0 +1,5 @@
{
"title": "Integrations",
"defaultOpen": true,
"pages": ["primary-sources", "context-sources", "agent-clients"]
}

View file

@ -0,0 +1,469 @@
---
title: Primary Sources
description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite.
---
KTX connects to your data warehouse or database to scan schemas, discover relationships, and execute semantic layer queries. Each connection is defined in `ktx.yaml` under the `connections` key.
All connectors share these conventions:
- Sensitive values support `env:VAR_NAME` (read from environment) and `file:/path/to/secret` (read from file) references
- Connections are read-only — KTX never writes to your database
- Schema scanning discovers tables, columns, types, and constraints automatically
## PostgreSQL
The most full-featured connector. Supports schema introspection, foreign key detection, column statistics, and historic SQL via `pg_stat_statements`.
### Connection config
```yaml title="ktx.yaml"
connections:
my-postgres:
driver: postgres
url: postgresql://user:password@host:5432/database
schema: public
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-postgres:
driver: postgres
host: localhost
port: 5432
database: analytics
username: ktx_reader
password: env:PG_PASSWORD
schemas:
- public
- analytics
ssl: true
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Password | `password: env:PG_PASSWORD` or `password: file:/path/to/secret` |
| Connection URL | `url: env:DATABASE_URL` |
| SSL | `ssl: true`, optionally `rejectUnauthorized: false` for self-signed certs |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `pg_catalog` |
| Primary keys | Yes | Via `information_schema.table_constraints` |
| Foreign keys | Yes | Full constraint detection |
| Row count estimates | Yes | Via `pg_class.reltuples` |
| Column statistics | Yes | Requires `pg_read_all_stats` role |
| Historic SQL | Yes | Via `pg_stat_statements` extension |
| Table sampling | Yes | `TABLESAMPLE SYSTEM` |
### Historic SQL
PostgreSQL Historic SQL mines real query patterns from `pg_stat_statements`. This is the most mature local Historic SQL path and helps KTX understand how your team actually queries the data.
**Requirements:**
- `pg_stat_statements` extension enabled
- `pg_read_all_stats` role granted to the KTX user
**Config options:**
```yaml
historicSql:
minCalls: 5 # Minimum call count to include a query template
maxTemplatesPerRun: 5000
```
### Dialect notes
- SQL generation uses `LIMIT/OFFSET` pagination
- Named parameters converted to positional (`$1`, `$2`, ...)
- Supports `COUNT(*) FILTER (WHERE ...)` for null analysis
- Full support for PostgreSQL types: `uuid`, `jsonb`, `timestamptz`, `numeric`, `text[]`, etc.
---
## Snowflake
Connects via the Snowflake SDK. Supports multi-schema scanning, RSA key authentication, and Historic SQL configuration for Snowflake query history.
### Connection config
```yaml title="ktx.yaml"
connections:
my-snowflake:
driver: snowflake
account: xy12345
warehouse: ANALYTICS_WH
database: PROD
schema_name: PUBLIC
username: KTX_SERVICE
password: env:SNOWFLAKE_PASSWORD
role: ANALYST
readonly: true
```
For multiple schemas:
```yaml
schema_names:
- PUBLIC
- ANALYTICS
- STAGING
```
### Authentication
| Method | Config |
|--------|--------|
| Password | `password: env:SNOWFLAKE_PASSWORD` |
| RSA key pair | `authMethod: rsa`, `privateKey: file:~/.ssh/snowflake_key.pem`, optional `passphrase` |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `INFORMATION_SCHEMA.TABLES` |
| Primary keys | Yes | Via table constraints |
| Foreign keys | No | Not available in Snowflake |
| Row count estimates | Yes | From `INFORMATION_SCHEMA.TABLES.ROW_COUNT` |
| Column statistics | No | — |
| Historic SQL | Configurable | Query-history settings can be stored; local CLI Historic SQL ingest currently uses the Postgres path |
| Table sampling | Yes | — |
### Historic SQL
Snowflake Historic SQL settings describe how query history should be sampled when that runtime path is available.
```yaml
historicSql:
windowDays: 90
redactionPatterns: []
serviceAccountUserPatterns: []
```
### Dialect notes
- All identifiers are uppercase by default (case-insensitive matching)
- Connection context set per query (`USE ROLE`, `USE WAREHOUSE`, `USE DATABASE`, `USE SCHEMA`)
- Parameter binding uses positional `?` placeholders
- Date values normalized to ISO 8601 strings
---
## BigQuery
Authenticates via GCP service account credentials. Supports multi-dataset scanning and Historic SQL configuration for `INFORMATION_SCHEMA.JOBS_BY_PROJECT`.
### Connection config
```yaml title="ktx.yaml"
connections:
my-bigquery:
driver: bigquery
credentials_json: file:~/.config/gcloud/bq-service-account.json
dataset_id: analytics
location: US
readonly: true
```
For multiple datasets:
```yaml
dataset_ids:
- analytics
- marketing
- finance
```
### Authentication
| Method | Config |
|--------|--------|
| Service account JSON | `credentials_json: file:/path/to/key.json` |
| Environment variable | `credentials_json: env:GCP_CREDENTIALS_JSON` |
The project ID is extracted automatically from the service account JSON file.
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Including materialized views and external tables |
| Primary keys | No | — |
| Foreign keys | No | Not available in BigQuery |
| Row count estimates | Yes | From table metadata |
| Column statistics | No | — |
| Historic SQL | Configurable | Query-history settings can be stored; local CLI Historic SQL ingest currently uses the Postgres path |
| Table sampling | Yes | — |
### Historic SQL
BigQuery Historic SQL settings describe how `INFORMATION_SCHEMA.JOBS_BY_PROJECT` should be sampled when that runtime path is available.
```yaml
historicSql:
windowDays: 90
redactionPatterns: []
serviceAccountUserPatterns: []
```
### Dialect notes
- Parameter binding uses named `@param` syntax
- Arrays flattened to comma-separated strings in results
- Location specified at query execution time
- Supports `maxBytesBilled` and `jobTimeoutMs` limits
---
## ClickHouse
Connects over HTTP (port 8123) or HTTPS (port 8443). Supports the ClickHouse native type system including `Nullable`, `LowCardinality`, and `Array` wrappers.
### Connection config
```yaml title="ktx.yaml"
connections:
my-clickhouse:
driver: clickhouse
url: http://localhost:8123/analytics
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-clickhouse:
driver: clickhouse
host: clickhouse.internal
port: 8123
database: analytics
username: default
password: env:CH_PASSWORD
ssl: false
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Basic auth | `username` + `password` (HTTP basic auth) |
| No auth | Default user `default` with no password |
| HTTPS | Set `ssl: true` (uses port 8443 by default) |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `system.tables`, engine-based detection |
| Primary keys | Yes | Via `system.columns` |
| Foreign keys | No | Not a ClickHouse concept |
| Row count estimates | Yes | Via `system.parts` aggregation |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | — |
### Dialect notes
- Parameter binding uses `{param:Type}` syntax (e.g., `{database:String}`)
- Detects views vs. tables by engine name (`View`, `MaterializedView`)
- Handles `Nullable(T)` and `LowCardinality(Nullable(T))` type wrappers
- Dictionary tables are excluded from scanning
- Results returned in JSONCompact or JSONEachRow format
---
## MySQL
Standard MySQL/MariaDB connector with full foreign key support and schema introspection.
### Connection config
```yaml title="ktx.yaml"
connections:
my-mysql:
driver: mysql
url: mysql://user:password@host:3306/database
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-mysql:
driver: mysql
host: mysql.internal
port: 3306
database: analytics
username: ktx_reader
password: env:MYSQL_PASSWORD
ssl: true
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Password | `password: env:MYSQL_PASSWORD` or `password: file:/path/to/secret` |
| SSL | `ssl: true` or `ssl: { rejectUnauthorized: false }` |
| URL parameters | `?ssl=true` or `?sslmode=required` in connection URL |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `INFORMATION_SCHEMA.TABLES` |
| Primary keys | Yes | Via `KEY_COLUMN_USAGE` |
| Foreign keys | Yes | Via `REFERENTIAL_CONSTRAINTS` |
| Row count estimates | Yes | From `TABLE_ROWS` (InnoDB estimate) |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | Uses `RAND()` filter |
### Dialect notes
- Parameter binding uses positional `?` placeholders
- Uses `LIMIT X OFFSET Y` for pagination
- Single database per connection (no multi-schema)
- Supports 20+ MySQL types including `enum`, `json`, `datetime`, `decimal`
- Table comments extracted with InnoDB metadata prefix stripping
---
## SQL Server
Connects to Microsoft SQL Server and Azure SQL. Supports multi-schema scanning with `dbo` as the default schema.
### Connection config
```yaml title="ktx.yaml"
connections:
my-sqlserver:
driver: sqlserver
url: mssql://user:password@host:1433/database?trustServerCertificate=true
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-sqlserver:
driver: sqlserver
host: sql.internal
port: 1433
database: Analytics
username: ktx_reader
password: env:MSSQL_PASSWORD
schema: dbo
trustServerCertificate: true
readonly: true
```
For multiple schemas:
```yaml
schemas:
- dbo
- analytics
- staging
```
### Authentication
| Method | Config |
|--------|--------|
| SQL Server auth | `username` + `password` |
| Encrypted connection | Always enabled, `trustServerCertificate: true` for self-signed |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `INFORMATION_SCHEMA.TABLES` |
| Primary keys | Yes | Via `TABLE_CONSTRAINTS` and `KEY_COLUMN_USAGE` |
| Foreign keys | Yes | Via `REFERENTIAL_CONSTRAINTS` |
| Row count estimates | Yes | Via `sys.dm_db_partition_stats` |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | — |
| Nested analysis | No | — |
### Dialect notes
- Parameter binding uses `@paramName` syntax
- Row limiting uses `SELECT TOP N * FROM (query) AS ktx_query_result`
- Encryption is always required; certificate validation is optional
- Multi-schema support with per-schema isolation
---
## SQLite
File-based connector using `better-sqlite3`. Ideal for local development, embedded analytics, or testing.
### Connection config
```yaml title="ktx.yaml"
connections:
my-sqlite:
driver: sqlite
path: ./data/warehouse.sqlite
readonly: true
```
Path supports multiple formats:
```yaml
# Relative path (resolved against project directory)
path: ./warehouse.sqlite
# Absolute path
path: /var/data/analytics.db
# Home directory expansion
path: ~/data/warehouse.sqlite
# Environment variable
path: env:SQLITE_DB_PATH
# URL format
url: sqlite:///path/to/db.sqlite
```
### Authentication
No authentication required — SQLite is file-based. The file must be readable by the process running KTX.
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `sqlite_master` |
| Primary keys | Yes | Via `PRAGMA table_info()` |
| Foreign keys | Yes | Via `PRAGMA foreign_key_list()` (requires `PRAGMA foreign_keys = ON`) |
| Row count estimates | Yes | Exact count via `SELECT COUNT(*)` |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | — |
| Nested analysis | No | — |
### Dialect notes
- Synchronous query execution (no connection pooling)
- Parameter binding uses `:paramName` syntax
- Uses `LIMIT X OFFSET Y` for pagination
- SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB`
- Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON`
- In-memory databases supported with `path: ":memory:"` (for testing)

View file

@ -0,0 +1,13 @@
{
"root": true,
"title": "KTX",
"pages": [
"getting-started",
"concepts",
"guides",
"integrations",
"benchmarks",
"cli-reference",
"community"
]
}

7
docs/lib/source.ts Normal file
View file

@ -0,0 +1,7 @@
import { docs } from "@/.source";
import { loader } from "fumadocs-core/source";
export const source = loader({
source: docs.toFumadocsSource(),
baseUrl: "/docs",
});

6
docs/next-env.d.ts vendored Normal file
View file

@ -0,0 +1,6 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
/// <reference path="./.next/types/routes.d.ts" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.

8
docs/next.config.mjs Normal file
View file

@ -0,0 +1,8 @@
import { createMDX } from "fumadocs-mdx/next";
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {};
export default withMDX(config);

26
docs/package.json Normal file
View file

@ -0,0 +1,26 @@
{
"name": "ktx-docs",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start"
},
"dependencies": {
"fumadocs-core": "15.7.13",
"fumadocs-mdx": "11.10.1",
"fumadocs-ui": "15.7.13",
"next": "^15",
"react": "19.2.6",
"react-dom": "19.2.6"
},
"devDependencies": {
"@types/react": "^19",
"@types/react-dom": "^19",
"typescript": "^5.9",
"@tailwindcss/postcss": "^4",
"tailwindcss": "^4"
}
}

7
docs/postcss.config.mjs Normal file
View file

@ -0,0 +1,7 @@
const config = {
plugins: {
"@tailwindcss/postcss": {},
},
};
export default config;

9
docs/source.config.ts Normal file
View file

@ -0,0 +1,9 @@
import { defineDocs, defineConfig } from "fumadocs-mdx/config";
export const docs = defineDocs({
dir: "content/docs",
});
export default defineConfig({
mdxOptions: {},
});

41
docs/tsconfig.json Normal file
View file

@ -0,0 +1,41 @@
{
"compilerOptions": {
"target": "ESNext",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "ESNext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": [
"./*"
]
}
},
"include": [
"**/*.ts",
"**/*.tsx",
".source/**/*.ts",
"next-env.d.ts",
".next/types/**/*.ts"
],
"exclude": [
"node_modules"
]
}

View file

@ -32,6 +32,7 @@
"relationships:verify-orbit": "node scripts/relationship-orbit-verification.mjs",
"smoke": "pnpm run build && pnpm --filter @ktx/cli run smoke",
"test": "node --test scripts/*.test.mjs && pnpm --filter './packages/*' run test",
"test:slow": "pnpm --filter @ktx/context run test:slow && pnpm --filter @ktx/cli run test:slow",
"type-check": "pnpm --filter './packages/*' run type-check"
},
"devDependencies": {

View file

@ -28,7 +28,8 @@
"assets:demo": "node scripts/build-demo-assets.mjs",
"build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs",
"smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000",
"test": "vitest run",
"test": "vitest run --exclude src/standalone-smoke.test.ts --exclude src/example-smoke.test.ts --exclude src/setup-databases.test.ts --exclude src/scan.test.ts --exclude src/commands/connection-metabase-setup.test.ts --exclude src/setup-models.test.ts --exclude src/setup-sources.test.ts --exclude src/setup.test.ts --exclude src/connection.test.ts --exclude src/setup-embeddings.test.ts --exclude src/ingest.test.ts --exclude src/commands/connection-mapping.test.ts --exclude src/ingest-viz.test.ts --exclude src/demo.test.ts --exclude src/setup-project.test.ts --exclude src/sl.test.ts --exclude src/local-scan-connectors.test.ts --exclude src/commands/connection-notion.test.ts",
"test:slow": "vitest run src/setup-databases.test.ts src/scan.test.ts src/commands/connection-metabase-setup.test.ts src/setup-models.test.ts src/setup-sources.test.ts src/setup.test.ts src/connection.test.ts src/setup-embeddings.test.ts src/ingest.test.ts src/commands/connection-mapping.test.ts src/ingest-viz.test.ts src/demo.test.ts src/setup-project.test.ts src/sl.test.ts src/local-scan-connectors.test.ts src/commands/connection-notion.test.ts --testTimeout 30000",
"type-check": "tsc -p tsconfig.json --noEmit"
},
"dependencies": {
@ -38,7 +39,6 @@
"@ktx/connector-clickhouse": "workspace:*",
"@ktx/connector-mysql": "workspace:*",
"@ktx/connector-postgres": "workspace:*",
"@ktx/connector-posthog": "workspace:*",
"@ktx/connector-snowflake": "workspace:*",
"@ktx/connector-sqlite": "workspace:*",
"@ktx/connector-sqlserver": "workspace:*",

View file

@ -27,7 +27,7 @@ export interface KtxCliPackageInfo {
}
export interface KtxCliIo {
stdout: { isTTY?: boolean; write(chunk: string): void };
stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void };
stderr: { write(chunk: string): void };
}

View file

@ -3,20 +3,23 @@ import { describe, expect, it, vi } from 'vitest';
import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js';
import {
extractProgressMessage,
createRepainter,
initViewState,
parseIngestSummary,
parseScanSummary,
renderContextBuildView,
runContextBuild,
viewStateFromSourceProgress,
} from './context-build-view.js';
function makeIo(options: { isTTY?: boolean } = {}) {
function makeIo(options: { isTTY?: boolean; columns?: number } = {}) {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: options.isTTY,
columns: options.columns,
write: (chunk: string) => {
stdout += chunk;
},
@ -98,7 +101,7 @@ describe('parseScanSummary', () => {
describe('parseIngestSummary', () => {
it('extracts work units and saved memory', () => {
expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('5 work units · 3 wiki, 2 SL');
expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL');
});
it('extracts work units alone when no saved memory', () => {
@ -127,10 +130,18 @@ describe('initViewState', () => {
expect(state.contextSources[0].target.connectionId).toBe('dbt-main');
expect(state.frame).toBe(0);
});
it('initializes global timing fields', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
expect(state.startedAt).toBeNull();
expect(state.totalElapsedMs).toBe(0);
});
});
describe('renderContextBuildView', () => {
it('renders all-queued state', () => {
it('renders all-queued state with ○ icon and progress counter', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
{ connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] },
@ -138,6 +149,8 @@ describe('renderContextBuildView', () => {
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('Building KTX context');
expect(output).toContain('(0/2)');
expect(output).toContain('○');
expect(output).toContain('Primary sources:');
expect(output).toContain('warehouse');
expect(output).toContain('queued');
@ -145,6 +158,29 @@ describe('renderContextBuildView', () => {
expect(output).toContain('dbt-main');
});
it('renders header with total elapsed time when set', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.totalElapsedMs = 65000;
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('(0/1 · 1m05s)');
});
it('renders dynamic separator matching header width', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.totalElapsedMs = 120000;
const output = renderContextBuildView(state, { styled: false });
const lines = output.split('\n');
const headerLine = lines.find((l) => l.includes('Building KTX context'))!;
const separatorLine = lines.find((l) => /^─+$/.test(l))!;
expect(separatorLine.length).toBeGreaterThanOrEqual(headerLine.length);
});
it('renders completed state with summary', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
@ -156,6 +192,74 @@ describe('renderContextBuildView', () => {
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('42 tables');
expect(output).toContain('1m12s');
expect(output).toContain('(1/1)');
});
it('renders running target with elapsed time', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.primarySources[0].status = 'running';
state.primarySources[0].elapsedMs = 30000;
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('scanning...');
expect(output).toContain('(30s)');
});
it('renders running target with progress bar when percentage is available', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.primarySources[0].status = 'running';
state.primarySources[0].detailLine = '[50%] Scanning tables...';
state.primarySources[0].elapsedMs = 15000;
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('██████░░░░░░');
expect(output).toContain('50%');
expect(output).toContain('Scanning tables...');
expect(output).toContain('(15s)');
});
it('renders completion summary when all targets are done', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
{ connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] },
]);
state.primarySources[0].status = 'done';
state.primarySources[0].elapsedMs = 72000;
state.contextSources[0].status = 'done';
state.contextSources[0].elapsedMs = 34000;
state.totalElapsedMs = 106000;
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('Done in 1m46s · 2 sources processed');
});
it('renders singular source label in completion summary', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.primarySources[0].status = 'done';
state.primarySources[0].elapsedMs = 5000;
state.totalElapsedMs = 5000;
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('Done in 5s · 1 source processed');
});
it('does not render completion summary while targets are still active', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
{ connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] },
]);
state.primarySources[0].status = 'done';
state.contextSources[0].status = 'running';
state.totalElapsedMs = 30000;
const output = renderContextBuildView(state, { styled: false });
expect(output).not.toContain('Done in');
});
it('renders failed state', () => {
@ -178,6 +282,54 @@ describe('renderContextBuildView', () => {
expect(output).not.toContain('Primary sources:');
expect(output).toContain('Context sources:');
});
it('preserves detach hint while targets are active', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.primarySources[0].status = 'running';
const output = renderContextBuildView(state, { styled: false, showHint: true, projectDir: '/tmp/project' });
expect(output).toContain('d to detach');
expect(output).toContain('ktx setup --project-dir /tmp/project');
expect(output).toContain('to resume');
});
it('omits detach hint when all targets are done', () => {
const state = initViewState([
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.primarySources[0].status = 'done';
state.totalElapsedMs = 5000;
const output = renderContextBuildView(state, { styled: false, showHint: true });
expect(output).not.toContain('d to detach');
});
});
describe('createRepainter', () => {
it('moves up visual rows, not just newline count, when content wraps', () => {
const io = makeIo({ isTTY: true, columns: 5 });
const repainter = createRepainter(io.io);
repainter.paint('abcdefghijk\n');
repainter.paint('updated\n');
repainter.paint('done\n');
const cursorMoves = [...io.stdout().matchAll(/\u001b\[(\d+)A\r/g)].map((match) => Number(match[1]));
expect(cursorMoves).toEqual([3, 2]);
});
it('returns to the start of a single-line frame without moving up when content has no newline', () => {
const io = makeIo({ isTTY: true, columns: 80 });
const repainter = createRepainter(io.io);
repainter.paint('hello');
repainter.paint('bye');
expect(io.stdout()).toContain('\rbye');
expect(io.stdout()).not.toContain('\u001b[1A\rbye');
});
});
describe('runContextBuild', () => {
@ -298,6 +450,135 @@ describe('runContextBuild', () => {
expect(mockExit).toHaveBeenCalledWith(0);
expect(io.stdout()).toContain('Context build continuing in the background.');
expect(io.stdout()).toContain('Resume: ktx setup --project-dir /tmp/project');
expect(io.stdout()).toContain('Status: ktx setup context status --project-dir /tmp/project');
mockExit.mockRestore();
});
it('calls onSourceProgress when sources start and finish', async () => {
const io = makeIo();
const project = projectWithConnections({
warehouse: { driver: 'postgres' },
dbt_main: { driver: 'dbt' },
});
const progressUpdates: Array<Array<{ connectionId: string; status: string }>> = [];
const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation));
await runContextBuild(
project,
{ projectDir: '/tmp/project', inputMode: 'disabled' },
io.io,
{
executeTarget,
now: () => 1000,
onSourceProgress: (sources) => {
progressUpdates.push(sources.map((s) => ({ connectionId: s.connectionId, status: s.status })));
},
},
);
expect(progressUpdates).toHaveLength(4);
expect(progressUpdates[0]).toEqual([
{ connectionId: 'warehouse', status: 'running' },
{ connectionId: 'dbt_main', status: 'queued' },
]);
expect(progressUpdates[1]).toEqual([
{ connectionId: 'warehouse', status: 'done' },
{ connectionId: 'dbt_main', status: 'queued' },
]);
expect(progressUpdates[2]).toEqual([
{ connectionId: 'warehouse', status: 'done' },
{ connectionId: 'dbt_main', status: 'running' },
]);
expect(progressUpdates[3]).toEqual([
{ connectionId: 'warehouse', status: 'done' },
{ connectionId: 'dbt_main', status: 'done' },
]);
});
it('returns report IDs and artifact paths parsed from target output', async () => {
const io = makeIo();
const project = projectWithConnections({
warehouse: { driver: 'postgres' },
dbt_main: { driver: 'dbt' },
});
const executeTarget = vi.fn(async (target, _args, targetIo) => {
if (target.operation === 'scan') {
targetIo.stdout.write('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n');
targetIo.stdout.write('Raw sources: raw-sources/warehouse/live-database/sync-1\n');
} else {
targetIo.stdout.write('Report: report-dbt-1\n');
targetIo.stdout.write('Saved memory: 2 wiki, 3 SL\n');
}
return successResult(target.connectionId, target.driver, target.operation);
});
const result = await runContextBuild(
project,
{ projectDir: '/tmp/project', inputMode: 'disabled' },
io.io,
{ executeTarget, now: () => 1000 },
);
expect(result).toMatchObject({
exitCode: 0,
detached: false,
reportIds: ['report-dbt-1'],
artifactPaths: [
'raw-sources/warehouse/live-database/sync-1/scan-report.json',
'raw-sources/warehouse/live-database/sync-1',
],
});
});
});
describe('viewStateFromSourceProgress', () => {
it('partitions sources into primary and context groups', () => {
const state = viewStateFromSourceProgress(
[
{ connectionId: 'warehouse', operation: 'scan', status: 'running', startedAtMs: 900 },
{ connectionId: 'dbt-main', operation: 'source-ingest', status: 'queued' },
],
1000,
500,
);
expect(state.primarySources).toHaveLength(1);
expect(state.primarySources[0].target.connectionId).toBe('warehouse');
expect(state.primarySources[0].status).toBe('running');
expect(state.primarySources[0].elapsedMs).toBe(100);
expect(state.contextSources).toHaveLength(1);
expect(state.contextSources[0].target.connectionId).toBe('dbt-main');
expect(state.contextSources[0].status).toBe('queued');
expect(state.totalElapsedMs).toBe(500);
});
it('uses stored elapsedMs for completed sources', () => {
const state = viewStateFromSourceProgress(
[{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }],
99999,
);
expect(state.primarySources[0].elapsedMs).toBe(72000);
expect(state.primarySources[0].summaryText).toBe('42 tables');
});
it('renders the same view format as the foreground build', () => {
const state = viewStateFromSourceProgress(
[
{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' },
{ connectionId: 'dbt-main', operation: 'source-ingest', status: 'running', startedAtMs: 900 },
],
1000,
500,
);
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('Building KTX context');
expect(output).toContain('Primary sources:');
expect(output).toContain('warehouse');
expect(output).toContain('42 tables');
expect(output).toContain('Context sources:');
expect(output).toContain('dbt-main');
expect(output).toContain('ingesting...');
});
});

View file

@ -30,6 +30,8 @@ export interface ContextBuildViewState {
primarySources: ContextBuildTargetState[];
contextSources: ContextBuildTargetState[];
frame: number;
startedAt: number | null;
totalElapsedMs: number;
}
export interface ContextBuildArgs {
@ -42,6 +44,17 @@ export interface ContextBuildArgs {
export interface ContextBuildResult {
exitCode: number;
detached: boolean;
reportIds?: string[];
artifactPaths?: string[];
}
export interface ContextBuildSourceProgressUpdate {
connectionId: string;
operation: 'scan' | 'source-ingest';
status: 'queued' | 'running' | 'done' | 'failed';
startedAtMs?: number;
elapsedMs?: number;
summaryText?: string;
}
export interface ContextBuildDeps {
@ -49,6 +62,7 @@ export interface ContextBuildDeps {
now?: () => number;
setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null;
onDetach?: () => void;
onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void;
}
// --- Rendering ---
@ -79,7 +93,7 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st
case 'running':
return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋';
default:
return '·';
return '';
}
}
switch (status) {
@ -90,10 +104,27 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st
case 'running':
return cyan(SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋');
default:
return dim('·');
return dim('');
}
}
function extractPercent(detailLine: string | null): number | null {
if (!detailLine) return null;
const match = detailLine.match(/^\[(\d+)%\]/);
return match ? Number(match[1]) : null;
}
const BAR_WIDTH = 12;
const BAR_FILLED = '█';
const BAR_EMPTY = '░';
function renderProgressBar(percent: number, styled: boolean): string {
const filled = Math.round((percent / 100) * BAR_WIDTH);
const empty = BAR_WIDTH - filled;
const bar = `${BAR_FILLED.repeat(filled)}${BAR_EMPTY.repeat(empty)}`;
return styled ? cyan(bar) : bar;
}
function targetDetail(target: ContextBuildTargetState, styled: boolean): string {
if (target.status === 'done') {
const parts: string[] = [];
@ -105,7 +136,17 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string
return styled ? red('failed') : 'failed';
}
if (target.status === 'running') {
return target.detailLine ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...');
const percent = extractPercent(target.detailLine);
const progressText = target.detailLine?.replace(/^\[\d+%\]\s*/, '')
?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...');
const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : null;
const parts: string[] = [];
if (percent !== null) {
parts.push(`${renderProgressBar(percent, styled)} ${percent}%`);
}
parts.push(progressText);
if (elapsed) parts.push(styled ? dim(elapsed) : elapsed);
return parts.join(' ');
}
return styled ? dim('queued') : 'queued';
}
@ -136,23 +177,46 @@ function resumeCommand(projectDir?: string): string {
export function renderContextBuildView(
state: ContextBuildViewState,
options: { styled?: boolean; showHint?: boolean; projectDir?: string } = {},
options: { styled?: boolean; showHint?: boolean; hintText?: string; projectDir?: string } = {},
): string {
const styled = options.styled ?? true;
const width = columnWidth(state);
const allTargets = [...state.primarySources, ...state.contextSources];
const doneCount = allTargets.filter((t) => t.status === 'done' || t.status === 'failed').length;
const totalCount = allTargets.length;
const hasActive = allTargets.some((t) => t.status === 'running' || t.status === 'queued');
const allDone = totalCount > 0 && !hasActive;
const headerParts = ['Building KTX context'];
if (totalCount > 0) {
const progressParts: string[] = [`${doneCount}/${totalCount}`];
if (state.totalElapsedMs > 0) progressParts.push(formatDuration(state.totalElapsedMs));
const progress = `(${progressParts.join(' · ')})`;
headerParts.push(styled ? dim(progress) : progress);
}
const header = headerParts.join(' ');
const headerPlainLength = header.replace(/\x1b\[[0-9;]*m/g, '').length;
const separator = '─'.repeat(Math.max(21, headerPlainLength));
const lines: string[] = [
'',
'Building KTX context',
'─────────────────────',
header,
separator,
...renderTargetGroup('Primary sources', state.primarySources, state.frame, styled, width),
...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width),
'',
];
const hasActive = [...state.primarySources, ...state.contextSources].some(
(t) => t.status === 'running' || t.status === 'queued',
);
if (allDone && state.totalElapsedMs > 0) {
const sourcesLabel = totalCount === 1 ? '1 source' : `${totalCount} sources`;
const summary = ` Done in ${formatDuration(state.totalElapsedMs)} · ${sourcesLabel} processed`;
lines.push(styled ? green(summary) : summary);
lines.push('');
}
if (options.showHint && hasActive) {
const hint = ` d to detach · ${resumeCommand(options.projectDir)} to resume`;
const hintContent = options.hintText ?? `d to detach · ${resumeCommand(options.projectDir)} to resume`;
const hint = ` ${hintContent}`;
lines.push(styled ? dim(hint) : hint);
lines.push('');
}
@ -162,6 +226,7 @@ export function renderContextBuildView(
// --- IO Capture ---
const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g');
const ANSI_RE = /\x1b\[[0-9;]*m/g;
export function extractProgressMessage(chunk: string): string | null {
const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim();
@ -175,12 +240,41 @@ export function parseScanSummary(output: string): string | null {
}
export function parseIngestSummary(output: string): string | null {
const parts: string[] = [];
const workUnits = output.match(/Work units: (\d+)/);
if (workUnits) parts.push(`${workUnits[1]} work units`);
const savedMemory = output.match(/Saved memory: (.+)/);
if (savedMemory) parts.push(savedMemory[1]);
return parts.length > 0 ? parts.join(' · ') : null;
if (savedMemory) return savedMemory[1];
const workUnits = output.match(/Work units: (\d+)/);
if (workUnits) return `${workUnits[1]} work units`;
return null;
}
function collectOutputMetadata(
output: string,
operation: KtxPublicIngestPlanTarget['operation'],
): { reportIds: string[]; artifactPaths: string[] } {
const reportIds = new Set<string>();
const artifactPaths = new Set<string>();
for (const line of output.split(/\r?\n/)) {
const trimmed = line.trim();
const reportLine = trimmed.match(/^Report:\s*(.+)$/);
if (reportLine) {
const value = reportLine[1].trim();
if (value && value !== 'none') {
if (operation === 'scan') artifactPaths.add(value);
else reportIds.add(value);
}
}
const rawSourcesLine = trimmed.match(/^Raw sources:\s*(.+)$/);
if (rawSourcesLine) {
const value = rawSourcesLine[1].trim();
if (value && value !== 'none') artifactPaths.add(value);
}
if (operation === 'source-ingest') {
for (const match of trimmed.matchAll(/\breport=([^\s]+)/g)) {
reportIds.add(match[1]);
}
}
}
return { reportIds: [...reportIds], artifactPaths: [...artifactPaths] };
}
interface CapturedIo {
@ -210,19 +304,84 @@ function createCaptureIo(onProgress: (message: string) => void, isTTY: boolean):
};
}
// --- Source progress helpers ---
function collectSourceProgress(targets: ContextBuildTargetState[]): ContextBuildSourceProgressUpdate[] {
return targets.map((t) => ({
connectionId: t.target.connectionId,
operation: t.target.operation,
status: t.status,
...(t.startedAt !== null ? { startedAtMs: t.startedAt } : {}),
...(t.elapsedMs > 0 ? { elapsedMs: t.elapsedMs } : {}),
...(t.summaryText ? { summaryText: t.summaryText } : {}),
}));
}
export function viewStateFromSourceProgress(
sources: ContextBuildSourceProgressUpdate[],
now: number,
startedAtMs?: number,
): ContextBuildViewState {
const makeTarget = (s: ContextBuildSourceProgressUpdate): ContextBuildTargetState => ({
target: { connectionId: s.connectionId, driver: '', operation: s.operation, debugCommand: '', steps: [] },
status: s.status,
detailLine: null,
summaryText: s.summaryText ?? null,
startedAt: s.startedAtMs ?? null,
elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0),
});
return {
primarySources: sources.filter((s) => s.operation === 'scan').map(makeTarget),
contextSources: sources.filter((s) => s.operation === 'source-ingest').map(makeTarget),
frame: 0,
startedAt: startedAtMs ?? null,
totalElapsedMs: startedAtMs ? now - startedAtMs : 0,
};
}
// --- Repaint ---
function createRepainter(io: KtxCliIo) {
let lastLineCount = 0;
export function createRepainter(io: KtxCliIo) {
let hasPainted = false;
let lastCursorUpRows = 0;
const terminalColumns = () => {
for (const columns of [io.stdout.columns, process.stdout.columns]) {
if (typeof columns === 'number' && Number.isFinite(columns) && columns > 0) return columns;
}
return 80;
};
const visualRows = (line: string, columns: number) => {
const plainLength = line.replace(ANSI_RE, '').length;
return Math.max(1, Math.ceil(plainLength / columns));
};
const cursorUpRowsAfterWrite = (content: string) => {
const columns = terminalColumns();
const endsWithNewline = content.endsWith('\n');
const lines = content.split('\n');
return lines.reduce((sum, line, index) => {
if (index === lines.length - 1) {
return endsWithNewline ? sum : sum + Math.max(0, visualRows(line, columns) - 1);
}
return sum + visualRows(line, columns);
}, 0);
};
return {
paint(content: string) {
if (lastLineCount > 0) {
io.stdout.write(`${ESC}[${lastLineCount}A\r`);
if (hasPainted) {
if (lastCursorUpRows > 0) {
io.stdout.write(`${ESC}[${lastCursorUpRows}A`);
}
io.stdout.write('\r');
}
io.stdout.write(content);
io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`));
io.stdout.write(`${ESC}[J`);
lastLineCount = (content.match(/\n/g) ?? []).length;
hasPainted = true;
lastCursorUpRows = cursorUpRowsAfterWrite(content);
},
};
}
@ -258,7 +417,7 @@ function spawnBackgroundBuild(projectDir: string): { logPath: string } | null {
// --- Keystroke handling ---
function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null {
export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null {
const stdin = process.stdin;
if (!stdin.isTTY || typeof stdin.setRawMode !== 'function') {
return null;
@ -289,6 +448,8 @@ export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuil
primarySources: targets.filter((t) => t.operation === 'scan').map(makeTargetState),
contextSources: targets.filter((t) => t.operation === 'source-ingest').map(makeTargetState),
frame: 0,
startedAt: null,
totalElapsedMs: 0,
};
}
@ -303,6 +464,8 @@ export async function runContextBuild(
const isTTY = io.stdout.isTTY === true;
const nowFn = deps.now ?? (() => Date.now());
state.startedAt = nowFn();
const repainter = isTTY ? createRepainter(io) : null;
const viewOpts = { styled: true, projectDir: args.projectDir };
const paint = (hint: boolean) => repainter?.paint(renderContextBuildView(state, { ...viewOpts, showHint: hint }));
@ -312,6 +475,9 @@ export async function runContextBuild(
if (repainter) {
spinnerInterval = setInterval(() => {
state.frame++;
if (state.startedAt !== null) {
state.totalElapsedMs = nowFn() - state.startedAt;
}
for (const t of [...state.primarySources, ...state.contextSources]) {
if (t.status === 'running' && t.startedAt !== null) {
t.elapsedMs = nowFn() - t.startedAt;
@ -323,6 +489,8 @@ export async function runContextBuild(
const orderedTargets = [...state.primarySources, ...state.contextSources];
const execTarget = deps.executeTarget ?? executePublicIngestTarget;
const reportIds = new Set<string>();
const artifactPaths = new Set<string>();
let detached = false;
let cleanupKeystroke: (() => void) | null = null;
@ -339,8 +507,8 @@ export async function runContextBuild(
const bg = spawnBackgroundBuild(args.projectDir);
io.stdout.write('\n\nContext build continuing in the background.\n');
if (bg) io.stdout.write(`Log: ${bg.logPath}\n`);
io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`);
io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`);
io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`);
process.exit(0);
},
() => {
@ -370,6 +538,7 @@ export async function runContextBuild(
targetState.status = 'running';
targetState.startedAt = nowFn();
paint(true);
deps.onSourceProgress?.(collectSourceProgress(orderedTargets));
const capture = createCaptureIo(
(message) => {
@ -386,20 +555,29 @@ export async function runContextBuild(
targetState.status = failed ? 'failed' : 'done';
targetState.detailLine = null;
if (!failed) {
const capturedOutput = capture.captured();
const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation);
for (const reportId of metadata.reportIds) reportIds.add(reportId);
for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath);
targetState.summaryText =
targetState.target.operation === 'scan'
? parseScanSummary(capture.captured())
: parseIngestSummary(capture.captured());
? parseScanSummary(capturedOutput)
: parseIngestSummary(capturedOutput);
}
if (failed) hasFailure = true;
paint(true);
deps.onSourceProgress?.(collectSourceProgress(orderedTargets));
}
} finally {
if (spinnerInterval) clearInterval(spinnerInterval);
cleanupKeystroke?.();
}
if (state.startedAt !== null) {
state.totalElapsedMs = nowFn() - state.startedAt;
}
if (detached) {
return { exitCode: 0, detached: true };
}
@ -410,5 +588,10 @@ export async function runContextBuild(
paint(false);
}
return { exitCode: hasFailure ? 1 : 0, detached: false };
return {
exitCode: hasFailure ? 1 : 0,
detached: false,
...(reportIds.size > 0 ? { reportIds: [...reportIds] } : {}),
...(artifactPaths.size > 0 ? { artifactPaths: [...artifactPaths] } : {}),
};
}

View file

@ -0,0 +1,863 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
type LocalIngestResult,
type MemoryFlowReplayInput,
type RunLocalIngestOptions,
} from '@ktx/context/ingest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runKtxIngest } from './ingest.js';
import {
completedLocalBundleRun,
emitLiveLocalMemoryFlow,
localFakeBundleReport,
makeIo,
persistLocalBundleReport,
writeBundleReportFile,
writeWarehouseConfig,
} from './ingest.test-utils.js';
import { resetVizFallbackWarningsForTest } from './viz-fallback.js';
describe('runKtxIngest viz and replay', () => {
let tempDir: string;
let originalTerm: string | undefined;
beforeEach(async () => {
resetVizFallbackWarningsForTest();
originalTerm = process.env.TERM;
process.env.TERM = 'xterm-256color';
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-ingest-'));
});
afterEach(async () => {
if (originalTerm === undefined) {
delete process.env.TERM;
} else {
process.env.TERM = originalTerm;
}
await rm(tempDir, { recursive: true, force: true });
});
it('renders live memory-flow frames for run --viz when stdout is interactive', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 });
input.memoryFlow?.update({ syncId: 'sync-live-1' });
input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 });
input.memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 });
input.memoryFlow?.update({
plannedWorkUnits: [
{
unitKey: 'fake-orders',
rawFiles: ['orders/orders.json'],
peerFileCount: 0,
dependencyCount: 0,
},
],
});
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
input.memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' });
input.memoryFlow?.finish('done');
return completedLocalBundleRun(input, 'live-viz-run');
});
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null);
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
},
io.io,
{
runLocalIngest: runLocal,
startLiveMemoryFlow,
jobIdFactory: () => 'live-viz-run',
now: () => new Date('2026-04-30T14:00:00.000Z'),
},
),
).resolves.toBe(0);
expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.any(Object) }));
expect(io.stdout()).toContain('\u001b[2J\u001b[H');
expect((io.stdout().match(/KTX memory flow/g) ?? []).length).toBeGreaterThan(1);
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
expect(io.stdout()).toContain('fake-orders');
expect(io.stderr()).toBe('');
});
it('uses the TUI live session for run --viz when stdin and stdout are interactive', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
emitLiveLocalMemoryFlow(input.memoryFlow);
return completedLocalBundleRun(input, 'live-viz-run');
});
const liveSession = {
update: vi.fn(),
close: vi.fn(),
isClosed: vi.fn(() => false),
};
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession);
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
},
io.io,
{
runLocalIngest: runLocal,
startLiveMemoryFlow,
jobIdFactory: () => 'live-viz-run',
now: () => new Date('2026-04-30T14:00:00.000Z'),
},
),
).resolves.toBe(0);
expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1);
expect(startLiveMemoryFlow.mock.calls[0]?.[0]).toMatchObject({
runId: 'live-viz-run',
connectionId: 'warehouse',
adapter: 'fake',
status: 'running',
});
expect(liveSession.update).toHaveBeenCalled();
expect(liveSession.close).toHaveBeenCalledTimes(1);
expect(io.stdout()).not.toContain('\u001b[2J\u001b[H');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toBe('');
});
it('prints a final plain summary after live viz completes', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
const liveSession = {
update: vi.fn(),
close: vi.fn(),
isClosed: vi.fn(() => false),
};
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession);
const runLocal = vi.fn(async (input: RunLocalIngestOptions) => {
emitLiveLocalMemoryFlow(input.memoryFlow);
return completedLocalBundleRun(input, 'live-summary');
});
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
outputMode: 'viz',
},
io.io,
{ runLocalIngest: runLocal, startLiveMemoryFlow },
),
).resolves.toBe(0);
expect(liveSession.close).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('Memory-flow summary: done');
expect(io.stdout()).toContain('Connection: warehouse');
});
it('falls back to text live rendering when the TUI live session is unavailable', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
emitLiveLocalMemoryFlow(input.memoryFlow);
return completedLocalBundleRun(input, 'live-viz-run');
});
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null);
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
},
io.io,
{
runLocalIngest: runLocal,
startLiveMemoryFlow,
jobIdFactory: () => 'live-viz-run',
},
),
).resolves.toBe(0);
expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('\u001b[2J\u001b[H');
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
});
it('falls back to text live rendering when TUI startup fails with a redacted warning', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
emitLiveLocalMemoryFlow(input.memoryFlow);
return completedLocalBundleRun(input, 'live-viz-run');
});
const startLiveMemoryFlow = vi.fn(
async (_input: MemoryFlowReplayInput, ioArg: { stderr: { write(chunk: string): void } }) => {
ioArg.stderr.write('TUI visualization unavailable: Failed [redacted-url] [redacted]; using text renderer.\n');
return null;
},
);
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
},
io.io,
{
runLocalIngest: runLocal,
startLiveMemoryFlow,
jobIdFactory: () => 'live-viz-run',
},
),
).resolves.toBe(0);
expect(io.stderr()).toContain('TUI visualization unavailable: Failed [redacted-url] [redacted]');
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
expect(io.stdout()).toContain('\u001b[2J\u001b[H');
});
it('does not start live TUI when run --viz disables input', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
return completedLocalBundleRun(input, 'no-input-live-viz-run');
});
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({
update: vi.fn(),
close: vi.fn(),
isClosed: vi.fn(() => false),
}));
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
inputMode: 'disabled',
},
io.io,
{ runLocalIngest: runLocal, startLiveMemoryFlow },
),
).resolves.toBe(0);
expect(startLiveMemoryFlow).not.toHaveBeenCalled();
expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() }));
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
});
it('does not attach a live memory-flow sink for plain run output', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'plain-run'));
const io = makeIo({ isTTY: true });
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'plain',
},
io.io,
{ runLocalIngest: runLocal },
),
).resolves.toBe(0);
expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() }));
expect(io.stdout()).toContain('Job: plain-run');
expect(io.stdout()).not.toContain('KTX memory flow');
});
it('falls back to plain run output for run --viz when stdout is not interactive', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const io = makeIo({ isTTY: false });
const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'non-tty-viz-run'));
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
},
io.io,
{
runLocalIngest: runLocal,
jobIdFactory: () => 'non-tty-viz-run',
},
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Job: non-tty-viz-run');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but stdout is not an interactive terminal; printing plain output.',
);
});
it('falls back to plain run output for run --viz when stdin raw mode is unavailable', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 });
const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'raw-missing-viz-run'));
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({
update: vi.fn(),
close: vi.fn(),
isClosed: vi.fn(() => false),
}));
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'fake',
sourceDir,
outputMode: 'viz',
},
io.io,
{
runLocalIngest: runLocal,
startLiveMemoryFlow,
jobIdFactory: () => 'raw-missing-viz-run',
},
),
).resolves.toBe(0);
expect(startLiveMemoryFlow).not.toHaveBeenCalled();
expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() }));
expect(io.stdout()).toContain('Job: raw-missing-viz-run');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but stdin raw mode is unavailable; printing plain output.',
);
});
it('returns an error code for missing status', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const io = makeIo();
await expect(
runKtxIngest({ command: 'status', projectDir, runId: 'missing-run', outputMode: 'plain' }, io.io),
).resolves.toBe(1);
expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found');
});
it('uses the latest local ingest report when status has no run id', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
await persistLocalBundleReport(projectDir, localFakeBundleReport('older-run'));
await persistLocalBundleReport(projectDir, localFakeBundleReport('newer-run'));
const io = makeIo();
await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(0);
expect(io.stdout()).toContain('Run: run-newer-run');
expect(io.stdout()).toContain('Job: newer-run');
expect(io.stderr()).toBe('');
});
it('renders the latest local ingest report through watch when run id is omitted', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
await persistLocalBundleReport(projectDir, localFakeBundleReport('watch-latest'));
const io = makeIo({ isTTY: true });
await expect(
runKtxIngest({ command: 'watch', projectDir, outputMode: 'viz', inputMode: 'disabled' }, io.io),
).resolves.toBe(0);
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
expect(io.stdout()).toContain('Run: run-watch-latest');
expect(io.stderr()).toBe('');
});
it('renders report-file replay through the memory-flow TUI', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const reportFile = await writeBundleReportFile(tempDir);
const io = makeIo({ isTTY: true });
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'job-1',
reportFile,
outputMode: 'viz',
inputMode: 'disabled',
},
io.io,
),
).resolves.toBe(0);
expect(io.stdout()).toContain('KTX memory flow warehouse/metabase done');
expect(io.stdout()).toContain('Saved 2 memories from 2 raw files');
expect(io.stdout()).toContain('Commit: abc12345 Run: run-1 Report: report-1');
expect(io.stdout()).toContain('SOURCE');
expect(io.stdout()).toContain('ACTIONS');
expect(io.stdout()).toContain('SAVED');
expect(io.stderr()).toBe('');
});
it('prints report-file JSON without looking up local ingest status', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const reportFile = await writeBundleReportFile(tempDir);
const io = makeIo();
await expect(
runKtxIngest({ command: 'status', projectDir, runId: 'report-1', reportFile, outputMode: 'json' }, io.io),
).resolves.toBe(0);
const parsed = JSON.parse(io.stdout());
expect(parsed).toMatchObject({
id: 'report-1',
runId: 'run-1',
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
});
expect(io.stderr()).toBe('');
});
it('routes interactive report-file replay through the stored TUI renderer', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const reportFile = await writeBundleReportFile(tempDir);
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true);
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'run-1',
reportFile,
outputMode: 'viz',
},
io.io,
{ renderStoredMemoryFlow },
),
).resolves.toBe(0);
expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1);
expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({
runId: 'run-1',
reportId: 'report-1',
connectionId: 'warehouse',
adapter: 'metabase',
});
expect(io.stdout()).toBe('');
expect(io.stderr()).toBe('');
});
it('rejects report-file replay when the requested id does not match the report', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const reportFile = await writeBundleReportFile(tempDir);
const io = makeIo();
await expect(
runKtxIngest({ command: 'replay', projectDir, runId: 'unrelated-id', reportFile, outputMode: 'plain' }, io.io),
).resolves.toBe(1);
expect(io.stderr()).toContain(
`Report file ${reportFile} does not match ingest replay id "unrelated-id"; expected one of report-1, run-1, job-1`,
);
expect(io.stdout()).toBe('');
});
it('renders memory-flow snapshot for status --viz when stdout is interactive', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-1'));
const io = makeIo({ isTTY: true });
await expect(
runKtxIngest(
{ command: 'status', projectDir, runId: 'viz-run-1', outputMode: 'viz', inputMode: 'disabled' },
io.io,
),
).resolves.toBe(0);
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
expect(io.stdout()).toContain('SOURCE');
expect(io.stdout()).toContain('CHUNKS');
expect(io.stdout()).toContain('WORKUNITS');
expect(io.stdout()).toContain('Saved 2 memories from 2 raw files');
expect(io.stderr()).toBe('');
});
it('uses the TUI renderer for stored status --viz when stdin and stdout are interactive', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-viz-run'));
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true);
await expect(
runKtxIngest(
{
command: 'status',
projectDir,
runId: 'tui-viz-run',
outputMode: 'viz',
},
io.io,
{ renderStoredMemoryFlow },
),
).resolves.toBe(0);
expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1);
expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({
runId: 'run-tui-viz-run',
connectionId: 'warehouse',
adapter: 'fake',
});
expect(io.stdout()).toBe('');
expect(io.stderr()).toBe('');
});
it('falls back to the text renderer when TUI declines stored status --viz', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-fallback-run'));
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120, keypresses: [{ name: 'q' }] });
const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => false);
await expect(
runKtxIngest(
{
command: 'status',
projectDir,
runId: 'tui-fallback-run',
outputMode: 'viz',
},
io.io,
{ renderStoredMemoryFlow },
),
).resolves.toBe(0);
expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
});
it('does not use TUI for stored --viz when input is disabled', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-no-input-run'));
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true);
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'tui-no-input-run',
outputMode: 'viz',
inputMode: 'disabled',
},
io.io,
{ renderStoredMemoryFlow },
),
).resolves.toBe(0);
expect(renderStoredMemoryFlow).not.toHaveBeenCalled();
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
});
it('falls back to plain status for stored --viz when stdin raw mode is unavailable', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('raw-missing-stored-viz-run'));
const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 });
const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true);
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'raw-missing-stored-viz-run',
outputMode: 'viz',
},
io.io,
{ renderStoredMemoryFlow },
),
).resolves.toBe(0);
expect(renderStoredMemoryFlow).not.toHaveBeenCalled();
expect(io.stdout()).toContain('Run: run-raw-missing-stored-viz-run');
expect(io.stdout()).toContain('Job: raw-missing-stored-viz-run');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but stdin raw mode is unavailable; printing plain output.',
);
});
it('keeps stored --viz snapshot-only when input is disabled', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('no-input-viz-run'));
const io = makeIo({ isTTY: true, columns: 120 });
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'no-input-viz-run',
outputMode: 'viz',
inputMode: 'disabled',
},
io.io,
),
).resolves.toBe(0);
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
expect(io.stdout()).not.toContain('\u001b[2J\u001b[H');
expect(io.stderr()).toBe('');
});
it('keeps disabled-input stored --viz snapshot output even when stdin raw mode is unavailable', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('disabled-raw-missing-viz-run'));
const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 });
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'disabled-raw-missing-viz-run',
outputMode: 'viz',
inputMode: 'disabled',
},
io.io,
),
).resolves.toBe(0);
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
expect(io.stdout()).not.toContain('\u001b[2J\u001b[H');
expect(io.stderr()).toBe('');
});
it('degrades stored --viz snapshots to plain status when stdout is redirected even when input is disabled', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('redirected-no-input-viz-run'));
const io = makeIo({ isTTY: false });
await expect(
runKtxIngest(
{
command: 'replay',
projectDir,
runId: 'redirected-no-input-viz-run',
outputMode: 'viz',
inputMode: 'disabled',
},
io.io,
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Run: run-redirected-no-input-viz-run');
expect(io.stdout()).toContain('Job: redirected-no-input-viz-run');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but stdout is not an interactive terminal; printing plain output.',
);
});
it('degrades ingest replay --viz to plain status when TERM is dumb', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('dumb-terminal-viz-run'));
const io = makeIo({ isTTY: true });
await expect(
runKtxIngest(
{ command: 'replay', projectDir, runId: 'dumb-terminal-viz-run', outputMode: 'viz' },
io.io,
{ env: { ...process.env, TERM: 'dumb' } },
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Run: run-dumb-terminal-viz-run');
expect(io.stdout()).toContain('Job: dumb-terminal-viz-run');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.',
);
});
it('falls back to plain status for --viz when stdout is not interactive', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-2'));
const io = makeIo({ isTTY: false });
await expect(
runKtxIngest({ command: 'replay', projectDir, runId: 'viz-run-2', outputMode: 'viz' }, io.io),
).resolves.toBe(0);
expect(io.stdout()).toContain('Run: run-viz-run-2');
expect(io.stdout()).toContain('Job: viz-run-2');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but stdout is not an interactive terminal; printing plain output.',
);
});
it('prints JSON for status --json', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const sourceDir = join(tempDir, 'source');
await mkdir(join(sourceDir, 'orders'), { recursive: true });
await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8');
await persistLocalBundleReport(projectDir, localFakeBundleReport('json-run-1'));
const io = makeIo();
await expect(
runKtxIngest({ command: 'status', projectDir, runId: 'json-run-1', outputMode: 'json' }, io.io),
).resolves.toBe(0);
expect(JSON.parse(io.stdout())).toMatchObject({
runId: 'run-json-run-1',
jobId: 'json-run-1',
sourceKey: 'fake',
connectionId: 'warehouse',
});
expect(io.stderr()).toBe('');
});
});

View file

@ -0,0 +1,746 @@
import { EventEmitter } from 'node:events';
import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent';
import {
LocalLookerRuntimeStore,
LocalMetabaseSourceStateReader,
MetabaseSourceAdapter,
getLocalIngestStatus,
type ChunkResult,
type FetchContext,
type IngestReportSnapshot,
type LocalIngestResult,
type LocalMetabaseFanoutProgress,
type LookerMappingClient,
type LookerRuntimeClient,
type LookerTableIdentifierParser,
type MemoryFlowEventSink,
type MemoryFlowReplayInput,
type MetabaseCard,
type MetabaseCardSummary,
type MetabaseClientFactory,
type MetabaseRuntimeClient,
type RunLocalIngestOptions,
type SourceAdapter,
type SqliteBundleIngestStore,
} from '@ktx/context/ingest';
import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project';
import { expect, vi } from 'vitest';
import { type KtxIngestArgs, runKtxIngest } from './ingest.js';
export function makeIo(
options: {
isTTY?: boolean;
stdinIsTTY?: boolean;
columns?: number;
rawMode?: boolean;
keypresses?: { name?: string; ctrl?: boolean }[];
} = {},
) {
let stdout = '';
let stderr = '';
type TestKey = { name?: string; ctrl?: boolean };
class TestStdin extends EventEmitter {
isTTY = options.stdinIsTTY ?? false;
isRaw = false;
setRawMode =
options.rawMode === false
? undefined
: (value: boolean): void => {
this.isRaw = value;
};
resume(): void {
return undefined;
}
pause(): void {
return undefined;
}
override on(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this {
const result = super.on(eventName, listener);
if (eventName === 'keypress') {
for (const key of options.keypresses ?? []) {
queueMicrotask(() => listener('', key));
}
}
return result;
}
override off(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this {
return super.off(eventName, listener);
}
override removeListener(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this {
return super.removeListener(eventName, listener);
}
}
const stdin = new TestStdin();
return {
io: {
stdin,
stdout: {
isTTY: options.isTTY,
columns: options.columns,
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
export async function writeWarehouseConfig(projectDir: string): Promise<void> {
await mkdir(projectDir, { recursive: true });
await writeFile(
join(projectDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' prod-metabase:',
' driver: metabase',
' warehouse_a:',
' driver: postgres',
'ingest:',
' adapters:',
' - fake',
'',
].join('\n'),
'utf-8',
);
}
export async function writeMetabaseConfig(projectDir: string): Promise<void> {
await mkdir(projectDir, { recursive: true });
await writeFile(
join(projectDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: postgres',
'ingest:',
' adapters:',
' - metabase',
' embeddings:',
' backend: deterministic',
'',
].join('\n'),
'utf-8',
);
}
export function bundleReportSnapshot(): IngestReportSnapshot {
return {
id: 'report-1',
runId: 'run-1',
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
createdAt: '2026-04-30T12:00:00.000Z',
body: {
syncId: 'sync-1',
diffSummary: { added: 2, modified: 0, deleted: 0, unchanged: 0 },
commitSha: 'abc12345',
workUnits: [
{
unitKey: 'cards',
rawFiles: ['cards/1.json', 'cards/2.json'],
status: 'success',
actions: [
{ target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' },
{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' },
],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }],
},
],
failedWorkUnits: [],
reconciliationSkipped: false,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [
{
rawPath: 'cards/1.json',
artifactKind: 'wiki',
artifactKey: 'knowledge/global/revenue.md',
actionType: 'wiki_written',
},
{
rawPath: 'cards/2.json',
artifactKind: 'sl',
artifactKey: 'warehouse.orders',
actionType: 'measure_added',
},
],
toolTranscripts: [
{
unitKey: 'cards',
path: 'tool-transcripts/cards.jsonl',
toolCallCount: 4,
errorCount: 0,
toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'],
},
],
},
};
}
export function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult {
const nextReport = localFakeBundleReport(jobId, {
id: 'report-live-1',
runId: 'run-live-1',
connectionId: input.connectionId,
sourceKey: input.adapter,
});
return {
result: {
jobId,
runId: nextReport.runId,
syncId: nextReport.body.syncId,
diffSummary: nextReport.body.diffSummary,
workUnitCount: nextReport.body.workUnits.length,
failedWorkUnits: nextReport.body.failedWorkUnits,
artifactsWritten: nextReport.body.provenanceRows.length,
commitSha: nextReport.body.commitSha,
},
report: nextReport,
};
}
export function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult {
const failedWorkUnit = {
...bundleReportSnapshot().body.workUnits[0],
status: 'failed' as const,
reason: 'writer tool failed',
actions: [],
touchedSlSources: [],
};
const nextReport = localFakeBundleReport(jobId, {
id: 'report-failed-1',
runId: 'run-failed-1',
connectionId: input.connectionId,
sourceKey: input.adapter,
body: {
workUnits: [failedWorkUnit],
failedWorkUnits: [failedWorkUnit.unitKey],
},
});
return {
result: {
jobId,
runId: nextReport.runId,
syncId: nextReport.body.syncId,
diffSummary: nextReport.body.diffSummary,
workUnitCount: nextReport.body.workUnits.length,
failedWorkUnits: nextReport.body.failedWorkUnits,
artifactsWritten: nextReport.body.provenanceRows.length,
commitSha: nextReport.body.commitSha,
},
report: nextReport,
};
}
export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
) {
const slWrite = params.toolSet.sl_write_source;
if (!slWrite?.execute) {
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
}
const result = await slWrite.execute(
{
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
},
{ toolCallId: 'cli-looker-sl-write', messages: [] },
);
if (!result.structured.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
export class CliMetabaseAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
export class CliMetabaseSourceAdapter implements SourceAdapter {
readonly source = 'metabase';
readonly skillNames: string[] = [];
readonly fetchCalls: Array<{ metabaseConnectionId: string; metabaseDatabaseId: number; connectionId: string }> = [];
private readonly databaseByStagedDir = new Map<string, number>();
detect(): Promise<boolean> {
return Promise.resolve(true);
}
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number };
this.fetchCalls.push({
metabaseConnectionId: config.metabaseConnectionId,
metabaseDatabaseId: config.metabaseDatabaseId,
connectionId: ctx.connectionId,
});
this.databaseByStagedDir.set(stagedDir, config.metabaseDatabaseId);
await mkdir(join(stagedDir, 'cards'), { recursive: true });
await mkdir(join(stagedDir, 'databases'), { recursive: true });
await writeFile(
join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`),
JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }),
'utf-8',
);
await writeFile(
join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`),
JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }),
'utf-8',
);
}
async chunk(stagedDir: string): Promise<ChunkResult> {
const databaseId = this.databaseByStagedDir.get(stagedDir);
if (!databaseId) {
throw new Error(`Missing Metabase database id for staged dir ${stagedDir}`);
}
return {
workUnits: [
{
unitKey: `metabase-db-${databaseId}`,
rawFiles: [`cards/${databaseId}.json`],
peerFileIndex: [],
dependencyPaths: [`databases/${databaseId}.json`],
},
],
};
}
}
const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [
{
id: 101,
name: 'Collection 12 Revenue',
description: null,
type: 'question',
query_type: 'native',
database_id: 1,
collection_id: 12,
archived: false,
result_metadata: [],
dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } },
parameters: [],
dashboard_count: 0,
},
{
id: 102,
name: 'Collection 12 Margin',
description: null,
type: 'question',
query_type: 'native',
database_id: 1,
collection_id: 12,
archived: false,
result_metadata: [],
dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } },
parameters: [],
dashboard_count: 0,
},
{
id: 103,
name: 'Collection 13 Pipeline',
description: null,
type: 'question',
query_type: 'native',
database_id: 1,
collection_id: 13,
archived: false,
result_metadata: [],
dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } },
parameters: [],
dashboard_count: 0,
},
];
function metabaseCardSummary(card: MetabaseCard): MetabaseCardSummary {
return {
id: card.id,
name: card.name,
archived: card.archived,
database_id: card.database_id,
collection_id: card.collection_id,
};
}
function createSyncModeMetabaseClient(): MetabaseRuntimeClient {
const cardsById = new Map(SYNC_MODE_METABASE_CARDS.map((card) => [card.id, card]));
return {
testConnection: async () => ({ success: true }),
getCurrentUser: async () => ({ id: 1, email: 'local@example.test' }),
getDatabases: async () => [{ id: 1, name: 'Warehouse A', engine: 'postgres' }],
getDatabase: async (id) => ({ id, name: 'Warehouse A', engine: 'postgres' }),
getCollectionTree: async () => [
{ id: 12, name: 'Selected Collection', parent_id: 'root', children: [] },
{ id: 13, name: 'Other Collection', parent_id: 'root', children: [] },
],
getCollection: async (id) => ({
id,
name: id === 12 ? 'Selected Collection' : 'Other Collection',
parent_id: 'root',
children: [],
}),
getCollectionItems: async (collectionId) =>
SYNC_MODE_METABASE_CARDS.filter((card) => card.collection_id === collectionId).map((card) => ({
id: card.id,
model: 'card',
name: card.name,
collection_id: card.collection_id,
database_id: card.database_id,
})),
getCard: async (id) => {
const card = cardsById.get(id);
if (!card) {
throw new Error(`unexpected card ${id}`);
}
return card;
},
getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary),
convertMbqlToNative: async () => ({ query: 'select 1' }),
getNativeSql: (card) => card.dataset_query?.native?.query ?? null,
getTemplateTags: () => ({}),
getCardSql: async (card) => card.dataset_query?.native?.query ?? null,
getResolvedSql: async (card) => ({
resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`,
templateTags: [],
resolutionStatus: 'resolved',
}),
cleanup: async () => undefined,
};
}
export class StaticMetabaseClientFactory implements MetabaseClientFactory {
constructor(private readonly client: MetabaseRuntimeClient) {}
createClient(): MetabaseRuntimeClient {
return this.client;
}
}
type SyncModeCase = {
name: string;
syncMode: 'ALL' | 'ONLY' | 'EXCEPT';
selections: Array<{ selectionType: 'collection' | 'item'; metabaseObjectId: number }>;
expectedRawFiles: string[];
expectedWorkUnitKeys: string[];
};
export async function runPublicMetabaseSyncModeCase(tempDir: string, input: SyncModeCase): Promise<void> {
const projectDir = join(tempDir, `metabase-sync-mode-${input.name}`);
await mkdir(projectDir, { recursive: true });
await writeFile(
join(projectDir, 'ktx.yaml'),
[
`project: metabase-sync-mode-${input.name}`,
'connections:',
' prod-metabase:',
' driver: metabase',
' api_url: https://metabase.example.test',
' api_key: literal-test-key',
' warehouse_a:',
' driver: postgres',
' url: postgresql://readonly@db.example.test/warehouse_a',
'ingest:',
' adapters:',
' - metabase',
' embeddings:',
' backend: deterministic',
'',
].join('\n'),
'utf-8',
);
const project = await loadKtxProject({ projectDir });
const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) });
await store.replaceSourceState({
connectionId: 'prod-metabase',
syncMode: input.syncMode,
defaultTagNames: ['sync-mode-smoke'],
selections: input.selections,
mappings: [
{
metabaseDatabaseId: 1,
metabaseDatabaseName: 'Warehouse A',
metabaseEngine: 'postgres',
metabaseHost: 'db.example.test',
metabaseDbName: 'warehouse_a',
targetConnectionId: 'warehouse_a',
syncEnabled: true,
source: 'refresh',
},
],
});
const adapter = new MetabaseSourceAdapter({
clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()),
sourceStateReader: store,
});
const jobId = `metabase-sync-mode-${input.name}-child`;
const io = makeIo();
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'prod-metabase',
adapter: 'metabase',
outputMode: 'plain',
},
io.io,
{
createAdapters: vi.fn(() => [adapter]),
jobIdFactory: () => jobId,
localIngestOptions: {
agentRunner: new CliMetabaseAgentRunner(),
},
},
),
).resolves.toBe(0);
expect(io.stderr()).toBe('');
expect(io.stdout()).toContain('Metabase fan-out: all_succeeded');
expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`);
const report = await getLocalIngestStatus(project, jobId);
expect(report).not.toBeNull();
expect(report?.body.workUnits.map((wu) => wu.unitKey).sort()).toEqual(input.expectedWorkUnitKeys);
expect(report?.body.workUnits.flatMap((wu) => wu.rawFiles).sort()).toEqual(input.expectedRawFiles);
}
type CliLookerRuntimeClient = LookerRuntimeClient &
Pick<LookerMappingClient, 'listLookerConnections'> & {
cleanup: ReturnType<typeof vi.fn<NonNullable<LookerRuntimeClient['cleanup']>>>;
};
export function makeCliLookerRuntimeClient(): CliLookerRuntimeClient {
const lookerModels = {
source: 'looker',
fetchedAt: '2026-05-05T00:00:00.000Z',
models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }],
};
const lookerExplore = {
source: 'looker',
modelName: 'ecommerce',
exploreName: 'orders',
label: 'Orders',
description: null,
connectionName: 'analytics',
viewName: 'orders',
rawSqlTableName: 'public.orders',
fields: {
dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }],
},
joins: [
{
name: 'users',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'public.users',
sqlOn: '${orders.user_id} = ${users.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
return {
listLookerConnections: vi.fn().mockResolvedValue([
{
name: 'analytics',
host: 'db.example.test',
database: 'analytics',
schema: null,
dialect: 'postgres',
},
]),
listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]),
getDashboard: vi.fn().mockResolvedValue({
lookerId: '10',
title: 'Revenue Overview',
description: 'Revenue dashboard',
folderId: '7',
ownerId: '3',
updatedAt: '2026-05-05T08:00:00.000Z',
tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }],
}),
listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]),
getLook: vi.fn().mockResolvedValue({
lookerId: '20',
title: 'Revenue Look',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-05-05T08:10:00.000Z',
query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] },
}),
listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }),
listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]),
listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]),
listLookmlModels: vi.fn().mockResolvedValue(lookerModels),
getExplore: vi.fn().mockResolvedValue(lookerExplore),
getSignals: vi.fn().mockResolvedValue({
dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }],
lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }],
scheduledPlans: [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 },
],
favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }],
}),
cleanup: vi.fn<NonNullable<LookerRuntimeClient['cleanup']>>().mockResolvedValue(undefined),
};
}
interface TestLookerTableIdentifierParser extends LookerTableIdentifierParser {
parse: ReturnType<typeof vi.fn<LookerTableIdentifierParser['parse']>>;
}
export function makeCliLookerParser(): TestLookerTableIdentifierParser {
return {
parse: vi.fn<LookerTableIdentifierParser['parse']>().mockResolvedValue({
'ecommerce.orders': {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
'ecommerce.orders.users': {
ok: true,
catalog: null,
schema: 'public',
name: 'users',
canonical_table: 'public.users',
},
}),
};
}
export function localFakeBundleReport(
jobId: string,
overrides: Partial<Omit<IngestReportSnapshot, 'body'>> & { body?: Partial<IngestReportSnapshot['body']> } = {},
): IngestReportSnapshot {
const report = bundleReportSnapshot();
return {
...report,
id: `report-${jobId}`,
runId: `run-${jobId}`,
jobId,
connectionId: 'warehouse',
sourceKey: 'fake',
...overrides,
body: {
...report.body,
syncId: 'sync-live-1',
...(overrides.body ?? {}),
},
};
}
export async function localBundleStore(projectDir: string, ids: [string, string]): Promise<SqliteBundleIngestStore> {
const { SqliteBundleIngestStore } = await import('@ktx/context/ingest');
const project = await loadKtxProject({ projectDir });
return new SqliteBundleIngestStore({
dbPath: ktxLocalStateDbPath(project),
idFactory: (() => {
let index = 0;
return () => ids[index++] ?? `generated-${index}`;
})(),
});
}
export async function persistLocalBundleReport(projectDir: string, report = bundleReportSnapshot()): Promise<void> {
const store = await localBundleStore(projectDir, [report.runId, report.id]);
const run = await store.create({
jobId: report.jobId,
connectionId: report.connectionId,
sourceKey: report.sourceKey,
syncId: report.body.syncId,
trigger: 'manual_resync',
});
await store.markCompleted(run.id, report.body.diffSummary);
await store.create({
runId: run.id,
jobId: report.jobId,
connectionId: report.connectionId,
sourceKey: report.sourceKey,
body: report.body,
});
}
export async function writeBundleReportFile(tempDir: string, report = bundleReportSnapshot()): Promise<string> {
const reportFile = join(tempDir, 'bundle-report.json');
await writeFile(reportFile, `${JSON.stringify(report, null, 2)}\n`, 'utf-8');
return reportFile;
}
export function emitLiveLocalMemoryFlow(memoryFlow: MemoryFlowEventSink | undefined): void {
memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 });
memoryFlow?.update({ syncId: 'sync-live-1' });
memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 });
memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 });
memoryFlow?.update({
plannedWorkUnits: [
{
unitKey: 'fake-orders',
rawFiles: ['orders/orders.json'],
peerFileCount: 0,
dependencyCount: 0,
},
],
});
memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' });
memoryFlow?.finish('done');
}

File diff suppressed because it is too large Load diff

View file

@ -114,6 +114,16 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void
}
function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIngestIo): void {
const counts = result.children.reduce(
(acc, child) => {
const childCounts = reportActionCounts(child.report);
return {
wikiCount: acc.wikiCount + childCounts.wikiCount,
slCount: acc.slCount + childCounts.slCount,
};
},
{ wikiCount: 0, slCount: 0 },
);
io.stdout.write(`Metabase fan-out: ${result.status}\n`);
io.stdout.write(`Source: ${result.metabaseConnectionId}\n`);
io.stdout.write(`Children: ${result.children.length}\n`);
@ -121,10 +131,11 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng
io.stdout.write(`Work units: ${result.totals.workUnits}\n`);
io.stdout.write(`Failed work units: ${result.totals.failedWorkUnits}\n`);
}
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
for (const child of result.children) {
const status = reportStatus(child.report);
io.stdout.write(
`- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId}\n`,
`- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`,
);
}
}
@ -345,7 +356,7 @@ export async function runKtxIngest(
} else {
writeMetabaseFanoutStatus(result, io);
}
return 0;
return result.status === 'all_succeeded' ? 0 : 1;
}
const jobId = deps.jobIdFactory?.();
@ -397,14 +408,14 @@ export async function runKtxIngest(
liveTui?.close();
liveTui = null;
io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot));
return 0;
return reportStatus(result.report) === 'done' ? 0 : 1;
}
await writeReportRecord(result.report, runOutputMode, io, {
interactive: (args.inputMode ?? 'auto') === 'auto',
renderStoredMemoryFlow: deps.renderStoredMemoryFlow,
env,
});
return 0;
return reportStatus(result.report) === 'done' ? 0 : 1;
} finally {
liveTui?.close();
}

View file

@ -95,29 +95,6 @@ describe('createKtxCliScanConnector', () => {
]);
});
it('does not create a standalone PostHog scan connector', async () => {
await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' product:',
' driver: posthog',
' api_key: phx_test',
' project_id: "157881"',
' readonly: true',
'',
].join('\n'),
'utf-8',
);
const project = await loadKtxProject({ projectDir: tempDir });
await expect(createKtxCliScanConnector(project, 'product')).rejects.toThrow(
'Connection "product" uses driver "posthog", which has no native standalone KTX scan connector',
);
});
it('throws for structural daemon-only fallback configs', async () => {
await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' });
await writeFile(

View file

@ -59,7 +59,7 @@ function commandLines(commands: ReadonlyArray<{ command: string; description: st
export function formatNextStepLines(indent = ' '): string[] {
return [
`${indent}KTX context is ready for agents.`,
`${indent}Preferred route: CLI + Skills; installed rules call \`ktx agent ...\` directly, so no MCP server is required.`,
`${indent}Preferred route: CLI + Skills; installed rules call the pinned local CLI directly, so no MCP server is required.`,
`${indent}Direct CLI checks:`,
...commandLines(KTX_NEXT_STEP_DIRECT_COMMANDS, indent),
`${indent}Optional MCP:`,

View file

@ -80,13 +80,6 @@ describe('buildPublicIngestPlan', () => {
);
});
it('does not plan PostHog connections as CLI ingest targets', () => {
const project = projectWithConnections({ product: { driver: 'posthog' } });
expect(() =>
buildPublicIngestPlan(project, { projectDir: '/tmp/project', targetConnectionId: 'product', all: false }),
).toThrow('Connection "product" uses unsupported public ingest driver "posthog"');
});
});
describe('runKtxPublicIngest', () => {

View file

@ -3,6 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
formatInstallSummary,
plannedKtxAgentFiles,
readKtxAgentInstallManifest,
removeKtxAgentInstall,
@ -37,11 +38,13 @@ describe('setup agents', () => {
it('plans project-scoped CLI and MCP files for every target', () => {
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'both' })).toEqual([
{ kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md') },
{ kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' },
{ kind: 'json-key', path: join(tempDir, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] },
]);
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([
{ kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md') },
{ kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(tempDir, '.codex/instructions/ktx.md'), role: 'rule' },
]);
expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'mcp' })).toEqual([
{ kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'ktx'] },
@ -83,7 +86,7 @@ describe('setup agents', () => {
const skill = await readFile(join(tempDir, '.agents/skills/ktx/SKILL.md'), 'utf-8');
expect(skill).toContain(`--project-dir ${tempDir}`);
expect(skill).toContain('must not print secrets');
expect(skill).toContain('ktx agent sql execute');
expect(skill).toContain('agent sql execute');
expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({
version: 1,
projectDir: tempDir,
@ -93,6 +96,47 @@ describe('setup agents', () => {
expect(io.stderr()).toBe('');
});
it('writes PATH-independent launcher commands for skills and MCP configs', async () => {
const io = makeIo();
await expect(
runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'universal',
scope: 'project',
mode: 'both',
skipAgents: false,
},
io.io,
),
).resolves.toMatchObject({ status: 'ready' });
const skill = await readFile(join(tempDir, '.agents/skills/ktx/SKILL.md'), 'utf-8');
expect(skill).not.toContain('`ktx agent');
expect(skill).toContain('agent context --json');
expect(skill).toContain('agent sql execute');
const mcp = JSON.parse(await readFile(join(tempDir, '.agents/mcp/ktx.json'), 'utf-8')) as {
mcpServers?: { ktx?: { command?: string; args?: string[] } };
};
expect(mcp.mcpServers?.ktx?.command).toBe(process.execPath);
expect(mcp.mcpServers?.ktx?.args?.[0]).toMatch(/packages\/cli\/(src|dist)\/bin\.(ts|js)$/);
expect(mcp.mcpServers?.ktx?.args).toEqual([
expect.stringMatching(/packages\/cli\/(src|dist)\/bin\.(ts|js)$/),
'--project-dir',
tempDir,
'serve',
'--mcp',
'stdio',
'--semantic-compute',
'--execute-queries',
]);
});
it('removes only manifest-listed files and JSON keys', async () => {
const io = makeIo();
await runKtxSetupAgentsStep(
@ -113,6 +157,7 @@ describe('setup agents', () => {
await expect(removeKtxAgentInstall(tempDir, io.io)).resolves.toBe(0);
await expect(stat(join(tempDir, '.claude/skills/ktx/SKILL.md'))).rejects.toThrow();
await expect(stat(join(tempDir, '.claude/rules/ktx.md'))).rejects.toThrow();
await expect(stat(join(tempDir, '.claude/skills/ktx/keep.txt'))).resolves.toBeDefined();
await expect(readKtxAgentInstallManifest(tempDir)).resolves.toEqual(null);
});
@ -173,4 +218,71 @@ describe('setup agents', () => {
}),
);
});
it('prints per-agent install summary after successful installation', async () => {
const io = makeIo();
await runKtxSetupAgentsStep(
{
projectDir: tempDir,
inputMode: 'disabled',
yes: true,
agents: true,
target: 'claude-code',
scope: 'project',
mode: 'both',
skipAgents: false,
},
io.io,
);
const output = io.stdout();
expect(output).toContain('Agent integration complete');
expect(output).toContain('Claude Code');
expect(output).toContain('+ Skill installed');
expect(output).toContain('.claude/skills/ktx/SKILL.md');
expect(output).toContain('+ Rule installed');
expect(output).toContain('.claude/rules/ktx.md');
expect(output).toContain('+ MCP config added');
expect(output).toContain('.mcp.json');
});
it('formats summary with relative paths for project scope', () => {
const summary = formatInstallSummary(
[{ target: 'cursor', scope: 'project', mode: 'both' }],
[
{ kind: 'file', path: join(tempDir, '.cursor/rules/ktx.mdc') },
{ kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'ktx'] },
],
tempDir,
);
expect(summary).toContain('Cursor');
expect(summary).toContain('+ Rule installed');
expect(summary).toContain('.cursor/rules/ktx.mdc');
expect(summary).toContain('+ MCP config added');
expect(summary).toContain('.cursor/mcp.json');
expect(summary).not.toContain(tempDir);
});
it('formats summary with multiple agent targets', () => {
const summary = formatInstallSummary(
[
{ target: 'claude-code', scope: 'project', mode: 'cli' },
{ target: 'codex', scope: 'project', mode: 'mcp' },
],
[
{ kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
{ kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' },
{ kind: 'json-key', path: join(tempDir, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] },
],
tempDir,
);
expect(summary).toContain('Claude Code');
expect(summary).toContain('+ Skill installed');
expect(summary).toContain('+ Rule installed');
expect(summary).toContain('Codex');
expect(summary).toContain('+ MCP config added');
});
});

View file

@ -1,5 +1,6 @@
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
import { dirname, join, resolve } from 'node:path';
import { dirname, join, relative, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { cancel, isCancel, multiselect, select } from '@clack/prompts';
import { loadKtxProject, markKtxSetupStepComplete, serializeKtxProjectConfig } from '@ktx/context/project';
import type { KtxCliIo } from './cli-runtime.js';
@ -37,11 +38,19 @@ export interface KtxAgentInstallManifest {
projectDir: string;
installedAt: string;
installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>;
entries: Array<{ kind: 'file'; path: string } | { kind: 'json-key'; path: string; jsonPath: string[] }>;
entries: Array<
| { kind: 'file'; path: string; role?: 'skill' | 'rule' }
| { kind: 'json-key'; path: string; jsonPath: string[] }
>;
}
type InstallEntry = KtxAgentInstallManifest['entries'][number];
interface KtxCliLauncher {
command: string;
args: string[];
}
export function agentInstallManifestPath(projectDir: string): string {
return join(resolve(projectDir), '.ktx/agents/install-manifest.json');
}
@ -54,11 +63,17 @@ export function plannedKtxAgentFiles(input: {
}): InstallEntry[] {
if (input.scope === 'global') {
if (input.target === 'claude-code') {
return [{ kind: 'file', path: join(process.env.HOME ?? '', '.claude/skills/ktx/SKILL.md') }];
const home = process.env.HOME ?? '';
return [
{ kind: 'file', path: join(home, '.claude/skills/ktx/SKILL.md'), role: 'skill' as const },
{ kind: 'file', path: join(home, '.claude/rules/ktx.md'), role: 'rule' as const },
];
}
if (input.target === 'codex') {
const codexHome = process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex');
return [
{ kind: 'file', path: join(process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'), 'skills/ktx/SKILL.md') },
{ kind: 'file', path: join(codexHome, 'skills/ktx/SKILL.md'), role: 'skill' as const },
{ kind: 'file', path: join(codexHome, 'instructions/ktx.md'), role: 'rule' as const },
];
}
throw new Error(`Global ${input.target} installation is not supported; use --project.`);
@ -66,12 +81,16 @@ export function plannedKtxAgentFiles(input: {
const root = resolve(input.projectDir);
const cliEntries: Partial<Record<KtxAgentTarget, InstallEntry>> = {
'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md') },
codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') },
'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' },
codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' },
cursor: { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') },
opencode: { kind: 'file', path: join(root, '.opencode/commands/ktx.md') },
universal: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') },
};
const ruleEntries: Partial<Record<KtxAgentTarget, InstallEntry>> = {
'claude-code': { kind: 'file', path: join(root, '.claude/rules/ktx.md'), role: 'rule' },
codex: { kind: 'file', path: join(root, '.codex/instructions/ktx.md'), role: 'rule' },
};
const mcpEntries: Record<KtxAgentTarget, InstallEntry> = {
'claude-code': { kind: 'json-key', path: join(root, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] },
codex: { kind: 'json-key', path: join(root, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] },
@ -80,12 +99,31 @@ export function plannedKtxAgentFiles(input: {
universal: { kind: 'json-key', path: join(root, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] },
};
return [
...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target]] : []),
...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target], ruleEntries[input.target]] : []),
...(input.mode === 'mcp' || input.mode === 'both' ? [mcpEntries[input.target]] : []),
].filter((entry): entry is InstallEntry => entry !== undefined);
}
function cliInstructionContent(input: { projectDir: string; target: KtxAgentTarget }): string {
function ktxCliLauncher(): KtxCliLauncher {
return {
command: process.execPath,
args: [fileURLToPath(new URL('./bin.js', import.meta.url))],
};
}
function shellQuote(value: string): string {
if (/^[A-Za-z0-9_/:=.,@%+-]+$/.test(value)) {
return value;
}
return `'${value.replaceAll("'", "'\\''")}'`;
}
function ktxCommandLine(launcher: KtxCliLauncher, args: string[]): string {
return [launcher.command, ...launcher.args, ...args].map(shellQuote).join(' ');
}
function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLauncher }): string {
const projectDirArgs = ['--json', '--project-dir', input.projectDir];
return [
'---',
'name: ktx',
@ -95,28 +133,64 @@ function cliInstructionContent(input: { projectDir: string; target: KtxAgentTarg
'# KTX Local Context',
'',
`Use this project with \`--project-dir ${input.projectDir}\`.`,
'Commands are pinned to the local KTX CLI path that created this file, so agents do not need `ktx` in PATH.',
'If the CLI path no longer exists after moving this checkout or reinstalling KTX, rerun `ktx setup --agents`.',
'',
'Agents must not print secrets, credential references, environment variable values, or file contents from `.ktx/secrets`.',
'',
'Available commands:',
'',
`- \`ktx agent context --json --project-dir ${input.projectDir}\``,
`- \`ktx agent sl list --json --project-dir ${input.projectDir}\``,
`- \`ktx agent sl read <sourceName> --json --project-dir ${input.projectDir}\``,
`- \`ktx agent sl query --json --project-dir ${input.projectDir} --connection-id <id> --query-file <path> --execute --max-rows 100\``,
`- \`ktx agent wiki search <query> --json --project-dir ${input.projectDir}\``,
`- \`ktx agent wiki read <pageId> --json --project-dir ${input.projectDir}\``,
`- \`ktx agent sql execute --json --project-dir ${input.projectDir} --connection-id <id> --sql-file <path> --max-rows 100\``,
`- \`${ktxCommandLine(input.launcher, ['agent', 'context', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['agent', 'sl', 'list', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['agent', 'sl', 'read', '<sourceName>', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, [
'agent',
'sl',
'query',
...projectDirArgs,
'--connection-id',
'<id>',
'--query-file',
'<path>',
'--execute',
'--max-rows',
'100',
])}\``,
`- \`${ktxCommandLine(input.launcher, ['agent', 'wiki', 'search', '<query>', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['agent', 'wiki', 'read', '<pageId>', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, [
'agent',
'sql',
'execute',
...projectDirArgs,
'--connection-id',
'<id>',
'--sql-file',
'<path>',
'--max-rows',
'100',
])}\``,
'',
'SQL execution is read-only, requires an explicit row limit, and should use the smallest useful limit.',
'',
].join('\n');
}
function mcpConfig(projectDir: string): Record<string, unknown> {
function ruleInstructionContent(input: { projectDir: string }): string {
return [
`Use the \`ktx\` CLI to query local semantic context, wiki knowledge, and execute safe SQL for this project (\`--project-dir ${input.projectDir}\`).`,
'',
'Use when the user asks about data schemas, metrics, dimensions, database structure, or wants to run SQL queries.',
'',
'Do not use for general programming, code review, or tasks unrelated to data and analytics.',
'',
].join('\n');
}
function mcpConfig(projectDir: string, launcher: KtxCliLauncher): Record<string, unknown> {
return {
command: 'ktx',
args: ['--project-dir', projectDir, 'serve', '--mcp', 'stdio', '--semantic-compute', '--execute-queries'],
command: launcher.command,
args: [...launcher.args, '--project-dir', projectDir, 'serve', '--mcp', 'stdio', '--semantic-compute', '--execute-queries'],
env: {},
};
}
@ -245,6 +319,55 @@ function createPromptAdapter(): KtxSetupAgentsPromptAdapter {
};
}
const targetDisplayNames: Record<KtxAgentTarget, string> = {
'claude-code': 'Claude Code',
codex: 'Codex',
cursor: 'Cursor',
opencode: 'OpenCode',
universal: 'Universal .agents',
};
const fileEntryLabels: Record<KtxAgentTarget, string> = {
'claude-code': 'Skill installed',
codex: 'Skill installed',
cursor: 'Rule installed',
opencode: 'Command installed',
universal: 'Skill installed',
};
export function formatInstallSummary(
installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>,
entries: InstallEntry[],
projectDir: string,
): string {
const entriesByTarget = new Map<KtxAgentTarget, InstallEntry[]>();
let idx = 0;
for (const install of installs) {
const planned = plannedKtxAgentFiles({ projectDir, ...install });
entriesByTarget.set(install.target, entries.slice(idx, idx + planned.length));
idx += planned.length;
}
const lines: string[] = [];
for (const install of installs) {
const targetEntries = entriesByTarget.get(install.target) ?? [];
lines.push(` ${targetDisplayNames[install.target]}`);
for (const entry of targetEntries) {
const displayPath =
install.scope === 'global' ? entry.path : relative(projectDir, entry.path);
if (entry.kind === 'file') {
const label = entry.role === 'rule' ? 'Rule installed' : fileEntryLabels[install.target];
lines.push(` + ${label}`);
lines.push(` ${displayPath}`);
} else {
lines.push(` + MCP config added`);
lines.push(` ${displayPath}`);
}
}
}
return lines.join('\n');
}
async function installTarget(input: {
projectDir: string;
target: KtxAgentTarget;
@ -252,12 +375,17 @@ async function installTarget(input: {
mode: KtxAgentInstallMode;
}): Promise<InstallEntry[]> {
const entries = plannedKtxAgentFiles(input);
const launcher = ktxCliLauncher();
for (const entry of entries) {
if (entry.kind === 'file') {
const content =
entry.role === 'rule'
? ruleInstructionContent({ projectDir: input.projectDir })
: cliInstructionContent({ projectDir: input.projectDir, launcher });
await mkdir(dirname(entry.path), { recursive: true });
await writeFile(entry.path, cliInstructionContent({ projectDir: input.projectDir, target: input.target }), 'utf-8');
await writeFile(entry.path, content, 'utf-8');
} else {
await writeJsonKey(entry.path, entry.jsonPath, mcpConfig(input.projectDir));
await writeJsonKey(entry.path, entry.jsonPath, mcpConfig(input.projectDir, launcher));
}
}
return entries;
@ -311,7 +439,6 @@ export async function runKtxSetupAgentsStep(
{ value: 'cursor', label: 'Cursor' },
{ value: 'opencode', label: 'OpenCode' },
{ value: 'universal', label: 'Universal .agents' },
{ value: 'back', label: 'Back' },
],
required: true,
})) as KtxAgentTarget[]);
@ -327,7 +454,7 @@ export async function runKtxSetupAgentsStep(
for (const install of installs) entries.push(...(await installTarget({ projectDir: args.projectDir, ...install })));
await writeManifest(args.projectDir, mergeManifest(args.projectDir, await readKtxAgentInstallManifest(args.projectDir), installs, entries));
await markAgentsComplete(args.projectDir);
io.stdout.write(`Agent integration installed for ${installs.map((install) => install.target).join(', ')}.\n`);
io.stdout.write(`\nAgent integration complete\n\n${formatInstallSummary(installs, entries, args.projectDir)}\n`);
return { status: 'ready', projectDir: args.projectDir, installs };
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);

View file

@ -166,7 +166,12 @@ describe('setup context build state', () => {
it('runs setup context build, verifies readiness, and marks context complete', async () => {
await writeReadyProject(tempDir);
const io = makeIo();
const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false }));
const runContextBuildMock = vi.fn(async () => ({
exitCode: 0,
detached: false,
reportIds: ['report-docs-1'],
artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'],
}));
const verifyContextReady = vi.fn(async () => ({
ready: true,
agentContextReady: true,
@ -204,6 +209,8 @@ describe('setup context build state', () => {
runId: 'setup-context-local-abc123',
status: 'completed',
completedAt: '2026-05-09T10:00:00.000Z',
reportIds: ['report-docs-1'],
artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'],
});
expect(io.stdout()).toContain('KTX context is ready for agents.');
});
@ -340,6 +347,207 @@ describe('setup context build state', () => {
expect(io.stderr()).toContain('No primary or context sources are configured for a KTX context build.');
});
it('watches an already-running setup context build from the resume prompt', async () => {
await writeReadyProject(tempDir);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-resume-watch',
status: 'detached',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: ['docs'],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'),
});
const io = makeIo();
const completeRun = async () => {
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-resume-watch',
status: 'completed',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:02:00.000Z',
completedAt: '2026-05-09T10:02:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: ['docs'],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'),
});
};
const select = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => {
expect(options.options.map((option) => option.label)).toContain('Watch progress');
return 'watch';
});
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'auto' },
io.io,
{
prompts: { select, cancel: vi.fn() },
sleep: completeRun,
watchIntervalMs: 1,
},
),
).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-resume-watch' });
expect(io.stdout()).toContain('KTX context built: detached');
expect(io.stdout()).toContain('KTX context built: yes');
});
it('auto-watches a running build without prompting when autoWatch is true', async () => {
await writeReadyProject(tempDir);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-auto-watch',
status: 'detached',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: [],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'),
});
const io = makeIo();
const completeRun = async () => {
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-auto-watch',
status: 'completed',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:02:00.000Z',
completedAt: '2026-05-09T10:02:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: [],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'),
});
};
const select = vi.fn(async () => {
throw new Error('should not prompt when autoWatch is true');
});
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'auto', autoWatch: true },
io.io,
{
prompts: { select, cancel: vi.fn() },
sleep: completeRun,
watchIntervalMs: 1,
},
),
).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-auto-watch' });
expect(select).not.toHaveBeenCalled();
expect(io.stdout()).toContain('KTX context built: yes');
});
it('renders the progress view when watching a build with sourceProgress', async () => {
await writeReadyProject(tempDir);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-progress',
status: 'detached',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: ['docs'],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-progress'),
sourceProgress: [
{ connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 },
{ connectionId: 'docs', operation: 'source-ingest' as const, status: 'running' as const, startedAtMs: Date.now() - 5000 },
],
});
const io = makeIo();
const completeRun = async () => {
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-progress',
status: 'completed',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:02:00.000Z',
completedAt: '2026-05-09T10:02:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: ['docs'],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-progress'),
sourceProgress: [
{ connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 },
{ connectionId: 'docs', operation: 'source-ingest' as const, status: 'done' as const, elapsedMs: 60000 },
],
});
};
const select = vi.fn(async () => 'watch');
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'auto' },
io.io,
{
prompts: { select, cancel: vi.fn() },
sleep: completeRun,
watchIntervalMs: 1,
},
),
).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-progress' });
const output = io.stdout();
expect(output).toContain('Building KTX context');
expect(output).toContain('Primary sources:');
expect(output).toContain('warehouse');
expect(output).toContain('Context sources:');
expect(output).toContain('docs');
expect(output).not.toContain('KTX context built: detached');
});
it('supports d to detach from the progress watch view', async () => {
await writeReadyProject(tempDir);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-detach',
status: 'running',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: [],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-detach'),
sourceProgress: [
{ connectionId: 'warehouse', operation: 'scan' as const, status: 'running' as const, startedAtMs: Date.now() },
],
});
const io = makeIo();
let triggerDetach: (() => void) | null = null;
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'auto', autoWatch: true },
io.io,
{
sleep: async () => { triggerDetach?.(); },
watchIntervalMs: 1,
setupKeystroke: (onDetach) => {
triggerDetach = onDetach;
return () => {};
},
},
),
).resolves.toMatchObject({ status: 'detached' });
const output = io.stdout();
expect(output).toContain('Building KTX context');
expect(output).toContain('Context build continuing in the background.');
expect(output).toContain('Resume: ktx setup --project-dir');
});
it('prints JSON setup context command status with watch and resume commands', async () => {
await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true });
await writeKtxSetupContextState(tempDir, {
@ -372,6 +580,48 @@ describe('setup context build state', () => {
});
});
it('watches setup context command status until the run reaches a terminal state', async () => {
await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true });
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-watch',
status: 'running',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: ['docs'],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-watch'),
});
const io = makeIo();
const completeRun = async () => {
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-watch',
status: 'completed',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:02:00.000Z',
completedAt: '2026-05-09T10:02:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: ['docs'],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-watch'),
});
};
await expect(
runKtxSetupContextCommand(
{ command: 'watch', projectDir: tempDir, runId: 'setup-context-local-watch', inputMode: 'disabled' },
io.io,
{ sleep: completeRun, watchIntervalMs: 1 },
),
).resolves.toBe(0);
expect(io.stdout()).toContain('KTX context built: running');
expect(io.stdout()).toContain('KTX context built: yes');
});
it('runs direct build commands without asking for setup confirmation first', async () => {
await writeReadyProject(tempDir);
const io = makeIo();

View file

@ -10,7 +10,14 @@ import {
} from '@ktx/context/project';
import type { KtxCliIo } from './cli-runtime.js';
import { buildPublicIngestPlan } from './public-ingest.js';
import { runContextBuild } from './context-build-view.js';
import {
type ContextBuildSourceProgressUpdate,
createRepainter,
defaultSetupKeystroke,
renderContextBuildView,
runContextBuild,
viewStateFromSourceProgress,
} from './context-build-view.js';
import { withMenuOptionsSpacing } from './prompt-navigation.js';
import { withSetupInterruptConfirmation } from './setup-interrupt.js';
@ -45,6 +52,7 @@ export interface KtxSetupContextState {
retryableFailedTargets: string[];
commands: KtxSetupContextCommands;
failureReason?: string;
sourceProgress?: ContextBuildSourceProgressUpdate[];
}
export interface KtxSetupContextStatusSummary {
@ -80,6 +88,7 @@ export interface KtxSetupContextStepArgs {
forcePrompt?: boolean;
allowEmpty?: boolean;
prompt?: boolean;
autoWatch?: boolean;
}
export type KtxSetupContextCommandArgs =
@ -99,6 +108,9 @@ export interface KtxSetupContextDeps {
now?: () => Date;
runContextBuild?: typeof runContextBuild;
verifyContextReady?: (projectDir: string) => Promise<KtxSetupContextReadiness>;
sleep?: (ms: number) => Promise<void>;
watchIntervalMs?: number;
setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null;
}
interface KtxSetupContextTargets {
@ -109,6 +121,7 @@ interface KtxSetupContextTargets {
const SETUP_CONTEXT_STATE_PATH = ['.ktx', 'setup', 'context-build.json'] as const;
const LIVE_DATABASE_ADAPTER = 'live-database';
const SCAN_REPORT_FILE = 'scan-report.json';
const DEFAULT_WATCH_INTERVAL_MS = 2_000;
function createPromptAdapter(): KtxSetupContextPromptAdapter {
return {
@ -193,9 +206,34 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat
: [],
commands: contextBuildCommands(projectDir, runId),
...(typeof record.failureReason === 'string' ? { failureReason: record.failureReason } : {}),
...(normalizeSourceProgress(record.sourceProgress) ? { sourceProgress: normalizeSourceProgress(record.sourceProgress) } : {}),
};
}
const VALID_SOURCE_OPERATIONS = new Set(['scan', 'source-ingest']);
const VALID_SOURCE_STATUSES = new Set(['queued', 'running', 'done', 'failed']);
function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpdate[] | undefined {
if (!Array.isArray(value)) return undefined;
const entries: ContextBuildSourceProgressUpdate[] = [];
for (const item of value) {
if (typeof item !== 'object' || item === null || Array.isArray(item)) continue;
const rec = item as Record<string, unknown>;
if (typeof rec.connectionId !== 'string') continue;
if (!VALID_SOURCE_OPERATIONS.has(String(rec.operation))) continue;
if (!VALID_SOURCE_STATUSES.has(String(rec.status))) continue;
entries.push({
connectionId: rec.connectionId,
operation: rec.operation as 'scan' | 'source-ingest',
status: rec.status as 'queued' | 'running' | 'done' | 'failed',
...(typeof rec.startedAtMs === 'number' ? { startedAtMs: rec.startedAtMs } : {}),
...(typeof rec.elapsedMs === 'number' ? { elapsedMs: rec.elapsedMs } : {}),
...(typeof rec.summaryText === 'string' ? { summaryText: rec.summaryText } : {}),
});
}
return entries.length > 0 ? entries : undefined;
}
export async function readKtxSetupContextState(projectDir: string): Promise<KtxSetupContextState> {
const filePath = statePath(projectDir);
if (!(await pathExists(filePath))) {
@ -514,6 +552,7 @@ async function runBuild(
};
await writeKtxSetupContextState(args.projectDir, runningState);
let lastSourceProgress: ContextBuildSourceProgressUpdate[] | undefined;
const contextBuild = deps.runContextBuild ?? runContextBuild;
const buildResult = await contextBuild(
project,
@ -532,14 +571,39 @@ async function runBuild(
...runningState,
status: 'detached',
updatedAt: new Date().toISOString(),
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});
writeFileSync(statePath(resolvedDir), `${JSON.stringify(detachedState, null, 2)}\n`);
},
onSourceProgress: (sources) => {
lastSourceProgress = sources;
try {
const resolvedDir = resolve(args.projectDir);
mkdirSync(join(resolvedDir, '.ktx', 'setup'), { recursive: true });
const progressState = normalizeState(resolvedDir, {
...runningState,
sourceProgress: sources,
updatedAt: new Date().toISOString(),
});
writeFileSync(statePath(resolvedDir), `${JSON.stringify(progressState, null, 2)}\n`);
} catch {
// Progress reporting is supplementary — don't crash the build
}
},
},
);
const completedReportIds = buildResult.reportIds ?? [];
const completedArtifactPaths = buildResult.artifactPaths ?? [];
if (buildResult.detached) {
const updatedAt = now().toISOString();
await writeKtxSetupContextState(args.projectDir, { ...runningState, status: 'detached', updatedAt });
await writeKtxSetupContextState(args.projectDir, {
...runningState,
status: 'detached',
updatedAt,
reportIds: completedReportIds,
artifactPaths: completedArtifactPaths,
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});
return { status: 'detached', projectDir: args.projectDir, runId };
}
if (buildResult.exitCode !== 0) {
@ -548,8 +612,11 @@ async function runBuild(
...runningState,
status: 'failed',
updatedAt,
reportIds: completedReportIds,
artifactPaths: completedArtifactPaths,
retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds],
failureReason: 'Context build failed.',
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});
return { status: 'failed', projectDir: args.projectDir };
}
@ -561,8 +628,11 @@ async function runBuild(
...runningState,
status: 'failed',
updatedAt,
reportIds: completedReportIds,
artifactPaths: completedArtifactPaths,
retryableFailedTargets: readiness.failedTargets ?? [],
failureReason: readiness.details.join(' '),
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});
io.stderr.write('KTX context build did not pass agent-readiness verification.\n');
for (const detail of readiness.details) {
@ -578,7 +648,10 @@ async function runBuild(
status: 'completed',
updatedAt: completedAt,
completedAt,
reportIds: completedReportIds,
artifactPaths: completedArtifactPaths,
retryableFailedTargets: [],
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});
writeSuccess(readiness, targets, io);
return { status: 'ready', projectDir: args.projectDir, runId };
@ -632,17 +705,46 @@ export async function runKtxSetupContextStep(
(existingState.status === 'running' || existingState.status === 'detached') &&
args.inputMode !== 'disabled'
) {
if (args.autoWatch) {
const watched = await watchContextStatus(
{
command: 'watch',
projectDir: args.projectDir,
...(existingState.runId ? { runId: existingState.runId } : {}),
inputMode: args.inputMode,
},
existingState,
io,
deps,
);
return setupResultFromWatchedState(args.projectDir, watched.state);
}
const prompts = deps.prompts ?? createPromptAdapter();
const choice = await prompts.select({
message:
'A context build is running in the background.\n\n' +
'You can wait for it to finish, check its status, or start a fresh build.',
'You can watch it until it finishes, check its status once, or start a fresh build.',
options: [
{ value: 'watch', label: 'Watch progress' },
{ value: 'status', label: 'Check status' },
{ value: 'rebuild', label: 'Start a fresh context build' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'watch') {
const watched = await watchContextStatus(
{
command: 'watch',
projectDir: args.projectDir,
...(existingState.runId ? { runId: existingState.runId } : {}),
inputMode: args.inputMode,
},
existingState,
io,
deps,
);
return setupResultFromWatchedState(args.projectDir, watched.state);
}
if (choice === 'status') {
const commands = contextBuildCommands(args.projectDir, existingState.runId);
io.stdout.write(`\nRun: ${commands.status}\n`);
@ -698,6 +800,18 @@ function stateMatchesRunId(state: KtxSetupContextState, runId: string | undefine
return !runId || state.runId === runId;
}
function isActiveStatus(status: KtxSetupContextBuildStatus): boolean {
return status === 'running' || status === 'detached';
}
function watchExitCode(status: KtxSetupContextBuildStatus): number {
return status === 'failed' || status === 'interrupted' || status === 'stale' ? 1 : 0;
}
function defaultSleep(ms: number): Promise<void> {
return new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
}
function statusPayload(state: KtxSetupContextState): KtxSetupContextStatusSummary {
return setupContextStatusFromState(state, { completedStep: state.status === 'completed' });
}
@ -714,6 +828,149 @@ function writeContextStatus(state: KtxSetupContextState, io: KtxCliIo): void {
}
}
async function watchContextStatus(
args: Extract<KtxSetupContextCommandArgs, { command: 'watch' }>,
initialState: KtxSetupContextState,
io: KtxCliIo,
deps: KtxSetupContextDeps,
): Promise<{ exitCode: number; state: KtxSetupContextState }> {
if (initialState.sourceProgress && initialState.sourceProgress.length > 0) {
return watchContextStatusWithProgressView(args, initialState, io, deps);
}
return watchContextStatusText(args, initialState, io, deps);
}
async function watchContextStatusText(
args: Extract<KtxSetupContextCommandArgs, { command: 'watch' }>,
initialState: KtxSetupContextState,
io: KtxCliIo,
deps: KtxSetupContextDeps,
): Promise<{ exitCode: number; state: KtxSetupContextState }> {
const sleep = deps.sleep ?? defaultSleep;
const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS;
let state = initialState;
let lastRenderedStatus = '';
io.stdout.write('KTX context build\n');
while (true) {
const renderedStatus = `${state.status}:${state.updatedAt ?? ''}:${state.completedAt ?? ''}:${state.failureReason ?? ''}`;
if (renderedStatus !== lastRenderedStatus) {
writeContextStatus(state, io);
lastRenderedStatus = renderedStatus;
}
if (!isActiveStatus(state.status)) {
return { exitCode: watchExitCode(state.status), state };
}
await sleep(intervalMs);
state = await readKtxSetupContextState(args.projectDir);
if (!stateMatchesRunId(state, args.runId)) {
io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`);
return { exitCode: 1, state };
}
}
}
async function watchContextStatusWithProgressView(
args: Extract<KtxSetupContextCommandArgs, { command: 'watch' }>,
initialState: KtxSetupContextState,
io: KtxCliIo,
deps: KtxSetupContextDeps,
): Promise<{ exitCode: number; state: KtxSetupContextState }> {
const sleep = deps.sleep ?? defaultSleep;
const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS;
const isTTY = io.stdout.isTTY === true;
const repainter = isTTY ? createRepainter(io) : null;
const projectDir = resolve(args.projectDir);
const viewOpts = { styled: isTTY, showHint: true, projectDir };
let state = initialState;
let lastProgressKey = '';
let detached = false;
let viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], Date.now(),
state.startedAt ? new Date(state.startedAt).getTime() : undefined);
const cleanupKeystroke = (isTTY || deps.setupKeystroke)
? (deps.setupKeystroke ?? defaultSetupKeystroke)(
() => { detached = true; },
() => { detached = true; },
)
: null;
let spinnerInterval: ReturnType<typeof setInterval> | null = null;
if (repainter) {
repainter.paint(renderContextBuildView(viewState, viewOpts));
spinnerInterval = setInterval(() => {
viewState.frame++;
const now = Date.now();
viewState.totalElapsedMs = viewState.startedAt !== null ? now - viewState.startedAt : 0;
for (const t of [...viewState.primarySources, ...viewState.contextSources]) {
if (t.status === 'running' && t.startedAt !== null) {
t.elapsedMs = now - t.startedAt;
}
}
repainter.paint(renderContextBuildView(viewState, viewOpts));
}, 140);
}
try {
while (true) {
if (!repainter) {
const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status));
if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) {
io.stdout.write(renderContextBuildView(viewState, viewOpts));
lastProgressKey = currentKey;
}
}
if (!isActiveStatus(state.status)) {
return { exitCode: watchExitCode(state.status), state };
}
if (detached) break;
await sleep(intervalMs);
if (detached) break;
try {
state = await readKtxSetupContextState(args.projectDir);
} catch {
continue;
}
if (!stateMatchesRunId(state, args.runId)) {
io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`);
return { exitCode: 1, state };
}
const now = Date.now();
const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined;
viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs);
}
} finally {
if (spinnerInterval) clearInterval(spinnerInterval);
cleanupKeystroke?.();
}
io.stdout.write('\n\nContext build continuing in the background.\n');
io.stdout.write(`Resume: ktx setup --project-dir ${projectDir}\n`);
io.stdout.write(`Status: ktx setup context status --project-dir ${projectDir}\n`);
return { exitCode: 0, state };
}
function setupResultFromWatchedState(projectDir: string, state: KtxSetupContextState): KtxSetupContextResult {
if (state.status === 'completed') {
return { status: 'ready', projectDir, runId: state.runId ?? 'setup-context-completed' };
}
if (state.status === 'paused') {
return { status: 'paused', projectDir, runId: state.runId ?? '' };
}
if (state.status === 'running' || state.status === 'detached') {
return { status: 'detached', projectDir, runId: state.runId ?? '' };
}
return { status: 'failed', projectDir };
}
export async function runKtxSetupContextCommand(
args: KtxSetupContextCommandArgs,
io: KtxCliIo,
@ -744,9 +1001,7 @@ export async function runKtxSetupContextCommand(
}
if (args.command === 'watch') {
io.stdout.write('KTX context build\n');
writeContextStatus(state, io);
return 0;
return (await watchContextStatus(args, state, io, deps)).exitCode;
}
const updatedAt = new Date().toISOString();

View file

@ -962,10 +962,95 @@ describe('setup databases step', () => {
});
});
it('prompts for discovered Postgres schemas before the first scan', async () => {
const io = makeIo();
const prompts = makePromptAdapter({
selectValues: ['url'],
textValues: ['', 'env:DATABASE_URL'],
multiselectValues: [['orbit_analytics', 'orbit_raw']],
});
const testConnection = vi.fn(async () => 0);
const scanConnection = vi.fn(async asyncScanProjectDir => {
const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8'));
expect(config.connections['postgres-warehouse']).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw'],
});
return 0;
});
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'auto',
databaseDrivers: ['postgres'],
databaseSchemas: [],
skipDatabases: false,
},
io.io,
{ prompts, testConnection, scanConnection, listSchemas },
);
expect(result.status).toBe('ready');
expect(listSchemas).toHaveBeenCalledWith(tempDir, 'postgres-warehouse');
expect(prompts.multiselect).toHaveBeenCalledWith({
message: expect.stringContaining('PostgreSQL schemas to scan'),
options: [
{ value: 'orbit_analytics', label: 'orbit_analytics' },
{ value: 'orbit_raw', label: 'orbit_raw' },
{ value: 'public', label: 'public' },
],
initialValues: ['orbit_analytics', 'orbit_raw'],
required: true,
});
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.connections['postgres-warehouse']).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw'],
});
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw');
});
it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => {
const io = makeIo();
const prompts = makePromptAdapter({});
const testConnection = vi.fn(async () => 0);
const scanConnection = vi.fn(async asyncScanProjectDir => {
const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8'));
expect(config.connections.warehouse).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw', 'public'],
});
return 0;
});
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'disabled',
databaseDrivers: ['postgres'],
databaseConnectionId: 'warehouse',
databaseUrl: 'env:DATABASE_URL',
databaseSchemas: [],
skipDatabases: false,
},
io.io,
{ prompts, testConnection, scanConnection, listSchemas },
);
expect(result.status).toBe('ready');
expect(prompts.multiselect).not.toHaveBeenCalled();
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.connections.warehouse).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw', 'public'],
});
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw, public');
});
it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => {
const io = makeIo();
const testConnection = vi.fn(async () => 0);
const scanConnection = vi.fn(async () => 0);
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
const result = await runKtxSetupDatabasesStep(
{
@ -978,10 +1063,11 @@ describe('setup databases step', () => {
skipDatabases: false,
},
io.io,
{ testConnection, scanConnection },
{ testConnection, scanConnection, listSchemas },
);
expect(result.status).toBe('ready');
expect(listSchemas).not.toHaveBeenCalled();
expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));

View file

@ -52,6 +52,7 @@ export interface KtxSetupDatabasesPromptAdapter {
message: string;
options: Array<{ value: string; label: string }>;
required?: boolean;
initialValues?: string[];
}): Promise<string[]>;
select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise<string>;
text(options: { message: string; placeholder?: string; initialValue?: string }): Promise<string | undefined>;
@ -76,6 +77,7 @@ export interface KtxSetupDatabasesDeps {
prompts?: KtxSetupDatabasesPromptAdapter;
testConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise<number>;
scanConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise<number>;
listSchemas?: (projectDir: string, connectionId: string) => Promise<string[]>;
historicSqlProbe?: KtxSetupHistoricSqlProbe;
}
@ -255,6 +257,21 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr
}
}
async function defaultListSchemas(projectDir: string, connectionId: string): Promise<string[]> {
const project = await loadKtxProject({ projectDir });
const connection = project.config.connections[connectionId];
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
if (!isKtxPostgresConnectionConfig(connection)) {
return [];
}
const connector = new KtxPostgresScanConnector({ connectionId, connection });
try {
return await connector.listSchemas();
} finally {
await connector.cleanup();
}
}
function existingConnectionIdsByDriver(
connections: Record<string, KtxProjectConnectionConfig>,
driver: KtxSetupDatabaseDriver,
@ -814,6 +831,113 @@ async function writeConnectionConfig(input: {
}
}
function configuredSchemas(connection: KtxProjectConnectionConfig | undefined): string[] {
if (!connection) return [];
if (Array.isArray(connection.schemas)) {
return connection.schemas
.filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0)
.map((schema) => schema.trim());
}
return typeof connection.schema === 'string' && connection.schema.trim().length > 0 ? [connection.schema.trim()] : [];
}
function defaultSchemaSelection(schemas: string[]): string[] {
const nonPublic = schemas.filter((schema) => schema !== 'public');
return nonPublic.length > 0 ? nonPublic : schemas;
}
async function writeConnectionSchemas(input: {
projectDir: string;
connectionId: string;
schemas: string[];
}): Promise<void> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const connection = project.config.connections[input.connectionId];
if (!connection) return;
const { schema: _schema, ...connectionWithoutLegacySchema } = connection;
await writeConnectionConfig({
projectDir: input.projectDir,
connectionId: input.connectionId,
connection: {
...connectionWithoutLegacySchema,
schemas: unique(input.schemas),
},
});
}
async function maybeConfigurePostgresSchemas(input: {
projectDir: string;
connectionId: string;
args: KtxSetupDatabasesArgs;
prompts: KtxSetupDatabasesPromptAdapter;
deps: KtxSetupDatabasesDeps;
io: KtxCliIo;
}): Promise<boolean> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const connection = project.config.connections[input.connectionId];
if (normalizeDriver(connection?.driver) !== 'postgres') {
return true;
}
if (configuredSchemas(connection).length > 0) {
return true;
}
if (input.args.databaseSchemas.length > 0) {
await writeConnectionSchemas({
projectDir: input.projectDir,
connectionId: input.connectionId,
schemas: input.args.databaseSchemas,
});
return true;
}
let discoveredSchemas: string[];
try {
discoveredSchemas = unique(
await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId),
);
} catch (error) {
input.io.stderr.write(
`Could not discover PostgreSQL schemas for ${input.connectionId}; continuing with existing schema scope. ` +
`Pass --database-schema to set it explicitly. ${error instanceof Error ? error.message : String(error)}\n`,
);
return true;
}
if (discoveredSchemas.length === 0) {
return true;
}
let selectedSchemas: string[];
if (input.args.inputMode === 'disabled' || discoveredSchemas.length === 1) {
selectedSchemas = discoveredSchemas;
} else {
const initialValues = defaultSchemaSelection(discoveredSchemas);
const choices = await input.prompts.multiselect({
message: withMultiselectNavigation(
'PostgreSQL schemas to scan\nKTX found multiple non-system schemas. Select every schema agents should use.',
),
options: discoveredSchemas.map((schema) => ({ value: schema, label: schema })),
initialValues,
required: true,
});
if (choices.includes('back')) {
return false;
}
selectedSchemas = choices.length > 0 ? choices : initialValues;
}
await writeConnectionSchemas({
projectDir: input.projectDir,
connectionId: input.connectionId,
schemas: selectedSchemas,
});
writeSetupSection(input.io, `Selecting schemas for ${input.connectionId}`, [
`Schemas: ${selectedSchemas.join(', ')}`,
]);
return true;
}
async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise<void> {
const project = await loadKtxProject({ projectDir });
if (project.config.ingest.adapters.includes('historic-sql')) {
@ -902,6 +1026,8 @@ async function validateAndScanConnection(input: {
connectionId: string;
io: KtxCliIo;
deps: KtxSetupDatabasesDeps;
args: KtxSetupDatabasesArgs;
prompts: KtxSetupDatabasesPromptAdapter;
}): Promise<boolean> {
const testConnection = input.deps.testConnection ?? defaultTestConnection;
const scanConnection = input.deps.scanConnection ?? defaultScanConnection;
@ -923,6 +1049,10 @@ async function validateAndScanConnection(input: {
testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`);
writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines);
if (!(await maybeConfigurePostgresSchemas(input))) {
return false;
}
await maybeRunHistoricSqlSetupProbe({
projectDir: input.projectDir,
connectionId: input.connectionId,
@ -1069,7 +1199,7 @@ export async function runKtxSetupDatabasesStep(
prompts,
});
if (historicSqlResult === 'back') return { status: 'back', projectDir: args.projectDir };
if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps }))) {
if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps, args, prompts }))) {
return { status: 'failed', projectDir: args.projectDir };
}
selectedConnectionIds.push(connectionId);
@ -1209,6 +1339,8 @@ export async function runKtxSetupDatabasesStep(
connectionId: connectionChoice.connectionId,
io,
deps,
args,
prompts,
}))
) {
if (args.inputMode === 'disabled') return { status: 'failed', projectDir: args.projectDir };

View file

@ -1,5 +1,5 @@
import { describe, expect, it, vi } from 'vitest';
import { isKtxSetupReady, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js';
import { isKtxPreAgentSetupReady, isKtxSetupReady, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js';
import type { KtxSetupStatus } from './setup.js';
const readyStatus: KtxSetupStatus = {
@ -20,6 +20,13 @@ describe('setup ready menu', () => {
expect(isKtxSetupReady({ ...readyStatus, agents: [] })).toBe(false);
});
it('recognizes pre-agent readiness without requiring agents', () => {
expect(isKtxPreAgentSetupReady(readyStatus)).toBe(true);
expect(isKtxPreAgentSetupReady({ ...readyStatus, agents: [] })).toBe(true);
expect(isKtxPreAgentSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false);
expect(isKtxPreAgentSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false);
});
it('maps ready-project menu choices to setup sections', async () => {
const prompts = { select: vi.fn(async () => 'agents'), cancel: vi.fn() };

View file

@ -14,18 +14,21 @@ export interface KtxSetupReadyMenuDeps {
prompts?: KtxSetupReadyMenuPromptAdapter;
}
export function isKtxSetupReady(status: KtxSetupStatus): boolean {
export function isKtxPreAgentSetupReady(status: KtxSetupStatus): boolean {
return (
status.project.ready &&
status.llm.ready &&
status.embeddings.ready &&
status.databases.every((database) => database.ready) &&
status.sources.every((source) => source.ready) &&
status.context.ready &&
status.agents.some((agent) => agent.ready)
status.context.ready
);
}
export function isKtxSetupReady(status: KtxSetupStatus): boolean {
return isKtxPreAgentSetupReady(status) && status.agents.some((agent) => agent.ready);
}
function createPromptAdapter(): KtxSetupReadyMenuPromptAdapter {
return {
async select(options) {

View file

@ -1,4 +1,4 @@
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
@ -8,6 +8,7 @@ import {
serializeKtxProjectConfig,
} from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxCliIo } from './cli-runtime.js';
import {
runKtxSetupSourcesStep,
type KtxSetupSourcesDeps,
@ -41,14 +42,17 @@ function prompts(values: {
multiselect?: string[][];
select?: string[];
text?: Array<string | undefined>;
password?: Array<string | undefined>;
}): KtxSetupSourcesPromptAdapter {
const multiselectValues = [...(values.multiselect ?? [])];
const selectValues = [...(values.select ?? [])];
const textValues = [...(values.text ?? [])];
const passwordValues = [...(values.password ?? [])];
return {
multiselect: vi.fn(async () => multiselectValues.shift() ?? []),
select: vi.fn(async () => selectValues.shift() ?? 'skip'),
text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')),
password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : undefined)),
cancel: vi.fn(),
log: vi.fn(),
};
@ -201,12 +205,199 @@ describe('setup sources step', () => {
mappings: {
databaseMappings: { '1': 'warehouse' },
syncEnabled: { '1': true },
syncMode: 'ONLY',
syncMode: 'ALL',
},
});
expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io);
});
it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => {
await addPrimarySource();
const cases: Array<{
source: 'metabase' | 'looker';
text: string[];
deps: KtxSetupSourcesDeps;
expectedConnection: Record<string, unknown>;
}> = [
{
source: 'metabase',
text: ['metabase-main', 'https://metabase.example.com'],
deps: {
discoverMetabaseDatabases: vi.fn(async () => [
{ id: 1, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' },
]),
validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })),
runMapping: vi.fn(async () => 0),
},
expectedConnection: {
driver: 'metabase',
mappings: { databaseMappings: { '1': 'warehouse' } },
},
},
{
source: 'looker',
text: ['looker-main', 'https://looker.example.com', 'client-id', ''],
deps: {
validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })),
runMapping: vi.fn(async () => 0),
},
expectedConnection: {
driver: 'looker',
mappings: { connectionMappings: { warehouse: 'warehouse' } },
},
},
];
for (const testCase of cases) {
const testPrompts = prompts({
multiselect: [[testCase.source]],
select: ['env', 'done'],
text: testCase.text,
});
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
makeIo().io,
{
prompts: testPrompts,
...testCase.deps,
},
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] });
expect(
vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Mapped warehouse')),
).toBe(false);
if (testCase.source === 'metabase') {
expect(
vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Metabase database id')),
).toBe(false);
}
expect((await readConfig()).connections[`${testCase.source}-main`]).toMatchObject(testCase.expectedConnection);
}
});
it('prompts for the mapped warehouse when interactive Metabase and Looker source setup has multiple choices', async () => {
await addPrimarySource();
await addConnection('analytics_warehouse', {
driver: 'snowflake',
account: 'acme',
database: 'analytics',
readonly: true,
});
const cases: Array<{
source: 'metabase' | 'looker';
text: string[];
deps: KtxSetupSourcesDeps;
expectedConnection: Record<string, unknown>;
}> = [
{
source: 'metabase',
text: ['metabase-main', 'https://metabase.example.com'],
deps: {
discoverMetabaseDatabases: vi.fn(async () => [
{ id: 1, name: 'Finance', engine: 'postgres', host: 'db.example.com', dbName: 'finance' },
{ id: 2, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' },
]),
validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })),
runMapping: vi.fn(async () => 0),
},
expectedConnection: {
driver: 'metabase',
mappings: { databaseMappings: { '2': 'analytics_warehouse' } },
},
},
{
source: 'looker',
text: ['looker-main', 'https://looker.example.com', 'client-id', 'analytics'],
deps: {
validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })),
runMapping: vi.fn(async () => 0),
},
expectedConnection: {
driver: 'looker',
mappings: { connectionMappings: { analytics: 'analytics_warehouse' } },
},
},
];
for (const testCase of cases) {
const testPrompts = prompts({
multiselect: [[testCase.source]],
select: testCase.source === 'metabase' ? ['env', 'analytics_warehouse', '2', 'done'] : ['env', 'analytics_warehouse', 'done'],
text: testCase.text,
});
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
makeIo().io,
{
prompts: testPrompts,
...testCase.deps,
},
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] });
expect(testPrompts.select).toHaveBeenCalledWith({
message: 'Mapped warehouse connection',
options: [
{ value: 'analytics_warehouse', label: 'analytics_warehouse (SNOWFLAKE)' },
{ value: 'warehouse', label: 'warehouse (POSTGRESQL)' },
{ value: 'back', label: 'Back' },
],
});
if (testCase.source === 'metabase') {
expect(testPrompts.select).toHaveBeenCalledWith({
message: 'Metabase database',
options: [
{ value: '1', label: '1: Finance (postgres)' },
{ value: '2', label: '2: Analytics (postgres)' },
{ value: 'back', label: 'Back' },
],
});
expect(
vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Metabase database id')),
).toBe(false);
}
expect((await readConfig()).connections[`${testCase.source}-main`]).toMatchObject(testCase.expectedConnection);
}
});
it('lets visible Metabase mapping surface refresh and validation failures', async () => {
await addPrimarySource();
const runMapping = vi.fn(async (_projectDir: string, _connectionId: string, io: KtxCliIo) => {
io.stderr.write('1: Metabase database does not match KTX connection database\n');
return 1;
});
const io = makeIo();
const testPrompts = prompts({
multiselect: [['metabase']],
select: ['env'],
text: ['metabase-main', 'https://metabase.example.com'],
});
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
io.io,
{
prompts: testPrompts,
discoverMetabaseDatabases: vi.fn(async () => [
{ id: 1, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' },
]),
runMapping,
},
),
).resolves.toEqual({ status: 'failed', projectDir });
expect(runMapping).toHaveBeenCalledWith(projectDir, 'metabase-main', io.io);
expect(io.stderr()).toContain('1: Metabase database does not match KTX connection database');
expect(io.stderr()).not.toContain('Metabase mapping validation failed');
});
it('does not mark sources complete when validation fails', async () => {
await addPrimarySource();
const io = makeIo();
@ -253,7 +444,6 @@ describe('setup sources step', () => {
);
const options = vi.mocked(testPrompts.multiselect).mock.calls[0]?.[0].options ?? [];
expect(options).toContainEqual({ value: 'notion', label: 'Notion' });
expect(options).not.toContainEqual({ value: 'posthog', label: 'PostHog' });
});
it('uses a source-specific editable connection name for new interactive connections', async () => {
@ -333,8 +523,8 @@ describe('setup sources step', () => {
const io = makeIo();
const testPrompts = prompts({
multiselect: [['dbt']],
select: ['git'],
text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', '', 'env:GITHUB_TOKEN'],
select: ['git', 'env'],
text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', ''],
});
await expect(
@ -350,19 +540,16 @@ describe('setup sources step', () => {
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] });
expect(testGitRepo).toHaveBeenCalledWith({ repoUrl: 'https://github.com/acme-org/private-repo' });
expect(testPrompts.text).toHaveBeenNthCalledWith(5, {
message: textInputPrompt(
[
'This repo requires authentication.',
'Generate a token at: https://github.com/settings/tokens/new',
'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).',
'Or use file:/absolute/path if the token is stored in a file.',
'Press Enter to skip and try without authentication anyway.',
].join('\n'),
),
placeholder: 'env:GITHUB_TOKEN',
expect(testPrompts.select).toHaveBeenCalledWith({
message: 'This repo requires authentication.',
options: [
{ value: 'env', label: 'Use GITHUB_TOKEN from the environment' },
{ value: 'paste', label: 'Paste a token and save it as a local secret file' },
{ value: 'skip', label: 'Skip — try without authentication' },
{ value: 'back', label: 'Back' },
],
});
expect(testPrompts.text).toHaveBeenCalledTimes(5);
expect(testPrompts.text).toHaveBeenCalledTimes(4);
});
it('enables the dbt adapter when adding a dbt source connection', async () => {
@ -520,7 +707,7 @@ describe('setup sources step', () => {
mappings: {
databaseMappings: { '1': 'warehouse' },
syncEnabled: { '1': true },
syncMode: 'ONLY',
syncMode: 'ALL',
},
},
deps: {
@ -692,13 +879,11 @@ describe('setup sources step', () => {
},
{
source: 'metabase',
select: ['back', 'env'],
text: [
'metabase-main',
'https://old-metabase.example.com',
undefined,
'https://metabase.example.com',
'env:METABASE_API_KEY',
'warehouse',
'1',
],
deps: {
@ -709,14 +894,13 @@ describe('setup sources step', () => {
},
{
source: 'looker',
select: ['env'],
text: [
'looker-main',
'https://old-looker.example.com',
undefined,
'https://looker.example.com',
'client-id',
'env:LOOKER_CLIENT_SECRET',
'warehouse',
'',
],
deps: {
@ -727,10 +911,10 @@ describe('setup sources step', () => {
},
{
source: 'notion',
select: ['back', 'all_accessible'],
text: ['notion-main', 'env:NOTION_TOKEN', 'env:NOTION_TOKEN'],
select: ['env', 'back', 'env', 'all_accessible'],
text: ['notion-main'],
deps: { validateNotion: vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })) },
repeatedTextMessage: textInputPrompt('Notion token ref'),
repeatedSelectMessage: 'How should KTX find your Notion integration token?',
},
];
@ -787,4 +971,102 @@ describe('setup sources step', () => {
expect(io.stdout()).toContain('Connect a primary source before adding context sources.');
expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources');
});
it('auto-detects dbt_project.yml at the root of a local path', async () => {
await addPrimarySource();
const dbtDir = join(tempDir, 'dbt-repo');
await mkdir(dbtDir, { recursive: true });
await writeFile(join(dbtDir, 'dbt_project.yml'), 'name: analytics\n');
const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' }));
const io = makeIo();
const testPrompts = prompts({
multiselect: [['dbt']],
select: ['path'],
text: ['dbt-main', dbtDir],
});
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
io.io,
{ prompts: testPrompts, validateDbt },
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] });
expect(testPrompts.text).toHaveBeenCalledTimes(2);
const config = await readConfig();
expect(config.connections['dbt-main']).toMatchObject({ driver: 'dbt', source_dir: dbtDir });
expect(config.connections['dbt-main']).not.toHaveProperty('path');
});
it('auto-detects dbt_project.yml in a subdirectory of a local path', async () => {
await addPrimarySource();
const dbtDir = join(tempDir, 'monorepo');
await mkdir(join(dbtDir, 'analytics', 'dbt'), { recursive: true });
await writeFile(join(dbtDir, 'analytics', 'dbt', 'dbt_project.yml'), 'name: analytics\n');
const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' }));
const io = makeIo();
const testPrompts = prompts({
multiselect: [['dbt']],
select: ['path'],
text: ['dbt-main', dbtDir],
});
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
io.io,
{ prompts: testPrompts, validateDbt },
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] });
expect(testPrompts.text).toHaveBeenCalledTimes(2);
expect(testPrompts.log).toHaveBeenCalledWith('Found dbt_project.yml in analytics/dbt/');
const config = await readConfig();
expect(config.connections['dbt-main']).toMatchObject({
driver: 'dbt',
source_dir: dbtDir,
path: 'analytics/dbt',
});
});
it('shows a picker when multiple dbt projects are found in a local path', async () => {
await addPrimarySource();
const dbtDir = join(tempDir, 'multi-dbt');
await mkdir(join(dbtDir, 'analytics'), { recursive: true });
await mkdir(join(dbtDir, 'staging'), { recursive: true });
await writeFile(join(dbtDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n');
await writeFile(join(dbtDir, 'staging', 'dbt_project.yml'), 'name: staging\n');
const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' }));
const io = makeIo();
const testPrompts = prompts({
multiselect: [['dbt']],
select: ['path', 'staging'],
text: ['dbt-main', dbtDir],
});
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
io.io,
{ prompts: testPrompts, validateDbt },
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] });
expect(testPrompts.select).toHaveBeenCalledWith(
expect.objectContaining({
message: 'Multiple dbt projects found — which one should KTX use?',
}),
);
expect(testPrompts.text).toHaveBeenCalledTimes(2);
const config = await readConfig();
expect(config.connections['dbt-main']).toMatchObject({
driver: 'dbt',
source_dir: dbtDir,
path: 'staging',
});
});
});

View file

@ -1,14 +1,18 @@
import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { join, relative, resolve } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { cancel, isCancel, log, multiselect, select, text } from '@clack/prompts';
import { resolveNotionAuthToken } from '@ktx/context/connections';
import { cancel, isCancel, log, multiselect, password, select, text } from '@clack/prompts';
import { localConnectionTypeForConfig, resolveNotionAuthToken } from '@ktx/context/connections';
import { resolveKtxConfigReference } from '@ktx/context/core';
import {
cloneOrPull,
DEFAULT_METABASE_CLIENT_CONFIG,
discoverMetabaseDatabases,
type DiscoveredMetabaseDatabase,
loadDbtSchemaFiles,
loadProjectInfo,
MetabaseClient,
type NotionApi,
NotionClient,
parseLookmlStagedDir,
@ -28,6 +32,7 @@ import { runKtxConnection } from './connection.js';
import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js';
import { runKtxPublicIngest } from './public-ingest.js';
import { withSetupInterruptConfirmation } from './setup-interrupt.js';
import { writeProjectLocalSecretReference } from './setup-secrets.js';
export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion';
@ -71,6 +76,7 @@ export interface KtxSetupSourcesPromptAdapter {
}): Promise<string[]>;
select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise<string>;
text(options: { message: string; placeholder?: string; initialValue?: string }): Promise<string | undefined>;
password(options: { message: string }): Promise<string | undefined>;
cancel(message: string): void;
log?(message: string): void;
}
@ -86,6 +92,11 @@ export interface KtxSetupSourcesDeps {
validateLooker?: (projectDir: string, connectionId: string) => Promise<SourceValidationResult>;
validateLookml?: (connection: KtxProjectConnectionConfig) => Promise<SourceValidationResult>;
validateNotion?: (connection: KtxProjectConnectionConfig) => Promise<SourceValidationResult>;
discoverMetabaseDatabases?: (args: {
sourceUrl: string;
sourceApiKeyRef: string;
sourceConnectionId: string;
}) => Promise<DiscoveredMetabaseDatabase[]>;
runMapping?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise<number>;
runInitialIngest?: (
projectDir: string,
@ -143,6 +154,12 @@ function createPromptAdapter(): KtxSetupSourcesPromptAdapter {
);
return isCancel(value) ? undefined : String(value);
},
async password(options) {
const value = await withSetupInterruptConfirmation(() =>
password({ ...options, message: withTextInputNavigation(options.message) }),
);
return isCancel(value) ? undefined : String(value);
},
cancel(message) {
cancel(message);
},
@ -172,17 +189,6 @@ function connectionNamePrompt(label: string): string {
return `Name this ${label} connection\nKTX will use this short name in commands and config. You can rename it now.`;
}
function gitAuthAfterFailurePrompt(source: KtxSetupSourceType): string {
const label = source === 'dbt' ? 'This' : `This ${sourceLabel(source)}`;
return [
`${label} repo requires authentication.`,
'Generate a token at: https://github.com/settings/tokens/new',
'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).',
'Or use file:/absolute/path if the token is stored in a file.',
'Press Enter to skip and try without authentication anyway.',
].join('\n');
}
function sourceSubpathPrompt(source: KtxSetupSourceType): string {
if (source === 'dbt') {
return [
@ -198,6 +204,21 @@ function sourceSubpathPrompt(source: KtxSetupSourceType): string {
].join('\n');
}
const SCAN_SKIP_DIRS = new Set(['.git', 'node_modules', '.venv', 'target', 'dbt_packages', 'dbt_modules', '__pycache__']);
async function findDbtProjectSubpaths(rootDir: string): Promise<string[]> {
const entries = await readdir(rootDir, { withFileTypes: true, recursive: true });
const subpaths: string[] = [];
for (const entry of entries) {
if (!entry.isFile()) continue;
if (entry.name !== 'dbt_project.yml' && entry.name !== 'dbt_project.yaml') continue;
const relDir = relative(rootDir, entry.parentPath);
if (relDir.split('/').some((part) => SCAN_SKIP_DIRS.has(part))) continue;
subpaths.push(relDir);
}
return subpaths;
}
async function promptText(
prompts: KtxSetupSourcesPromptAdapter,
options: { message: string; placeholder?: string; initialValue?: string },
@ -222,6 +243,75 @@ function credentialRef(value: string | undefined, label: string): string {
return ref;
}
async function chooseSourceCredentialRef(input: {
prompts: KtxSetupSourcesPromptAdapter;
projectDir: string;
label: string;
envName: string;
secretFileName: string;
}): Promise<string | 'back'> {
while (true) {
const choice = await input.prompts.select({
message: `How should KTX find your ${input.label}?`,
options: [
{ value: 'env', label: `Use ${input.envName} from the environment` },
{ value: 'paste', label: 'Paste a key and save it as a local secret file' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'back') return 'back';
if (choice === 'paste') {
const value = await input.prompts.password({ message: input.label });
if (value === undefined) continue;
if (!value.trim()) continue;
const ref = await writeProjectLocalSecretReference({
projectDir: input.projectDir,
fileName: input.secretFileName,
value,
});
input.prompts.log?.(`Saved to .ktx/secrets/${input.secretFileName}`);
return ref;
}
return `env:${input.envName}`;
}
}
async function chooseGitAuthCredentialRef(input: {
prompts: KtxSetupSourcesPromptAdapter;
projectDir: string;
source: KtxSetupSourceType;
connectionId: string;
}): Promise<string | undefined | 'back'> {
const label = input.source === 'dbt' ? 'This' : `This ${sourceLabel(input.source)}`;
while (true) {
const choice = await input.prompts.select({
message: `${label} repo requires authentication.`,
options: [
{ value: 'env', label: 'Use GITHUB_TOKEN from the environment' },
{ value: 'paste', label: 'Paste a token and save it as a local secret file' },
{ value: 'skip', label: 'Skip — try without authentication' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'back') return 'back';
if (choice === 'skip') return undefined;
if (choice === 'paste') {
const value = await input.prompts.password({ message: 'Git access token' });
if (value === undefined) continue;
if (!value.trim()) continue;
const fileName = `${input.connectionId}-auth-token`;
const ref = await writeProjectLocalSecretReference({
projectDir: input.projectDir,
fileName,
value,
});
input.prompts.log?.(`Saved to .ktx/secrets/${fileName}`);
return ref;
}
return 'env:GITHUB_TOKEN';
}
}
function repoOrLocalSource(args: KtxSetupSourcesArgs): { sourceDir?: string; repoUrl?: string } {
if (args.sourcePath && args.sourceGitUrl) {
throw new Error('Choose only one source location: --source-path or --source-git-url.');
@ -373,7 +463,7 @@ function buildMetabaseConnection(args: KtxSetupSourcesArgs): KtxProjectConnectio
mappings: {
databaseMappings: { [String(args.metabaseDatabaseId)]: args.sourceWarehouseConnectionId },
syncEnabled: { [String(args.metabaseDatabaseId)]: true },
syncMode: 'ONLY',
syncMode: 'ALL',
},
};
}
@ -512,16 +602,6 @@ async function defaultValidateMetricflow(connection: KtxProjectConnectionConfig)
};
}
async function defaultValidateMetabase(projectDir: string, connectionId: string): Promise<SourceValidationResult> {
const code = await runKtxConnection(
{ command: 'map', projectDir, sourceConnectionId: connectionId, json: true },
{ stdout: { write() {} }, stderr: { write() {} } },
);
return code === 0
? { ok: true, detail: 'mapping validated' }
: { ok: false, message: 'Metabase mapping validation failed' };
}
async function defaultValidateLooker(projectDir: string, connectionId: string): Promise<SourceValidationResult> {
const code = await runKtxConnectionMapping(
{ command: 'refresh', projectDir, connectionId, autoAccept: true },
@ -559,8 +639,37 @@ async function defaultValidateNotion(connection: KtxProjectConnectionConfig): Pr
return { ok: true, detail: `roots=${roots.length}` };
}
interface MappingJsonOutput {
connectionId: string;
refresh: { ok: boolean; output: string[] };
validation: { ok: boolean; output: string[] };
mappings: unknown[];
}
function summarizeMappingResult(parsed: MappingJsonOutput): string {
const mappingCount = parsed.mappings.length;
const mappingNoun = mappingCount === 1 ? 'mapping' : 'mappings';
return `Mapping validated — ${mappingCount} ${mappingNoun} configured`;
}
async function defaultRunMapping(projectDir: string, connectionId: string, io: KtxCliIo): Promise<number> {
return await runKtxConnection({ command: 'map', projectDir, sourceConnectionId: connectionId, json: false }, io);
let captured = '';
const captureIo: KtxCliIo = {
stdout: { write(chunk: string) { captured += chunk; } },
stderr: io.stderr,
};
const code = await runKtxConnection(
{ command: 'map', projectDir, sourceConnectionId: connectionId, json: true },
captureIo,
);
if (code !== 0) return code;
try {
const parsed = JSON.parse(captured.trim()) as MappingJsonOutput;
io.stdout.write(`${summarizeMappingResult(parsed)}\n`);
} catch {
io.stdout.write(captured);
}
return 0;
}
async function defaultRunInitialIngest(
@ -634,6 +743,11 @@ type SourcePromptState = KtxSetupSourcesArgs & {
type SourcePromptStep = (state: SourcePromptState) => Promise<'next' | 'back'>;
interface WarehouseConnectionChoice {
id: string;
connectionType: string;
}
type InteractiveSourceConnectionChoice =
| { kind: 'existing'; connectionId: string; connection: KtxProjectConnectionConfig }
| { kind: 'new'; args: KtxSetupSourcesArgs }
@ -672,6 +786,107 @@ function resetRepoLocationFields(state: SourcePromptState): void {
delete state.sourceProjectName;
}
function warehouseConnectionChoices(config: KtxProjectConfig): WarehouseConnectionChoice[] {
return Object.entries(config.connections)
.filter(([, connection]) => PRIMARY_SOURCE_DRIVERS.has(String(connection.driver ?? '').toLowerCase()))
.map(([id, connection]) => ({ id, connectionType: localConnectionTypeForConfig(id, connection) }))
.sort((left, right) => left.id.localeCompare(right.id));
}
async function chooseMappedWarehouseConnectionId(input: {
projectDir: string;
prompts: KtxSetupSourcesPromptAdapter;
}): Promise<string | 'back'> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const choices = warehouseConnectionChoices(project.config);
if (choices.length === 1) {
return choices[0].id;
}
if (choices.length === 0) {
const entered = await promptText(input.prompts, { message: 'Mapped warehouse connection id' });
return entered === undefined ? 'back' : entered;
}
const selected = await input.prompts.select({
message: 'Mapped warehouse connection',
options: [
...choices.map((choice) => ({
value: choice.id,
label: `${choice.id} (${choice.connectionType})`,
})),
{ value: 'back', label: 'Back' },
],
});
return selected === 'back' ? 'back' : selected;
}
async function defaultDiscoverMetabaseDatabases(input: {
sourceUrl: string;
sourceApiKeyRef: string;
}): Promise<DiscoveredMetabaseDatabase[]> {
const apiKey = resolveKtxConfigReference(input.sourceApiKeyRef, process.env);
if (!apiKey) {
throw new Error('Metabase API key ref could not be resolved');
}
const client = new MetabaseClient(
{ apiUrl: input.sourceUrl, apiKey },
DEFAULT_METABASE_CLIENT_CONFIG,
);
try {
return await discoverMetabaseDatabases(client);
} finally {
await client.cleanup();
}
}
function metabaseDatabaseLabel(database: DiscoveredMetabaseDatabase): string {
const detail = [database.engine].filter(Boolean).join(', ');
return detail ? `${database.id}: ${database.name} (${detail})` : `${database.id}: ${database.name}`;
}
async function chooseMetabaseDatabaseId(input: {
state: SourcePromptState;
prompts: KtxSetupSourcesPromptAdapter;
deps: KtxSetupSourcesDeps;
}): Promise<number | 'back'> {
const sourceUrl = input.state.sourceUrl;
const sourceApiKeyRef = input.state.sourceApiKeyRef;
if (sourceUrl && sourceApiKeyRef) {
try {
const discovered = await (input.deps.discoverMetabaseDatabases ?? defaultDiscoverMetabaseDatabases)({
sourceUrl,
sourceApiKeyRef,
sourceConnectionId: input.state.sourceConnectionId ?? 'metabase-main',
});
if (discovered.length === 1) {
return discovered[0].id;
}
if (discovered.length > 1) {
const selected = await input.prompts.select({
message: 'Metabase database',
options: [
...discovered
.slice()
.sort((left, right) => left.id - right.id)
.map((database) => ({
value: String(database.id),
label: metabaseDatabaseLabel(database),
})),
{ value: 'back', label: 'Back' },
],
});
return selected === 'back' ? 'back' : Number.parseInt(selected, 10);
}
} catch {
// Discovery is a convenience. Fall back to the raw id prompt when credentials
// are unavailable locally or the Metabase API cannot be reached yet.
}
}
const databaseId = await promptText(input.prompts, { message: 'Metabase database id' });
return databaseId === undefined ? 'back' : Number.parseInt(databaseId, 10);
}
function connectionIdPromptSteps(
args: KtxSetupSourcesArgs,
source: KtxSetupSourceType,
@ -703,6 +918,7 @@ async function promptForInteractiveSource(
prompts: KtxSetupSourcesPromptAdapter,
defaultConnectionId = `${source}-main`,
testGitRepo: KtxSetupSourcesDeps['testGitRepo'] = testRepoConnection,
discoverMetabaseDatabaseList?: KtxSetupSourcesDeps['discoverMetabaseDatabases'],
): Promise<KtxSetupSourcesArgs | 'back'> {
const initialState: SourcePromptState = { ...args, source };
if (args.sourceConnectionId) {
@ -757,23 +973,6 @@ async function promptForInteractiveSource(
},
]
: []),
...(state.sourceLocation
? [
async (currentState: SourcePromptState) => {
const subpath = await promptText(prompts, {
message: sourceSubpathPrompt(source),
placeholder: 'optional',
});
if (subpath === undefined) return 'back';
if (subpath) {
currentState.sourceSubpath = subpath;
} else {
delete currentState.sourceSubpath;
}
return 'next';
},
]
: []),
...(state.sourceLocation === 'git'
? [
async (currentState: SourcePromptState) => {
@ -783,11 +982,13 @@ async function promptForInteractiveSource(
prompts.log?.('Repository connected.');
return 'next';
}
const authRef = await promptText(prompts, {
message: gitAuthAfterFailurePrompt(source),
placeholder: 'env:GITHUB_TOKEN',
const authRef = await chooseGitAuthCredentialRef({
prompts,
projectDir: args.projectDir,
source,
connectionId: currentState.sourceConnectionId ?? `${source}-main`,
});
if (authRef === undefined) return 'back';
if (authRef === 'back') return 'back';
if (authRef) {
currentState.sourceAuthTokenRef = authRef;
} else {
@ -797,6 +998,79 @@ async function promptForInteractiveSource(
},
]
: []),
...(state.sourceLocation
? [
async (currentState: SourcePromptState) => {
if (source === 'dbt') {
let scanDir: string | undefined;
if (currentState.sourceLocation === 'path' && currentState.sourcePath) {
scanDir = currentState.sourcePath;
} else if (currentState.sourceLocation === 'git' && currentState.sourceGitUrl) {
try {
const cacheDir = await mkdtemp(join(tmpdir(), 'ktx-setup-dbt-scan-'));
const authToken = currentState.sourceAuthTokenRef
? resolveKtxConfigReference(currentState.sourceAuthTokenRef, process.env)
: null;
await cloneOrPull({
repoUrl: currentState.sourceGitUrl,
authToken,
cacheDir,
branch: currentState.sourceBranch ?? 'main',
});
scanDir = cacheDir;
} catch {
// Clone failed — fall through to manual prompt
}
}
if (scanDir) {
try {
const subpaths = await findDbtProjectSubpaths(scanDir);
if (subpaths.length === 1) {
const found = subpaths[0]!;
if (found) {
currentState.sourceSubpath = found;
prompts.log?.(`Found dbt_project.yml in ${found}/`);
} else {
delete currentState.sourceSubpath;
}
return 'next';
}
if (subpaths.length > 1) {
const selected = await prompts.select({
message: 'Multiple dbt projects found — which one should KTX use?',
options: [
...subpaths.map((p) => ({ value: p || '.', label: p || '(project root)' })),
{ value: 'back', label: 'Back' },
],
});
if (selected === 'back') return 'back';
const subpath = selected === '.' ? '' : selected;
if (subpath) {
currentState.sourceSubpath = subpath;
} else {
delete currentState.sourceSubpath;
}
return 'next';
}
} catch {
// Directory unreadable — fall through to manual prompt
}
}
}
const subpath = await promptText(prompts, {
message: sourceSubpathPrompt(source),
placeholder: 'optional',
});
if (subpath === undefined) return 'back';
if (subpath) {
currentState.sourceSubpath = subpath;
} else {
delete currentState.sourceSubpath;
}
return 'next';
},
]
: []),
]);
}
@ -810,24 +1084,34 @@ async function promptForInteractiveSource(
return 'next';
},
async (state) => {
const sourceApiKeyRef = await promptText(prompts, {
message: 'Metabase API key ref',
placeholder: 'env:METABASE_API_KEY',
const ref = await chooseSourceCredentialRef({
prompts,
projectDir: args.projectDir,
label: 'Metabase API key',
envName: 'METABASE_API_KEY',
secretFileName: `${state.sourceConnectionId ?? 'metabase-main'}-api-key`,
});
if (sourceApiKeyRef === undefined) return 'back';
state.sourceApiKeyRef = sourceApiKeyRef;
if (ref === 'back') return 'back';
state.sourceApiKeyRef = ref;
return 'next';
},
async (state) => {
const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' });
if (sourceWarehouseConnectionId === undefined) return 'back';
const sourceWarehouseConnectionId = await chooseMappedWarehouseConnectionId({
projectDir: args.projectDir,
prompts,
});
if (sourceWarehouseConnectionId === 'back') return 'back';
state.sourceWarehouseConnectionId = sourceWarehouseConnectionId;
return 'next';
},
async (state) => {
const databaseId = await promptText(prompts, { message: 'Metabase database id' });
if (databaseId === undefined) return 'back';
state.metabaseDatabaseId = Number.parseInt(databaseId, 10);
const databaseId = await chooseMetabaseDatabaseId({
state,
prompts,
deps: { discoverMetabaseDatabases: discoverMetabaseDatabaseList },
});
if (databaseId === 'back') return 'back';
state.metabaseDatabaseId = databaseId;
return 'next';
},
]);
@ -849,17 +1133,23 @@ async function promptForInteractiveSource(
return 'next';
},
async (state) => {
const sourceClientSecretRef = await promptText(prompts, {
message: 'Looker client secret ref',
placeholder: 'env:LOOKER_CLIENT_SECRET',
const ref = await chooseSourceCredentialRef({
prompts,
projectDir: args.projectDir,
label: 'Looker client secret',
envName: 'LOOKER_CLIENT_SECRET',
secretFileName: `${state.sourceConnectionId ?? 'looker-main'}-client-secret`,
});
if (sourceClientSecretRef === undefined) return 'back';
state.sourceClientSecretRef = sourceClientSecretRef;
if (ref === 'back') return 'back';
state.sourceClientSecretRef = ref;
return 'next';
},
async (state) => {
const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' });
if (sourceWarehouseConnectionId === undefined) return 'back';
const sourceWarehouseConnectionId = await chooseMappedWarehouseConnectionId({
projectDir: args.projectDir,
prompts,
});
if (sourceWarehouseConnectionId === 'back') return 'back';
state.sourceWarehouseConnectionId = sourceWarehouseConnectionId;
return 'next';
},
@ -882,12 +1172,15 @@ async function promptForInteractiveSource(
return await runSourcePromptSteps(initialState, (state) => [
...connectionSteps,
async (currentState) => {
const sourceApiKeyRef = await promptText(prompts, {
message: 'Notion token ref',
placeholder: 'env:NOTION_TOKEN',
const ref = await chooseSourceCredentialRef({
prompts,
projectDir: args.projectDir,
label: 'Notion integration token',
envName: 'NOTION_TOKEN',
secretFileName: `${currentState.sourceConnectionId ?? 'notion-main'}-token`,
});
if (sourceApiKeyRef === undefined) return 'back';
currentState.sourceApiKeyRef = sourceApiKeyRef;
if (ref === 'back') return 'back';
currentState.sourceApiKeyRef = ref;
return 'next';
},
async (currentState) => {
@ -956,13 +1249,21 @@ async function chooseInteractiveSourceConnection(input: {
connections: Record<string, KtxProjectConnectionConfig>;
prompts: KtxSetupSourcesPromptAdapter;
testGitRepo?: KtxSetupSourcesDeps['testGitRepo'];
discoverMetabaseDatabases?: KtxSetupSourcesDeps['discoverMetabaseDatabases'];
}): Promise<InteractiveSourceConnectionChoice> {
const existingIds = existingConnectionIdsBySource(input.connections, input.source);
const defaultConnectionId = defaultConnectionIdForSource(input.connections, input.source);
const label = sourceLabel(input.source);
if (existingIds.length === 0) {
const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo);
const sourceArgs = await promptForInteractiveSource(
input.args,
input.source,
input.prompts,
defaultConnectionId,
input.testGitRepo,
input.discoverMetabaseDatabases,
);
return sourceArgs === 'back' ? 'back' : { kind: 'new', args: sourceArgs };
}
@ -987,7 +1288,14 @@ async function chooseInteractiveSourceConnection(input: {
}
continue;
}
const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo);
const sourceArgs = await promptForInteractiveSource(
input.args,
input.source,
input.prompts,
defaultConnectionId,
input.testGitRepo,
input.discoverMetabaseDatabases,
);
if (sourceArgs === 'back') {
continue;
}
@ -1026,7 +1334,9 @@ async function validateSource(
return await (deps.validateMetricflow ?? defaultValidateMetricflow)(args.connection);
}
if (source === 'metabase') {
return await (deps.validateMetabase ?? defaultValidateMetabase)(args.projectDir, args.connectionId);
return deps.validateMetabase
? await deps.validateMetabase(args.projectDir, args.connectionId)
: { ok: true, detail: 'mapping validation runs after the connection is saved' };
}
if (source === 'looker') {
return await (deps.validateLooker ?? defaultValidateLooker)(args.projectDir, args.connectionId);
@ -1097,6 +1407,7 @@ export async function runKtxSetupSourcesStep(
connections: (await loadKtxProject({ projectDir: args.projectDir })).config.connections,
prompts,
testGitRepo: deps.testGitRepo,
discoverMetabaseDatabases: deps.discoverMetabaseDatabases,
});
if (sourceChoice === 'back') {
if (args.source) {
@ -1126,6 +1437,7 @@ export async function runKtxSetupSourcesStep(
return { status: 'failed', projectDir: args.projectDir };
}
if (source === 'metabase' || source === 'looker') {
prompts.log?.(`Validating ${sourceLabel(source)} mapping…`);
const mappingCode = await (deps.runMapping ?? defaultRunMapping)(args.projectDir, connectionId, io);
if (mappingCode !== 0) {
await rollback?.();

View file

@ -1367,6 +1367,142 @@ describe('setup status', () => {
expect(calls).toEqual(['context']);
});
it('resumes an active context build before prompting for earlier setup steps', async () => {
const io = makeIo();
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: revenue',
'setup:',
' database_connection_ids:',
' - warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' url: env:DATABASE_URL',
'',
].join('\n'),
'utf-8',
);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-active',
status: 'running',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: [],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-active'),
});
const context = vi.fn(async () => ({
status: 'detached' as const,
projectDir: tempDir,
runId: 'setup-context-local-active',
}));
const databases = vi.fn(async () => {
throw new Error('database setup should not run while context build is active');
});
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'existing',
agents: false,
inputMode: 'auto',
yes: false,
cliVersion: '0.2.0',
skipLlm: false,
skipEmbeddings: false,
skipDatabases: false,
skipSources: false,
skipAgents: false,
databaseSchemas: [],
},
io.io,
{ context, databases },
),
).resolves.toBe(0);
expect(context).toHaveBeenCalledWith(
{ projectDir: tempDir, inputMode: 'auto', allowEmpty: true },
io.io,
);
expect(databases).not.toHaveBeenCalled();
});
it('skips entry menu and auto-watches when context build is active and showEntryMenu is true', async () => {
const io = makeIo();
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: revenue',
'setup:',
' database_connection_ids:',
' - warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' url: env:DATABASE_URL',
'',
].join('\n'),
'utf-8',
);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-active',
status: 'detached',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:00:00.000Z',
primarySourceConnectionIds: ['warehouse'],
contextSourceConnectionIds: [],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-active'),
});
const context = vi.fn(async () => ({
status: 'detached' as const,
projectDir: tempDir,
runId: 'setup-context-local-active',
}));
const entryMenuSelect = vi.fn(async () => 'exit');
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'existing',
agents: false,
inputMode: 'auto',
yes: false,
cliVersion: '0.2.0',
skipLlm: false,
skipEmbeddings: false,
skipDatabases: false,
skipSources: false,
skipAgents: false,
databaseSchemas: [],
showEntryMenu: true,
},
io.io,
{
context,
entryMenuDeps: { prompts: { select: entryMenuSelect, cancel: vi.fn() } },
},
),
).resolves.toBe(0);
expect(entryMenuSelect).not.toHaveBeenCalled();
expect(context).toHaveBeenCalledWith(
{ projectDir: tempDir, inputMode: 'auto', allowEmpty: true, autoWatch: true },
io.io,
);
});
it('routes a ready project menu selection to agent setup', async () => {
const calls: string[] = [];
const io = makeIo();
@ -1479,6 +1615,103 @@ describe('setup status', () => {
expect(calls).toEqual(['agents']);
});
it('skips to agent setup when context is ready but agents are not configured', async () => {
const calls: string[] = [];
const io = makeIo();
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: revenue',
'setup:',
' completed_steps:',
' - project',
' - llm',
' - embeddings',
' - sources',
' - context',
' database_connection_ids: []',
'connections: {}',
'llm:',
' provider:',
' backend: anthropic',
' models:',
' default: claude-sonnet-4-6',
'ingest:',
' embeddings:',
' backend: openai',
' model: text-embedding-3-small',
' dimensions: 1536',
'',
].join('\n'),
'utf-8',
);
await writeKtxSetupContextState(tempDir, {
runId: 'setup-context-local-ready',
status: 'completed',
startedAt: '2026-05-09T10:00:00.000Z',
updatedAt: '2026-05-09T10:02:00.000Z',
completedAt: '2026-05-09T10:02:00.000Z',
primarySourceConnectionIds: [],
contextSourceConnectionIds: [],
reportIds: [],
artifactPaths: [],
retryableFailedTargets: [],
commands: contextBuildCommands(tempDir, 'setup-context-local-ready'),
});
const readyMenuSelect = vi.fn();
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'existing',
agents: false,
inputMode: 'auto',
yes: false,
cliVersion: '0.2.0',
skipLlm: false,
skipEmbeddings: false,
skipDatabases: false,
skipSources: false,
skipAgents: false,
databaseSchemas: [],
},
io.io,
{
readyMenuDeps: { prompts: { select: readyMenuSelect, cancel: vi.fn() } },
model: async (args) => {
expect(args.skipLlm).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
embeddings: async (args) => {
expect(args.skipEmbeddings).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
databases: async (args) => {
expect(args.skipDatabases).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
sources: async (args) => {
expect(args.skipSources).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
agents: async () => {
calls.push('agents');
return {
status: 'ready',
projectDir: tempDir,
installs: [{ target: 'codex', scope: 'project', mode: 'cli' }],
};
},
},
),
).resolves.toBe(0);
expect(readyMenuSelect).not.toHaveBeenCalled();
expect(calls).toEqual(['agents']);
});
it('runs only project resolution, context gate, and agent setup in --agents mode', async () => {
const io = makeIo();
const context = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-local-test' }));

View file

@ -24,7 +24,12 @@ import {
import { type KtxSetupEmbeddingsDeps, runKtxSetupEmbeddingsStep } from './setup-embeddings.js';
import { type KtxSetupModelDeps, runKtxSetupAnthropicModelStep } from './setup-models.js';
import { type KtxSetupProjectDeps, runKtxSetupProjectStep } from './setup-project.js';
import { isKtxSetupReady, type KtxSetupReadyMenuDeps, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js';
import {
isKtxPreAgentSetupReady,
isKtxSetupReady,
type KtxSetupReadyMenuDeps,
runKtxSetupReadyChangeMenu,
} from './setup-ready-menu.js';
import { type KtxSetupSourcesDeps, type KtxSetupSourceType, runKtxSetupSourcesStep } from './setup-sources.js';
import { withMenuOptionsSpacing } from './prompt-navigation.js';
import {
@ -392,6 +397,10 @@ function setupContextReady(status: KtxSetupStatus): boolean {
return status.context.ready;
}
function setupContextActive(status: KtxSetupStatus): boolean {
return status.context.status === 'running' || status.context.status === 'detached';
}
function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void {
io.stderr.write('KTX context is not ready for agents.\n\n');
io.stderr.write(`Build context first:\n ktx setup context build --project-dir ${resolve(projectDir)}\n\n`);
@ -462,22 +471,27 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
args.inputMode !== 'disabled' &&
!args.agents &&
(io.stdout.isTTY === true || deps.entryMenuDeps?.prompts !== undefined);
let autoWatchActiveBuild = false;
setupLoop: while (true) {
entryAction = undefined;
if (canShowEntryMenu) {
const status = await readKtxSetupStatus(args.projectDir);
entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action;
if (entryAction === 'exit') {
(deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.');
return 0;
}
if (entryAction === 'status') {
io.stdout.write(formatKtxSetupStatus(status));
return 0;
}
if (entryAction === 'demo') {
return await runKtxSetupDemoFromEntryMenu(args, io, deps);
if (setupContextActive(status)) {
autoWatchActiveBuild = true;
} else {
entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action;
if (entryAction === 'exit') {
(deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.');
return 0;
}
if (entryAction === 'status') {
io.stdout.write(formatKtxSetupStatus(status));
return 0;
}
if (entryAction === 'demo') {
return await runKtxSetupDemoFromEntryMenu(args, io, deps);
}
}
}
@ -505,9 +519,38 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
const agentsRequested = args.agents || entryAction === 'agents';
const currentStatus = await readKtxSetupStatus(projectResult.projectDir);
let readyAction: string | undefined;
if (args.inputMode !== 'disabled' && !agentsRequested && isKtxSetupReady(currentStatus)) {
readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action;
if (readyAction === 'exit') return 0;
if (args.inputMode !== 'disabled' && !agentsRequested && setupContextActive(currentStatus)) {
const contextRunner =
deps.context ?? ((contextArgs, contextIo) => runKtxSetupContextStep(contextArgs, contextIo, deps.contextDeps));
const contextResult = await contextRunner(
{
projectDir: projectResult.projectDir,
inputMode: args.inputMode,
allowEmpty: true,
...(autoWatchActiveBuild ? { autoWatch: true } : {}),
},
io,
);
autoWatchActiveBuild = false;
if (contextResult.status === 'back') {
continue;
}
if (contextResult.status === 'failed' || contextResult.status === 'missing-input') {
return 1;
}
if (contextResult.status !== 'ready') {
return 0;
}
}
if (args.inputMode !== 'disabled' && !agentsRequested) {
if (isKtxSetupReady(currentStatus)) {
readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action;
if (readyAction === 'exit') return 0;
} else if (isKtxPreAgentSetupReady(currentStatus)) {
readyAction = 'agents';
}
}
const runOnly = readyAction;

View file

@ -1,46 +0,0 @@
{
"name": "@ktx/connector-posthog",
"version": "0.0.0-private",
"description": "PostHog connector package for KTX scan interfaces",
"private": true,
"type": "module",
"engines": {
"node": ">=22.0.0"
},
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"default": "./dist/index.js"
},
"./package.json": "./package.json"
},
"files": [
"dist"
],
"scripts": {
"build": "tsc -p tsconfig.json",
"test": "vitest run",
"type-check": "tsc -p tsconfig.json --noEmit"
},
"dependencies": {
"@ktx/context": "workspace:*"
},
"devDependencies": {
"@types/node": "^24.3.0",
"typescript": "^5.9.3",
"vitest": "^4.0.18"
},
"license": "Apache-2.0",
"repository": {
"type": "git",
"url": "git+https://github.com/kaelio/ktx.git",
"directory": "packages/connector-posthog"
},
"bugs": {
"url": "https://github.com/kaelio/ktx/issues"
},
"homepage": "https://github.com/kaelio/ktx#readme"
}

View file

@ -1,400 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import {
createPostHogLiveDatabaseIntrospection,
isKtxPostHogConnectionConfig,
KtxPostHogScanConnector,
postHogConnectionConfigFromConfig,
type KtxPostHogConnectionConfig,
type KtxPostHogFetch,
} from './index.js';
function jsonResponse(body: unknown, status = 200): Response {
return {
ok: status >= 200 && status < 300,
status,
json: async () => body,
text: async () => JSON.stringify(body),
} as Response;
}
function fakeFetch(queries: string[] = []): KtxPostHogFetch {
return vi.fn(async (_url: string, init?: RequestInit) => {
const body = JSON.parse(String(init?.body ?? '{}')) as { query?: { kind?: string; query?: string } };
const sql = body.query?.query ?? '';
if (sql) {
queries.push(sql);
}
if (body.query?.kind === 'DatabaseSchemaQuery') {
return jsonResponse({
tables: {
events: {
id: 'events',
name: 'events',
type: 'posthog',
row_count: 42,
fields: {
uuid: {
name: 'uuid',
type: 'uuid',
hogql_value: 'uuid',
schema_valid: true,
table: 'events',
fields: null,
chain: null,
id: 'uuid',
},
event: {
name: 'event',
type: 'string',
hogql_value: 'event',
schema_valid: true,
table: 'events',
fields: null,
chain: null,
id: 'event',
},
timestamp: {
name: 'timestamp',
type: 'datetime',
hogql_value: 'timestamp',
schema_valid: true,
table: 'events',
fields: null,
chain: null,
id: 'timestamp',
},
properties: {
name: 'properties',
type: 'json',
hogql_value: 'properties',
schema_valid: true,
table: 'events',
fields: null,
chain: null,
id: 'properties',
},
virtual: {
name: 'virtual',
type: 'virtual_table',
hogql_value: 'virtual',
schema_valid: true,
table: null,
fields: null,
chain: null,
id: 'virtual',
},
},
},
query_log: {
id: 'query_log',
name: 'query_log',
type: 'posthog',
row_count: 1,
fields: {},
},
},
joins: [],
});
}
if (sql.includes('SELECT * FROM person_distinct_ids LIMIT 0')) {
return jsonResponse({
results: [],
columns: ['distinct_id', 'person_id'],
types: [
['distinct_id', 'String'],
['person_id', 'UUID'],
],
error: null,
hogql: sql,
});
}
if (sql.includes('LIMIT 0')) {
return jsonResponse({ results: null, columns: null, types: null, error: 'Table not found', hogql: sql });
}
if (sql.includes('SELECT 1 AS test')) {
return jsonResponse({ results: [[1]], columns: ['test'], types: [['test', 'Int64']], error: null, hogql: sql });
}
if (sql.includes('count() AS cnt')) {
return jsonResponse({ results: [[42]], columns: ['cnt'], types: [['cnt', 'Int64']], error: null, hogql: sql });
}
if (sql.includes('GROUP BY event')) {
return jsonResponse({
results: [['$pageview', 9]],
columns: ['event', 'cnt'],
types: [
['event', 'String'],
['cnt', 'Int64'],
],
error: null,
hogql: sql,
});
}
if (sql.includes('arrayJoin(JSONExtractKeys')) {
return jsonResponse({
results: [['$browser', 7]],
columns: ['key', 'cnt'],
types: [
['key', 'String'],
['cnt', 'Int64'],
],
error: null,
hogql: sql,
});
}
if (sql.includes('uniq(JSONExtractString') || sql.includes('uniq(val) AS cardinality')) {
return jsonResponse({
results: [[2]],
columns: ['cardinality'],
types: [['cardinality', 'Int64']],
error: null,
hogql: sql,
});
}
if (sql.includes('DISTINCT JSONExtractString') || sql.includes('SELECT DISTINCT toString(')) {
return jsonResponse({
results: [['Chrome'], ['Safari']],
columns: ['value'],
types: [['value', 'String']],
error: null,
hogql: sql,
});
}
return jsonResponse({ results: [['$pageview']], columns: ['event'], types: [['event', 'String']], error: null, hogql: sql });
}) as KtxPostHogFetch;
}
const posthogApiKeyEnv = ['POSTHOG', 'API', 'KEY'].join('_');
const fixtureToken = ['phx', 'fixture'].join('_');
const env = { [posthogApiKeyEnv]: fixtureToken };
const connection: KtxPostHogConnectionConfig & { driver: string } = {
driver: 'posthog',
['api_' + 'key']: `env:${posthogApiKeyEnv}`,
project_id: '157881',
region: 'us',
readonly: true,
};
describe('KtxPostHogScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKtxPostHogConnectionConfig(connection)).toBe(true);
expect(isKtxPostHogConnectionConfig({ driver: 'mysql' })).toBe(false);
const resolved = postHogConnectionConfigFromConfig({
connectionId: 'product',
connection,
env,
});
expect(resolved).toMatchObject({ projectId: '157881', baseUrl: 'https://us.posthog.com' });
const tokenField = ['api', 'Key'].join('') as keyof typeof resolved;
expect(resolved[tokenField]).toBe(fixtureToken);
expect(() =>
postHogConnectionConfigFromConfig({
connectionId: 'product',
connection: { ...connection, readonly: false },
}),
).toThrow('Native PostHog connector requires connections.product.readonly: true');
});
it('introspects schema metadata, hidden tables, descriptions, primary keys, and normalized types', async () => {
const connector = new KtxPostHogScanConnector({
connectionId: 'product',
connection,
env,
fetch: fakeFetch(),
sleep: async () => {},
now: () => new Date('2026-04-29T19:00:00.000Z'),
});
const snapshot = await connector.introspect({ connectionId: 'product', driver: 'posthog' }, { runId: 'scan-run-1' });
expect(snapshot).toMatchObject({
connectionId: 'product',
driver: 'posthog',
extractedAt: '2026-04-29T19:00:00.000Z',
scope: { catalogs: ['157881'] },
metadata: {
project_id: '157881',
table_count: 2,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => table.name)).toEqual(['events', 'person_distinct_ids']);
expect(snapshot.tables[0]).toMatchObject({
catalog: '157881',
db: null,
name: 'events',
kind: 'event_stream',
estimatedRows: 42,
comment: expect.stringContaining('PostHog event stream'),
foreignKeys: [],
});
expect(snapshot.tables[0]?.columns).toEqual([
{
name: 'uuid',
nativeType: 'UUID',
normalizedType: 'UUID',
dimensionType: 'string',
nullable: false,
primaryKey: true,
comment: 'Unique identifier for this specific event.',
},
{
name: 'event',
nativeType: 'String',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: expect.stringContaining('Event name'),
},
{
name: 'timestamp',
nativeType: 'DateTime64',
normalizedType: 'TIMESTAMP',
dimensionType: 'time',
nullable: false,
primaryKey: false,
comment: expect.stringContaining('UTC timestamp'),
},
{
name: 'properties',
nativeType: 'JSON',
normalizedType: 'JSON',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: expect.stringContaining('JSON object'),
},
]);
});
it('runs samples, read-only SQL, event-stream discovery, row counts, and cleanup', async () => {
const queries: string[] = [];
const connector = new KtxPostHogScanConnector({
connectionId: 'product',
connection,
env,
fetch: fakeFetch(queries),
sleep: async () => {},
});
await expect(connector.testConnection()).resolves.toEqual({ success: true });
await expect(
connector.sampleTable(
{
connectionId: 'product',
table: { catalog: '157881', db: null, name: 'events' },
columns: ['event'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ values: ['$pageview'], nullCount: null, distinctCount: null });
await expect(
connector.executeReadOnly({ connectionId: 'product', sql: 'select event from events', maxRows: 1 }, { runId: 'scan-run-1' }),
).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'product', sql: 'delete from events' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('events')).resolves.toBe(42);
await expect(
connector.getColumnDistinctValues({ catalog: '157881', db: null, name: 'events' }, 'properties.$browser', {
maxCardinality: 5,
limit: 10,
sampleSize: 100,
}),
).resolves.toEqual({ values: ['Chrome', 'Safari'], cardinality: 2 });
await expect(
connector.eventStreamDiscovery.listEventTypes(
{
connectionId: 'product',
table: { catalog: '157881', db: null, name: 'events' },
eventColumn: 'event',
limit: 10,
minCount: 30,
lookbackDays: 14,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual([{ value: '$pageview', count: 9 }]);
expect(queries.some((query) => query.includes('HAVING cnt >= 30'))).toBe(true);
expect(queries.some((query) => query.includes('INTERVAL 14 DAY'))).toBe(true);
await expect(
connector.eventStreamDiscovery.listPropertyKeys(
{
connectionId: 'product',
table: { catalog: '157881', db: null, name: 'events' },
jsonColumn: 'properties',
sampleSize: 1000,
limit: 10,
lookbackDays: 7,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual([{ key: '$browser', count: 7 }]);
await expect(
connector.eventStreamDiscovery.listPropertyValues(
{
connectionId: 'product',
table: { catalog: '157881', db: null, name: 'events' },
jsonColumn: 'properties',
propertyKey: '$browser',
limit: 10,
maxCardinality: 1000,
lookbackDays: 30,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
values: ['Chrome', 'Safari'],
cardinality: 2,
});
await expect(
connector.columnStats(
{ connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('adapts native snapshots to live-database introspection snapshots', async () => {
const introspection = createPostHogLiveDatabaseIntrospection({
connections: { product: connection },
env,
fetch: fakeFetch(),
sleep: async () => {},
now: () => new Date('2026-04-29T19:00:00.000Z'),
});
await expect(introspection.extractSchema('product')).resolves.toMatchObject({
connectionId: 'product',
metadata: { project_id: '157881' },
tables: expect.arrayContaining([
expect.objectContaining({
catalog: '157881',
db: null,
name: 'events',
columns: expect.arrayContaining([
{
name: 'uuid',
nativeType: 'UUID',
normalizedType: 'UUID',
dimensionType: 'string',
nullable: false,
primaryKey: true,
comment: 'Unique identifier for this specific event.',
},
]),
}),
]),
});
});
});

View file

@ -1,609 +0,0 @@
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { assertReadOnlySql, limitSqlForExecution } from '@ktx/context/connections';
import {
createKtxConnectorCapabilities,
type KtxColumnSampleInput,
type KtxColumnSampleResult,
type KtxColumnStatsInput,
type KtxColumnStatsResult,
type KtxEventPropertyDiscovery,
type KtxEventPropertyDiscoveryInput,
type KtxEventPropertyValuesInput,
type KtxEventPropertyValuesResult,
type KtxEventStreamDiscoveryPort,
type KtxEventTypeDiscovery,
type KtxEventTypeDiscoveryInput,
type KtxQueryResult,
type KtxReadOnlyQueryInput,
type KtxScanConnector,
type KtxScanContext,
type KtxScanInput,
type KtxSchemaColumn,
type KtxSchemaSnapshot,
type KtxSchemaTable,
type KtxTableRef,
type KtxTableSampleInput,
type KtxTableSampleResult,
} from '@ktx/context/scan';
import { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js';
import { getKtxPostHogColumnDescription, getKtxPostHogTableDescription } from './schema-descriptions.js';
export interface KtxPostHogConnectionConfig {
driver?: string;
api_key?: string;
apiKey?: string;
project_id?: string;
projectId?: string;
region?: 'us' | 'eu';
host?: string;
readonly?: boolean;
[key: string]: unknown;
}
export interface KtxPostHogResolvedConnectionConfig {
apiKey: string;
projectId: string;
baseUrl: string;
}
export type KtxPostHogFetch = (url: string, init?: RequestInit) => Promise<Response>;
export interface KtxPostHogScanConnectorOptions {
connectionId: string;
connection: KtxPostHogConnectionConfig | undefined;
env?: NodeJS.ProcessEnv;
fetch?: KtxPostHogFetch;
sleep?: (ms: number) => Promise<void>;
now?: () => Date;
}
export interface KtxPostHogReadOnlyQueryInput extends KtxReadOnlyQueryInput {
params?: Record<string, unknown>;
}
export interface KtxPostHogColumnDistinctValuesOptions {
maxCardinality: number;
limit: number;
sampleSize?: number;
}
export interface KtxPostHogColumnDistinctValuesResult {
values: string[] | null;
cardinality: number;
}
interface PostHogSchemaField {
name: string;
type: string;
hogql_value: string;
schema_valid: boolean;
table: string | null;
fields: string[] | null;
chain: string[] | null;
id: string | null;
}
interface PostHogSchemaTable {
id: string;
name: string;
type: string;
row_count: number | null;
fields: Record<string, PostHogSchemaField>;
}
interface PostHogSchemaResponse {
tables: Record<string, PostHogSchemaTable>;
joins: unknown[];
}
interface PostHogQueryResponse {
results: unknown[][] | null;
columns: string[] | null;
types: [string, string][] | null;
error: string | null;
hogql: string | null;
}
const allowedTableTypes = new Set(['posthog', 'system']);
const excludedTables = new Set([
'query_log',
'system.teams',
'system.exports',
'system.ingestion_warnings',
'system.insight_variables',
'system.data_warehouse_sources',
'system.groups',
'system.group_type_mappings',
]);
const hiddenTablesToProbe = ['person_distinct_ids', 'cohort_people', 'static_cohort_people'];
export function isKtxPostHogConnectionConfig(connection: KtxPostHogConnectionConfig | undefined): boolean {
return String(connection?.driver ?? '').toLowerCase() === 'posthog';
}
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
if (value.startsWith('env:')) {
return env[value.slice('env:'.length)] ?? '';
}
if (value.startsWith('file:')) {
const rawPath = value.slice('file:'.length);
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
return readFileSync(path, 'utf-8').trim();
}
return value;
}
function stringConfigValue(
connection: KtxPostHogConnectionConfig | undefined,
key: keyof KtxPostHogConnectionConfig,
env: NodeJS.ProcessEnv,
): string | undefined {
const value = connection?.[key];
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
}
export function postHogConnectionConfigFromConfig(input: {
connectionId: string;
connection: KtxPostHogConnectionConfig | undefined;
env?: NodeJS.ProcessEnv;
}): KtxPostHogResolvedConnectionConfig {
if (!isKtxPostHogConnectionConfig(input.connection)) {
throw new Error(`Native PostHog connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`);
}
if (input.connection?.readonly !== true) {
throw new Error(`Native PostHog connector requires connections.${input.connectionId}.readonly: true`);
}
const env = input.env ?? process.env;
const apiKey = stringConfigValue(input.connection, 'api_key', env) ?? stringConfigValue(input.connection, 'apiKey', env);
const projectId =
stringConfigValue(input.connection, 'project_id', env) ?? stringConfigValue(input.connection, 'projectId', env);
if (!apiKey) {
throw new Error(`Native PostHog connector requires connections.${input.connectionId}.api_key`);
}
if (!projectId) {
throw new Error(`Native PostHog connector requires connections.${input.connectionId}.project_id`);
}
const host = stringConfigValue(input.connection, 'host', env);
const region = input.connection?.region ?? 'us';
return {
apiKey,
projectId,
baseUrl: host ? host.replace(/\/$/, '') : region === 'eu' ? 'https://eu.posthog.com' : 'https://us.posthog.com',
};
}
export class KtxPostHogScanConnector implements KtxScanConnector {
readonly id: string;
readonly driver = 'posthog' as const;
readonly capabilities = createKtxConnectorCapabilities({
tableSampling: true,
columnSampling: true,
columnStats: false,
readOnlySql: true,
nestedAnalysis: true,
eventStreamDiscovery: true,
formalForeignKeys: false,
estimatedRowCounts: true,
});
readonly eventStreamDiscovery: KtxEventStreamDiscoveryPort = {
listEventTypes: (input, ctx) => this.listEventTypes(input, ctx),
listPropertyKeys: (input, ctx) => this.listPropertyKeys(input, ctx),
listPropertyValues: (input, ctx) => this.listPropertyValues(input, ctx),
};
private readonly connectionId: string;
private readonly resolved: KtxPostHogResolvedConnectionConfig;
private readonly fetchImpl: KtxPostHogFetch;
private readonly sleep: (ms: number) => Promise<void>;
private readonly now: () => Date;
private readonly dialect = new KtxPostHogDialect();
constructor(options: KtxPostHogScanConnectorOptions) {
this.connectionId = options.connectionId;
this.resolved = postHogConnectionConfigFromConfig({
connectionId: options.connectionId,
connection: options.connection,
env: options.env,
});
this.fetchImpl = options.fetch ?? fetch;
this.sleep = options.sleep ?? ((ms) => new Promise((resolveSleep) => setTimeout(resolveSleep, ms)));
this.now = options.now ?? (() => new Date());
this.id = `posthog:${options.connectionId}`;
}
async testConnection(): Promise<{ success: boolean; error?: string }> {
const response = await this.query('SELECT 1 AS test');
return response.error ? { success: false, error: response.error } : { success: true };
}
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
this.assertConnection(input.connectionId);
const response = await this.makeRequest<PostHogSchemaResponse>('/query', { query: { kind: 'DatabaseSchemaQuery' } });
const tables: KtxSchemaTable[] = [];
for (const [tableName, tableInfo] of Object.entries(response.tables ?? {})) {
if (!allowedTableTypes.has(tableInfo.type) || excludedTables.has(tableName)) {
continue;
}
tables.push(this.toSchemaTable(tableName, tableInfo));
}
tables.push(...(await this.discoverHiddenTables()));
tables.sort((left, right) => left.name.localeCompare(right.name));
return {
connectionId: this.connectionId,
driver: 'posthog',
extractedAt: this.now().toISOString(),
scope: { catalogs: [this.resolved.projectId] },
metadata: {
project_id: this.resolved.projectId,
table_count: tables.length,
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
},
tables,
};
}
async sampleTable(
input: KtxTableSampleInput & { columnMetadata?: KtxPostHogSampleColumnInfo[] },
_ctx: KtxScanContext,
): Promise<KtxTableSampleResult> {
this.assertConnection(input.connectionId);
const sql = input.columnMetadata
? this.dialect.generateSampleQueryWithMetadata(this.qTableName(input.table), input.limit, input.columnMetadata)
: this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns);
const result = await this.query(sql);
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
}
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
);
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
return { values, nullCount: null, distinctCount: null };
}
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
return null;
}
async executeReadOnly(input: KtxPostHogReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
async getTableRowCount(tableName: string): Promise<number> {
const result = await this.query(`SELECT count() AS cnt FROM ${this.dialect.quoteIdentifier(tableName)}`);
return Number(result.rows[0]?.[0] ?? 0);
}
async getColumnDistinctValues(
table: KtxTableRef,
columnName: string,
options: KtxPostHogColumnDistinctValuesOptions,
): Promise<KtxPostHogColumnDistinctValuesResult | null> {
const sampleSize = options.sampleSize ?? 10000;
const tableName = this.qTableName(table);
const cardinalityResult = await this.query(
this.dialect.generateCardinalitySampleQuery(tableName, columnName, sampleSize),
);
if (cardinalityResult.error || cardinalityResult.rows.length === 0) {
return null;
}
const cardinality = Number(cardinalityResult.rows[0]?.[0]);
if (!Number.isFinite(cardinality)) {
return null;
}
if (cardinality === 0) {
return { values: [], cardinality: 0 };
}
if (cardinality > options.maxCardinality) {
return { values: null, cardinality };
}
const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, columnName, options.limit));
if (valuesResult.error) {
return null;
}
return {
values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])),
cardinality,
};
}
private async listEventTypes(
input: KtxEventTypeDiscoveryInput,
_ctx: KtxScanContext,
): Promise<KtxEventTypeDiscovery[]> {
this.assertConnection(input.connectionId);
const limit = this.positiveInteger(input.limit, 'limit');
const lookbackDays = this.positiveInteger(input.lookbackDays ?? 30, 'lookbackDays');
const minCount = this.positiveInteger(input.minCount ?? 0, 'minCount');
const eventColumn = this.dialect.quoteIdentifier(input.eventColumn);
const tableName = this.qTableName(input.table);
const havingClause = minCount > 0 ? `HAVING cnt >= ${minCount}` : '';
const result = await this.query(`
SELECT ${eventColumn} AS event, count() as cnt
FROM ${tableName}
WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY
GROUP BY event
${havingClause}
ORDER BY cnt DESC
LIMIT ${limit}
`);
if (result.error) {
return [];
}
return result.rows
.filter((row) => row[0] != null && String(row[0]).trim() !== '')
.map((row) => ({ value: String(row[0]), count: Number(row[1]) }));
}
private async listPropertyKeys(
input: KtxEventPropertyDiscoveryInput,
_ctx: KtxScanContext,
): Promise<KtxEventPropertyDiscovery[]> {
this.assertConnection(input.connectionId);
const sampleSize = this.positiveInteger(input.sampleSize, 'sampleSize');
const limit = this.positiveInteger(input.limit, 'limit');
const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays');
const tableName = this.qTableName(input.table);
const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn);
const whereClause = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`;
const result = await this.query(`
SELECT key, count() as cnt
FROM (
SELECT arrayJoin(JSONExtractKeys(${jsonColumn})) AS key
FROM ${tableName}
${whereClause}
LIMIT ${sampleSize}
)
GROUP BY key
ORDER BY cnt DESC
LIMIT ${limit}
`);
if (result.error) {
return [];
}
return result.rows.map((row) => ({ key: String(row[0]), count: Number(row[1]) }));
}
private async listPropertyValues(
input: KtxEventPropertyValuesInput,
_ctx: KtxScanContext,
): Promise<KtxEventPropertyValuesResult | null> {
this.assertConnection(input.connectionId);
const limit = this.positiveInteger(input.limit, 'limit');
const maxCardinality = this.positiveInteger(input.maxCardinality ?? 1000, 'maxCardinality');
const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays');
const tableName = this.qTableName(input.table);
const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn);
const escapedKey = this.escapeHogQLString(input.propertyKey);
const timeFilter = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`;
const cardinalityResult = await this.query(`
SELECT uniq(JSONExtractString(${jsonColumn}, '${escapedKey}')) as cardinality
FROM ${tableName}
${timeFilter}
LIMIT 1000000
`);
if (cardinalityResult.error || cardinalityResult.rows.length === 0) {
return null;
}
const cardinality = Number(cardinalityResult.rows[0]?.[0]);
if (!Number.isFinite(cardinality) || cardinality > maxCardinality) {
return null;
}
const valuesResult = await this.query(`
SELECT DISTINCT JSONExtractString(${jsonColumn}, '${escapedKey}') as value
FROM ${tableName}
WHERE JSONExtractString(${jsonColumn}, '${escapedKey}') IS NOT NULL
AND JSONExtractString(${jsonColumn}, '${escapedKey}') != ''
${lookbackDays === null ? '' : `AND timestamp > now() - INTERVAL ${lookbackDays} DAY`}
ORDER BY value
LIMIT ${limit}
`);
if (valuesResult.error) {
return null;
}
const values = valuesResult.rows
.map((row) => (row[0] != null ? String(row[0]) : ''))
.filter((value) => {
const trimmed = value.trim();
return trimmed !== '' && trimmed !== '[]' && trimmed !== '{}' && trimmed !== 'null';
});
return { values, cardinality };
}
async cleanup(): Promise<void> {}
qTableName(table: Pick<KtxTableRef, 'name'>): string {
return this.dialect.formatTableName(table);
}
quoteIdentifier(identifier: string): string {
return this.dialect.quoteIdentifier(identifier);
}
private toSchemaTable(tableName: string, tableInfo: PostHogSchemaTable): KtxSchemaTable {
return {
catalog: this.resolved.projectId,
db: null,
name: tableName,
kind: tableName === 'events' ? 'event_stream' : 'table',
comment: getKtxPostHogTableDescription(tableName) ?? null,
estimatedRows: tableInfo.row_count ?? null,
columns: this.extractColumns(tableName, tableInfo.fields),
foreignKeys: [],
};
}
private async discoverHiddenTables(): Promise<KtxSchemaTable[]> {
const tables: KtxSchemaTable[] = [];
for (const tableName of hiddenTablesToProbe) {
const result = await this.query(`SELECT * FROM ${tableName} LIMIT 0`);
if (result.error) {
continue;
}
tables.push({
catalog: this.resolved.projectId,
db: null,
name: tableName,
kind: 'table',
comment: getKtxPostHogTableDescription(tableName) ?? null,
estimatedRows: null,
columns: result.headers.map((header) => ({
name: header,
nativeType: 'String',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: getKtxPostHogColumnDescription(tableName, header) ?? null,
})),
foreignKeys: [],
});
}
return tables;
}
private extractColumns(tableName: string, fields: Record<string, PostHogSchemaField>): KtxSchemaColumn[] {
const columns: KtxSchemaColumn[] = [];
for (const [fieldName, fieldInfo] of Object.entries(fields)) {
if (
fieldInfo.type === 'lazy_table' ||
fieldInfo.type === 'virtual_table' ||
fieldInfo.type === 'field_traverser' ||
fieldInfo.type === 'expression'
) {
continue;
}
const nativeType = this.normalizeFieldType(fieldInfo.type);
columns.push({
name: fieldName,
nativeType,
normalizedType: this.dialect.mapDataType(nativeType),
dimensionType: this.dialect.mapToDimensionType(nativeType),
nullable: this.isNullableField(tableName, fieldName, fieldInfo.type),
primaryKey: this.isPrimaryKeyField(tableName, fieldName),
comment: getKtxPostHogColumnDescription(tableName, fieldName) ?? null,
});
}
return columns;
}
private normalizeFieldType(posthogType: string): string {
const typeMap: Record<string, string> = {
string: 'String',
integer: 'Int64',
datetime: 'DateTime64',
boolean: 'UInt8',
bool: 'Boolean',
json: 'JSON',
array: 'Array(String)',
uuid: 'UUID',
event: 'String',
};
return typeMap[posthogType.toLowerCase()] ?? posthogType;
}
private isNullableField(tableName: string, fieldName: string, fieldType: string): boolean {
if (tableName === 'events' && ['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldName)) {
return false;
}
return !['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldType.toLowerCase());
}
private isPrimaryKeyField(tableName: string, fieldName: string): boolean {
return (
(tableName === 'events' && fieldName === 'uuid') ||
(tableName === 'persons' && fieldName === 'id') ||
(tableName === 'sessions' && fieldName === 'session_id') ||
(tableName === 'groups' && fieldName === 'key')
);
}
private async query(sql: string, params?: Record<string, unknown>): Promise<KtxQueryResult & { error?: string }> {
const response = await this.makeRequest<PostHogQueryResponse>('/query', {
query: {
kind: 'HogQLQuery',
query: sql,
...(params && Object.keys(params).length > 0 ? { values: params } : {}),
},
});
if (response.error) {
return { headers: [], rows: [], totalRows: 0, rowCount: null, error: response.error };
}
const headers = response.columns ?? [];
const rows = response.results ?? [];
const headerTypes = response.types?.map((type) => type[1]);
return {
headers,
rows,
totalRows: rows.length,
rowCount: rows.length,
...(headerTypes && headerTypes.length > 0 ? { headerTypes } : {}),
};
}
private async makeRequest<T>(endpoint: string, body: Record<string, unknown>, maxRetries = 3): Promise<T> {
const url = `${this.resolved.baseUrl}/api/projects/${this.resolved.projectId}${endpoint}`;
let lastError: Error | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
const response = await this.fetchImpl(url, {
method: 'POST',
headers: {
Authorization: `Bearer ${this.resolved.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(body),
});
if (response.ok) {
return response.json() as Promise<T>;
}
const errorText = await response.text();
const errorMessage = this.parseErrorMessage(errorText);
if (response.status === 429 && attempt < maxRetries) {
await this.sleep(this.parseRateLimitWaitTime(errorMessage) * 1000);
continue;
}
lastError = new Error(`PostHog API error (${response.status}): ${errorMessage}`);
}
throw lastError ?? new Error('PostHog API request failed after retries');
}
private parseErrorMessage(errorText: string): string {
try {
const errorJson = JSON.parse(errorText) as { detail?: unknown; error?: unknown };
return String(errorJson.detail ?? errorJson.error ?? errorText);
} catch {
return errorText;
}
}
private parseRateLimitWaitTime(errorMessage: string): number {
const match = errorMessage.match(/(?:Expected available in|retry after) (\d+) seconds?/i);
return match ? Number.parseInt(match[1] ?? '30', 10) + 2 : 30;
}
private escapeHogQLString(value: string): string {
return value.replace(/\\/g, '\\\\').replace(/'/g, "''");
}
private positiveInteger(value: number, name: string): number {
if (!Number.isInteger(value) || value < 0) {
throw new Error(`PostHog event-stream discovery requires ${name} to be a non-negative integer`);
}
return value;
}
private assertConnection(connectionId: string): void {
if (connectionId !== this.connectionId) {
throw new Error(`PostHog connector ${this.connectionId} cannot scan connection ${connectionId}`);
}
}
}

View file

@ -1,48 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxPostHogDialect } from './dialect.js';
describe('KtxPostHogDialect', () => {
const dialect = new KtxPostHogDialect();
it('quotes identifiers, formats table names, maps types, and prepares HogQL params', () => {
expect(dialect.quoteIdentifier('weird`name')).toBe('`weird\\`name`');
expect(dialect.formatTableName({ name: 'events', catalog: '157881', db: null })).toBe('`events`');
expect(dialect.mapDataType('Nullable(DateTime64(6, UTC))')).toBe('TIMESTAMP');
expect(dialect.mapDataType('Array(String)')).toBe('JSON');
expect(dialect.mapToDimensionType('UInt8')).toBe('number');
expect(dialect.mapToDimensionType('Boolean')).toBe('boolean');
expect(dialect.prepareQuery('SELECT * FROM events WHERE event = :event', { event: '$pageview' })).toEqual({
sql: 'SELECT * FROM events WHERE event = {event}',
params: { event: '$pageview' },
});
});
it('builds sample and virtual-property queries without app dependencies', () => {
expect(dialect.generateSampleQuery('`events`', 5, ['event', 'timestamp'])).toBe(
'SELECT `event`, `timestamp` FROM `events` ORDER BY rand() LIMIT 5',
);
expect(
dialect.generateSampleQueryWithMetadata('`events`', 3, [
{ name: 'event', parentColumnId: null },
{ name: 'properties.$browser', parentColumnId: 'properties' },
]),
).toBe(
"SELECT `event`, JSONExtractString(properties, '$browser') AS `properties.$browser` FROM `events` ORDER BY rand() LIMIT 3",
);
expect(dialect.generateColumnSampleQuery('`events`', 'properties.$browser', 10)).toBe(
"SELECT JSONExtractString(properties, '$browser') FROM `events` WHERE JSONExtractString(properties, '$browser') IS NOT NULL ORDER BY rand() LIMIT 10",
);
});
it('builds data-dictionary and time helper SQL', () => {
expect(dialect.generateCardinalitySampleQuery('events', 'properties.$browser', 100)).toContain(
"JSONExtractString(properties, '$browser') AS val",
);
expect(dialect.generateDistinctValuesQuery('events', 'event', 20)).toContain('SELECT DISTINCT toString(`event`) AS val');
expect(dialect.getNullCountExpression('event')).toBe('countIf(event IS NULL)');
expect(dialect.getDistinctCountExpression('event')).toBe('uniq(event)');
expect(dialect.getTimeTruncExpression('timestamp', 'week', 'UTC')).toBe("DATE_TRUNC('week', toTimeZone(timestamp, 'UTC'))");
expect(dialect.parseIntervalToSql('7 day')).toBe('INTERVAL 7 DAY');
expect(dialect.generateColumnStatisticsQuery('', 'events')).toBeNull();
});
});

View file

@ -1,258 +0,0 @@
import type { KtxSchemaDimensionType, KtxTableRef } from '@ktx/context/scan';
type PostHogTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export interface KtxPostHogSampleColumnInfo {
name: string;
parentColumnId: string | null;
}
export class KtxPostHogDialect {
readonly type = 'posthog';
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime64: 'time',
datetime: 'time',
date: 'time',
int64: 'number',
int32: 'number',
int16: 'number',
int8: 'number',
uint64: 'number',
uint32: 'number',
uint16: 'number',
uint8: 'number',
float64: 'number',
float32: 'number',
decimal: 'number',
integer: 'number',
string: 'string',
uuid: 'string',
json: 'string',
boolean: 'boolean',
bool: 'boolean',
};
quoteIdentifier(identifier: string): string {
return `\`${identifier.replace(/`/g, '\\`')}\``;
}
formatTableName(table: PostHogTableNameRef): string {
return this.quoteIdentifier(table.name);
}
mapDataType(nativeType: string): string {
const cleanType = this.cleanType(nativeType);
const typeMapping: Record<string, string> = {
STRING: 'VARCHAR',
UUID: 'UUID',
INT64: 'BIGINT',
INT32: 'INTEGER',
INT16: 'SMALLINT',
INT8: 'TINYINT',
UINT64: 'BIGINT',
UINT32: 'INTEGER',
UINT16: 'SMALLINT',
UINT8: 'TINYINT',
FLOAT64: 'DOUBLE',
FLOAT32: 'FLOAT',
DATETIME64: 'TIMESTAMP',
DATETIME: 'TIMESTAMP',
DATE: 'DATE',
JSON: 'JSON',
ARRAY: 'JSON',
BOOLEAN: 'BOOLEAN',
BOOL: 'BOOLEAN',
};
return typeMapping[cleanType] ?? cleanType;
}
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
if (!nativeType) {
return 'string';
}
const cleanType = this.cleanType(nativeType).toLowerCase();
if (this.typeMappings[cleanType]) {
return this.typeMappings[cleanType];
}
if (cleanType.includes('date') || cleanType.includes('time')) {
return 'time';
}
if (cleanType.includes('int') || cleanType.includes('float') || cleanType.includes('decimal') || cleanType.includes('num')) {
return 'number';
}
if (cleanType === 'bool' || cleanType === 'boolean') {
return 'boolean';
}
return 'string';
}
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
const columnList =
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`;
}
generateSampleQueryWithMetadata(tableName: string, limit: number, columnMetadata?: KtxPostHogSampleColumnInfo[]): string {
if (!columnMetadata || columnMetadata.length === 0) {
return this.generateSampleQuery(tableName, limit);
}
const columnList = columnMetadata
.map((column) => {
if (!column.parentColumnId) {
return this.quoteIdentifier(column.name);
}
const expression = this.formatColumnExpression(column.name);
return `${expression} AS ${this.quoteIdentifier(column.name)}`;
})
.join(', ');
return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`;
}
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
const colExpr = this.formatColumnExpression(columnName);
return `SELECT ${colExpr} FROM ${tableName} WHERE ${colExpr} IS NOT NULL ORDER BY rand() LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `{${key}}`);
processedParams[key] = value;
}
return {
sql: processedSql,
params: Object.keys(processedParams).length > 0 ? processedParams : undefined,
};
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
}
return `rand() < ${samplePct}`;
}
getTableSampleClause(_samplePct: number): string {
return '';
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
}
getNullCountExpression(column: string): string {
return `countIf(${column} IS NULL)`;
}
getDistinctCountExpression(column: string): string {
return `uniq(${column})`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
const colExpr = this.formatColumnExpression(columnName);
return `
SELECT uniq(val) AS cardinality
FROM (
SELECT ${colExpr} AS val
FROM ${tableName}
WHERE ${colExpr} IS NOT NULL
LIMIT ${sampleSize}
)
`;
}
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
const colExpr = this.formatColumnExpression(columnName);
return `
SELECT DISTINCT toString(${colExpr}) AS val
FROM ${tableName}
WHERE ${colExpr} IS NOT NULL
ORDER BY val
LIMIT ${limit}
`;
}
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
return null;
}
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
const colExpr = this.formatColumnExpression(columnName);
return `
SELECT uniq(val) AS cardinality
FROM (
SELECT ${colExpr} AS val
FROM ${tableName}
WHERE ${colExpr} IS NOT NULL
ORDER BY rand()
LIMIT ${sampleSize}
)
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column;
return `DATE_TRUNC('${granularity}', ${col})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column;
const [amount, unit] = interval.split(' ');
const seconds = Number(amount) * this.getUnitSeconds(unit ?? 'day');
const originExpr = origin ? `toDateTime('${origin}')` : `toDateTime('1970-01-01')`;
return `${originExpr} + toIntervalSecond(intDiv(toUnixTimestamp(${col}) - toUnixTimestamp(${originExpr}), ${seconds}) * ${seconds})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit?.toUpperCase() ?? 'DAY'}`;
}
private formatColumnExpression(columnName: string): string {
const rawName = columnName.replace(/^`|`$/g, '');
const propertyMatch = rawName.match(/^(properties|person\.properties)\.(.+)$/);
if (propertyMatch) {
const [, parentCol, propertyKey] = propertyMatch;
return `JSONExtractString(${parentCol}, '${propertyKey.replace(/'/g, "''")}')`;
}
return this.quoteIdentifier(rawName);
}
private cleanType(nativeType: string): string {
let cleanType = nativeType.toUpperCase().trim();
const nullableMatch = cleanType.match(/^NULLABLE\((.+)\)$/);
if (nullableMatch) {
cleanType = nullableMatch[1] ?? cleanType;
}
if (cleanType.startsWith('ARRAY(')) {
return 'ARRAY';
}
if (cleanType.startsWith('DATETIME64')) {
return 'DATETIME64';
}
return cleanType;
}
private getUnitSeconds(unit: string): number {
const secondsByUnit: Record<string, number> = {
second: 1,
minute: 60,
hour: 3600,
day: 86400,
week: 604800,
month: 2592000,
quarter: 7776000,
year: 31536000,
};
return secondsByUnit[unit.toLowerCase()] ?? 86400;
}
}

View file

@ -1,19 +0,0 @@
export { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js';
export {
getKtxPostHogColumnDescription,
getKtxPostHogPropertyDescription,
getKtxPostHogTableDescription,
} from './schema-descriptions.js';
export {
isKtxPostHogConnectionConfig,
KtxPostHogScanConnector,
postHogConnectionConfigFromConfig,
type KtxPostHogColumnDistinctValuesOptions,
type KtxPostHogColumnDistinctValuesResult,
type KtxPostHogConnectionConfig,
type KtxPostHogFetch,
type KtxPostHogReadOnlyQueryInput,
type KtxPostHogResolvedConnectionConfig,
type KtxPostHogScanConnectorOptions,
} from './connector.js';
export { createPostHogLiveDatabaseIntrospection } from './live-database-introspection.js';

View file

@ -1,34 +0,0 @@
import type { LiveDatabaseIntrospectionPort } from '@ktx/context/ingest';
import type { KtxProjectConnectionConfig } from '@ktx/context/project';
import { KtxPostHogScanConnector, type KtxPostHogConnectionConfig, type KtxPostHogFetch } from './connector.js';
interface CreatePostHogLiveDatabaseIntrospectionOptions {
connections: Record<string, KtxProjectConnectionConfig>;
env?: NodeJS.ProcessEnv;
fetch?: KtxPostHogFetch;
sleep?: (ms: number) => Promise<void>;
now?: () => Date;
}
export function createPostHogLiveDatabaseIntrospection(
options: CreatePostHogLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
const connection = options.connections[connectionId] as KtxPostHogConnectionConfig | undefined;
const connector = new KtxPostHogScanConnector({
connectionId,
connection,
env: options.env,
fetch: options.fetch,
sleep: options.sleep,
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'posthog' }, { runId: `posthog-${connectionId}` });
} finally {
await connector.cleanup();
}
},
};
}

View file

@ -1,11 +0,0 @@
import { describe, expect, it } from 'vitest';
import * as posthog from './index.js';
describe('@ktx/connector-posthog package exports', () => {
it('exports the connector, dialect, descriptions, and live-database adapter', () => {
expect(posthog.KtxPostHogDialect).toBeTypeOf('function');
expect(posthog.KtxPostHogScanConnector).toBeTypeOf('function');
expect(posthog.createPostHogLiveDatabaseIntrospection).toBeTypeOf('function');
expect(posthog.getKtxPostHogPropertyDescription('$browser')).toBe('User browser name.');
});
});

View file

@ -1,99 +0,0 @@
const TABLE_DESCRIPTIONS: Record<string, string> = {
events:
'PostHog event stream containing all tracked user interactions. Each row represents a single event with properties, timestamp, and user identifier.',
persons:
'PostHog persons table containing unique users, identifiers, and user properties for segmentation and cohort analysis.',
sessions:
'PostHog sessions table grouping events into user sessions with duration, entry and exit URLs, and device details.',
groups:
'PostHog groups table for B2B and team-based analytics. Contains group identifiers and group properties.',
person_distinct_ids: 'PostHog identity resolution table mapping distinct_ids to person_ids.',
cohort_people: 'PostHog dynamic cohort membership table.',
static_cohort_people: 'PostHog static cohort membership table.',
'system.cohorts': 'PostHog cohort definitions table.',
'system.feature_flags': 'PostHog feature flag definitions table.',
'system.experiments': 'PostHog A/B test and experiment definitions table.',
'system.surveys': 'PostHog survey definitions table.',
'system.dashboards': 'PostHog dashboard metadata table.',
'system.insights': 'PostHog saved insight and chart definitions table.',
};
const COLUMN_DESCRIPTIONS: Record<string, string> = {
'events.uuid': 'Unique identifier for this specific event.',
'events.event': 'Event name such as $pageview, $autocapture, $identify, or a custom event.',
'events.distinct_id': 'User identifier that links events to persons.',
'events.timestamp': 'UTC timestamp when the event occurred.',
'events.created_at': 'Timestamp when the event was ingested into PostHog.',
'events.properties': 'JSON object containing event-specific properties.',
'events.person_id': 'Internal PostHog person UUID.',
'events.$session_id': 'Session identifier linking this event to sessions.',
'persons.id': 'Internal PostHog person UUID.',
'persons.distinct_id': 'Primary user identifier for joins with events.',
'persons.properties': 'JSON object containing user properties.',
'persons.created_at': 'Timestamp when this person was first seen in PostHog.',
'persons.is_identified': 'Whether the person has been explicitly identified.',
'sessions.session_id': 'Unique session identifier.',
'sessions.distinct_id': 'User identifier for this session.',
'sessions.$start_timestamp': 'Timestamp when the session started.',
'sessions.$end_timestamp': 'Timestamp when the session ended.',
'sessions.$session_duration': 'Total session duration in seconds.',
'groups.index': 'Index identifying the configured PostHog group type.',
'groups.key': 'Unique identifier for this group.',
'groups.properties': 'JSON object containing group properties.',
'groups.created_at': 'Timestamp when this group was first seen.',
'person_distinct_ids.distinct_id': 'Device or browser identifier for a person.',
'person_distinct_ids.person_id': 'Internal PostHog person UUID mapped to the distinct_id.',
'cohort_people.person_id': 'Person UUID belonging to the cohort.',
'cohort_people.cohort_id': 'Cohort identifier.',
'static_cohort_people.person_id': 'Person UUID belonging to the static cohort.',
'static_cohort_people.cohort_id': 'Static cohort identifier.',
'system.cohorts.id': 'Unique cohort identifier.',
'system.cohorts.name': 'Human-readable cohort name.',
'system.feature_flags.id': 'Unique feature flag identifier.',
'system.feature_flags.key': 'Feature flag key used in code.',
'system.experiments.id': 'Unique experiment identifier.',
'system.experiments.name': 'Experiment name.',
'system.surveys.id': 'Unique survey identifier.',
'system.surveys.name': 'Survey name.',
'system.dashboards.id': 'Unique dashboard identifier.',
'system.dashboards.name': 'Dashboard name.',
'system.insights.id': 'Unique insight identifier.',
'system.insights.name': 'Insight or chart name.',
};
const PROPERTY_DESCRIPTIONS: Record<string, string> = {
$browser: 'User browser name.',
$browser_version: 'User browser version.',
$os: 'Operating system.',
$os_version: 'Operating system version.',
$device: 'Device name.',
$device_type: 'Device type.',
$current_url: 'Full URL of the current page.',
$pathname: 'Path portion of the current URL.',
$host: 'Hostname of the current page.',
$referrer: 'Referrer URL.',
$referring_domain: 'Referrer domain.',
$utm_source: 'UTM source parameter.',
$utm_medium: 'UTM medium parameter.',
$utm_campaign: 'UTM campaign parameter.',
$utm_content: 'UTM content parameter.',
$utm_term: 'UTM term parameter.',
$lib: 'PostHog library name used to capture the event.',
$lib_version: 'PostHog library version.',
$insert_id: 'Unique identifier for event deduplication.',
$active_feature_flags: 'List of active feature flags for this user or event.',
$feature_flag: 'Feature flag name for flag-related events.',
$feature_flag_response: 'Feature flag value or variant.',
};
export function getKtxPostHogTableDescription(tableName: string): string | undefined {
return TABLE_DESCRIPTIONS[tableName];
}
export function getKtxPostHogColumnDescription(tableName: string, columnName: string): string | undefined {
return COLUMN_DESCRIPTIONS[`${tableName}.${columnName}`];
}
export function getKtxPostHogPropertyDescription(propertyKey: string): string | null {
return PROPERTY_DESCRIPTIONS[propertyKey] ?? null;
}

View file

@ -1,9 +0,0 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src"
},
"include": ["src/**/*.ts"],
"exclude": ["dist", "node_modules"]
}

View file

@ -120,10 +120,12 @@
"scripts": {
"build": "tsc -p tsconfig.json",
"relationships:benchmarks": "pnpm --silent run build && node scripts/relationship-benchmark-report.mjs",
"relationships:benchmarks:test": "KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts",
"search:pglite-spike": "node scripts/pglite-hybrid-search-spike.mjs",
"search:pglite-owner-prototype": "node scripts/pglite-owner-process-prototype.mjs",
"search:pglite-sl-prototype": "node scripts/pglite-sl-search-prototype.mjs",
"test": "vitest run",
"test": "vitest run --exclude src/scan/relationship-benchmarks.test.ts --exclude src/scan/local-scan.test.ts --exclude src/mcp/local-project-ports.test.ts --exclude src/ingest/local-stage-ingest.test.ts --exclude src/sl/pglite-sl-search-prototype.test.ts --exclude src/core/git.service.test.ts --exclude src/ingest/local-adapters.test.ts --exclude src/ingest/local-bundle-ingest.test.ts --exclude src/ingest/local-metabase-ingest.test.ts --exclude src/sl/local-sl.test.ts --exclude src/search/pglite-owner-process.test.ts --exclude src/scan/local-enrichment-artifacts.test.ts --exclude src/search/pglite-spike.test.ts --exclude src/wiki/local-knowledge.test.ts --exclude src/sl/local-query.test.ts --exclude src/scan/relationship-review-decisions.test.ts --exclude src/scan/relationship-profiling.test.ts",
"test:slow": "vitest run src/scan/local-scan.test.ts src/mcp/local-project-ports.test.ts src/ingest/local-stage-ingest.test.ts src/sl/pglite-sl-search-prototype.test.ts src/core/git.service.test.ts src/ingest/local-adapters.test.ts src/ingest/local-bundle-ingest.test.ts src/ingest/local-metabase-ingest.test.ts src/sl/local-sl.test.ts src/search/pglite-owner-process.test.ts src/scan/local-enrichment-artifacts.test.ts src/search/pglite-spike.test.ts src/wiki/local-knowledge.test.ts src/sl/local-query.test.ts src/scan/relationship-review-decisions.test.ts src/scan/relationship-profiling.test.ts --testTimeout 30000",
"type-check": "tsc -p tsconfig.json --noEmit"
},
"dependencies": {

View file

@ -18,7 +18,6 @@ export const connectionTypeSchema = z.enum([
'METABASE',
'LOOKER',
'NOTION',
'POSTHOG',
'MYSQL',
'CLICKHOUSE',
'PLAIN',

View file

@ -256,6 +256,31 @@ describe('GitService', () => {
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('serializes concurrent commits from scoped services targeting the same worktree', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`);
await service.addWorktree(wtDir, 'session/concurrent', commitHash);
const first = service.forWorktree(wtDir);
const second = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8');
await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8');
const [a, b] = await Promise.all([
first.commitFile('a.md', 'add a', 'System User', 'system@example.com'),
second.commitFile('b.md', 'add b', 'System User', 'system@example.com'),
]);
expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/);
expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n');
await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n');
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
describe('squashMergeIntoMain', () => {

View file

@ -32,6 +32,8 @@ export type SquashMergeResult =
| { ok: false; conflict: true; conflictPaths: string[] };
export class GitService {
private static readonly mutationQueues = new Map<string, Promise<void>>();
private readonly logger: KtxLogger;
private git!: SimpleGit;
private configDir: string;
@ -92,6 +94,15 @@ export class GitService {
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
return this.withMutationQueue(() => this.commitFileUnlocked(filePath, commitMessage, author, authorEmail));
}
private async commitFileUnlocked(
filePath: string,
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
try {
// Stage the file
@ -166,6 +177,15 @@ export class GitService {
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
return this.withMutationQueue(() => this.commitFilesUnlocked(filePaths, commitMessage, author, authorEmail));
}
private async commitFilesUnlocked(
filePaths: string[],
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
try {
for (const filePath of filePaths) {
@ -231,6 +251,10 @@ export class GitService {
if (filePaths.length === 0) {
return;
}
return this.withMutationQueue(() => this.checkoutFilesUnlocked(filePaths));
}
private async checkoutFilesUnlocked(filePaths: string[]): Promise<void> {
try {
await this.git.checkout(['--', ...filePaths]);
} catch (error) {
@ -292,6 +316,10 @@ export class GitService {
if (!trimmed) {
return;
}
return this.withMutationQueue(() => this.addNoteUnlocked(commitHash, trimmed));
}
private async addNoteUnlocked(commitHash: string, trimmed: string): Promise<void> {
try {
await this.git.raw(['notes', 'add', '-f', '-m', trimmed, commitHash]);
} catch (error) {
@ -343,6 +371,15 @@ export class GitService {
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
return this.withMutationQueue(() => this.deleteFileUnlocked(filePath, commitMessage, author, authorEmail));
}
private async deleteFileUnlocked(
filePath: string,
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
try {
// Remove the file from git
@ -485,6 +522,13 @@ export class GitService {
async squashTo(
preHead: string,
options: { message: string; author: string; authorEmail: string; expectedAuthor?: string },
): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> {
return this.withMutationQueue(() => this.squashToUnlocked(preHead, options));
}
private async squashToUnlocked(
preHead: string,
options: { message: string; author: string; authorEmail: string; expectedAuthor?: string },
): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> {
const { message, author, authorEmail } = options;
const expectedAuthor = options.expectedAuthor ?? author;
@ -560,6 +604,15 @@ export class GitService {
author: string,
authorEmail: string,
commitMessage: string,
): Promise<SquashMergeResult> {
return this.withMutationQueue(() => this.squashMergeIntoMainUnlocked(branch, author, authorEmail, commitMessage));
}
private async squashMergeIntoMainUnlocked(
branch: string,
author: string,
authorEmail: string,
commitMessage: string,
): Promise<SquashMergeResult> {
// Diff of HEAD..branch (two dots) lists commits/files reachable from `branch` that
// aren't on HEAD — i.e. exactly what the squash would apply. Three dots (HEAD...branch)
@ -615,7 +668,7 @@ export class GitService {
* range, which can pause the sequencer on conflicts.
*/
async resetHardTo(targetSha: string): Promise<void> {
await this.git.raw(['reset', '--hard', targetSha]);
await this.withMutationQueue(() => this.git.raw(['reset', '--hard', targetSha]));
}
/**
@ -667,6 +720,10 @@ export class GitService {
* Used by the memory agent to isolate per-session writes from interactive saves on main.
*/
async addWorktree(path: string, branch: string, startSha: string): Promise<void> {
await this.withMutationQueue(() => this.addWorktreeUnlocked(path, branch, startSha));
}
private async addWorktreeUnlocked(path: string, branch: string, startSha: string): Promise<void> {
try {
await this.git.raw(['worktree', 'add', '-b', branch, path, startSha]);
} catch (error) {
@ -679,6 +736,10 @@ export class GitService {
* worktrees are ktx-internal a clean working tree is not required.
*/
async removeWorktree(path: string): Promise<void> {
await this.withMutationQueue(() => this.removeWorktreeUnlocked(path));
}
private async removeWorktreeUnlocked(path: string): Promise<void> {
try {
await this.git.raw(['worktree', 'remove', '--force', path]);
} catch (error) {
@ -724,7 +785,7 @@ export class GitService {
}
async deleteBranch(branch: string, force = false): Promise<void> {
await this.git.raw(['branch', force ? '-D' : '-d', branch]);
await this.withMutationQueue(() => this.git.raw(['branch', force ? '-D' : '-d', branch]));
}
/**
@ -745,6 +806,15 @@ export class GitService {
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
return this.withMutationQueue(() => this.deleteDirectoryUnlocked(directoryPath, commitMessage, author, authorEmail));
}
private async deleteDirectoryUnlocked(
directoryPath: string,
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
try {
// Remove the directory recursively from git
@ -795,6 +865,17 @@ export class GitService {
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
return this.withMutationQueue(() =>
this.deleteDirectoriesUnlocked(directoryPaths, commitMessage, author, authorEmail),
);
}
private async deleteDirectoriesUnlocked(
directoryPaths: string[],
commitMessage: string,
author: string,
authorEmail: string,
): Promise<GitCommitInfo> {
if (directoryPaths.length === 0) {
return {
@ -852,4 +933,27 @@ export class GitService {
created: true,
};
}
private async withMutationQueue<T>(operation: () => Promise<T>): Promise<T> {
const key = this.configDir;
const previous = GitService.mutationQueues.get(key) ?? Promise.resolve();
let release: () => void = () => {};
const current = previous.catch(() => undefined).then(
() =>
new Promise<void>((resolve) => {
release = resolve;
}),
);
GitService.mutationQueues.set(key, current);
await previous.catch(() => undefined);
try {
return await operation();
} finally {
release();
if (GitService.mutationQueues.get(key) === current) {
GitService.mutationQueues.delete(key);
}
}
}
}

View file

@ -284,6 +284,18 @@ describe('chunkMetabaseStagedDir — syncMode enum coverage', () => {
expect(allRawFiles).not.toContain('cards/200.json');
});
it('ONLY with no selections includes every matching card for old generated configs', async () => {
await writeInline(dir, 'sync-config.json', {
...BASE_SYNC,
syncMode: 'ONLY',
selections: [],
});
const result = await chunkMetabaseStagedDir(dir);
const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles);
expect(allRawFiles).toContain('cards/100.json');
expect(allRawFiles).toContain('cards/200.json');
});
it('EXCEPT excludes cards in selected collections; includes the rest', async () => {
await writeInline(dir, 'sync-config.json', {
...BASE_SYNC,

View file

@ -66,7 +66,7 @@ function cardMatchesSyncConfig(card: StagedCardFile, config: StagedSyncConfig):
if (card.archived) {
return false;
}
if (config.syncMode === 'ALL') {
if (config.syncMode === 'ALL' || (config.syncMode === 'ONLY' && config.selections.length === 0)) {
return true;
}
const selectedCollections = new Set(

View file

@ -327,6 +327,40 @@ describe('MetabaseClient.getResolvedSql', () => {
expect(result?.resolvedSql).toBe('SELECT * FROM (SELECT a, b FROM base) t ');
});
it('inlines native-query snippets before checking for remaining variables', async () => {
const requestSpy = vi.fn().mockResolvedValue([
{
id: 1,
name: 'account_join',
content: 'LEFT JOIN accounts a ON a.account_id = mart.account_id',
},
]);
const requestWithCustomRetrySpy = vi.fn();
const client = makeClient((client) => {
Reflect.set(client, 'request', requestSpy);
Reflect.set(client, 'requestWithCustomRetry', requestWithCustomRetrySpy);
});
const card = nativeCard('SELECT a.account_name FROM mart {{snippet: account_join}}', {
'snippet: account_join': {
id: 'snippet-tag',
name: 'snippet: account_join',
type: 'snippet',
'snippet-name': 'account_join',
'snippet-id': 1,
},
});
const result = await client.getResolvedSql(card);
expect(requestSpy).toHaveBeenCalledWith('GET', '/api/native-query-snippet');
expect(requestWithCustomRetrySpy).not.toHaveBeenCalled();
expect(result?.resolutionStatus).toBe('resolved');
expect(result?.resolvedSql).toBe(
'SELECT a.account_name FROM mart LEFT JOIN accounts a ON a.account_id = mart.account_id',
);
expect(result?.resolvedSql).not.toContain('{{snippet:');
});
it('uses /api/dataset/native for naked variables and prepends a warning comment', async () => {
const requestSpy = vi.fn().mockResolvedValue({ query: "SELECT * WHERE id = 'placeholder' AND n = 1" });
const client = makeClient((client) => {

View file

@ -39,6 +39,13 @@ interface TemplateTagInfo {
dummyValue: string | null;
}
interface NativeQuerySnippet {
id: number;
name: string;
content: string;
archived?: boolean | null;
}
interface CreateCardParams {
name: string;
databaseId: number;
@ -100,6 +107,43 @@ function collectRemainingPlaceholderNames(sql: string): Set<string> {
return names;
}
function collectRemainingSnippetNames(sql: string): Set<string> {
const names = new Set<string>();
for (const match of sql.matchAll(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi)) {
names.add(match[1].trim());
}
return names;
}
function normalizeSnippetName(name: string | null | undefined): string {
return (name ?? '').replace(/^snippet:\s*/i, '').trim().toLowerCase();
}
function parseNativeQuerySnippets(value: unknown): NativeQuerySnippet[] {
const rawItems = Array.isArray(value)
? value
: typeof value === 'object' && value !== null && Array.isArray((value as { data?: unknown }).data)
? (value as { data: unknown[] }).data
: [];
const snippets: NativeQuerySnippet[] = [];
for (const item of rawItems) {
if (typeof item !== 'object' || item === null || Array.isArray(item)) {
continue;
}
const rec = item as Record<string, unknown>;
if (typeof rec.id !== 'number' || typeof rec.name !== 'string' || typeof rec.content !== 'string') {
continue;
}
snippets.push({
id: rec.id,
name: rec.name,
content: rec.content,
...(typeof rec.archived === 'boolean' ? { archived: rec.archived } : {}),
});
}
return snippets;
}
function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): MetabaseDatasetQuery {
if (datasetQuery?.stages?.[0]?.native !== undefined) {
const stages = [...(datasetQuery.stages ?? [])];
@ -148,6 +192,7 @@ export class MetabaseClient implements MetabaseRuntimeClient {
private readonly logger: MetabaseClientLogger;
private readonly baseUrl: string;
private readonly config: MetabaseClientConfig;
private snippetCache: Promise<NativeQuerySnippet[]> | null = null;
constructor(
runtime: MetabaseClientRuntimeConfig,
@ -261,6 +306,63 @@ export class MetabaseClient implements MetabaseRuntimeClient {
return this.request<MetabaseCardSummary[]>('GET', '/api/card/?f=all');
}
private getNativeQuerySnippets(): Promise<NativeQuerySnippet[]> {
this.snippetCache ??= this.request<unknown>('GET', '/api/native-query-snippet').then(parseNativeQuerySnippets);
return this.snippetCache;
}
private async inlineNativeQuerySnippets(
sql: string,
templateTags: MetabaseTemplateTag[],
cardId: number,
): Promise<{ sql: string; unresolved: string[] }> {
const names = collectRemainingSnippetNames(sql);
if (names.size === 0) {
return { sql, unresolved: [] };
}
let snippets: NativeQuerySnippet[];
try {
snippets = await this.getNativeQuerySnippets();
} catch (error) {
this.logger.warn(
`[metabase] failed to load native query snippets for card ${cardId}; leaving snippet placeholders unresolved: ${error instanceof Error ? error.message : String(error)}`,
);
return { sql, unresolved: [...names] };
}
const snippetsById = new Map<number, NativeQuerySnippet>();
const snippetsByName = new Map<string, NativeQuerySnippet>();
for (const snippet of snippets) {
if (snippet.archived === true) {
continue;
}
snippetsById.set(snippet.id, snippet);
snippetsByName.set(normalizeSnippetName(snippet.name), snippet);
}
const snippetTags = templateTags.filter((tag) => tag.type === 'snippet');
const unresolved = new Set<string>();
const inlinedSql = sql.replace(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi, (match, rawName: string) => {
const normalizedName = normalizeSnippetName(rawName);
const tag = snippetTags.find(
(candidate) =>
normalizeSnippetName(candidate['snippet-name']) === normalizedName ||
normalizeSnippetName(candidate.name) === normalizedName,
);
const snippet =
(typeof tag?.['snippet-id'] === 'number' ? snippetsById.get(tag['snippet-id']) : undefined) ??
snippetsByName.get(normalizedName);
if (!snippet) {
unresolved.add(rawName.trim());
return match;
}
return snippet.content;
});
return { sql: inlinedSql, unresolved: [...unresolved] };
}
async convertMbqlToNative(datasetQuery: MetabaseDatasetQuery): Promise<MetabaseNativeQueryResult> {
return this.request<MetabaseNativeQueryResult>('POST', '/api/dataset/native', {
...datasetQuery,
@ -351,7 +453,18 @@ export class MetabaseClient implements MetabaseRuntimeClient {
// silently filter rows out — see incident with auction_seller_bidder_pair_suspicion).
let processedSql = stripOptionalClauses(nativeQuery);
// Step 2: inline {{#CARD_ID}} card references locally. Recursively strip optional
// Step 2: inline native-query snippets. Metabase's substitution endpoint does not
// always expand {{snippet: name}} for fetched card SQL, but the snippets API does.
const snippetResult = await this.inlineNativeQuerySnippets(processedSql, templateTagEntries, card.id);
processedSql = snippetResult.sql;
if (snippetResult.unresolved.length > 0) {
this.logger.warn(
`[metabase] card ${card.id} has unresolved SQL snippets: ${snippetResult.unresolved.join(', ')}`,
);
return { resolvedSql: processedSql, templateTags, resolutionStatus: 'fallback' };
}
// Step 3: inline {{#CARD_ID}} card references locally. Recursively strip optional
// clauses in referenced cards too — the same reasoning applies all the way down.
try {
processedSql = await expandCardReferences(processedSql, {
@ -361,7 +474,17 @@ export class MetabaseClient implements MetabaseRuntimeClient {
if (!referencedNative) {
throw new Error(`referenced card ${id} has no native query`);
}
return { native_query: stripOptionalClauses(referencedNative) };
const referencedSnippetResult = await this.inlineNativeQuerySnippets(
stripOptionalClauses(referencedNative),
Object.values(this.getTemplateTags(referenced)),
referenced.id,
);
if (referencedSnippetResult.unresolved.length > 0) {
throw new Error(
`referenced card ${id} has unresolved SQL snippets: ${referencedSnippetResult.unresolved.join(', ')}`,
);
}
return { native_query: referencedSnippetResult.sql };
},
});
} catch (err) {
@ -372,7 +495,7 @@ export class MetabaseClient implements MetabaseRuntimeClient {
throw err;
}
// Step 3: collect template tags that still appear in the SQL after strip + inline.
// Step 4: collect template tags that still appear in the SQL after strip + inline.
// Anything bracketed-only is gone now; anything card-referenced is inlined.
const remainingNames = collectRemainingPlaceholderNames(processedSql);
const remainingTags = templateTagEntries.filter((tag) => tag.type !== 'snippet' && remainingNames.has(tag.name));
@ -381,7 +504,7 @@ export class MetabaseClient implements MetabaseRuntimeClient {
return { resolvedSql: processedSql, templateTags, resolutionStatus: 'resolved' };
}
// Step 4: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's
// Step 5: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's
// substitution endpoint. Only required because we can't translate dimension-tag
// bindings to warehouse columns ourselves. Prepend a SQL comment listing every
// dummy substitution so downstream consumers (the metabase_ingest LLM) know which

View file

@ -57,13 +57,9 @@ describe('computeFetchScope', () => {
});
});
it('returns empty explicit scope for ONLY with no selections', () => {
it('treats generated ONLY with no selections as all', () => {
const scope = computeFetchScope({ ...BASE_CONFIG, syncMode: 'ONLY', selections: [] });
expect(scope).toEqual({
kind: 'explicit',
includeCardIds: new Set(),
includeCollectionIds: new Set(),
});
expect(scope).toEqual({ kind: 'all' });
});
});

View file

@ -11,7 +11,7 @@ export type FetchScope =
* union the fetcher switches on. Pure function; no I/O, no side effects.
*/
export function computeFetchScope(syncConfig: StagedSyncConfig): FetchScope {
if (syncConfig.syncMode === 'ALL') {
if (syncConfig.syncMode === 'ALL' || (syncConfig.syncMode === 'ONLY' && syncConfig.selections.length === 0)) {
return { kind: 'all' };
}
const cardIds = new Set<number>();

View file

@ -1,8 +1,21 @@
import { describe, expect, it } from 'vitest';
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxProjectConnectionConfig } from '../../../project/index.js';
import { metabaseRuntimeConfigFromLocalConnection } from './local-metabase.adapter.js';
describe('metabaseRuntimeConfigFromLocalConnection', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-runtime-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('resolves api_url and env-backed api_key_ref from a flat ktx.yaml connection', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',
@ -20,6 +33,21 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => {
});
});
it('resolves file-backed api_key_ref from pasted setup secrets', async () => {
const keyPath = join(tempDir, 'metabase-main-api-key');
await writeFile(keyPath, 'mb_file_key\n', 'utf-8'); // pragma: allowlist secret
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',
api_url: 'https://metabase.example.com',
api_key_ref: `file:${keyPath}`,
};
expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({
apiUrl: 'https://metabase.example.com',
apiKey: 'mb_file_key', // pragma: allowlist secret
});
});
it('accepts url as the local api URL alias', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',

View file

@ -1,5 +1,6 @@
import type { KtxLocalProject, KtxProjectConnectionConfig } from '../../../project/index.js';
import { ktxLocalStateDbPath } from '../../../project/index.js';
import { resolveKtxConfigReference } from '../../../core/config-reference.js';
import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './client.js';
import {
IngestMetabaseClientFactory,
@ -13,14 +14,6 @@ function stringField(value: unknown): string | null {
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
}
function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null {
if (!ref.startsWith('env:')) {
return null;
}
const name = ref.slice('env:'.length);
return stringField(env[name]);
}
function hasNetworkProxy(connection: KtxProjectConnectionConfig): boolean {
return connection.networkProxy != null || connection.network_proxy != null;
}
@ -42,7 +35,7 @@ export function metabaseRuntimeConfigFromLocalConnection(
const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url);
const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey);
const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef);
const apiKey = literalApiKey ?? (apiKeyRef ? resolveEnvReference(apiKeyRef, env) : null);
const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null);
if (!apiUrl) {
throw new Error(`Connection "${connectionId}" is missing metabase api_url`);

View file

@ -79,6 +79,21 @@ function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['t
return actions.filter((action) => action.target === target).length;
}
function isStructuredToolFailure(output: unknown): boolean {
if (!output || typeof output !== 'object') {
return false;
}
const structured = (output as { structured?: unknown }).structured;
return !!structured && typeof structured === 'object' && (structured as { success?: unknown }).success === false;
}
function isFailedToolCall(entry: ToolCallLogEntry): boolean {
if (entry.error) {
return true;
}
return (entry.toolName === 'sl_write_source' || entry.toolName === 'wiki_write') && isStructuredToolFailure(entry.output);
}
function reportIdFromCreateResult(result: unknown): string | undefined {
if (!result || typeof result !== 'object' || !('id' in result)) {
return undefined;
@ -344,7 +359,7 @@ export class IngestBundleRunner {
toolNames: new Set<string>(),
} satisfies MutableToolTranscriptSummary);
current.toolCallCount += 1;
current.errorCount += entry.error ? 1 : 0;
current.errorCount += isFailedToolCall(entry) ? 1 : 0;
current.toolNames.add(entry.toolName);
transcriptSummaries.set(entry.wuKey, current);
};
@ -712,6 +727,7 @@ export class IngestBundleRunner {
sourceKey: job.sourceKey,
connectionId: job.connectionId,
jobId: job.jobId,
toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.errorCount ?? 0,
onStepFinish: ({ stepIndex, stepBudget }) => {
memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget });
},

Some files were not shown because too many files have changed in this diff Show more