chore: move docs site workspace

This commit is contained in:
Andrey Avtomonov 2026-05-11 16:53:42 +02:00
parent 0ae9b6effd
commit a46563bb01
52 changed files with 3 additions and 3 deletions

View file

@ -1,7 +0,0 @@
import { HomeLayout } from "fumadocs-ui/layouts/home";
import type { ReactNode } from "react";
import { baseOptions } from "@/app/layout.config";
export default function Layout({ children }: { children: ReactNode }) {
return <HomeLayout {...baseOptions}>{children}</HomeLayout>;
}

View file

@ -1,5 +0,0 @@
import { redirect } from "next/navigation";
export default function HomePage() {
redirect("/docs/getting-started/introduction");
}

View file

@ -1,47 +0,0 @@
import { source } from "@/lib/source";
import {
DocsPage,
DocsBody,
DocsTitle,
DocsDescription,
} from "fumadocs-ui/page";
import { notFound } from "next/navigation";
import defaultMdxComponents from "fumadocs-ui/mdx";
import { CodeBlock } from "@/components/code-block";
export default async function Page(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
const MDX = page.data.body;
return (
<DocsPage toc={page.data.toc}>
<DocsTitle>{page.data.title}</DocsTitle>
<DocsDescription>{page.data.description}</DocsDescription>
<DocsBody>
<MDX components={{ ...defaultMdxComponents, pre: CodeBlock }} />
</DocsBody>
</DocsPage>
);
}
export function generateStaticParams() {
return source.generateParams();
}
export async function generateMetadata(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
return {
title: page.data.title,
description: page.data.description,
};
}

View file

@ -1,12 +0,0 @@
import { source } from "@/lib/source";
import { DocsLayout } from "fumadocs-ui/layouts/docs";
import type { ReactNode } from "react";
import { baseOptions } from "@/app/layout.config";
export default function Layout({ children }: { children: ReactNode }) {
return (
<DocsLayout tree={source.pageTree} {...baseOptions}>
{children}
</DocsLayout>
);
}

View file

@ -1,977 +0,0 @@
@import "tailwindcss";
@import "fumadocs-ui/css/neutral.css";
@import "fumadocs-ui/css/preset.css";
@theme inline {
--font-sans: var(--font-inter);
--font-display: var(--font-outfit);
--font-mono: var(--font-geist-mono);
}
/*
KTX Light Theme Warm Cream & Taupe
*/
:root {
--color-fd-background: #faf9f6;
--color-fd-foreground: #1b1b18;
--color-fd-muted: #f3f1ec;
--color-fd-muted-foreground: #6b6560;
--color-fd-popover: #ffffff;
--color-fd-popover-foreground: #1b1b18;
--color-fd-card: #ffffff;
--color-fd-card-foreground: #1b1b18;
--color-fd-border: #e2dfd9;
--color-fd-primary: #0e7490;
--color-fd-primary-foreground: #ffffff;
--color-fd-secondary: #f3f1ec;
--color-fd-secondary-foreground: #44403c;
--color-fd-accent: rgba(14, 116, 144, 0.06);
--color-fd-accent-foreground: #0e7490;
--color-fd-ring: #0e7490;
/* Extended brand tokens */
--ktx-cream: #faf9f6;
--ktx-cream-deep: #f3f1ec;
--ktx-ink: #1b1b18;
--ktx-ink-soft: #57534e;
--ktx-ink-muted: #8c857f;
--ktx-teal: #0e7490;
--ktx-teal-soft: #cffafe;
--ktx-coral: #c2897a;
--ktx-ease: cubic-bezier(0.16, 1, 0.3, 1);
}
/*
KTX Dark Theme Deep Ocean Slate
*/
.dark {
--color-fd-background: #0f1719;
--color-fd-foreground: #e8e4df;
--color-fd-muted: #1a2429;
--color-fd-muted-foreground: #8a9da6;
--color-fd-popover: #182228;
--color-fd-popover-foreground: #e8e4df;
--color-fd-card: #16202570;
--color-fd-card-foreground: #e8e4df;
--color-fd-border: rgba(255, 255, 255, 0.07);
--color-fd-primary: #22d3ee;
--color-fd-primary-foreground: #0c1518;
--color-fd-secondary: #1c2a31;
--color-fd-secondary-foreground: #c8c3bc;
--color-fd-accent: rgba(34, 211, 238, 0.08);
--color-fd-accent-foreground: #22d3ee;
--color-fd-ring: #22d3ee;
}
.dark #nd-sidebar {
--color-fd-muted: #14202559;
--color-fd-secondary: #1a262c;
--color-fd-muted-foreground: #7a8d96;
}
html, body {
overflow-x: clip;
}
body {
-webkit-font-smoothing: antialiased;
text-rendering: optimizeLegibility;
}
/*
Typography Outfit display, Inter body
*/
h1, h2, h3, h4 {
font-family: var(--font-display), var(--font-sans), sans-serif;
letter-spacing: -0.02em;
}
h1 {
font-weight: 700;
letter-spacing: -0.03em;
}
h2 {
font-weight: 650;
}
/*
Prose & Content Refinements
*/
/* Inline code */
:not(pre) > code {
background: var(--color-fd-muted) !important;
border: 1px solid var(--color-fd-border) !important;
border-radius: 5px !important;
padding: 0.15em 0.4em !important;
font-size: 0.875em !important;
font-weight: 450 !important;
}
.dark :not(pre) > code {
background: rgba(255, 255, 255, 0.05) !important;
border-color: rgba(255, 255, 255, 0.08) !important;
}
/* Code blocks — give them a subtle traffic-light feel */
figure[data-rehype-pretty-code-figure],
figure:has(> pre) {
position: relative;
border-radius: 12px;
overflow: hidden;
border: 1px solid var(--color-fd-border);
background: var(--color-fd-card);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.03),
0 8px 24px -12px rgba(27, 27, 24, 0.06);
transition: box-shadow 0.3s var(--ktx-ease), border-color 0.3s ease;
}
figure[data-rehype-pretty-code-figure]:hover,
figure:has(> pre):hover {
border-color: color-mix(in oklch, var(--color-fd-primary) 30%, var(--color-fd-border));
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.04),
0 14px 32px -12px rgba(14, 116, 144, 0.12);
}
.dark figure[data-rehype-pretty-code-figure],
.dark figure:has(> pre) {
background: #0c1417;
border-color: rgba(255, 255, 255, 0.06);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.2),
0 12px 32px -12px rgba(0, 0, 0, 0.4);
}
.dark figure[data-rehype-pretty-code-figure]:hover,
.dark figure:has(> pre):hover {
border-color: rgba(34, 211, 238, 0.2);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.2),
0 14px 32px -12px rgba(34, 211, 238, 0.12);
}
pre {
border: 0 !important;
border-radius: 0 !important;
background: transparent !important;
font-size: 13.5px !important;
line-height: 1.7 !important;
}
.dark pre {
background: transparent !important;
}
/*
Code blocks context-aware modes
*/
/* Shared wrapper base */
.ktx-code {
border-radius: 14px;
overflow: hidden;
margin: 1.25rem 0;
font-family: var(--font-mono), ui-monospace, SFMono-Regular, monospace;
transition: box-shadow 0.3s var(--ktx-ease), border-color 0.3s ease;
}
.ktx-code-body {
margin: 0 !important;
padding: 14px 18px !important;
font-size: 13.5px !important;
line-height: 1.7 !important;
overflow-x: auto;
border: 0 !important;
border-radius: 0 !important;
}
/* Neutralize the outer figure styling that our wrapper now owns */
figure:has(> .ktx-code),
figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
border: 0 !important;
background: transparent !important;
box-shadow: none !important;
border-radius: 0 !important;
margin: 0;
}
/* ── Mode A: Terminal ─────────────────────── */
.ktx-code-terminal {
background: #0c1417;
border: 1px solid rgba(255, 255, 255, 0.08);
color: #c8c3bc;
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 12px 32px -16px rgba(0, 0, 0, 0.3);
}
.ktx-code-terminal:hover {
border-color: rgba(34, 211, 238, 0.2);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 14px 32px -12px rgba(34, 211, 238, 0.18);
}
.ktx-code-terminal-head {
display: flex;
align-items: center;
gap: 6px;
padding: 10px 12px;
border-bottom: 1px solid rgba(255, 255, 255, 0.06);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.03), transparent);
}
.ktx-tl-dot {
width: 11px;
height: 11px;
border-radius: 999px;
flex-shrink: 0;
}
.ktx-code-terminal-label {
margin-left: 8px;
font-size: 11px;
font-weight: 500;
letter-spacing: 0.02em;
color: rgba(255, 255, 255, 0.4);
}
.ktx-code-body-terminal {
background: transparent !important;
color: #c8c3bc !important;
}
/* ── Mode B: VS Code tab (filename) ───────── */
.ktx-code-tab {
background: var(--color-fd-card);
border: 1px solid var(--color-fd-border);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.03),
0 8px 24px -12px rgba(27, 27, 24, 0.06);
}
.dark .ktx-code-tab {
background: #0c1417;
border-color: rgba(255, 255, 255, 0.06);
}
.ktx-code-tab:hover {
border-color: rgba(14, 116, 144, 0.4);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.04),
0 14px 32px -12px rgba(14, 116, 144, 0.14);
}
.dark .ktx-code-tab:hover {
border-color: rgba(34, 211, 238, 0.25);
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.2),
0 14px 32px -12px rgba(34, 211, 238, 0.14);
}
.ktx-code-tab-head {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 10px 8px 14px;
border-bottom: 1px solid var(--color-fd-border);
background: linear-gradient(180deg, var(--color-fd-muted), transparent);
}
.dark .ktx-code-tab-head {
border-bottom-color: rgba(255, 255, 255, 0.05);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.02), transparent);
}
.ktx-file-glyph {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 999px;
background: var(--color-fd-muted-foreground);
flex-shrink: 0;
}
.ktx-file-glyph[data-lang="yaml"],
.ktx-file-glyph[data-lang="yml"] { background: #fbbf24; }
.ktx-file-glyph[data-lang="ts"],
.ktx-file-glyph[data-lang="tsx"],
.ktx-file-glyph[data-lang="typescript"] { background: #3b82f6; }
.ktx-file-glyph[data-lang="js"],
.ktx-file-glyph[data-lang="jsx"],
.ktx-file-glyph[data-lang="javascript"] { background: #facc15; }
.ktx-file-glyph[data-lang="json"] { background: #84cc16; }
.ktx-file-glyph[data-lang="md"],
.ktx-file-glyph[data-lang="mdx"] { background: #a3a3a3; }
.ktx-file-glyph[data-lang="sql"] { background: #f97316; }
.ktx-file-glyph[data-lang="py"],
.ktx-file-glyph[data-lang="python"] { background: #22d3ee; }
.ktx-code-tab-filename {
font-family: var(--font-mono), ui-monospace, monospace;
font-size: 12.5px;
color: var(--color-fd-foreground);
}
.ktx-lang-pill {
margin-left: 4px;
padding: 1px 6px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.04em;
color: var(--color-fd-muted-foreground);
border: 1px solid var(--color-fd-border);
border-radius: 4px;
background: var(--color-fd-card);
font-family: var(--font-display), var(--font-sans), sans-serif;
}
.ktx-code-body-tab {
background: transparent !important;
}
/* ── Mode C: Minimal default ──────────────── */
.ktx-code-minimal {
background: var(--color-fd-card);
border: 1px solid var(--color-fd-border);
position: relative;
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.03),
0 8px 24px -12px rgba(27, 27, 24, 0.06);
}
.dark .ktx-code-minimal {
background: #0c1417;
border-color: rgba(255, 255, 255, 0.06);
}
.ktx-code-minimal:hover {
border-color: rgba(14, 116, 144, 0.3);
box-shadow:
0 1px 2px rgba(27, 27, 24, 0.04),
0 14px 32px -12px rgba(14, 116, 144, 0.12);
}
.dark .ktx-code-minimal:hover {
border-color: rgba(34, 211, 238, 0.2);
}
.ktx-code-minimal-lang {
position: absolute;
top: 8px;
left: 14px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--color-fd-muted-foreground);
font-family: var(--font-display), var(--font-sans), sans-serif;
opacity: 0;
transition: opacity 0.2s var(--ktx-ease);
pointer-events: none;
z-index: 1;
}
.ktx-code-minimal:hover .ktx-code-minimal-lang {
opacity: 0.5;
}
.ktx-code-minimal-copy {
position: absolute !important;
top: 6px !important;
right: 6px !important;
opacity: 0;
transform: translateY(-4px);
transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease);
z-index: 2;
}
.ktx-code-minimal:hover .ktx-code-minimal-copy {
opacity: 0.7;
transform: translateY(0);
}
.ktx-code-minimal-copy:hover {
opacity: 1 !important;
}
.ktx-code-body-minimal {
background: transparent !important;
}
/* Tables */
table {
border-radius: 8px;
overflow: hidden;
}
th {
font-family: var(--font-display), var(--font-sans), sans-serif !important;
font-weight: 600 !important;
font-size: 0.78rem !important;
letter-spacing: 0.02em;
text-transform: uppercase;
color: var(--color-fd-muted-foreground) !important;
}
/*
Sidebar Typographic sections + active rail
*/
#nd-sidebar {
border-right: 1px solid var(--color-fd-border);
}
.dark #nd-sidebar {
border-right-color: rgba(255, 255, 255, 0.05);
background: rgba(15, 23, 25, 0.6);
backdrop-filter: blur(10px);
}
/* Section folder trigger uppercase tracked label
Fumadocs 15 section wrappers are bare <div data-state> (no class, no id);
content panels and other Radix collapsibles always carry a class attribute,
so :not([class]) tightly scopes these rules to section triggers only. */
#nd-sidebar div[data-state]:not([class]) > button[data-state] {
font-family: var(--font-display), var(--font-sans), sans-serif !important;
font-size: 11px !important;
font-weight: 600 !important;
letter-spacing: 0.08em !important;
text-transform: uppercase !important;
color: var(--color-fd-muted-foreground) !important;
padding: 14px 12px 8px !important;
margin-top: 8px !important;
border-top: 1px solid var(--color-fd-border);
width: 100%;
display: flex;
align-items: center;
justify-content: space-between;
text-align: left;
background: transparent;
cursor: pointer;
transition: color 0.15s ease;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state]:hover {
color: var(--color-fd-foreground) !important;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state]:focus-visible {
outline: 2px solid var(--color-fd-primary);
outline-offset: 2px;
border-radius: 4px;
}
/* Remove top border from the first section in the sidebar */
#nd-sidebar div[data-state]:not([class]):first-child > button[data-state] {
border-top: none;
margin-top: 0 !important;
padding-top: 4px !important;
}
/* Chevron rotation on toggle */
#nd-sidebar div[data-state]:not([class]) > button[data-state] svg {
transition: transform 0.2s cubic-bezier(0.16, 1, 0.3, 1);
opacity: 0.7;
}
/* Page link items */
#nd-sidebar a[data-active] {
font-size: 14px;
padding: 6px 12px;
border-radius: 6px;
margin-left: 0;
border-left: 2px solid transparent;
transition: background 0.15s ease, color 0.15s ease, border-color 0.15s ease;
}
#nd-sidebar a[data-active="false"]:hover {
background: var(--color-fd-accent);
color: var(--color-fd-foreground);
}
#nd-sidebar a[data-active="true"] {
background: color-mix(in oklch, var(--color-fd-primary) 8%, transparent) !important;
border-left-color: var(--color-fd-primary) !important;
color: var(--color-fd-primary) !important;
font-weight: 500;
}
#nd-sidebar a[data-active]:focus-visible {
outline: 2px solid var(--color-fd-primary);
outline-offset: 2px;
border-radius: 6px;
}
.dark #nd-sidebar a[data-active="true"] {
background: color-mix(in oklch, var(--color-fd-primary) 12%, transparent) !important;
}
/*
Cards refined with multi-layer shadow & lift
*/
[data-card="true"] {
border-radius: 12px !important;
border: 1px solid var(--color-fd-border) !important;
background: var(--color-fd-card) !important;
position: relative;
overflow: hidden;
transition:
transform 0.4s var(--ktx-ease),
box-shadow 0.4s var(--ktx-ease),
border-color 0.3s ease !important;
box-shadow: 0 1px 2px rgba(27, 27, 24, 0.02);
}
[data-card="true"]::before {
content: "";
position: absolute;
inset: 0;
border-radius: inherit;
padding: 1px;
background: linear-gradient(
135deg,
rgba(14, 116, 144, 0) 0%,
rgba(14, 116, 144, 0) 70%,
rgba(14, 116, 144, 0.3) 100%
);
mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0);
-webkit-mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0);
mask-composite: exclude;
-webkit-mask-composite: xor;
opacity: 0;
transition: opacity 0.4s var(--ktx-ease);
pointer-events: none;
}
[data-card="true"]:hover {
border-color: color-mix(in oklch, var(--color-fd-primary) 40%, var(--color-fd-border)) !important;
transform: translateY(-2px);
box-shadow:
0 18px 36px -16px rgba(14, 116, 144, 0.18),
0 2px 6px rgba(27, 27, 24, 0.04) !important;
}
[data-card="true"]:hover::before {
opacity: 1;
}
.dark [data-card="true"]:hover {
border-color: rgba(34, 211, 238, 0.3) !important;
box-shadow:
0 18px 36px -16px rgba(34, 211, 238, 0.18),
0 2px 6px rgba(0, 0, 0, 0.3) !important;
}
/*
Callouts / Admonitions
*/
[data-callout] {
border-radius: 12px !important;
border-left-width: 3px !important;
}
/*
Nav & Header
*/
#nd-nav {
backdrop-filter: blur(14px) saturate(1.5);
-webkit-backdrop-filter: blur(14px) saturate(1.5);
}
:root #nd-nav {
background: rgba(250, 249, 246, 0.78) !important;
border-bottom: 1px solid var(--color-fd-border);
}
.dark #nd-nav {
background: rgba(15, 23, 25, 0.7) !important;
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
}
/*
Page title area give docs pages a hero feel
*/
[data-page-header] h1,
article > h1:first-of-type {
font-size: 2.25rem !important;
font-weight: 750 !important;
letter-spacing: -0.035em !important;
line-height: 1.1 !important;
background: linear-gradient(
180deg,
var(--color-fd-foreground) 0%,
color-mix(in oklch, var(--color-fd-foreground) 85%, var(--color-fd-primary)) 100%
);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
-webkit-text-fill-color: transparent;
}
[data-page-header] p,
article > h1:first-of-type + p {
font-size: 1.075rem !important;
color: var(--color-fd-muted-foreground) !important;
line-height: 1.6 !important;
max-width: 640px;
}
/*
Links
*/
article a:not([data-card]) {
text-decoration-thickness: 1px !important;
text-underline-offset: 3px !important;
transition: color 0.15s ease, text-decoration-color 0.15s ease;
}
article a:not([data-card]):hover {
text-decoration-color: var(--color-fd-primary) !important;
}
/*
Background atmosphere gradient blobs (subtle)
*/
body::before {
content: "";
position: fixed;
inset: 0;
pointer-events: none;
z-index: 0;
background:
radial-gradient(
ellipse 60% 40% at 10% 0%,
rgba(14, 116, 144, 0.05) 0%,
transparent 60%
),
radial-gradient(
ellipse 70% 50% at 100% 100%,
rgba(194, 137, 122, 0.04) 0%,
transparent 65%
);
}
.dark body::before {
background:
radial-gradient(
ellipse 60% 40% at 10% 0%,
rgba(34, 211, 238, 0.06) 0%,
transparent 60%
),
radial-gradient(
ellipse 70% 50% at 100% 100%,
rgba(124, 58, 237, 0.04) 0%,
transparent 65%
);
}
/* Noise texture overlay (above atmosphere, below content) */
body::after {
content: "";
position: fixed;
inset: 0;
pointer-events: none;
z-index: 1;
opacity: 0.02;
background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E");
background-repeat: repeat;
background-size: 220px 220px;
mix-blend-mode: multiply;
}
.dark body::after {
opacity: 0.035;
mix-blend-mode: overlay;
}
/* Make sure content stays above background */
body > * {
position: relative;
z-index: 2;
}
/*
TOC refinement
*/
[data-toc] a {
font-size: 0.8rem !important;
transition: color 0.15s ease !important;
}
[data-toc] a[data-active="true"] {
color: var(--color-fd-primary) !important;
font-weight: 500 !important;
}
/*
Scrollbar (dark mode)
*/
.dark ::-webkit-scrollbar {
width: 6px;
height: 6px;
}
.dark ::-webkit-scrollbar-track {
background: transparent;
}
.dark ::-webkit-scrollbar-thumb {
background: rgba(255, 255, 255, 0.12);
border-radius: 3px;
}
.dark ::-webkit-scrollbar-thumb:hover {
background: rgba(255, 255, 255, 0.2);
}
/*
Selection color
*/
::selection {
background: rgba(14, 116, 144, 0.18);
color: inherit;
}
.dark ::selection {
background: rgba(34, 211, 238, 0.22);
}
/*
Landing page utilities
*/
/* Hero gradient text */
.gradient-text {
background: linear-gradient(
135deg,
var(--color-fd-foreground) 0%,
var(--color-fd-primary) 100%
);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
-webkit-text-fill-color: transparent;
}
/* Pill badge */
.pill-badge {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.375rem 0.875rem;
border-radius: 999px;
font-size: 0.75rem;
font-weight: 500;
letter-spacing: 0.01em;
background: var(--color-fd-muted);
border: 1px solid var(--color-fd-border);
color: var(--color-fd-muted-foreground);
backdrop-filter: blur(8px);
}
.pill-badge .pill-dot {
width: 6px;
height: 6px;
border-radius: 999px;
background: var(--color-fd-primary);
box-shadow: 0 0 8px var(--color-fd-primary);
animation: pill-pulse 2.4s ease-in-out infinite;
}
@keyframes pill-pulse {
0%, 100% { opacity: 1; transform: scale(1); }
50% { opacity: 0.65; transform: scale(0.9); }
}
/* Dot grid */
.dot-grid {
background-image: radial-gradient(
circle,
color-mix(in oklch, var(--color-fd-foreground) 8%, transparent) 1px,
transparent 1px
);
background-size: 24px 24px;
}
.dot-grid-fade {
-webkit-mask-image: radial-gradient(ellipse 60% 60% at center, black, transparent);
mask-image: radial-gradient(ellipse 60% 60% at center, black, transparent);
}
/* Card lift (use on custom landing cards) */
.card-lift {
transition:
transform 0.4s var(--ktx-ease),
box-shadow 0.4s var(--ktx-ease),
border-color 0.3s ease;
}
.card-lift:hover {
transform: translateY(-3px);
box-shadow:
0 20px 40px -12px rgba(27, 49, 57, 0.1),
0 0 0 1px rgba(14, 116, 144, 0.08);
}
.dark .card-lift:hover {
box-shadow:
0 20px 40px -12px rgba(0, 0, 0, 0.5),
0 0 0 1px rgba(34, 211, 238, 0.15);
}
/* Reveal animations on scroll */
.rv {
opacity: 0;
transform: translateY(24px);
transition:
opacity 0.7s var(--ktx-ease),
transform 0.7s var(--ktx-ease);
}
.rv.visible {
opacity: 1;
transform: translateY(0);
}
.rv-stagger > .rv:nth-child(1) { transition-delay: 0ms; }
.rv-stagger > .rv:nth-child(2) { transition-delay: 80ms; }
.rv-stagger > .rv:nth-child(3) { transition-delay: 160ms; }
.rv-stagger > .rv:nth-child(4) { transition-delay: 240ms; }
.rv-stagger > .rv:nth-child(5) { transition-delay: 320ms; }
.rv-stagger > .rv:nth-child(6) { transition-delay: 400ms; }
.rv-stagger > .rv:nth-child(7) { transition-delay: 480ms; }
.rv-stagger > .rv:nth-child(8) { transition-delay: 560ms; }
/* Float animation */
@keyframes float {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-6px); }
}
.anim-float { animation: float 6s ease-in-out infinite; }
/* Sheen across surfaces */
.sheen {
position: relative;
overflow: hidden;
}
.sheen::after {
content: "";
position: absolute;
top: 0;
left: -100%;
width: 50%;
height: 100%;
background: linear-gradient(
110deg,
transparent 20%,
rgba(255, 255, 255, 0.12) 50%,
transparent 80%
);
mix-blend-mode: screen;
animation: sheen-slide 4s ease-in-out infinite;
animation-delay: 1s;
}
@keyframes sheen-slide {
0% { left: -100%; }
100% { left: 200%; }
}
/* Glow text — use sparingly on hero key phrase */
.glow-text {
position: relative;
color: var(--color-fd-primary);
}
.glow-text::after {
content: attr(data-text);
position: absolute;
inset: 0;
color: var(--color-fd-primary);
filter: blur(14px);
opacity: 0.35;
z-index: -1;
}
/* Terminal frame for landing page code preview */
.terminal-frame {
background: #0c1417;
border-radius: 14px;
border: 1px solid rgba(255, 255, 255, 0.08);
overflow: hidden;
box-shadow:
0 1px 2px rgba(0, 0, 0, 0.1),
0 20px 50px -20px rgba(14, 116, 144, 0.4),
0 50px 100px -40px rgba(0, 0, 0, 0.5);
font-family: var(--font-mono), ui-monospace, SFMono-Regular, monospace;
font-size: 13px;
line-height: 1.65;
}
.terminal-frame .terminal-head {
display: flex;
align-items: center;
gap: 6px;
padding: 10px 14px;
border-bottom: 1px solid rgba(255, 255, 255, 0.06);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.03), rgba(255, 255, 255, 0));
}
.terminal-frame .terminal-dot {
width: 11px;
height: 11px;
border-radius: 999px;
}
.terminal-frame .terminal-body {
padding: 16px 18px;
color: #c8c3bc;
}
.terminal-frame .term-prompt { color: #22d3ee; }
.terminal-frame .term-cmd { color: #e8e4df; }
.terminal-frame .term-comment { color: #6b7280; }
.terminal-frame .term-ok { color: #4ade80; }
.terminal-frame .term-info { color: #fbbf24; }
.terminal-frame .term-dim { color: #71717a; }
.terminal-frame .term-key { color: #c2897a; }
/* Cursor blink */
.term-cursor {
display: inline-block;
width: 8px;
height: 1em;
vertical-align: text-bottom;
background: #22d3ee;
animation: cursor-blink 1.1s steps(2) infinite;
}
@keyframes cursor-blink {
0%, 50% { opacity: 1; }
51%, 100% { opacity: 0; }
}
/*
Reduced motion
*/
@media (prefers-reduced-motion: reduce) {
.rv { transition: none; opacity: 1; transform: none; }
.anim-float { animation: none; }
.sheen::after { animation: none; }
.term-cursor { animation: none; }
.pill-badge .pill-dot { animation: none; }
.card-lift { transition: none; }
.ktx-code,
.ktx-code-minimal-copy,
.ktx-code-minimal-lang {
transition: none;
}
#nd-sidebar div[data-state]:not([class]) > button[data-state] svg {
transition: none;
}
}

View file

@ -1,10 +0,0 @@
import type { BaseLayoutProps } from "fumadocs-ui/layouts/shared";
import { Logo } from "@/components/logo";
export const baseOptions: BaseLayoutProps = {
nav: {
title: <Logo />,
transparentMode: "top",
},
githubUrl: "https://github.com/kaelio/ktx",
};

View file

@ -1,44 +0,0 @@
import "./global.css";
import { RootProvider } from "fumadocs-ui/provider";
import { Outfit, Inter, Geist_Mono } from "next/font/google";
import type { ReactNode } from "react";
import type { Metadata } from "next";
const outfit = Outfit({
variable: "--font-outfit",
subsets: ["latin"],
weight: ["400", "500", "600", "700", "800"],
});
const inter = Inter({
variable: "--font-inter",
subsets: ["latin"],
});
const geistMono = Geist_Mono({
variable: "--font-geist-mono",
subsets: ["latin"],
});
export const metadata: Metadata = {
title: {
template: "%s | KTX Docs",
default: "KTX Docs",
},
description:
"Open-source context infrastructure that makes agentic analytics reliable.",
};
export default function RootLayout({ children }: { children: ReactNode }) {
return (
<html
lang="en"
className={`${outfit.variable} ${inter.variable} ${geistMono.variable}`}
suppressHydrationWarning
>
<body>
<RootProvider>{children}</RootProvider>
</body>
</html>
);
}

View file

@ -1,110 +0,0 @@
"use client";
import {
type ReactNode,
type ReactElement,
isValidElement,
} from "react";
import { CopyButton } from "./copy-button";
type Props = {
children?: ReactNode;
className?: string;
title?: string;
// rehype-pretty-code adds data attributes such as data-language; capture them via index signature
[key: string]: unknown;
};
const TERMINAL_LANGS = new Set(["bash", "sh", "shell", "zsh"]);
const WIZARD_GLYPHS = /^\s*[◆◇◯◐○●]/;
function extractText(node: ReactNode): string {
if (typeof node === "string") return node;
if (typeof node === "number") return String(node);
if (Array.isArray(node)) return node.map(extractText).join("");
if (isValidElement(node)) {
const props = (node as ReactElement<{ children?: ReactNode }>).props;
return extractText(props.children);
}
return "";
}
function detectLanguage(props: Props, children: ReactNode): string | null {
const dataLang = props["data-language"];
if (typeof dataLang === "string" && dataLang) return dataLang;
const className = typeof props.className === "string" ? props.className : "";
const m = className.match(/language-([\w-]+)/);
if (m) return m[1];
if (isValidElement(children)) {
const childProps = (children as ReactElement<{ className?: string }>).props;
const childClass = typeof childProps.className === "string" ? childProps.className : "";
const cm = childClass.match(/language-([\w-]+)/);
if (cm) return cm[1];
}
return null;
}
export function CodeBlock(props: Props) {
const { children, title, className: _ignored, ...rest } = props;
const language = detectLanguage(props, children);
const codeText = extractText(children);
const isTerminal =
(language !== null && TERMINAL_LANGS.has(language)) ||
WIZARD_GLYPHS.test(codeText);
const hasTitle = typeof title === "string" && title.length > 0;
// Mode A — Terminal
if (isTerminal) {
return (
<div className="ktx-code ktx-code-terminal group">
<div className="ktx-code-terminal-head">
<span className="ktx-tl-dot" style={{ background: "#ff5f57" }} />
<span className="ktx-tl-dot" style={{ background: "#febc2e" }} />
<span className="ktx-tl-dot" style={{ background: "#28c840" }} />
<span className="ktx-code-terminal-label">
{hasTitle ? title : "zsh"}
</span>
<CopyButton
text={codeText}
className="ml-auto text-white/80"
/>
</div>
<pre {...rest} className="ktx-code-body ktx-code-body-terminal">
{children}
</pre>
</div>
);
}
// Mode B — VS Code tab (filename present)
if (hasTitle) {
return (
<div className="ktx-code ktx-code-tab group">
<div className="ktx-code-tab-head">
<span className="ktx-file-glyph" data-lang={language ?? ""} />
<span className="ktx-code-tab-filename">{title}</span>
{language && <span className="ktx-lang-pill">{language}</span>}
<CopyButton text={codeText} className="ml-auto" />
</div>
<pre {...rest} className="ktx-code-body ktx-code-body-tab">
{children}
</pre>
</div>
);
}
// Mode C — Minimal default
return (
<div className="ktx-code ktx-code-minimal group relative">
{language && <span className="ktx-code-minimal-lang">{language}</span>}
<CopyButton text={codeText} className="ktx-code-minimal-copy" />
<pre {...rest} className="ktx-code-body ktx-code-body-minimal">
{children}
</pre>
</div>
);
}

View file

@ -1,64 +0,0 @@
"use client";
import { useState } from "react";
type Props = {
text: string;
className?: string;
};
export function CopyButton({ text, className = "" }: Props) {
const [copied, setCopied] = useState(false);
const onClick = async () => {
try {
await navigator.clipboard.writeText(text);
setCopied(true);
setTimeout(() => setCopied(false), 1500);
} catch {
// Older browsers or denied permission — fail silently
}
};
return (
<button
type="button"
onClick={onClick}
aria-label={copied ? "Copied" : "Copy code"}
className={`inline-flex items-center justify-center w-7 h-7 rounded-md transition-all hover:bg-white/5 ${className}`}
>
{copied ? (
<svg
width="14"
height="14"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2.4"
strokeLinecap="round"
strokeLinejoin="round"
className="text-emerald-400"
aria-hidden="true"
>
<polyline points="20 6 9 17 4 12" />
</svg>
) : (
<svg
width="13"
height="13"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
className="opacity-70"
aria-hidden="true"
>
<rect x="9" y="9" width="13" height="13" rx="2" ry="2" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg>
)}
</button>
);
}

View file

@ -1,56 +0,0 @@
export function Logo() {
return (
<div className="flex items-center gap-2 group">
<div className="relative flex items-center justify-center transition-transform duration-300 ease-out group-hover:rotate-[-4deg]">
<svg
width="22"
height="22"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
aria-hidden="true"
>
<defs>
<linearGradient id="ktx-grad-a" x1="0" y1="0" x2="24" y2="24" gradientUnits="userSpaceOnUse">
<stop offset="0%" stopColor="var(--color-fd-primary)" />
<stop offset="100%" stopColor="var(--color-fd-primary)" stopOpacity="0.55" />
</linearGradient>
<linearGradient id="ktx-grad-b" x1="0" y1="0" x2="24" y2="24" gradientUnits="userSpaceOnUse">
<stop offset="0%" stopColor="var(--color-fd-primary)" stopOpacity="0.85" />
<stop offset="100%" stopColor="var(--color-fd-primary)" stopOpacity="0.4" />
</linearGradient>
</defs>
{/* Bottom layer */}
<path
d="M3 17 L12 21.5 L21 17 L12 12.5 Z"
fill="url(#ktx-grad-a)"
opacity="0.4"
/>
{/* Middle layer */}
<path
d="M3 12 L12 16.5 L21 12 L12 7.5 Z"
fill="url(#ktx-grad-b)"
opacity="0.7"
/>
{/* Top layer */}
<path
d="M3 7 L12 11.5 L21 7 L12 2.5 Z"
fill="var(--color-fd-primary)"
/>
</svg>
</div>
<span
className="text-[15px] font-semibold text-fd-foreground tracking-tight"
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
>
KTX
</span>
<span
className="text-[13px] font-medium text-fd-muted-foreground/80 tracking-tight border-l border-fd-border pl-2 ml-0.5"
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
>
Docs
</span>
</div>
);
}

View file

@ -1,58 +0,0 @@
"use client";
import { useEffect, useRef, type ReactNode } from "react";
type Props = {
children: ReactNode;
className?: string;
stagger?: boolean;
threshold?: number;
};
export function ScrollReveal({
children,
className = "",
stagger = false,
threshold = 0.1,
}: Props) {
const ref = useRef<HTMLDivElement>(null);
useEffect(() => {
const node = ref.current;
if (!node) return;
const observer = new IntersectionObserver(
(entries) => {
for (const entry of entries) {
if (entry.isIntersecting) {
entry.target.classList.add("visible");
if (stagger) {
entry.target.querySelectorAll(".rv").forEach((el) => {
el.classList.add("visible");
});
}
observer.unobserve(entry.target);
}
}
},
{ threshold, rootMargin: "0px 0px -40px 0px" }
);
if (stagger) {
observer.observe(node);
} else {
node.querySelectorAll(".rv").forEach((el) => observer.observe(el));
}
return () => observer.disconnect();
}, [stagger, threshold]);
return (
<div
ref={ref}
className={`${stagger ? "rv rv-stagger" : ""} ${className}`}
>
{children}
</div>
);
}

View file

@ -1,56 +0,0 @@
export function TerminalPreview() {
return (
<div className="terminal-frame sheen w-full max-w-[560px]">
<div className="terminal-head">
<span className="terminal-dot" style={{ background: "#ff5f57" }} />
<span className="terminal-dot" style={{ background: "#febc2e" }} />
<span className="terminal-dot" style={{ background: "#28c840" }} />
<span className="ml-2 text-[11px] text-zinc-500 font-medium tracking-wide">
~/analytics
</span>
</div>
<div className="terminal-body">
<div>
<span className="term-prompt">$</span>{" "}
<span className="term-cmd">ktx setup</span>
</div>
<div className="h-2" />
<div className="term-dim"> Welcome to KTX setup</div>
<div className="term-dim"></div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">LLM</span>{" "}
<span className="term-ok"> claude-sonnet-4-6</span>
</div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">Embeddings</span>{" "}
<span className="term-ok"> openai · text-embedding-3-small</span>
</div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">Database</span>{" "}
<span className="term-ok"> postgres-warehouse · 42 tables</span>
</div>
<div>
<span className="term-dim"></span>{" "}
<span className="term-key">Sources</span>{" "}
<span className="term-ok"> dbt-main · 218 models</span>
</div>
<div className="h-2" />
<div className="term-info"> Building context for agents</div>
<div className="pl-3 text-[12px] term-dim">
enriching schema · detecting relationships · ingesting dbt
</div>
<div className="h-2" />
<div className="term-ok"> KTX context is ready for agents.</div>
<div className="h-2" />
<div>
<span className="term-prompt">$</span>{" "}
<span className="term-cmd">ktx serve</span>
<span className="term-cursor ml-1" />
</div>
</div>
</div>
);
}

View file

@ -1,152 +0,0 @@
---
title: Link Detection
description: How KTX's relationship detection performs on real-world schemas.
---
KTX infers foreign key relationships between tables even when the database declares no primary keys or foreign key constraints. This is critical for analytics warehouses, where constraints are rarely enforced. This page documents the methodology, scoring pipeline, and a reproducible benchmark you can run yourself.
## What this measures
Most analytics warehouses — Snowflake, BigQuery, Redshift — don't enforce referential integrity constraints. Tables like `fct_product_events` reference `dim_accounts` by convention (`account_id` → `id`), but nothing in the schema says so.
KTX's relationship detection discovers these links automatically. The benchmark measures how accurately it recovers known foreign key relationships from a schema with **all declared constraints removed** — the hardest operating mode.
Metrics tracked:
- **Accepted** — relationships scored above the accept threshold (default 0.85) and written to the project manifest
- **Review** — relationships scored between the review threshold (0.55) and accept threshold, flagged for human review
- **Rejected** — relationships scored below the review threshold
- **Skipped** — relationships not evaluated (e.g., filtered by candidate limits)
## Methodology
### Detection pipeline
Relationship detection runs as a multi-stage pipeline during `ktx dev scan`:
1. **Candidate generation** — scans the schema for potential FK relationships using multiple heuristics: exact column name matches, normalized table name matching, name inflection (singular/plural), column suffix patterns (`_id`, `_key`, `_code`, `_uuid`), self-references (`parent_id`, `manager_id`), and optionally embedding similarity and LLM proposals.
2. **Column profiling** — samples up to 10,000 rows per column (configurable via `profile_sample_rows`) to collect statistics: row counts, null rates, distinct value counts, uniqueness ratios, sample values, and text length ranges.
3. **Validation** — tests each candidate relationship against actual data by measuring target uniqueness, source coverage, violation ratio, and value overlap between child and parent columns.
4. **Scoring** — combines 7 weighted signals into a confidence score:
| Signal | Weight | What it captures |
|--------|--------|-----------------|
| Name similarity | 0.24 | How closely column/table names match FK conventions |
| Value overlap | 0.22 | What percentage of FK values exist in the PK column |
| Profile uniqueness | 0.22 | How unique the target column values are |
| Type compatibility | 0.10 | Whether data types are compatible (hard gate — score is 0 if incompatible) |
| Embedding similarity | 0.10 | Semantic similarity between column names |
| Profile null rate | 0.08 | Presence of non-null values |
| Structural prior | 0.04 | Baseline structural hints from schema conventions |
Each signal is normalized to \[0, 1\], multiplied by its weight, and summed. The final confidence is `0.56 + (weighted_sum × 0.65)`, clamped to \[0, 1\].
5. **Graph resolution** — resolves conflicts when multiple candidates target the same column, detects primary keys (by name pattern and validation), and classifies each relationship into `accepted`, `review`, or `rejected` based on thresholds.
### Threshold configuration
```yaml
scan:
relationships:
accept_threshold: 0.85
review_threshold: 0.55
```
Relationships scoring above `accept_threshold` are automatically accepted into the project manifest. Those between `review_threshold` and `accept_threshold` are flagged for analyst review. Below `review_threshold`, they're rejected.
### Test fixture
The benchmark uses the **Orbit-style product warehouse** — a synthetic schema modeled after a real SaaS analytics warehouse with all declared constraints removed. The fixture is a SQLite database with 6 tables:
| Table | Role | Estimated rows |
|-------|------|---------------|
| `dim_accounts` | Dimension | 3 |
| `dim_users` | Dimension | 4 |
| `dim_workspaces` | Dimension | 4 |
| `fct_product_events` | Fact | 5 |
| `fct_invoices` | Fact | 3 |
| `support_tickets` | Fact | 4 |
**Ground truth:** 6 primary keys (one `id` column per table) and 9 foreign key relationships, all `many_to_one`:
| Source column | Target |
|--------------|--------|
| `dim_users.account_id` | `dim_accounts.id` |
| `dim_workspaces.account_id` | `dim_accounts.id` |
| `dim_workspaces.user_id` | `dim_users.id` |
| `fct_product_events.account_id` | `dim_accounts.id` |
| `fct_product_events.user_id` | `dim_users.id` |
| `fct_product_events.workspace_id` | `dim_workspaces.id` |
| `fct_invoices.account_id` | `dim_accounts.id` |
| `support_tickets.account_id` | `dim_accounts.id` |
| `support_tickets.user_id` | `dim_users.id` |
The fixture runs in multiple modes to isolate the contribution of each pipeline stage: with LLM disabled, profiling disabled, validation disabled, and embeddings disabled.
## Results
Results for the default configuration will be added after the benchmark run is finalized.
## Reproducing the benchmark
### Prerequisites
- Node.js 22+
- pnpm
- The KTX repository cloned and dependencies installed (`pnpm install`)
### Running
From the repository root:
```bash
pnpm run relationships:verify-orbit
```
This runs `ktx dev scan` against the bundled SQLite fixture with enrichment disabled, then generates a verification report at:
```text
examples/orbit-relationship-verification/reports/orbit-verification.md
```
The report includes the full relationship summary, enrichment details, artifact paths, and any warnings.
### Custom project
To run verification against your own database (e.g., a local Orbit project):
```bash
KTX_ORBIT_PROJECT_DIR=/path/to/your-project pnpm run relationships:verify-orbit
```
### Configuration
The benchmark project configuration lives at `examples/orbit-relationship-verification/ktx.yaml`:
```yaml
scan:
enrichment:
backend: none
relationships:
enabled: true
llm_proposals: false
accept_threshold: 0.85
review_threshold: 0.55
profile_sample_rows: 10000
validation_concurrency: 4
```
Adjust `accept_threshold` and `review_threshold` to see how threshold changes affect the accepted/review/rejected distribution. Lower thresholds accept more relationships (higher recall, lower precision); higher thresholds are more conservative.
## Broader benchmark suite
Beyond the Orbit fixture, KTX includes a full benchmark corpus at `packages/context/test/fixtures/relationship-benchmarks/` with fixtures across multiple tiers:
- **Unit** — minimal schemas testing individual heuristics
- **Row-bearing** — small schemas with data for validation testing
- **Product** — full warehouse schemas like the Orbit fixture
Fixtures from public datasets (Chinook, Sakila, AdventureWorks, Northwind) supplement the synthetic fixtures. The benchmark runner measures precision, recall, and F1 for both primary key and foreign key detection across all fixtures and modes.

View file

@ -1,5 +0,0 @@
{
"title": "Benchmarks",
"defaultOpen": true,
"pages": ["link-detection"]
}

View file

@ -1,126 +0,0 @@
---
title: "ktx agent"
description: "Machine-readable commands for coding agents."
---
Hidden commands that provide machine-readable JSON output for coding agents. These are the commands that agent integrations (Claude Code, Cursor, Codex, OpenCode) call under the hood — you typically won't use them directly.
All `ktx agent` subcommands require `--json` and produce structured JSON output on stdout.
## Usage
```bash
ktx agent <subcommand> --json [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `tools` | Print available agent-facing KTX tools |
| `context` | Print project context for agent planning |
| `sl list` | List semantic-layer sources |
| `sl read <sourceName>` | Read one semantic-layer source |
| `sl query` | Run a semantic-layer query from a JSON file |
| `wiki search <query>` | Search KTX wiki pages |
| `wiki read <pageId>` | Read one KTX wiki page |
| `sql execute` | Execute read-only SQL with a row limit |
## Options
### `agent tools`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
### `agent context`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
### `agent sl list`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Filter by connection id | — |
| `--query <text>` | Search source names and descriptions | — |
### `agent sl read`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Connection id containing the source | — |
### `agent sl query`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Connection id for execution (required) | — |
| `--query-file <path>` | JSON semantic-layer query file (required) | — |
| `--execute` | Execute the compiled query against the connection | `false` |
| `--max-rows <number>` | Maximum rows to return when executing (1-1000) | — |
### `agent wiki search`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--limit <number>` | Maximum search results | `10` |
### `agent wiki read`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
### `agent sql execute`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output (required) | — |
| `--connection-id <id>` | Connection id for execution (required) | — |
| `--sql-file <path>` | SQL file to execute (required) | — |
| `--max-rows <number>` | Maximum rows to return, 1-1000 (required) | — |
## Examples
```bash
# List available tools
ktx agent tools --json
# Get project context for planning
ktx agent context --json
# List semantic sources
ktx agent sl list --json
# Search semantic sources by name
ktx agent sl list --json --query "revenue"
# Read a semantic source
ktx agent sl read orders --json --connection-id my-warehouse
# Run a semantic-layer query from a file
ktx agent sl query --json \
--connection-id my-warehouse \
--query-file /tmp/query.json \
--execute \
--max-rows 100
# Search wiki pages
ktx agent wiki search "churn definition" --json
# Read a specific wiki page
ktx agent wiki read page-abc123 --json
# Execute read-only SQL
ktx agent sql execute --json \
--connection-id my-warehouse \
--sql-file /tmp/query.sql \
--max-rows 500
```

View file

@ -1,149 +0,0 @@
---
title: "ktx connection"
description: "Add, list, test, and map data sources."
---
Manage database and source connections in your KTX project. Connections define how KTX reaches your data warehouse, BI tools, and context sources.
## Usage
```bash
ktx connection <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `list` | List configured connections |
| `test <connectionId>` | Test a configured connection |
| `add <driver> <connectionId>` | Add or replace a configured connection |
| `remove <connectionId>` | Remove a configured connection from `ktx.yaml` |
| `map <sourceConnectionId>` | Refresh and validate BI-to-warehouse mappings |
| `mapping list <connectionId>` | List Metabase database mappings |
| `mapping set <connectionId> <field> <assignment>` | Set a Metabase or Looker warehouse mapping |
| `mapping apply-bulk <connectionId>` | Apply mappings from JSON |
| `mapping set-sync-enabled <connectionId> <dbId>` | Enable or disable sync for one Metabase database |
| `mapping sync-state get <connectionId>` | Read sync-state selection |
| `mapping sync-state set <connectionId>` | Write sync-state selection |
| `mapping refresh <connectionId>` | Refresh Metabase database mappings |
| `mapping validate <connectionId>` | Validate Metabase database mappings |
| `mapping clear <connectionId> [dbId]` | Clear Metabase database mappings |
| `metabase setup` | Guided setup for a Metabase connection |
| `notion pick <connectionId>` | Pick Notion root pages for a configured Notion connection |
## Options
### `connection add`
| Flag | Description | Default |
|------|-------------|---------|
| `--url <url>` | Connection URL, `env:NAME`, or `file:/path` reference | — |
| `--schema <schema>` | Schema to include; repeatable | — |
| `--readonly` | Mark the connection as read-only | `false` |
| `--force` | Replace an existing connection | `false` |
| `--allow-literal-credentials` | Allow writing a literal credential URL to `ktx.yaml` | `false` |
#### Notion-specific options for `connection add`
| Flag | Description | Default |
|------|-------------|---------|
| `--token-env <name>` | Environment variable containing Notion auth token | — |
| `--token-file <path>` | File containing Notion auth token | — |
| `--crawl-mode <mode>` | Notion crawl mode (`all_accessible` or `selected_roots`) | `selected_roots` |
| `--root-page-id <id>` | Root page to crawl; repeatable | — |
| `--root-database-id <id>` | Root database to crawl; repeatable | — |
| `--root-data-source-id <id>` | Root data source to crawl; repeatable | — |
| `--max-pages <n>` | Maximum pages per run | — |
| `--max-knowledge-creates <n>` | Maximum knowledge creates per run | — |
| `--max-knowledge-updates <n>` | Maximum knowledge updates per run | — |
### `connection remove`
| Flag | Description | Default |
|------|-------------|---------|
| `--force` | Remove without prompting | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `connection map`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
### `connection mapping` subcommands
| Flag | Subcommand | Description | Default |
|------|-----------|-------------|---------|
| `--json` | `list`, `sync-state get` | Print JSON output | `false` |
| `--file <path>` | `apply-bulk` | JSON mapping file (required) | — |
| `--enabled <value>` | `set-sync-enabled` | `true` or `false` (required) | — |
| `--mode <mode>` | `sync-state set` | `ALL`, `ONLY`, or `EXCEPT` (required) | — |
| `--collections <ids>` | `sync-state set` | Comma-separated collection ids | — |
| `--items <ids>` | `sync-state set` | Comma-separated item ids | — |
| `--tag-names <names>` | `sync-state set` | Comma-separated tag names | — |
| `--auto-accept` | `refresh` | Accept refresh changes without prompting | `false` |
### `connection metabase setup`
| Flag | Description | Default |
|------|-------------|---------|
| `--id <connectionId>` | KTX connection id to write | — |
| `--url <url>` | Metabase API URL | — |
| `--api-key <key>` | Metabase API key | — |
| `--mint-api-key` | Mint a Metabase API key with credentials | `false` |
| `--username <email>` | Metabase admin username for API-key minting | — |
| `--password <password>` | Metabase admin password for API-key minting | — |
| `--map <id=target>` | Assign a Metabase database id to a warehouse connection; repeatable | — |
| `--sync <metabaseDatabaseId>` | Enable sync for a discovered database; repeatable | — |
| `--sync-mode <mode>` | Metabase sync selection mode (`ALL`, `ONLY`, or `EXCEPT`) | `ALL` |
| `--run-ingest` | Run ingest after setup | `false` |
| `--yes` | Confirm and apply setup changes without prompting | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `connection notion pick`
| Flag | Description | Default |
|------|-------------|---------|
| `--no-input` | Disable interactive terminal input | — |
| `--root-page-id <id>` | Root page UUID to crawl; repeatable (required with `--no-input`) | — |
## Examples
```bash
# List all configured connections
ktx connection list
# Add a Postgres connection using an environment variable
ktx connection add postgres my-warehouse --url "env:DATABASE_URL"
# Add a Postgres connection with specific schemas
ktx connection add postgres analytics --url "env:PG_URL" --schema public --schema analytics
# Add a read-only Snowflake connection
ktx connection add snowflake sf-prod --url "env:SNOWFLAKE_URL" --readonly
# Test a connection
ktx connection test my-warehouse
# Remove a connection
ktx connection remove old-warehouse
# Add a Notion source connection
ktx connection add notion my-notion \
--token-env NOTION_TOKEN \
--crawl-mode selected_roots \
--root-page-id abc123def456...
# Run guided Metabase setup
ktx connection metabase setup --url https://metabase.example.com
# Map a BI database to a warehouse connection
ktx connection mapping set metabase-prod databaseMappings 1=my-warehouse
# Refresh Metabase mappings
ktx connection mapping refresh metabase-prod --auto-accept
# Pick Notion root pages interactively
ktx connection notion pick my-notion
```

View file

@ -1,147 +0,0 @@
---
title: "ktx dev"
description: "Low-level diagnostics, scans, adapter commands, and mapping tools."
---
Hidden commands for low-level project management, diagnostics, direct adapter control, and shell completion. Most users interact with these through higher-level commands like [`ktx ingest`](/docs/cli-reference/ktx-ingest) and [`ktx setup`](/docs/cli-reference/ktx-setup), but `ktx dev` provides direct access when you need fine-grained control.
## Usage
```bash
ktx dev <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `init [directory]` | Initialize a Git-backed KTX project directory |
| `doctor` | Check KTX setup, project, and demo readiness |
| `doctor setup` | Check KTX install, build, and local runtime readiness |
| `scan` | Run or inspect standalone connection scans |
| `ingest run` | Run local ingest for one configured connection and source adapter |
| `ingest status [runId]` | Print status for a stored local ingest run |
| `ingest watch [runId]` | Open a stored ingest visual report |
| `ingest replay <runId>` | Replay a stored ingest run through memory-flow output |
| `mapping` | Manage Metabase warehouse mappings (same as `ktx connection mapping`) |
| `completion zsh` | Generate zsh completion script |
## Options
### `dev init`
| Flag | Description | Default |
|------|-------------|---------|
| `--name <name>` | Project name written to `ktx.yaml` | — |
| `--force` | Rewrite `ktx.yaml` and scaffold files in an existing project | `false` |
### `dev doctor`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `dev doctor setup`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `dev scan`
See [`ktx scan`](/docs/cli-reference/ktx-scan) for the full scan command reference.
### `dev ingest run`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <connectionId>` | KTX connection id (required) | — |
| `--adapter <adapter>` | Ingest source adapter name (required) | — |
| `--source-dir <path>` | Directory containing source files | — |
| `--database-introspection-url <url>` | Daemon URL for live-database introspection | — |
| `--debug-llm-request-file <path>` | Write sanitized LLM request structure to a JSONL file | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output | `false` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev ingest status`
| Flag | Description | Default |
|------|-------------|---------|
| `--report-file <path>` | Bundle ingest report JSON file to render | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output | `false` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev ingest watch`
| Flag | Description | Default |
|------|-------------|---------|
| `--report-file <path>` | Bundle ingest report JSON file to render | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output (the default unless `--plain` or `--json` is set) | `true` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev ingest replay`
| Flag | Description | Default |
|------|-------------|---------|
| `--report-file <path>` | Bundle ingest report JSON file to render | — |
| `--plain` | Print plain text output | `false` |
| `--json` | Print JSON output | `false` |
| `--viz` | Render memory-flow TUI output | `false` |
| `--no-input` | Disable interactive terminal input for visualization | — |
### `dev completion zsh`
| Flag | Description | Default |
|------|-------------|---------|
| `--install` | Install zsh completion into `~/.zfunc` and update `~/.zshrc` | `false` |
## Examples
```bash
# Initialize a new KTX project
ktx dev init
# Initialize in a specific directory with a project name
ktx dev init ./my-project --name "Analytics Context"
# Re-initialize an existing project
ktx dev init --force
# Check project readiness
ktx dev doctor
# Check CLI install readiness
ktx dev doctor setup
# Run a low-level ingest with a specific adapter
ktx dev ingest run --connection-id my-dbt --adapter dbt
# Run ingest from a specific source directory
ktx dev ingest run \
--connection-id my-dbt \
--adapter dbt \
--source-dir ./dbt-project
# View ingest status with the visual TUI
ktx dev ingest watch run-abc123
# Replay a stored ingest session
ktx dev ingest replay run-abc123
# View ingest status from a report file
ktx dev ingest status --report-file /tmp/ingest-report.json
# Generate zsh completions
ktx dev completion zsh
# Install zsh completions
ktx dev completion zsh --install
```

View file

@ -1,70 +0,0 @@
---
title: "ktx ingest"
description: "Build and refresh context from configured sources."
---
Ingest context from your configured sources — dbt, Looker, Metabase, MetricFlow, LookML, or Notion. The ingest process extracts metadata from your tools, then uses an LLM agent to reconcile it with existing context, writing semantic sources and knowledge pages to your project.
## Usage
```bash
ktx ingest [connectionId] [options]
ktx ingest <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `status [runId]` | Print status for the latest or selected public ingest run |
| `watch [runId]` | Open the latest or selected public ingest visual report |
## Options
### `ingest` (run)
| Flag | Description | Default |
|------|-------------|---------|
| `--all` | Ingest every eligible configured source | `false` |
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `ingest status`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
### `ingest watch`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output instead of the visual report | `false` |
| `--no-input` | Disable interactive terminal input | — |
## Examples
```bash
# Ingest from a specific connection
ktx ingest my-dbt-source
# Ingest from all eligible sources
ktx ingest --all
# Check the status of the latest ingest
ktx ingest status
# Check the status of a specific ingest run
ktx ingest status run-abc123
# Watch the latest ingest report
ktx ingest watch
# Get ingest status as JSON
ktx ingest status --json
```
## Low-level ingest commands
For adapter-level control, use `ktx dev ingest`. See [`ktx dev`](/docs/cli-reference/ktx-dev) for the full low-level ingest surface including `run`, `status`, `watch`, and `replay` with output mode options (`--plain`, `--json`, `--viz`).

View file

@ -1,145 +0,0 @@
---
title: "ktx scan"
description: "Run or inspect database scans."
---
Discover your database schema — tables, columns, types, constraints, and relationships. Scanning is the first step in building context: KTX needs to understand your warehouse structure before it can build semantic sources.
Scan commands live under `ktx dev scan`. See also the [Building Context](/docs/guides/building-context) guide for a walkthrough.
## Usage
```bash
ktx dev scan <connectionId> [options]
ktx dev scan <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `status <runId>` | Print status for a local scan run |
| `report <runId>` | Print a local scan report |
| `relationships <runId>` | Print relationship artifacts for a local scan run |
| `relationship-apply <runId>` | Apply accepted relationship review decisions as manual manifest joins |
| `relationship-feedback` | Export persisted relationship review decisions as calibration labels |
| `relationship-calibration` | Summarize relationship feedback labels against current score thresholds |
| `relationship-thresholds` | Evaluate relationship feedback labels for offline threshold advice |
## Options
### `scan` (run)
| Flag | Description | Default |
|------|-------------|---------|
| `--mode <mode>` | Scan mode: `structural`, `enriched`, or `relationships` | `structural` |
| `--dry-run` | Run without writing scan results | `false` |
| `--database-introspection-url <url>` | Daemon URL for live-database introspection | — |
### `scan report`
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print the raw scan report JSON | `false` |
### `scan relationships`
| Flag | Description | Default |
|------|-------------|---------|
| `--status <status>` | Filter by status: `accepted`, `review`, `rejected`, `skipped`, or `all` | `review` |
| `--limit <count>` | Maximum relationships to print per status | `25` |
| `--accept <candidateId>` | Record an accepted decision for a relationship candidate | — |
| `--reject <candidateId>` | Record a rejected decision for a relationship candidate | — |
| `--note <text>` | Attach a note when recording a relationship review decision | — |
| `--reviewer <name>` | Reviewer name for a relationship review decision | — |
| `--json` | Print relationship artifacts as JSON | `false` |
### `scan relationship-apply`
| Flag | Description | Default |
|------|-------------|---------|
| `--all-accepted` | Apply all accepted relationship review decisions for the scan run | `false` |
| `--candidate <candidateId>` | Apply one accepted relationship review decision; repeatable | — |
| `--dry-run` | Preview relationships that would be written without rewriting manifest shards | `false` |
| `--json` | Print the apply result as JSON | `false` |
### `scan relationship-feedback`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection <connectionId>` | Only export labels for one KTX connection | — |
| `--decision <decision>` | Filter: `accepted`, `rejected`, or `all` | `all` |
| `--json` | Print the export as JSON | `false` |
| `--jsonl` | Print labels as newline-delimited JSON | `false` |
### `scan relationship-calibration`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection <connectionId>` | Only calibrate labels for one KTX connection | — |
| `--decision <decision>` | Filter: `accepted`, `rejected`, or `all` | `all` |
| `--accept-threshold <value>` | Score threshold treated as predicted accepted (01) | `0.85` |
| `--review-threshold <value>` | Score threshold treated as predicted review (01) | `0.55` |
| `--json` | Print the calibration report as JSON | `false` |
### `scan relationship-thresholds`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection <connectionId>` | Only evaluate labels for one KTX connection | — |
| `--min-total-labels <count>` | Minimum scored labels before advice can be ready | `20` |
| `--min-accepted-labels <count>` | Minimum accepted labels before advice can be ready | `5` |
| `--min-rejected-labels <count>` | Minimum rejected labels before advice can be ready | `5` |
| `--json` | Print the threshold advice report as JSON | `false` |
## Examples
```bash
# Run a structural scan of a connection
ktx dev scan my-warehouse
# Run a scan with LLM enrichment
ktx dev scan my-warehouse --mode enriched
# Run a scan with relationship detection
ktx dev scan my-warehouse --mode relationships
# Dry-run a scan (don't write results)
ktx dev scan my-warehouse --dry-run
# Check the status of a scan run
ktx dev scan status run-abc123
# View the scan report
ktx dev scan report run-abc123
# View scan report as JSON
ktx dev scan report run-abc123 --json
# List relationship candidates pending review
ktx dev scan relationships run-abc123
# List all relationships regardless of status
ktx dev scan relationships run-abc123 --status all
# Accept a relationship candidate
ktx dev scan relationships run-abc123 --accept candidate-xyz
# Reject a relationship candidate with a note
ktx dev scan relationships run-abc123 --reject candidate-xyz --note "false positive"
# Apply all accepted relationships to the manifest
ktx dev scan relationship-apply run-abc123 --all-accepted
# Preview what would be applied
ktx dev scan relationship-apply run-abc123 --all-accepted --dry-run
# Export relationship feedback as calibration labels
ktx dev scan relationship-feedback --json
# Calibrate relationship detection thresholds
ktx dev scan relationship-calibration --accept-threshold 0.9 --review-threshold 0.6
# Get threshold advice based on review decisions
ktx dev scan relationship-thresholds
```

View file

@ -1,51 +0,0 @@
---
title: "ktx serve"
description: "Run the MCP stdio server."
---
Start a Model Context Protocol (MCP) server that exposes your KTX project's context to coding agents. The server runs over stdio and provides tools for querying semantic sources, searching knowledge, managing connections, and running ingests.
## Usage
```bash
ktx serve --mcp stdio [options]
```
## Options
| Flag | Description | Default |
|------|-------------|---------|
| `--mcp <mode>` | MCP transport mode (required; only `stdio` is supported) | — |
| `--user-id <id>` | Local user id | `local` |
| `--semantic-compute` | Enable semantic-layer compute | `false` |
| `--semantic-compute-url <url>` | HTTP semantic-layer compute URL | — |
| `--database-introspection-url <url>` | Daemon URL for live-database introspection | — |
| `--execute-queries` | Allow semantic-layer query execution (requires `--semantic-compute`) | `false` |
| `--memory-capture` | Enable memory capture | `false` |
| `--memory-model <model>` | Memory capture model | — |
## Examples
```bash
# Start the MCP server over stdio
ktx serve --mcp stdio
# Start with semantic-layer compute enabled
ktx serve --mcp stdio --semantic-compute
# Start with query execution enabled
ktx serve --mcp stdio --semantic-compute --execute-queries
# Start with a remote semantic compute backend
ktx serve --mcp stdio --semantic-compute-url http://localhost:8080
# Start with memory capture
ktx serve --mcp stdio --memory-capture
# Use a specific project directory
ktx serve --mcp stdio --project-dir /path/to/my-project
```
## Agent integration
The MCP server is typically configured through `ktx setup --agents` rather than started manually. See the [Serving Agents](/docs/guides/serving-agents) guide and [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool configuration.

View file

@ -1,174 +0,0 @@
---
title: "ktx setup"
description: "Set up or resume a local KTX project."
---
Interactive wizard that walks you through configuring LLM credentials, embeddings, database connections, context sources, and agent integrations. When run without flags in a directory that has no `ktx.yaml`, it launches the full guided flow. When run in an existing project, it resumes from the first incomplete step.
## Usage
```bash
ktx setup [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `setup demo` | Run the packaged KTX demo from setup |
| `setup demo init` | Initialize the packaged demo project |
| `setup demo reset` | Reset the packaged demo project |
| `setup demo replay` | Replay the packaged demo memory-flow |
| `setup demo scan` | Run the packaged demo scan |
| `setup demo inspect` | Inspect packaged demo outputs |
| `setup demo doctor` | Check packaged demo readiness |
| `setup demo ingest` | Run packaged demo ingest |
| `setup context build` | Build agent-ready KTX context for setup |
| `setup context watch [runId]` | Watch a setup-managed context build |
| `setup context status [runId]` | Print setup-managed context build status |
| `setup context stop [runId]` | Request a pause for a setup-managed context build |
| `setup remove` | Remove setup-managed local integrations |
| `setup status` | Show setup readiness for the resolved KTX project |
## Options
### General
| Flag | Description | Default |
|------|-------------|---------|
| `--project-dir <path>` | KTX project directory | `KTX_PROJECT_DIR`, nearest `ktx.yaml`, or cwd |
| `--new` | Create a new KTX project before setup | `false` |
| `--existing` | Use an existing KTX project | `false` |
| `--yes` | Accept safe defaults in non-interactive setup | `false` |
| `--no-input` | Disable interactive terminal input | — |
### Agent Integration
| Flag | Description | Default |
|------|-------------|---------|
| `--agents` | Install agent integration only | `false` |
| `--target <target>` | Agent target (`claude-code`, `codex`, `cursor`, `opencode`, `universal`) | — |
| `--agent-scope <scope>` | Agent install scope (`project` or `global`) | `project` |
| `--agent-install-mode <mode>` | Agent install mode (`cli`, `mcp`, or `both`) | `cli` |
| `--project` | Install agent integration into the project scope | `false` |
| `--global` | Install agent integration into the global target scope (Claude Code and Codex only) | `false` |
| `--skip-agents` | Leave agent integration incomplete for now | `false` |
### LLM Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--anthropic-api-key-env <name>` | Environment variable containing the Anthropic API key | — |
| `--anthropic-api-key-file <path>` | File containing the Anthropic API key | — |
| `--anthropic-model <model>` | Anthropic model ID to validate and save | — |
| `--skip-llm` | Leave LLM setup incomplete for now | `false` |
### Embedding Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--embedding-backend <backend>` | Embedding backend (`openai` or `sentence-transformers`) | — |
| `--embedding-api-key-env <name>` | Environment variable containing the embedding provider API key | — |
| `--embedding-api-key-file <path>` | File containing the embedding provider API key | — |
| `--skip-embeddings` | Leave embedding setup incomplete for now | `false` |
### Database Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--database <driver>` | Database driver to configure; repeatable (`sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake`) | — |
| `--database-connection-id <id>` | Existing or new connection id; repeatable | — |
| `--new-database-connection-id <id>` | Connection id for one new database connection | — |
| `--database-url <url>` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection | — |
| `--database-schema <schema>` | Database schema to include; repeatable | — |
| `--skip-databases` | Leave database setup incomplete | `false` |
### Historic SQL
| Flag | Description | Default |
|------|-------------|---------|
| `--enable-historic-sql` | Enable Historic SQL when the selected database supports it | `false` |
| `--disable-historic-sql` | Disable Historic SQL for the selected database | `false` |
| `--historic-sql-window-days <number>` | Historic SQL query-history window in days | — |
| `--historic-sql-min-calls <number>` | Postgres `pg_stat_statements` minimum calls floor | — |
| `--historic-sql-service-account-pattern <pattern>` | Historic SQL service-account regex; repeatable | — |
| `--historic-sql-redaction-pattern <pattern>` | Historic SQL SQL-literal redaction regex; repeatable | — |
### Context Source Configuration
| Flag | Description | Default |
|------|-------------|---------|
| `--source <type>` | Source connector type (`dbt`, `metricflow`, `metabase`, `looker`, `lookml`, `notion`) | — |
| `--source-connection-id <id>` | Connection id for source setup | — |
| `--source-path <path>` | Local source path for dbt, MetricFlow, or LookML | — |
| `--source-git-url <url>` | Git URL for dbt, MetricFlow, or LookML | — |
| `--source-branch <branch>` | Git branch for source setup | — |
| `--source-subpath <path>` | Repo subpath for source setup | — |
| `--source-auth-token-ref <ref>` | `env:` or `file:` credential ref for source repo auth | — |
| `--source-url <url>` | Source service URL for Metabase or Looker | — |
| `--source-api-key-ref <ref>` | `env:` or `file:` API key ref for Metabase or Notion | — |
| `--source-client-id <id>` | Looker client id | — |
| `--source-client-secret-ref <ref>` | `env:` or `file:` Looker client secret ref | — |
| `--source-warehouse-connection-id <id>` | Mapped warehouse connection id | — |
| `--source-project-name <name>` | dbt project name override | — |
| `--source-profiles-path <path>` | dbt profiles path | — |
| `--source-target <target>` | dbt target or source-specific mapping target | — |
| `--metabase-database-id <id>` | Metabase database id to map | — |
| `--notion-crawl-mode <mode>` | Notion crawl mode (`all_accessible` or `selected_roots`) | — |
| `--notion-root-page-id <id>` | Notion root page id; repeatable | — |
| `--skip-initial-source-ingest` | Validate source setup without building source context during setup | `false` |
| `--skip-sources` | Mark optional source setup complete with no sources | `false` |
### Subcommand Options
| Flag | Subcommand | Description | Default |
|------|-----------|-------------|---------|
| `--json` | `status`, `context status` | Print JSON output | `false` |
| `--no-input` | `context build`, `context watch` | Disable interactive terminal input | — |
| `--force` | `context stop` | Request the pause without interactive confirmation | `false` |
| `--agents` | `remove` | Remove setup-managed agent integration files | `false` |
| `--mode <mode>` | `demo` | Demo mode: `seeded`, `replay`, or `full` | `seeded` |
| `--plain` | `demo` | Print plain text output | `false` |
## Examples
```bash
# Run the interactive setup wizard
ktx setup
# Create a new project and run setup
ktx setup --new
# Resume setup in an existing project
ktx setup --existing
# Non-interactive setup with Anthropic key from environment
ktx setup --yes --anthropic-api-key-env ANTHROPIC_API_KEY
# Set up a Postgres connection
ktx setup --database postgres --database-url "env:DATABASE_URL"
# Install agent integration for Claude Code only
ktx setup --agents --target claude-code
# Install agent integration globally for Codex
ktx setup --agents --target codex --global
# Add a dbt source from a local path
ktx setup --source dbt --source-path ./my-dbt-project
# Skip optional steps for a minimal setup
ktx setup --skip-sources --skip-agents
# Check setup readiness
ktx setup status
# Build context after setup
ktx setup context build
# Watch a running context build
ktx setup context watch
# Run the packaged demo
ktx setup demo
```

View file

@ -1,122 +0,0 @@
---
title: "ktx sl"
description: "List, read, validate, query, or write semantic-layer sources."
---
Interact with your project's semantic layer. Semantic sources are YAML definitions that describe your tables, columns, measures, joins, and grain — the vocabulary agents use to generate correct SQL.
## Usage
```bash
ktx sl <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `list` | List semantic-layer sources |
| `read <sourceName>` | Read a semantic-layer source |
| `validate <sourceName>` | Validate a semantic-layer source against the database schema |
| `write <sourceName>` | Write a semantic-layer source |
| `query` | Compile or execute a semantic-layer query |
## Options
### `sl list`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | Filter by KTX connection id | — |
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
### `sl read`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id (required) | — |
### `sl validate`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id (required) | — |
### `sl write`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id (required) | — |
| `--yaml <yaml>` | Semantic-layer source YAML content (required) | — |
### `sl query`
| Flag | Description | Default |
|------|-------------|---------|
| `--connection-id <id>` | KTX connection id | — |
| `--measure <measure>` | Measure to query; repeatable (at least one required) | — |
| `--dimension <dimension>` | Dimension to include; repeatable | — |
| `--filter <filter>` | Filter expression; repeatable | — |
| `--segment <segment>` | Segment to include; repeatable | — |
| `--order-by <field[:direction]>` | Order field, optionally suffixed with `:asc` or `:desc`; repeatable | — |
| `--limit <n>` | Query limit | — |
| `--include-empty` | Include empty rows | `false` |
| `--format <format>` | Output format: `json` or `sql` | `json` |
| `--execute` | Execute the compiled query against the database | `false` |
| `--max-rows <n>` | Maximum rows to return when executing | — |
## Examples
```bash
# List all semantic sources
ktx sl list
# List sources for a specific connection
ktx sl list --connection-id my-warehouse
# List sources as JSON
ktx sl list --json
# Read a source definition
ktx sl read orders --connection-id my-warehouse
# Validate a source against the live schema
ktx sl validate orders --connection-id my-warehouse
# Write a new source from YAML
ktx sl write customers --connection-id my-warehouse --yaml "$(cat sources/customers.yaml)"
# Compile a query and view the generated SQL
ktx sl query \
--connection-id my-warehouse \
--measure orders.total_revenue \
--dimension orders.created_date \
--format sql
# Execute a query with filters
ktx sl query \
--connection-id my-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--filter "orders.created_date >= '2024-01-01'" \
--execute \
--max-rows 100
# Query with ordering and limit
ktx sl query \
--connection-id my-warehouse \
--measure orders.total_revenue \
--dimension customers.country \
--order-by total_revenue:desc \
--limit 10 \
--execute
# Execute and cap the result set
ktx sl query \
--connection-id my-warehouse \
--measure orders.count \
--dimension orders.created_date \
--execute \
--max-rows 1000
```

View file

@ -1,28 +0,0 @@
---
title: "ktx status"
description: "Show current project status."
---
Print the current setup status of your KTX project — which steps are complete, which need attention, and whether the project is ready for agents.
## Usage
```bash
ktx status [options]
```
## Options
| Flag | Description | Default |
|------|-------------|---------|
| `--json` | Print JSON output | `false` |
## Examples
```bash
# Show project status
ktx status
# Get status as JSON (useful for scripting)
ktx status --json
```

View file

@ -1,92 +0,0 @@
---
title: "ktx wiki"
description: "List, read, search, or write knowledge pages."
---
Manage knowledge pages in your KTX project. Knowledge pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data.
## Usage
```bash
ktx wiki <subcommand> [options]
```
## Subcommands
| Subcommand | Description |
|-----------|-------------|
| `list` | List local wiki pages |
| `read <key>` | Read one local wiki page |
| `search <query>` | Search local wiki pages |
| `write <key>` | Write one local wiki page |
## Options
### `wiki list`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
### `wiki read`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
### `wiki search`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
### `wiki write`
| Flag | Description | Default |
|------|-------------|---------|
| `--user-id <id>` | Local user id | `local` |
| `--scope <scope>` | Scope: `global` or `user` | `global` |
| `--summary <summary>` | Wiki page summary (required) | — |
| `--content <content>` | Wiki page content (required) | — |
| `--tag <tag>` | Wiki tag; repeatable | — |
| `--ref <ref>` | Wiki ref; repeatable | — |
| `--sl-ref <ref>` | Semantic-layer ref; repeatable | — |
## Examples
```bash
# List all wiki pages
ktx wiki list
# Read a specific wiki page
ktx wiki read revenue-definitions
# Search wiki pages
ktx wiki search "monthly recurring revenue"
# Write a global knowledge page
ktx wiki write revenue-definitions \
--summary "Canonical revenue metric definitions" \
--content "## MRR\nMonthly Recurring Revenue is calculated as..."
# Write a user-scoped knowledge page
ktx wiki write my-notes \
--scope user \
--summary "Personal analysis notes" \
--content "Things to check when revenue numbers look off..."
# Write a page with tags and references
ktx wiki write churn-rules \
--summary "Churn calculation business rules" \
--content "A customer is considered churned when..." \
--tag finance \
--tag retention \
--sl-ref customers \
--sl-ref subscriptions
# Write a page with external references
ktx wiki write data-freshness \
--summary "Data pipeline SLAs and freshness guarantees" \
--content "The orders table refreshes every 15 minutes..." \
--ref "https://wiki.example.com/data-pipelines"
```

View file

@ -1,16 +0,0 @@
{
"title": "CLI Reference",
"defaultOpen": true,
"pages": [
"ktx-setup",
"ktx-connection",
"ktx-scan",
"ktx-ingest",
"ktx-sl",
"ktx-wiki",
"ktx-serve",
"ktx-status",
"ktx-agent",
"ktx-dev"
]
}

View file

@ -1,222 +0,0 @@
---
title: Contributing
description: How to contribute to KTX.
---
KTX is an open-source project and welcomes contributions — bug fixes, new connectors, documentation improvements, and feature proposals. This page covers how to set up a development environment, navigate the repository, run tests, and submit changes.
## Development setup
### Prerequisites
- **Node.js 22+** and **pnpm** — for the TypeScript workspace
- **Python 3.11+** and **uv** — for the Python semantic layer and daemon
- **Git** — for version control
### Clone and install
```bash
git clone https://github.com/kaelio/ktx.git
cd ktx
pnpm install
uv sync --all-groups
```
`pnpm install` sets up all TypeScript packages in the workspace. `uv sync --all-groups` installs Python dependencies for the semantic layer and daemon, including dev and test groups.
### Build
```bash
pnpm run build
```
This builds all TypeScript packages. You can also build individual packages:
```bash
pnpm --filter @ktx/cli run build
pnpm --filter @ktx/context run build
```
### Link the CLI for local testing
```bash
pnpm run setup:dev
pnpm run link:dev
```
This makes the `ktx` command available globally, pointing at your local build.
## Repository structure
KTX is a pnpm + uv workspace. TypeScript packages live in `packages/`, Python projects in `python/`.
```text
packages/
cli/ # CLI entry point and commands
context/ # Core context engine (scan, ingest, MCP, semantic layer)
llm/ # LLM client abstraction
connector-postgres/ # PostgreSQL connector
connector-snowflake/ # Snowflake connector
connector-bigquery/ # BigQuery connector
connector-clickhouse/ # ClickHouse connector
connector-mysql/ # MySQL connector
connector-sqlserver/ # SQL Server connector
connector-sqlite/ # SQLite connector
connector-posthog/ # PostHog connector
python/
ktx-sl/ # Semantic layer — grain-aware query planning and SQL generation
ktx-daemon/ # Daemon — portable API server around the semantic layer
examples/ # Example projects and fixtures
scripts/ # Workspace scripts (benchmarks, verification, release)
docs/ # Documentation site (Fumadocs)
```
All TypeScript packages are ESM (`"type": "module"`) and use `NodeNext` module resolution. The Python projects use `pyproject.toml` for dependency management.
## Running tests
### TypeScript
```bash
# Run all tests
pnpm run test
# Run tests for a specific package
pnpm --filter @ktx/cli run test
pnpm --filter @ktx/context run test
# Type-check all packages
pnpm run type-check
# Type-check a specific package
pnpm --filter @ktx/context run type-check
# CLI smoke test
pnpm --filter @ktx/cli run smoke
```
### Python
```bash
# Run all Python tests
uv run pytest -q
# Semantic layer tests
uv run pytest python/ktx-sl/tests -q
# Daemon tests
uv run pytest python/ktx-daemon/tests -q
```
### Pre-commit checks
After modifying Python files, run pre-commit on the changed files:
```bash
uv run pre-commit run --files python/ktx-sl/src/changed_file.py
```
### Full verification
For cross-cutting changes that affect package exports or shared contracts:
```bash
pnpm run build
pnpm run type-check
pnpm run test
uv run pytest -q
```
## Adding a connector
Database connectors live in `packages/connector-<name>/`. Each connector implements the `KtxScanConnector` interface from `@ktx/context`.
### Step 1: Scaffold the package
Create a new directory at `packages/connector-<name>/` with:
```text
packages/connector-<name>/
package.json
tsconfig.json
src/
index.ts # Public exports
connector.ts # KtxScanConnector implementation
dialect.ts # SQL dialect handling
```
The `package.json` should follow the pattern of existing connectors:
```json
{
"name": "@ktx/connector-<name>",
"private": true,
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js"
}
},
"dependencies": {
"@ktx/context": "workspace:*"
}
}
```
### Step 2: Implement the connector
Your connector class must implement `KtxScanConnector`, which requires:
- **`id`** — a string identifier, typically `"<driver>:<connectionId>"`
- **`driver`** — the `KtxConnectionDriver` value for your database
- **`capabilities`** — a `KtxConnectorCapabilities` object declaring what your connector supports: `tableSampling`, `columnSampling`, `columnStats`, `readOnlySql`, `nestedAnalysis`, `eventStreamDiscovery`, `formalForeignKeys`, `estimatedRowCounts`
- **`introspect()`** — discovers tables, columns, types, and constraints, returning a `KtxSchemaSnapshot`
Optional methods for richer scanning:
- **`sampleColumn()`** — sample values from a specific column
- **`sampleTable()`** — sample rows from a table
- **`columnStats()`** — compute column statistics
- **`executeReadOnly()`** — execute arbitrary read-only SQL
### Step 3: Add a dialect
The dialect class handles database-specific concerns: identifier quoting, type mapping (native types to normalized types), and query generation for sampling and statistics.
### Step 4: Wire it up
Register the new connector driver in `packages/context` so the CLI and scan engine can instantiate it. Look at how existing connectors are registered for the pattern.
### Step 5: Test
```bash
pnpm --filter @ktx/connector-<name> run build
pnpm --filter @ktx/connector-<name> run type-check
pnpm --filter @ktx/connector-<name> run test
```
Use `packages/connector-sqlite/` as a minimal reference and `packages/connector-postgres/` as a full-featured one.
## Code conventions
- **TypeScript**: strict types, no `any`, no `as unknown as`. Use `zod` schemas for runtime validation at CLI and config boundaries. Follow the `camelCaseSchema` / `PascalCaseType` naming convention for Zod schemas and inferred types.
- **Python**: type hints on all new code, `pathlib` over `os.path`, explicit exception types over broad `except Exception`, `logger.exception()` for caught exceptions. Use `sqlglot` for SQL parsing — never regex.
- **Dependencies**: `pnpm` for Node packages (never `npm` or `bun`), `uv` for Python (never `pip`).
- **Dead code**: remove it. Don't leave commented-out code, unused wrappers, or empty directories.
## PR guidelines
Before submitting a pull request:
1. **Run the relevant checks** — at minimum, `pnpm run type-check` and `pnpm run test` for TypeScript changes, `uv run pytest -q` and `uv run pre-commit run --files [FILES]` for Python changes.
2. **Build if you changed exports** — run `pnpm run build` to verify package exports and `dist/` expectations still align.
3. **Keep changes focused** — one logical change per PR. Don't bundle unrelated refactors.
4. **Follow existing patterns** — match the style and conventions of surrounding code. The codebase favors explicit over clever.
5. **Don't commit artifacts** — `node_modules/`, `.venv/`, `dist/`, coverage output, and local databases should not be committed.
For larger features or architectural changes, open an issue first to discuss the approach.

View file

@ -1,5 +0,0 @@
{
"title": "Community",
"defaultOpen": true,
"pages": ["contributing"]
}

View file

@ -1,82 +0,0 @@
---
title: Context as Code
description: Treat analytics context like code — version it, review it, merge it.
---
## The idea
dbt proved that analytics transformations belong in version control. Before dbt, SQL lived in BI tools, scheduling systems, and spreadsheets — scattered, unreviewed, impossible to audit. "Analytics as code" changed that: put your models in git, review them in PRs, deploy them by merging.
KTX applies the same principle to analytics context. Metric definitions, business rules, join relationships, knowledge pages — these are artifacts that determine whether an agent produces correct results. They change over time. They need review. They need history. They need to be treated like code.
A KTX project is a git repository. Semantic sources are YAML files. Knowledge pages are Markdown files. Changes are commits. Updates are pull requests. Deployment is a merge. The entire lifecycle of your analytics context follows the same workflow your team already uses for dbt models, application code, and infrastructure.
## Auto-ingestion
Most analytics context already exists — it's in your dbt manifests, LookML models, Metabase questions, and team Notion pages. KTX pulls from these sources automatically through adapters.
An ingestion run works like this:
1. **Adapters extract metadata.** Each configured source — dbt, LookML, Metabase, MetricFlow, Notion, or your live database — provides structured metadata about models, metrics, dimensions, questions, and documentation.
2. **The LLM agent reconciles.** KTX doesn't blindly overwrite existing context. An LLM agent compares incoming metadata against your current semantic sources and knowledge pages. It decides what to create, what to update, and what to leave alone. If your dbt project added a new model, the agent writes a new semantic source. If a Metabase question references a metric you've already defined, the agent skips the duplicate.
3. **Files are written.** New and updated YAML sources and Markdown knowledge pages are written to the project directory. Every decision is recorded in the session transcript.
This reconciliation step is what separates auto-ingestion from a simple sync. A naive import would overwrite your hand-tuned metric definitions every time dbt's manifest changes. KTX's agent-driven approach merges intelligently: it respects your edits, fills gaps, and flags conflicts for human review.
## The git workflow
Auto-ingestion is designed to plug into a PR-based workflow. Run ingestion on a branch, review the changed YAML and Markdown files, and merge them the same way you merge dbt models or application code.
```
dbt / Looker / Metabase KTX project repo
┌──────────────┐ ┌──────────────────────┐
│ Metadata │───ingestion──▶│ Branch: ingest/... │
│ changes │ │ │
└──────────────┘ │ + 3 new sources │
│ ~ 2 updated joins │
│ + 1 knowledge page │
│ │
│ ──── Open PR ──── │
│ │
│ Review semantic diff │
│ Approve & merge │
└──────────────────────┘
Agents see updated
context immediately
```
A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge.
Once merged, agents querying through KTX's MCP server or CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request.
This workflow gives you the same review guarantees you have for dbt models. No semantic source reaches production without a human approving it. But unlike maintaining context manually, the heavy lifting — discovering new tables, drafting source definitions, extracting business rules from documentation — is done by the ingestion agent. You review and approve. You don't write from scratch.
## Feedback loops
Context improves over time through three feedback channels.
**Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a knowledge page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest.
**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, source definitions can be tightened with better filters or grain declarations, and relationship thresholds can be adjusted.
**Relationship calibration.** KTX infers foreign key relationships between tables automatically, even when the database has no declared constraints. It does this by analyzing column names, types, value distributions, and asking the LLM for proposals. Each inferred relationship gets a confidence score. You control two thresholds: `acceptThreshold` (relationships above this score are accepted automatically, default 0.85) and `reviewThreshold` (relationships between review and accept are flagged for human review, default 0.55). As you accept or reject proposals, the system learns which patterns match your schema conventions.
Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Relationship calibration tunes the discovery process to your warehouse's conventions. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration.
## Deterministic replay
Every ingestion session in KTX produces a full transcript: every tool call the LLM agent made, every response it received, every source it created or modified, and the reasoning behind each decision.
This matters for three reasons.
**Debugging.** When a semantic source looks wrong — the grain is off, a join points to the wrong table, a measure formula doesn't match the business definition — you can trace it back to the ingestion session that created it. The transcript shows exactly which adapter provided the input, how the LLM interpreted it, and why it made the decision it did. You don't have to guess.
**Trust.** Analytics teams need to trust the context that agents consume. Deterministic replay means you can verify any part of the context layer by re-examining the session that produced it. If a stakeholder asks "where did this revenue definition come from?", you have a complete audit trail — from the dbt manifest entry, through the LLM's reconciliation logic, to the YAML file that was written.
**Reproducibility.** Because ingestion sessions are recorded as structured transcripts (tool calls and responses, not just logs), they can be replayed for testing and validation. If you change your ingestion configuration or upgrade the LLM, you can replay previous sessions to see how the output would differ. This gives you a safety net for changes that affect how context is generated.
The transcript is stored with local ingest run state and can be reviewed or replayed when you need to audit a decision. Commit the resulting YAML and Markdown changes; commit reports or transcripts only when they are part of your team's review workflow.

View file

@ -1,5 +0,0 @@
{
"title": "Concepts",
"defaultOpen": true,
"pages": ["the-context-layer", "context-as-code"]
}

View file

@ -1,147 +0,0 @@
---
title: The Context Layer
description: What a context layer is, why agents need one, and how KTX compares to other semantic layers.
---
## The problem
Give an agent access to your database and it will generate SQL. It might even produce a decent chart. But ask it a real analytics question — "what's our net revenue trend by segment?" — and things fall apart.
The agent doesn't know that `orders.amount` includes refunds and needs a status filter. It doesn't know that `customers` should join to `orders` on `customer_id`, not `id`. It doesn't know that your team stopped using `legacy_segments` six months ago, or that "enterprise" means contracts over $100k, not just big logos. It sees column names and types. It doesn't see your business.
This isn't a model capability problem. GPT-4, Claude, and Gemini can all write correct SQL — when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision.
Analytics engineers already know this pain. It's the same reason you write dbt tests, maintain a data dictionary, and spend half of standup explaining why someone's dashboard number doesn't match the board deck. The difference is that agents make decisions at machine speed, so the wrong context propagates faster than a human can catch it.
## Three waves of AI analytics
The industry has moved through three distinct approaches to getting AI and data to work together.
**Wave one: database access.** Connect an LLM to a database, let it generate SQL. This works for simple lookups — "how many orders last week?" — but breaks on anything that requires business knowledge. The agent guesses at joins, invents metrics, and hallucinates table relationships. Every query is a coin flip.
**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but it's still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter.
**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, data quality gotchas, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer.
## What a context layer is
A context layer is the infrastructure that gives agents the business knowledge they need to produce correct analytics artifacts. It includes a semantic layer — that's a critical component — but it's not the whole thing.
KTX organizes context into four pillars:
**Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives.
```yaml
name: orders
table: public.orders
grain: [id]
columns:
- name: id
type: number
- name: customer_id
type: number
- name: amount
type: number
- name: status
type: string
- name: created_at
type: time
role: time
joins:
- to: customers
"on": customer_id = customers.id
relationship: many_to_one
measures:
- name: revenue
expr: sum(amount)
filter: "status != 'refunded'"
description: Net revenue excluding refunds
- name: order_count
expr: count(id)
```
**Knowledge pages** are Markdown documents that capture business definitions, rules, and gotchas — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it.
```markdown
---
summary: Gross-to-net revenue reconciles paid invoices, credits, and refunds.
tags:
- finance
- revenue
refs:
- arr-contract-first
sl_refs:
- warehouse.invoices
usage_mode: auto
---
Gross revenue starts from paid invoice activity. Net revenue subtracts
credits and successful refunds in the month they are recorded.
Exclude unpaid, void, draft, and test-account invoice activity from
canonical revenue reporting.
```
**Scan artifacts** are the raw output of KTX's database scanner: table and column metadata, inferred foreign key relationships (even without declared constraints), column statistics, and enrichment reports. They form the foundation that semantic sources are built on.
**Provenance** is the record of how context was created and changed. Every ingestion session records a full transcript — which adapter ran, what the LLM decided, which sources were created or updated, and why. This is what makes the system auditable: you can trace any semantic source back to the ingestion decision that created it.
Together, these four pillars give agents enough context to produce analytics artifacts that match what your team would produce — not just syntactically valid SQL, but the right query for the question.
## How KTX compares
KTX is a context layer, and its structured core is an agent-native semantic layer. That matters. MetricFlow, Cube, and Malloy all give teams ways to model metrics, dimensions, joins, and generated SQL. KTX covers that same semantic-layer job, then adds the surrounding context agents need to use it well: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools.
The primary user is different. MetricFlow is centered on dbt-style metric definitions. Cube is centered on a governed semantic runtime for BI, applications, and agents. Malloy is centered on an expressive modeling and query language. KTX is centered on agents that need to read a semantic model, change it, validate it, inspect the generated SQL, and leave a reviewable git diff.
| | KTX semantic layer | MetricFlow | Cube | Malloy |
|---|---|---|---|---|
| **Design center** | Agent-native semantic modeling inside a broader context layer | Metric definitions and dbt semantic models | Governed serving layer for BI, embedded analytics, APIs, and agents | Semantic modeling and analytical query language |
| **Model surface** | Plain YAML sources plus Markdown knowledge pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations |
| **What it models** | Sources, columns, measures, segments, joins, grain, filters, default time dimensions, and context references | Semantic models, entities, dimensions, measures, metrics, time grains, and metric types | Cubes, views, measures, dimensions, segments, joins, hierarchies, policies, and rollups | Sources, joins, dimensions, measures, calculations, nested results, and query pipelines |
| **Agent edit loop** | First-class. Agents can patch small files, save imperfect drafts, run validation, query through MCP, inspect SQL, and refine in the same workflow | Possible, but the interface is a dbt/metric workflow rather than an agent context workflow | Possible through code-first models and platform APIs, but changes are tied to runtime deployment and governance concerns | Possible, but agents must operate in Malloy's language and compiler model |
| **Fan-out safety** | Explicit `grain` plus relationship metadata. KTX detects `one_to_many` fan-out, identifies chasm traps, pre-aggregates independent fact measures into CTEs, and rejects unsafe filters | Dataflow query planning for metric requests, multi-hop joins, metric time, and metric types | Runtime planner, modeled joins, primary keys, views, multi-fact views, and pre-aggregations | Symmetric aggregates and path-based aggregation in the language |
| **SQL generation** | Structured semantic query to canonical SQL, then dialect transpilation with sqlglot | Metric request to optimized query plan, then engine-specific SQL | REST, GraphQL, Postgres-compatible SQL, Semantic SQL, and cached/pre-aggregated execution | Malloy source/query to dialect-specific SQL and result metadata |
| **Context around semantics** | Built in: wiki pages, scan artifacts, relationship inference, ingest transcripts, replay, and agent-facing MCP tools | Primarily metric and dbt project context | Descriptions and `meta.ai_context` inside the semantic model, plus platform agent features | Annotations/tags can carry metadata; surrounding context depends on the application |
| **Best fit** | Agents maintaining analytics code, metrics, joins, SQL, docs, and semantic definitions | Teams standardizing metrics inside dbt workflows | Production semantic APIs, BI integrations, access control, caching, and concurrency | Expressive modeling and exploratory analysis above SQL |
**Agent-native by design.** KTX's advantage is not just that the files are YAML. The whole loop is shaped for agents: sources are small, overlays can add measures or computed columns without copying entire generated schemas, writes are permissive so an agent can save a draft, and validation/query tools give immediate feedback. An agent can move from "this metric is wrong" to "here is the semantic diff, generated SQL, and supporting context" without leaving the project.
**A semantic layer plus the context to use it.** Traditional semantic layers define what to calculate. KTX also stores why the definition exists, where it came from, what schema evidence supports it, and what an agent did when it changed. A measure can live next to a knowledge page about exclusions, a scan artifact that proves the join path, and an ingest transcript that explains the source of the definition. That is the difference between giving an agent a metric catalog and giving it operational memory.
**Fan-out handling is explicit and reviewable.** KTX asks model authors and agents to declare grain and relationship direction. The planner uses that metadata to avoid silent row multiplication: it detects `one_to_many` fan-out paths, separates independent fact measures into aggregate-locality CTEs, and refuses filters that would be unsafe to apply after pre-aggregation. Cube, MetricFlow, and Malloy all have strong approaches to this class of problem, but KTX's approach is deliberately inspectable in the files and in the generated plan.
**Where other systems are stronger.** KTX draws a clear product boundary around agent-native context and semantic modeling. Cube is stronger when you need a production semantic API with access policies, pre-aggregations, refresh workers, and high-concurrency serving. MetricFlow is stronger when your primary workflow is dbt-native metric standardization. Malloy is stronger when you want a full analytical language with nested query shapes. KTX is strongest when the semantic layer is the substrate agents will read, edit, validate, and extend as part of day-to-day analytics engineering.
**When KTX replaces your semantic layer vs. works beside it.** If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow, LookML, Looker, Metabase, dbt, or Notion, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against.
## The plain-files philosophy
A KTX project is a directory of plain files. No server to run, no database to manage, no proprietary store to back up. Everything is YAML, Markdown, and SQLite — formats you can read, diff, and version-control with tools you already use.
```
my-project/
├── ktx.yaml # Project configuration
├── semantic-layer/
│ └── warehouse/
│ ├── orders.yaml # Semantic source definitions
│ ├── customers.yaml
│ └── order_items.yaml
├── knowledge/
│ ├── global/
│ │ ├── revenue.md # Business definitions and rules
│ │ └── segment-classification.md
│ └── user/
│ └── local/
│ └── data-quality-notes.md
├── raw-sources/
│ └── warehouse/
│ └── live-database/ # Scan artifacts and reports
└── .ktx/
├── db.sqlite # Local state (git-ignored)
└── cache/ # Runtime cache (git-ignored)
```
Semantic sources and knowledge pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run.
This means your analytics context travels with your code. You can fork it, branch it, review it in a PR, and merge it with the same tools you use for dbt models. There's no sync problem between a remote server and your local state. There's no migration to run. The files are the source of truth.

View file

@ -1,59 +0,0 @@
---
title: Introduction
description: What KTX is and who it's for.
---
Data agents can write SQL. The hard part is making sure they write the SQL your analytics team would have written.
KTX is the agent-native context layer for analytics engineering. At its core is a semantic layer: YAML sources that define tables, columns, measures, joins, grain, filters, segments, and computed fields. Around that core, KTX adds the context analytics agents need to work safely: warehouse scans, knowledge pages, ingestion from existing tools, provenance, validation, and MCP access.
KTX projects are plain files — YAML, Markdown, and SQLite — that you commit to git and review in PRs, just like dbt models. Agents can read them, edit them, validate them, query through them, and leave behind a diff your team can review.
## Who KTX is for
KTX is built for analytics engineers and data teams who want data agents to work on real analytics systems, not just generate one-off SQL.
Use KTX when you want agents to:
- Generate SQL from approved measures, dimensions, and joins
- Repair or extend semantic definitions through reviewable git diffs
- Explain where a metric definition came from and what business rules shape it
- Use warehouse scans and relationship evidence instead of guessing join paths
- Work alongside **dbt**, **LookML**, **MetricFlow**, **Looker**, **Metabase**, **Notion**, and BI platforms
- Work with warehouses like **PostgreSQL**, **Snowflake**, **BigQuery**, **ClickHouse**, **MySQL**, or **SQL Server**
If you've ever watched an agent confidently generate a query that joins on the wrong key or invents a metric that doesn't exist, KTX is the fix.
## What KTX gives agents
- **A semantic layer they can edit** — plain YAML sources with measures, dimensions, joins, grain, segments, filters, and computed columns
- **Safe query planning** — grain-aware SQL generation, fan-out detection, chasm-trap handling, and dialect transpilation
- **Business context** — Markdown knowledge pages for definitions, rules, exceptions, and data quality notes
- **Schema evidence** — warehouse scans with table metadata, column stats, constraints, and inferred relationships
- **Provenance** — ingest transcripts and replay metadata that explain where context came from and why it changed
- **An agent-facing API** — MCP and CLI tools for reading, writing, validating, searching, and querying context
## How these docs are organized
<Cards>
<Card title="Quickstart" href="/docs/getting-started/quickstart">
Set up KTX and build your first context in under 10 minutes.
</Card>
<Card title="Concepts" href="/docs/concepts/the-context-layer">
Understand what a context layer is, why agents need one, and how KTX compares to other semantic layers.
</Card>
<Card title="Guides" href="/docs/guides/building-context">
Hands-on workflows for scanning, ingesting, writing semantic sources, and serving agents.
</Card>
<Card title="Integrations" href="/docs/integrations/primary-sources">
Setup details for every supported database, context source, and agent client.
</Card>
<Card title="CLI Reference" href="/docs/cli-reference/ktx-setup">
Exhaustive flag and subcommand reference for every KTX command.
</Card>
</Cards>
## Next steps
- **Get hands-on** — follow the [Quickstart](/docs/getting-started/quickstart) to set up KTX with your own database in under 10 minutes.
- **Understand the theory** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why schema access alone breaks on real analytics and how KTX addresses it.

View file

@ -1,5 +0,0 @@
{
"title": "Getting Started",
"defaultOpen": true,
"pages": ["introduction", "quickstart"]
}

View file

@ -1,255 +0,0 @@
---
title: Quickstart
description: Set up KTX and build your first context in under 10 minutes.
---
This guide walks you through `ktx setup` — an interactive wizard that configures your LLM provider, connects your database, optionally ingests from your existing tools, builds context, and installs agent integration.
## Prerequisites
- **Node.js 22+** and **pnpm**
- An **Anthropic API key** for LLM-powered enrichment and ingestion
- A **database connection** — PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite
- Optionally, a **dbt project**, **LookML repo**, **Metabase instance**, or other context source
## Install and run setup
KTX is currently used from a local checkout or linked workspace CLI. Build and link the CLI first:
```bash
git clone https://github.com/kaelio/ktx.git
cd ktx
pnpm install
pnpm run setup:dev
pnpm run link:dev
```
Then run the setup wizard in the directory where you want your KTX project:
```bash
ktx setup
```
The wizard walks through six steps. You can go back at any point, and if you exit early, running `ktx setup` again resumes where you left off.
## Step 1: Configure LLM
KTX uses an Anthropic model to enrich schema descriptions, generate semantic sources during ingestion, and reconcile metadata from your tools.
The wizard asks how to find your API key:
```
◆ How should KTX find your Anthropic API key?
│ ○ Use ANTHROPIC_API_KEY from the environment
│ ○ Paste a key and save it as a local secret file
```
If you choose to paste a key, KTX saves it in `.ktx/secrets/anthropic-api-key` with local file permissions. Your `ktx.yaml` stores a `file:` reference, never the raw key.
Next, choose a model:
```
◆ Which Anthropic model should KTX use?
│ ○ Claude Sonnet 4.6 (recommended)
│ ○ Claude Opus 4.6
│ ○ Claude Haiku 4.5
│ ○ Enter a model ID manually
```
KTX runs a health check to verify your key and model work before saving.
## Step 2: Configure embeddings
KTX uses embeddings for semantic search over sources, wiki content, schema metadata, and relationship evidence.
```
◆ Which embedding option should KTX use?
│ ○ Local sentence-transformers embeddings
│ ○ OpenAI embeddings (recommended)
```
**OpenAI embeddings** use `text-embedding-3-small` (1536 dimensions) and require an `OPENAI_API_KEY`.
**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX Python daemon. No API key is needed. If you run the daemon as a long-lived HTTP service, start it with:
```bash
ktx-daemon serve-http --host 127.0.0.1 --port 8765
```
## Step 3: Connect a database
Select one or more databases for KTX to scan. The wizard supports SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake.
For PostgreSQL, you can enter connection details field by field or paste a connection URL:
```
◆ How do you want to connect to PostgreSQL?
│ ○ Enter connection details (host, port, database, user)
│ ○ Paste a connection URL
```
If your URL contains credentials, KTX saves it to `.ktx/secrets/` and writes a `file:` reference in `ktx.yaml`. You can also use `env:DATABASE_URL` to reference an environment variable.
After connecting, KTX automatically runs a connection test and a structural scan:
```
◇ Testing postgres-warehouse
│ ✓ Connection test passed
│ Driver: PostgreSQL · Tables: 42
◇ Scanning postgres-warehouse
│ ✓ Structural scan completed
│ Changes: 42 new tables
◇ Primary source ready
│ postgres-warehouse · PostgreSQL · structural scan complete
```
For Snowflake and BigQuery, the wizard offers **Historic SQL** configuration for query history views. For PostgreSQL, enable Historic SQL with `--enable-historic-sql` when `pg_stat_statements` is configured.
## Step 4: Add context sources
Context sources let KTX ingest metadata from your existing analytics tools. This step is optional — you can skip it and add sources later.
```
◆ Which context sources should KTX ingest?
│ ◻ dbt
│ ◻ MetricFlow
│ ◻ Metabase
│ ◻ Looker
│ ◻ LookML
│ ◻ Notion
```
For **dbt**, point KTX at a local path or git URL. KTX reads your `dbt_project.yml` and schema files to extract model metadata:
```
◆ dbt source location
│ ○ Local path
│ ○ Git URL
```
For **Metabase** and **Looker**, you provide an API URL and credentials. KTX maps BI databases to your KTX primary source connections so it knows which warehouse tables the BI metadata refers to.
Context sources are saved to `ktx.yaml` and built during the next step.
## Step 5: Build context
This is where KTX does the heavy lifting. It runs an enriched scan of your database (generating AI-powered column and table descriptions) and ingests metadata from any configured context sources.
```
◆ Build KTX context for agents?
│ ○ Build context now (recommended)
│ ○ Leave context unbuilt and exit setup
```
The build scans each primary source with LLM enrichment, detects table relationships, and runs ingestion agents that reconcile metadata from your context sources into semantic-layer YAML files and knowledge pages.
For a small database (under 50 tables), this takes a few minutes. Larger warehouses can take longer. You can press <kbd>d</kbd> to detach and let it run in the background:
```
KTX context build
Run: setup-context-local-abc123
Project: /home/user/analytics
Detach: press d to leave this running.
Resume: ktx setup context watch setup-context-local-abc123
Status: ktx setup context status setup-context-local-abc123
```
When the build completes, KTX verifies that agent-ready context was produced:
```
KTX context is ready for agents.
Primary sources:
postgres-warehouse: enriched scan complete
Context sources:
dbt-main: memory update complete
Verification:
Agent context: ready
Semantic search: ready
```
## Step 6: Install agent integration
The final step connects KTX to your coding agent. Choose how agents should access the project:
```
◆ How should agents use this KTX project?
│ ○ CLI tools and skills
│ ○ MCP server config
│ ○ Both
```
Then select which agents to install for:
```
◆ Which agent targets should KTX install?
│ ◻ Claude Code
│ ◻ Codex
│ ◻ Cursor
│ ◻ OpenCode
```
**CLI mode** writes a skill file (e.g., `.claude/skills/ktx/SKILL.md`) that teaches the agent to call KTX commands directly.
**MCP mode** writes an MCP server configuration (e.g., `.mcp.json`) that lets the agent call KTX tools like `sl_query`, `knowledge_search`, and `sl_write_source` over the Model Context Protocol.
## Verify it worked
Check your project status:
```bash
ktx status
```
```
KTX project: /home/user/analytics
Project ready: yes
LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Primary sources configured: yes (postgres-warehouse)
Context sources configured: yes (dbt-main)
KTX context built: yes
Agent integration ready: yes (claude-code:project)
```
List your semantic sources:
```bash
ktx sl list
```
Query through the semantic layer:
```bash
ktx sl query \
--connection-id postgres-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--order-by orders.total_revenue:desc \
--limit 5 \
--format sql
```
This outputs the generated SQL. Add `--execute` to run it against your warehouse:
```bash
ktx sl query \
--connection-id postgres-warehouse \
--measure orders.total_revenue \
--dimension orders.status \
--order-by orders.total_revenue:desc \
--limit 5 \
--execute --max-rows 10
```
## Next steps
- **Build more context** — learn about [scanning](/docs/guides/building-context), relationship detection, and ingestion workflows in the Building Context guide.
- **Refine your semantic layer** — the [Writing Context](/docs/guides/writing-context) guide covers source YAML, measures, joins, and knowledge pages.
- **Understand the architecture** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why a context layer is more than a semantic layer.
- **Connect more agents** — see the [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool setup details.

View file

@ -1,241 +0,0 @@
---
title: Building Context
description: Scan your database schema and ingest context from dbt, Looker, Metabase, and more.
---
Building context is a two-step process. First, you **scan** your database to discover its structure — tables, columns, types, constraints, and relationships. Then you **ingest** from your existing tools to enrich that structure with semantic meaning — metric definitions, business descriptions, join logic, and knowledge that agents need to generate correct analytics.
## Scanning
Scanning connects to your database and extracts structural metadata. KTX stores the results locally so agents can understand your schema without querying the database directly.
### Running a scan
```bash
ktx dev scan <connection-id>
```
This runs a structural scan by default. You can control what the scan does with the `--mode` flag:
| Mode | What it does |
|------|-------------|
| `structural` | Tables, columns, types, constraints, row counts (default) |
| `enriched` | Structural scan plus LLM-generated column descriptions |
| `relationships` | Structural scan plus foreign key relationship detection |
```bash
# Scan with relationship detection
ktx dev scan my-postgres --mode relationships
# Preview without writing results
ktx dev scan my-postgres --dry-run
```
### Checking scan status
Every scan produces a run ID. Use it to check progress or review results:
```bash
# Check status of a scan run
ktx dev scan status <run-id>
# Print the full scan report
ktx dev scan report <run-id>
# Get the report as JSON for scripting
ktx dev scan report <run-id> --json
```
### Relationship detection
Many databases lack declared foreign keys. KTX infers relationships by scoring column pairs across seven signals — name similarity, type compatibility, value overlap, embedding similarity, profile uniqueness, null rate, and structural priors. The weighted score determines each candidate's status:
| Score range | Status | Meaning |
|-------------|--------|---------|
| &ge; 0.85 | `accepted` | High confidence — applied automatically |
| 0.55 &ndash; 0.84 | `review` | Plausible — needs human review |
| &lt; 0.55 | `rejected` | Low confidence — not applied |
After a relationship scan, review the candidates:
```bash
# Show candidates pending review (default)
ktx dev scan relationships <run-id>
# Show all candidates regardless of status
ktx dev scan relationships <run-id> --status all
# Accept a specific candidate
ktx dev scan relationships <run-id> --accept <candidate-id>
# Reject a candidate with a note
ktx dev scan relationships <run-id> --reject <candidate-id> --note "These columns share a name but are unrelated"
```
Once you've reviewed candidates, apply the accepted ones as joins in your semantic layer:
```bash
# Apply all accepted relationships
ktx dev scan relationship-apply <run-id> --all-accepted
# Preview what would be applied
ktx dev scan relationship-apply <run-id> --all-accepted --dry-run
# Apply a specific candidate
ktx dev scan relationship-apply <run-id> --candidate <candidate-id>
```
### Calibrating thresholds
As you review more relationships, KTX can evaluate whether the default thresholds (0.85 accept, 0.55 review) are optimal for your schema:
```bash
# See how your feedback aligns with current thresholds
ktx dev scan relationship-calibration --connection my-postgres
# Get threshold recommendations (needs 20+ labels, 5+ accepted, 5+ rejected)
ktx dev scan relationship-thresholds --connection my-postgres
# Export your review decisions as calibration labels
ktx dev scan relationship-feedback --connection my-postgres
```
## Ingestion
Ingestion pulls semantic context from your existing analytics tools — dbt projects, Looker models, Metabase questions, and more — and writes it into your KTX project as semantic sources and knowledge pages.
### How it works
Each ingest run follows this flow:
1. An **adapter** extracts metadata from your tool (dbt manifest, LookML files, Metabase API, etc.)
2. An **LLM agent** reconciles the extracted metadata with your existing context — it merges intelligently rather than overwriting
3. **Semantic sources** (YAML) and **knowledge pages** (Markdown) are written to your project directory
### Running an ingest
```bash
# Ingest one configured context source
ktx ingest my-dbt-source
# Ingest every configured context source
ktx ingest --all
```
The public `ktx ingest` command uses the source configuration in `ktx.yaml`, including the source `driver` and any adapter-specific paths or credentials.
For adapter-level debugging, use the low-level `ktx dev ingest run` command:
```bash
ktx dev ingest run --connection-id my-dbt-source --adapter dbt
```
Useful low-level flags:
| Flag | Description |
|------|-------------|
| `--source-dir <path>` | Directory containing source files (e.g., your dbt project) |
| `--viz` | Render the memory-flow TUI for real-time progress |
| `--json` | Output as JSON |
| `--plain` | Plain text output |
### Watching progress
```bash
# Check status of the latest ingest
ktx ingest status
# Check a specific run
ktx ingest status <run-id>
# Open the visual ingest report (TUI)
ktx ingest watch
# Replay a past ingest run
ktx dev ingest replay <run-id>
```
The `watch` command opens an interactive TUI that shows the memory-flow output — every tool call, LLM decision, and artifact written during the ingest.
### Available adapters
| Adapter | Source | What gets ingested |
|---------|--------|--------------------|
| `dbt` | dbt project | Model definitions, column descriptions, tests, tags |
| `metricflow` | MetricFlow semantic models | Metrics, dimensions, entities, semantic joins |
| `lookml` | LookML files | Views, explores, dimensions, measures, joins |
| `looker` | Looker API | Explores, looks, dashboard metadata |
| `metabase` | Metabase API | Questions, dashboards, table metadata |
| `notion` | Notion API | Database pages, knowledge articles |
| `historic-sql` | Query history | Frequent queries, usage patterns, runtime stats |
| `live-database` | Direct DB connection | Live schema introspection |
See [Context Sources](/docs/integrations/context-sources) for adapter-specific setup and auth configuration.
### What gets generated
A typical dbt ingest produces semantic sources and knowledge pages in your project:
**Semantic source** (`semantic-layer/my-postgres/orders.yaml`):
```yaml title="semantic-layer/my-postgres/orders.yaml"
name: orders
table: public.orders
grain:
- order_id
columns:
- name: order_id
type: string
description: Unique order identifier
- name: customer_id
type: string
description: Foreign key to customers table
- name: order_date
type: time
role: time
description: Date the order was placed
- name: total_amount
type: number
description: Total order value in USD
measures:
- name: total_revenue
expr: SUM(total_amount)
description: Sum of all order values
- name: order_count
expr: COUNT(DISTINCT order_id)
description: Number of distinct orders
joins:
- to: customers
on: orders.customer_id = customers.customer_id
relationship: many_to_one
```
**Knowledge page** (`knowledge/global/order-status-definitions.md`):
```markdown
---
summary: Business definitions for order status values
tags: [orders, definitions]
sl_refs: [orders]
---
## Order Statuses
- **pending**: Order placed but not yet processed
- **confirmed**: Payment received, awaiting fulfillment
- **shipped**: Order dispatched to carrier
- **delivered**: Order received by customer
- **cancelled**: Order cancelled before shipment
Orders in "pending" status for more than 48 hours are flagged for review.
```
### Deterministic replay
Every ingest session records a full transcript — tool calls, LLM responses, and write decisions. You can replay any session to debug why a source was written a certain way:
```bash
ktx dev ingest replay <run-id> --viz
```
This opens the same TUI view as the original run, letting you step through the agent's reasoning.

View file

@ -1,5 +0,0 @@
{
"title": "Guides",
"defaultOpen": true,
"pages": ["building-context", "writing-context", "serving-agents"]
}

View file

@ -1,207 +0,0 @@
---
title: Serving Agents
description: Expose your context to Claude Code, Cursor, Codex, and other coding agents.
---
Once you've built and refined your context, the final step is exposing it to coding agents. KTX provides two channels: an **MCP server** for persistent integration with tools like Claude Code and Cursor, and **CLI commands** for direct terminal access.
## MCP Server
The MCP (Model Context Protocol) server gives agents structured access to your entire context layer — semantic sources, knowledge pages, scans, and ingestion — through a standard tool-calling interface.
### Starting the server
```bash
ktx serve --mcp stdio
```
This starts an MCP server on stdio, which is how Claude Code, Cursor, and other MCP-compatible tools communicate with KTX. You typically don't run this manually — your agent's configuration handles it.
### Configuration options
| Flag | Description | Default |
|------|-------------|---------|
| `--mcp <mode>` | MCP transport mode (currently `stdio`) | Required |
| `--user-id <id>` | User identifier for knowledge scoping | `local` |
| `--semantic-compute` | Enable semantic layer planning and query execution | `false` |
| `--semantic-compute-url <url>` | URL for the semantic compute daemon | &mdash; |
| `--database-introspection-url <url>` | Daemon URL for live database access | &mdash; |
| `--execute-queries` | Allow agents to execute SQL queries | `false` |
| `--memory-capture` | Enable memory capture from conversations | `false` |
| `--memory-model <model>` | LLM model for memory capture | &mdash; |
### Available tools
When an agent connects via MCP, it can call these tools:
**Connections**
| Tool | Description |
|------|-------------|
| `connection_list` | List configured data connections |
| `connection_test` | Test a connection through the scan connector |
**Semantic Layer**
| Tool | Description |
|------|-------------|
| `sl_list_sources` | List sources, optionally filtered by connection or search query |
| `sl_read_source` | Read a source YAML by connection and name |
| `sl_write_source` | Create, replace, or delete a source |
| `sl_validate` | Validate sources against the database schema |
| `sl_query` | Execute a semantic query — returns rows, SQL, and query plan |
**Knowledge**
| Tool | Description |
|------|-------------|
| `knowledge_search` | Search knowledge pages by query, returns ranked summaries |
| `knowledge_read` | Read a knowledge page by key |
| `knowledge_write` | Create or replace a knowledge page |
**Scanning**
| Tool | Description |
|------|-------------|
| `scan_trigger` | Run a structural, enriched, or relationship scan |
| `scan_status` | Check the status of a running scan |
| `scan_report` | Read a completed scan report |
| `scan_list_artifacts` | List files produced by a scan run |
| `scan_read_artifact` | Read a scan artifact by path |
**Ingestion**
| Tool | Description |
|------|-------------|
| `ingest_trigger` | Trigger an ingest run for an adapter and connection |
| `ingest_status` | Check ingest progress, including diff and work-unit summaries |
| `ingest_report` | Read a stored ingest report |
| `ingest_replay` | Read the memory-flow replay for a past ingest |
**Memory**
| Tool | Description |
|------|-------------|
| `memory_capture` | Capture knowledge and semantic updates from a conversation |
| `memory_capture_status` | Check the status of a memory capture run |
### How agents use these tools
A typical agent interaction flows like this:
1. Agent calls `connection_list` to see available databases
2. Agent calls `sl_list_sources` to discover what semantic sources exist
3. Agent calls `knowledge_search` to find business context relevant to the user's question
4. Agent calls `sl_query` with measures, dimensions, and filters to get data
5. Agent presents results with the business context it found
Agents should use the semantic layer for analytics questions because it enforces correct joins, grain-aware aggregation, and consistent metric definitions. If SQL execution is enabled, KTX only allows read-only SQL with row limits.
## CLI Commands
For agents that work through the terminal rather than MCP, KTX provides a set of machine-readable commands under `ktx agent`. These return JSON output designed for programmatic consumption.
### Available commands
```bash
# List available tools and their descriptions
ktx agent tools --json
# Get project context for planning
ktx agent context --json
```
**Semantic layer:**
```bash
# List sources
ktx agent sl list --json
ktx agent sl list --json --connection-id my-postgres
# Read a source
ktx agent sl read orders --json --connection-id my-postgres
# Run a query from a JSON file
ktx agent sl query --json \
--connection-id my-postgres \
--query-file query.json \
--execute \
--max-rows 100
```
**Knowledge:**
```bash
# Search knowledge pages
ktx agent wiki search "revenue recognition" --json --limit 10
# Read a specific page
ktx agent wiki read order-status-definitions --json
```
**SQL execution:**
```bash
# Execute read-only SQL with a row limit
ktx agent sql execute --json \
--connection-id my-postgres \
--sql-file query.sql \
--max-rows 500
```
### When to use CLI vs MCP
| | MCP | CLI |
|---|-----|-----|
| **Best for** | Persistent agent integrations | Terminal-based workflows, scripting |
| **Protocol** | Structured tool calls over stdio | Shell commands with JSON output |
| **Used by** | Claude Code, Cursor, Codex | Shell scripts, custom agents, debugging |
| **State** | Server runs continuously | Stateless per invocation |
Most users should set up MCP — it gives agents richer context and a more natural interaction model. The CLI commands are useful for scripting, debugging, and agents that operate through terminal tools.
## Setting Up Your Agent
The fastest way to connect an agent is through the setup wizard:
```bash
ktx setup
```
The agents step auto-detects installed tools and generates the right configuration. For manual setup or per-tool details, see the [Agent Clients](/docs/integrations/agent-clients) integration page.
### Quick manual setup
**Claude Code** — add to `.claude/settings.json`:
```json
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": ["serve", "--mcp", "stdio", "--semantic-compute", "--execute-queries"],
"env": {
"KTX_PROJECT_DIR": "/path/to/your/ktx/project"
}
}
}
}
```
**Cursor** — add to `.cursor/mcp.json`:
```json
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": ["serve", "--mcp", "stdio", "--semantic-compute", "--execute-queries"],
"env": {
"KTX_PROJECT_DIR": "/path/to/your/ktx/project"
}
}
}
}
```
After configuration, the agent can immediately start calling KTX tools — listing sources, searching knowledge, and querying your semantic layer.

View file

@ -1,273 +0,0 @@
---
title: Writing Context
description: Write and refine semantic sources and knowledge pages.
---
After building context through scanning and ingestion, you'll want to refine it — edit semantic sources to match your business logic, add knowledge pages that capture tribal knowledge, and query your data through the semantic layer to verify everything works.
## Semantic Sources
Semantic sources are YAML files that describe your tables, columns, measures, and joins. They're the core of the context layer — the structured definitions that agents use to generate correct SQL.
### Listing sources
```bash
# List all sources across connections
ktx sl list
# List sources for a specific connection
ktx sl list --connection-id my-postgres
# Output as JSON
ktx sl list --json
```
### Reading a source
```bash
ktx sl read orders --connection-id my-postgres
```
This prints the full YAML definition for the source.
### The source schema
A semantic source defines a single queryable entity — usually a table or a SQL expression. Here's a fully annotated example:
```yaml
name: orders
description: Customer orders with line-item totals
table: public.orders # or use `sql:` for a custom SQL expression
grain:
- order_id # columns that uniquely identify a row
columns:
- name: order_id
type: string # string | number | time | boolean
description: Unique order identifier
- name: order_date
type: time
role: time # marks this as the default time dimension
description: Date the order was placed
- name: status
type: string
visibility: public # public (default) | internal | hidden
description: Current order status
- name: _etl_loaded_at
type: time
visibility: hidden # hidden columns are excluded from agent queries
description: Internal ETL timestamp
- name: total_amount
type: number
description: Order total in USD
measures:
- name: total_revenue
expr: SUM(total_amount)
description: Sum of all order values
- name: order_count
expr: COUNT(DISTINCT order_id)
description: Number of distinct orders
- name: avg_order_value
expr: AVG(total_amount)
description: Average order value
- name: high_value_revenue
expr: SUM(total_amount)
filter: total_amount > 100
description: Revenue from orders over $100
segments:
- name: us_orders
expr: country = 'US'
description: Orders from US customers
joins:
- to: customers
on: orders.customer_id = customers.customer_id
relationship: many_to_one # many_to_one | one_to_many | one_to_one
- to: order_items
on: orders.order_id = order_items.order_id
relationship: one_to_many
alias: items # optional alias for the joined source
```
Key fields:
| Field | Required | Description |
|-------|----------|-------------|
| `name` | Yes | Source identifier (lowercase, underscores) |
| `table` or `sql` | Yes | Database table or custom SQL expression (exactly one) |
| `grain` | Yes | Columns that define row uniqueness |
| `columns` | No | Column definitions with type, role, visibility |
| `measures` | No | Aggregation expressions (SUM, COUNT, AVG, etc.) |
| `joins` | No | Relationships to other sources |
| `segments` | No | Named filter conditions |
| `inherits_columns_from` | No | Inherit column metadata from a manifest entry |
Column visibility controls what agents see:
| Visibility | Behavior |
|------------|----------|
| `public` | Included in agent queries and listings (default) |
| `internal` | Available for joins and measures but not shown to agents |
| `hidden` | Excluded entirely — useful for ETL columns |
### Writing a source
```bash
ktx sl write orders --connection-id my-postgres --yaml '
name: orders
table: public.orders
grain: [order_id]
columns:
- name: order_id
type: string
- name: total_amount
type: number
measures:
- name: total_revenue
expr: SUM(total_amount)
'
```
You can also edit source files directly — they live at `semantic-layer/<connection-id>/<source-name>.yaml` in your project directory.
### Validating sources
Validation checks a source definition against the actual database schema:
```bash
ktx sl validate orders --connection-id my-postgres
```
This catches mismatches — columns that don't exist in the table, type mismatches, invalid join targets — before an agent tries to use the source.
### Querying
The semantic layer compiles your measures and dimensions into SQL, optionally executing it against the database:
```bash
# Compile a query to SQL
ktx sl query \
--connection-id my-postgres \
--measure total_revenue \
--measure order_count \
--dimension "order_date" \
--filter "status = 'completed'" \
--order-by order_date:desc \
--limit 10 \
--format sql
```
This outputs the compiled SQL without executing it. To run the query:
```bash
# Execute and return results
ktx sl query \
--connection-id my-postgres \
--measure total_revenue \
--dimension "order_date" \
--execute \
--max-rows 100
```
Query flags:
| Flag | Description |
|------|-------------|
| `--measure <name>` | Measure to query (repeatable, at least one required) |
| `--dimension <name>` | Dimension to group by (repeatable) |
| `--filter <expr>` | Filter expression (repeatable) |
| `--segment <name>` | Named segment to apply (repeatable) |
| `--order-by <field[:dir]>` | Sort field, optionally with `:asc` or `:desc` (repeatable) |
| `--limit <n>` | Maximum rows in the compiled query |
| `--format <mode>` | Output format: `json` (default) or `sql` |
| `--execute` | Execute the query against the database |
| `--max-rows <n>` | Maximum rows to return when executing |
| `--include-empty` | Include empty/null rows in results |
The query planner is grain-aware — it understands the cardinality of joins and avoids chasm traps (double-counting caused by many-to-many fan-outs). When you query measures that span multiple sources, KTX generates sub-queries at the correct grain before joining.
## Knowledge Pages
Knowledge pages are Markdown files that capture business context — definitions, rules, gotchas, and anything an agent needs to understand beyond what the schema tells it.
### What they are
When an agent asks "what counts as an active user?" or "why do revenue numbers differ between the dashboard and the SQL query?", the answer isn't in the schema. It's tribal knowledge that lives in Slack threads, Notion pages, or someone's head. Knowledge pages make that context searchable and available to agents.
### Organization
Knowledge pages are organized by scope:
```
knowledge/
├── global/ # Cross-cutting definitions
│ ├── order-status-definitions.md
│ ├── revenue-recognition-rules.md
│ └── data-freshness-sla.md
└── user/
└── local/ # User-scoped context
├── schema-conventions.md
└── known-data-issues.md
```
- **Global pages** apply across all connections — business definitions, metric standards, company terminology.
- **User-scoped pages** are private to a user ID — personal notes, local gotchas, or context you do not want shared globally.
### Writing pages
```bash
ktx wiki write order-status-definitions \
--scope global \
--summary "Business definitions for order status values" \
--content "## Order Statuses
- **pending**: Order placed but not yet processed
- **confirmed**: Payment received, awaiting fulfillment
- **shipped**: Order dispatched to carrier
- **delivered**: Order received by customer
- **cancelled**: Order cancelled before shipment
Orders in pending status for more than 48 hours are flagged for review." \
--tag orders \
--tag definitions \
--sl-ref orders
```
Write flags:
| Flag | Description |
|------|-------------|
| `--scope <scope>` | `global` (default) or `user` |
| `--summary <text>` | Short description for search results (required) |
| `--content <text>` | Full Markdown content (required) |
| `--tag <tag>` | Categorization tag (repeatable) |
| `--ref <ref>` | Reference to external resources (repeatable) |
| `--sl-ref <ref>` | Link to a semantic source (repeatable) |
You can also create and edit knowledge pages directly as Markdown files in the `knowledge/` directory.
### Listing pages
```bash
ktx wiki list
```
### Reading a page
```bash
ktx wiki read order-status-definitions
```
### Searching
```bash
ktx wiki search "revenue recognition"
```
Search uses both full-text matching and semantic similarity — it finds relevant pages even when the exact terms don't match. Agents call this automatically when they need business context to answer a question.

View file

@ -1,279 +0,0 @@
---
title: Agent Clients
description: Set up KTX with Claude Code, Cursor, Codex, and OpenCode.
---
KTX integrates with coding agents through two channels that can be used independently or together:
- **MCP server** — A persistent Model Context Protocol server that exposes KTX tools (semantic queries, knowledge search, SQL execution) directly to the agent
- **CLI skills** — Command definitions that teach the agent how to invoke KTX via the terminal
Run `ktx setup` and select your agent targets, or configure manually using the snippets below.
## Claude Code
### Install via `ktx setup`
During setup, select **Claude Code** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI skills | `.claude/skills/ktx/SKILL.md` |
| MCP server | `.mcp.json` (under `mcpServers.ktx`) |
Both project-scoped and global installations are supported. Global installs write to `~/.claude/skills/ktx/SKILL.md`.
### Manual MCP configuration
Add KTX to `.mcp.json` at your project root:
```json title=".mcp.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
Replace `/path/to/ktx-project` with your KTX project directory. For a pinned local checkout, use the absolute path to the built CLI as the command and arguments generated by `ktx setup`.
### Manual CLI skills configuration
Create `.claude/skills/ktx/SKILL.md`:
```markdown title=".claude/skills/ktx/SKILL.md"
---
name: ktx
description: Use local KTX semantic context, wiki knowledge, and safe SQL execution for this project.
---
Available commands:
- `ktx agent context --json --project-dir /path/to/project`
- `ktx agent sl list --json --project-dir /path/to/project`
- `ktx agent sl read '<sourceName>' --json --project-dir /path/to/project`
- `ktx agent sl query --json --project-dir /path/to/project --connection-id '<id>' --query-file '<path>' --execute --max-rows 100`
- `ktx agent wiki search '<query>' --json --project-dir /path/to/project`
- `ktx agent wiki read '<pageId>' --json --project-dir /path/to/project`
- `ktx agent sql execute --json --project-dir /path/to/project --connection-id '<id>' --sql-file '<path>' --max-rows 100`
```
### Workflow tips
- Claude Code discovers skills automatically from `.claude/skills/` — no restart needed after setup
- The MCP server starts on-demand when Claude Code first calls a KTX tool
- Use `--semantic-compute` to enable query planning and execution
- Global installation (`~/.claude/skills/ktx/SKILL.md`) makes KTX available in all projects without per-project setup
---
## Cursor
### Install via `ktx setup`
During setup, select **Cursor** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI rules | `.cursor/rules/ktx.mdc` |
| MCP server | `.cursor/mcp.json` (under `mcpServers.ktx`) |
Cursor supports project-scoped installation only.
### Manual MCP configuration
Create or edit `.cursor/mcp.json`:
```json title=".cursor/mcp.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
### Manual CLI rules configuration
Create `.cursor/rules/ktx.mdc` with the same content structure as the Claude Code SKILL.md file — Cursor rules use the `.mdc` extension but support the same markdown format with command definitions.
### Workflow tips
- After adding MCP config, restart Cursor or reload the window for the server to connect
- Cursor rules in `.cursor/rules/` are automatically loaded into agent context
- MCP tools appear in Cursor's tool list once the server is running
- Project-scoped only — no global installation option
---
## Codex
### Install via `ktx setup`
During setup, select **Codex** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI skills | `.agents/skills/ktx/SKILL.md` |
| MCP server | `.agents/mcp/ktx.json` (under `mcpServers.ktx`) |
Both project-scoped and global installations are supported. Global installs write to `$CODEX_HOME/skills/ktx/SKILL.md` (defaults to `~/.codex/skills/ktx/SKILL.md`).
### Manual MCP configuration
Create or edit `.agents/mcp/ktx.json`:
```json title=".agents/mcp/ktx.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
### Manual CLI skills configuration
Create `.agents/skills/ktx/SKILL.md` with the same content structure as Claude Code's SKILL.md.
### Workflow tips
- Set `CODEX_HOME` environment variable to customize the global installation directory
- Codex shares the `.agents/` directory structure with the universal format — one install covers both
- Global installation makes KTX available across all Codex sessions
---
## OpenCode
### Install via `ktx setup`
During setup, select **OpenCode** from the agent targets. KTX writes:
| Mode | File |
|------|------|
| CLI commands | `.opencode/commands/ktx.md` |
| MCP server | `.opencode/mcp.json` (under `mcpServers.ktx`) |
OpenCode supports project-scoped installation only.
### Manual MCP configuration
Create or edit `.opencode/mcp.json`:
```json title=".opencode/mcp.json"
{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": [
"--project-dir", "/path/to/ktx-project",
"serve",
"--mcp", "stdio",
"--semantic-compute",
"--execute-queries"
],
"env": {}
}
}
}
```
### Manual CLI commands configuration
Create `.opencode/commands/ktx.md` with the same command definitions as Claude Code's SKILL.md.
### Workflow tips
- OpenCode reads commands from `.opencode/commands/` on startup
- Project-scoped only — no global installation option
- Commands file uses standard markdown format (`.md` extension)
---
## MCP server reference
All agent clients connect to the same KTX MCP server. The server exposes these tools:
| Tool | Description |
|------|-------------|
| `connection_list` | List configured database connections |
| `connection_test` | Test a database connection |
| `knowledge_search` | Semantic + full-text search across knowledge pages |
| `knowledge_read` | Read a specific knowledge page |
| `knowledge_write` | Write or update a knowledge page |
| `sl_list_sources` | List semantic layer sources |
| `sl_read_source` | Read a semantic source definition |
| `sl_write_source` | Write or update a semantic source |
| `sl_validate` | Validate a source against the database schema |
| `sl_query` | Execute a semantic layer query |
| `ingest_trigger` | Trigger an ingestion run |
| `ingest_status` | Check ingestion status |
| `ingest_report` | View an ingestion report |
| `ingest_replay` | Replay a past ingestion session |
| `scan_trigger` | Trigger a structural, enriched, or relationship scan |
| `scan_status` | Check scan status |
| `scan_report` | View a completed scan report |
| `scan_list_artifacts` | List artifacts produced by a scan |
| `scan_read_artifact` | Read a scan artifact |
| `memory_capture` | Capture reusable context from an agent conversation when memory capture is enabled |
| `memory_capture_status` | Check a memory capture run |
### Server flags
| Flag | Description | Default |
|------|-------------|---------|
| `--project-dir` | KTX project directory; otherwise KTX uses `KTX_PROJECT_DIR`, the nearest `ktx.yaml`, or the current directory | Auto-detected |
| `--mcp stdio` | Transport mode (stdio only) | Required |
| `--semantic-compute` | Enable semantic layer queries | `false` |
| `--execute-queries` | Allow read-only SQL execution | `false` |
| `--semantic-compute-url` | Remote compute endpoint URL | — |
| `--database-introspection-url` | Live schema introspection endpoint | — |
| `--memory-capture` | Record agent interactions | `false` |
| `--memory-model` | LLM model for memory processing | — |
### Security constraints
- SQL execution is always read-only
- Agent CLI SQL execution requires an explicit `--max-rows` limit from 1 to 1000; MCP semantic queries default to a 1000-row cap
- Secrets and credentials are never exposed in tool responses
- The server runs as a child process of the agent client (no network exposure)
---
## Comparison
| | Claude Code | Cursor | Codex | OpenCode |
|---|---|---|---|---|
| MCP support | Yes | Yes | Yes | Yes |
| CLI skills | Yes | Yes (.mdc) | Yes | Yes |
| Global install | Yes | No | Yes | No |
| Config location | `.mcp.json` | `.cursor/mcp.json` | `.agents/mcp/ktx.json` | `.opencode/mcp.json` |
| Skills location | `.claude/skills/` | `.cursor/rules/` | `.agents/skills/` | `.opencode/commands/` |

View file

@ -1,353 +0,0 @@
---
title: Context Sources
description: Ingest semantic context from dbt, MetricFlow, LookML, Metabase, Looker, and Notion.
---
Context sources feed your existing analytics tooling into KTX. During ingestion, KTX extracts metadata from each source and uses an LLM agent to reconcile it with your existing semantic layer and knowledge base — merging intelligently rather than overwriting.
All context sources are configured in `ktx.yaml` under `connections` with their respective `driver` value.
## dbt
Ingests schema definitions, model descriptions, column metadata, and test coverage from a dbt project.
### What it provides
- Model and source definitions from `schema.yml` files
- Column descriptions and types
- Test coverage signals
- Semantic model references (if using dbt semantic layer)
- Data lineage between models
### Connection config
```yaml title="ktx.yaml"
connections:
my-dbt:
driver: dbt
source_dir: /path/to/dbt/project
readonly: true
```
For a Git-hosted project:
```yaml title="ktx.yaml"
connections:
my-dbt:
driver: dbt
repo_url: https://github.com/org/dbt-repo
branch: main
path: analytics/dbt # For monorepos
auth_token_ref: env:GITHUB_TOKEN
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Local path | `source_dir: /absolute/path/to/dbt/project` |
| Public repo | `repo_url: https://github.com/org/repo` |
| Private repo | `repo_url` + `auth_token_ref: env:GITHUB_TOKEN` |
**Optional fields:**
| Field | Description |
|-------|-------------|
| `profiles_path` | Path to `profiles.yml` (if non-standard location) |
| `target` | dbt target name (e.g., `dev`, `prod`) |
| `project_name` | Override auto-detected project name |
### What gets ingested
- YAML semantic sources generated from dbt schema files
- One work unit per model file (for projects with >25 YAML files) or all at once for smaller projects
- Column descriptions, tests, and relationships are preserved
---
## MetricFlow
Ingests MetricFlow semantic models and metric definitions. Useful when your team defines metrics in MetricFlow's YAML format.
### What it provides
- Semantic model definitions (entities, dimensions, measures)
- Cross-model metric definitions
- Dimension and entity relationships between models
### Connection config
```yaml title="ktx.yaml"
connections:
my-metricflow:
driver: metricflow
metricflow:
repoUrl: https://github.com/org/metricflow-repo
branch: main
path: dbt_metrics # Subdirectory for monorepos
auth_token_ref: env:GITHUB_TOKEN
readonly: true
```
For a local path:
```yaml
metricflow:
repoUrl: file:///absolute/path/to/project
```
### Authentication
| Method | Config |
|--------|--------|
| Public repo | `repoUrl: https://github.com/org/repo` |
| Private repo | `repoUrl` + `auth_token_ref: env:GITHUB_TOKEN` |
| Local path | `repoUrl: file:///path/to/project` |
### What gets ingested
- Semantic models with their entities, dimensions, and measures
- Metric definitions with their expressions and filters
- Work units organized by connected component (metrics + related semantic models grouped together)
---
## LookML
Ingests LookML view and model definitions from a Git repository. Extracts field definitions, SQL table references, and join relationships.
### What it provides
- View definitions (dimensions, measures, derived tables)
- Model explore definitions and joins
- SQL table name references
- Field-level descriptions and labels
### Connection config
```yaml title="ktx.yaml"
connections:
my-lookml:
driver: lookml
repoUrl: https://github.com/org/lookml-repo
branch: main
path: analytics # Subdirectory for monorepos
auth_token_ref: env:GITHUB_TOKEN
readonly: true
```
For a local path:
```yaml
repoUrl: file:///absolute/path/to/lookml
```
### Authentication
| Method | Config |
|--------|--------|
| Public repo | `repoUrl: https://github.com/org/repo` |
| Private repo | `repoUrl` + `auth_token_ref: env:GITHUB_TOKEN` |
| Local path | `repoUrl: file:///path/to/project` |
### What gets ingested
- View and model definitions organized by connected component
- LookML field types mapped to semantic layer column types
- Join definitions and relationship cardinalities
- SQL table references for warehouse mapping validation
### Warehouse mapping
Optionally validate that LookML references match your expected Looker connection:
```yaml
mappings:
expectedLookerConnectionName: postgres_connection
```
This validates that LookML model `connection:` declarations match expectations, flagging mismatches during ingestion.
---
## Metabase
Ingests dashboards, questions, and their underlying SQL queries from a Metabase instance. Maps Metabase databases to your KTX warehouse connections.
### What it provides
- Dashboard metadata and organization
- Question/query definitions (native SQL and structured queries)
- Table and column usage patterns from queries
- Database-to-warehouse relationship mapping
### Connection config
```yaml title="ktx.yaml"
connections:
my-metabase:
driver: metabase
api_url: https://metabase.company.com
api_key_ref: env:METABASE_API_KEY
mappings:
databaseMappings:
"3": postgres-main # Metabase DB ID → KTX connection
syncEnabled:
"3": true
syncMode: ONLY # Only ingest mapped databases
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| API key | `api_key_ref: env:METABASE_API_KEY` |
Generate an API key in Metabase: **Admin > Settings > Authentication > API Keys**.
### What gets ingested
- Semantic sources generated from SQL queries in questions
- Knowledge pages for dashboards (purpose, key metrics, relationships)
- Work units per dashboard and per question
### Warehouse mapping
Metabase databases must be mapped to KTX connections so ingested context links to the correct warehouse:
```yaml
mappings:
databaseMappings:
"<metabase_db_id>": "<ktx_connection_id>"
syncEnabled:
"<metabase_db_id>": true
syncMode: ONLY # ONLY = restrict to mapped DBs
```
Find Metabase database IDs in **Admin > Databases** — the ID is in the URL when editing a database.
---
## Looker
Ingests explores, looks, and dashboards from a Looker instance via the Looker API. Maps Looker connections to your KTX warehouse connections.
### What it provides
- Explore definitions and field metadata
- Dashboard and look configurations
- Query patterns and usage signals
- Looker folder structure for organization context
### Connection config
```yaml title="ktx.yaml"
connections:
my-looker:
driver: looker
base_url: https://looker.company.com
client_id: your-looker-client-id
client_secret_ref: env:LOOKER_CLIENT_SECRET
mappings:
connectionMappings:
postgres_connection: postgres-main # Looker conn → KTX conn
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| OAuth client credentials | `client_id` + `client_secret_ref: env:LOOKER_CLIENT_SECRET` |
Generate API credentials in Looker: **Admin > Users > Edit > API Keys**.
### What gets ingested
- Semantic sources from explore field definitions
- Knowledge pages for dashboards (purpose, audience, key metrics)
- Triage signals for automated content classification
- Work units per explore and per dashboard
### Warehouse mapping
Map Looker connection names to KTX connections so explores link to the correct warehouse:
```yaml
mappings:
connectionMappings:
"<looker_connection_name>": "<ktx_connection_id>"
```
Find Looker connection names in **Admin > Database > Connections**.
---
## Notion
Ingests pages and databases from a Notion workspace as knowledge pages. Useful for capturing business definitions, data dictionaries, and team documentation that agents need for context.
### What it provides
- Knowledge pages synthesized from Notion content
- Page hierarchy and relationships
- Database schemas (when Notion databases describe data sources)
- Semantic clustering for organized ingestion
### Connection config
```yaml title="ktx.yaml"
connections:
my-notion:
driver: notion
auth_token_ref: env:NOTION_TOKEN
crawl_mode: selected_roots
root_page_ids:
- "abc123def456..."
readonly: true
```
For crawling all accessible pages:
```yaml title="ktx.yaml"
connections:
my-notion:
driver: notion
auth_token_ref: env:NOTION_TOKEN
crawl_mode: all_accessible
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Internal integration token | `auth_token_ref: env:NOTION_TOKEN` |
Create an integration at [notion.so/my-integrations](https://www.notion.so/my-integrations), then share target pages with the integration.
### Configuration options
| Field | Description | Default |
|-------|-------------|---------|
| `crawl_mode` | `all_accessible` or `selected_roots` | — |
| `root_page_ids` | Page IDs to crawl from (for `selected_roots`) | `[]` |
| `root_database_ids` | Database IDs to include | `[]` |
| `max_pages_per_run` | Pages processed per sync | `1000` |
| `max_knowledge_creates_per_run` | New pages created per sync | `5` |
| `max_knowledge_updates_per_run` | Pages updated per sync | `20` |
### What gets ingested
- Knowledge pages synthesized from Notion content (not raw copies)
- Domain context extracted and organized by topic
- Triage signals for classifying page relevance
- Work units clustered by semantic similarity for efficient processing
### Notes
- Notion is knowledge-only — it does not produce semantic layer sources
- Rate limits apply; large workspaces may require multiple ingestion runs
- `last_successful_cursor` is auto-managed for incremental sync

View file

@ -1,5 +0,0 @@
{
"title": "Integrations",
"defaultOpen": true,
"pages": ["primary-sources", "context-sources", "agent-clients"]
}

View file

@ -1,469 +0,0 @@
---
title: Primary Sources
description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite.
---
KTX connects to your data warehouse or database to scan schemas, discover relationships, and execute semantic layer queries. Each connection is defined in `ktx.yaml` under the `connections` key.
All connectors share these conventions:
- Sensitive values support `env:VAR_NAME` (read from environment) and `file:/path/to/secret` (read from file) references
- Connections are read-only — KTX never writes to your database
- Schema scanning discovers tables, columns, types, and constraints automatically
## PostgreSQL
The most full-featured connector. Supports schema introspection, foreign key detection, column statistics, and historic SQL via `pg_stat_statements`.
### Connection config
```yaml title="ktx.yaml"
connections:
my-postgres:
driver: postgres
url: postgresql://user:password@host:5432/database
schema: public
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-postgres:
driver: postgres
host: localhost
port: 5432
database: analytics
username: ktx_reader
password: env:PG_PASSWORD
schemas:
- public
- analytics
ssl: true
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Password | `password: env:PG_PASSWORD` or `password: file:/path/to/secret` |
| Connection URL | `url: env:DATABASE_URL` |
| SSL | `ssl: true`, optionally `rejectUnauthorized: false` for self-signed certs |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `pg_catalog` |
| Primary keys | Yes | Via `information_schema.table_constraints` |
| Foreign keys | Yes | Full constraint detection |
| Row count estimates | Yes | Via `pg_class.reltuples` |
| Column statistics | Yes | Requires `pg_read_all_stats` role |
| Historic SQL | Yes | Via `pg_stat_statements` extension |
| Table sampling | Yes | `TABLESAMPLE SYSTEM` |
### Historic SQL
PostgreSQL Historic SQL mines real query patterns from `pg_stat_statements`. This is the most mature local Historic SQL path and helps KTX understand how your team actually queries the data.
**Requirements:**
- `pg_stat_statements` extension enabled
- `pg_read_all_stats` role granted to the KTX user
**Config options:**
```yaml
historicSql:
minCalls: 5 # Minimum call count to include a query template
maxTemplatesPerRun: 5000
```
### Dialect notes
- SQL generation uses `LIMIT/OFFSET` pagination
- Named parameters converted to positional (`$1`, `$2`, ...)
- Supports `COUNT(*) FILTER (WHERE ...)` for null analysis
- Full support for PostgreSQL types: `uuid`, `jsonb`, `timestamptz`, `numeric`, `text[]`, etc.
---
## Snowflake
Connects via the Snowflake SDK. Supports multi-schema scanning, RSA key authentication, and Historic SQL configuration for Snowflake query history.
### Connection config
```yaml title="ktx.yaml"
connections:
my-snowflake:
driver: snowflake
account: xy12345
warehouse: ANALYTICS_WH
database: PROD
schema_name: PUBLIC
username: KTX_SERVICE
password: env:SNOWFLAKE_PASSWORD
role: ANALYST
readonly: true
```
For multiple schemas:
```yaml
schema_names:
- PUBLIC
- ANALYTICS
- STAGING
```
### Authentication
| Method | Config |
|--------|--------|
| Password | `password: env:SNOWFLAKE_PASSWORD` |
| RSA key pair | `authMethod: rsa`, `privateKey: file:~/.ssh/snowflake_key.pem`, optional `passphrase` |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `INFORMATION_SCHEMA.TABLES` |
| Primary keys | Yes | Via table constraints |
| Foreign keys | No | Not available in Snowflake |
| Row count estimates | Yes | From `INFORMATION_SCHEMA.TABLES.ROW_COUNT` |
| Column statistics | No | — |
| Historic SQL | Configurable | Query-history settings can be stored; local CLI Historic SQL ingest currently uses the Postgres path |
| Table sampling | Yes | — |
### Historic SQL
Snowflake Historic SQL settings describe how query history should be sampled when that runtime path is available.
```yaml
historicSql:
windowDays: 90
redactionPatterns: []
serviceAccountUserPatterns: []
```
### Dialect notes
- All identifiers are uppercase by default (case-insensitive matching)
- Connection context set per query (`USE ROLE`, `USE WAREHOUSE`, `USE DATABASE`, `USE SCHEMA`)
- Parameter binding uses positional `?` placeholders
- Date values normalized to ISO 8601 strings
---
## BigQuery
Authenticates via GCP service account credentials. Supports multi-dataset scanning and Historic SQL configuration for `INFORMATION_SCHEMA.JOBS_BY_PROJECT`.
### Connection config
```yaml title="ktx.yaml"
connections:
my-bigquery:
driver: bigquery
credentials_json: file:~/.config/gcloud/bq-service-account.json
dataset_id: analytics
location: US
readonly: true
```
For multiple datasets:
```yaml
dataset_ids:
- analytics
- marketing
- finance
```
### Authentication
| Method | Config |
|--------|--------|
| Service account JSON | `credentials_json: file:/path/to/key.json` |
| Environment variable | `credentials_json: env:GCP_CREDENTIALS_JSON` |
The project ID is extracted automatically from the service account JSON file.
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Including materialized views and external tables |
| Primary keys | No | — |
| Foreign keys | No | Not available in BigQuery |
| Row count estimates | Yes | From table metadata |
| Column statistics | No | — |
| Historic SQL | Configurable | Query-history settings can be stored; local CLI Historic SQL ingest currently uses the Postgres path |
| Table sampling | Yes | — |
### Historic SQL
BigQuery Historic SQL settings describe how `INFORMATION_SCHEMA.JOBS_BY_PROJECT` should be sampled when that runtime path is available.
```yaml
historicSql:
windowDays: 90
redactionPatterns: []
serviceAccountUserPatterns: []
```
### Dialect notes
- Parameter binding uses named `@param` syntax
- Arrays flattened to comma-separated strings in results
- Location specified at query execution time
- Supports `maxBytesBilled` and `jobTimeoutMs` limits
---
## ClickHouse
Connects over HTTP (port 8123) or HTTPS (port 8443). Supports the ClickHouse native type system including `Nullable`, `LowCardinality`, and `Array` wrappers.
### Connection config
```yaml title="ktx.yaml"
connections:
my-clickhouse:
driver: clickhouse
url: http://localhost:8123/analytics
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-clickhouse:
driver: clickhouse
host: clickhouse.internal
port: 8123
database: analytics
username: default
password: env:CH_PASSWORD
ssl: false
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Basic auth | `username` + `password` (HTTP basic auth) |
| No auth | Default user `default` with no password |
| HTTPS | Set `ssl: true` (uses port 8443 by default) |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `system.tables`, engine-based detection |
| Primary keys | Yes | Via `system.columns` |
| Foreign keys | No | Not a ClickHouse concept |
| Row count estimates | Yes | Via `system.parts` aggregation |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | — |
### Dialect notes
- Parameter binding uses `{param:Type}` syntax (e.g., `{database:String}`)
- Detects views vs. tables by engine name (`View`, `MaterializedView`)
- Handles `Nullable(T)` and `LowCardinality(Nullable(T))` type wrappers
- Dictionary tables are excluded from scanning
- Results returned in JSONCompact or JSONEachRow format
---
## MySQL
Standard MySQL/MariaDB connector with full foreign key support and schema introspection.
### Connection config
```yaml title="ktx.yaml"
connections:
my-mysql:
driver: mysql
url: mysql://user:password@host:3306/database
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-mysql:
driver: mysql
host: mysql.internal
port: 3306
database: analytics
username: ktx_reader
password: env:MYSQL_PASSWORD
ssl: true
readonly: true
```
### Authentication
| Method | Config |
|--------|--------|
| Password | `password: env:MYSQL_PASSWORD` or `password: file:/path/to/secret` |
| SSL | `ssl: true` or `ssl: { rejectUnauthorized: false }` |
| URL parameters | `?ssl=true` or `?sslmode=required` in connection URL |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `INFORMATION_SCHEMA.TABLES` |
| Primary keys | Yes | Via `KEY_COLUMN_USAGE` |
| Foreign keys | Yes | Via `REFERENTIAL_CONSTRAINTS` |
| Row count estimates | Yes | From `TABLE_ROWS` (InnoDB estimate) |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | Uses `RAND()` filter |
### Dialect notes
- Parameter binding uses positional `?` placeholders
- Uses `LIMIT X OFFSET Y` for pagination
- Single database per connection (no multi-schema)
- Supports 20+ MySQL types including `enum`, `json`, `datetime`, `decimal`
- Table comments extracted with InnoDB metadata prefix stripping
---
## SQL Server
Connects to Microsoft SQL Server and Azure SQL. Supports multi-schema scanning with `dbo` as the default schema.
### Connection config
```yaml title="ktx.yaml"
connections:
my-sqlserver:
driver: sqlserver
url: mssql://user:password@host:1433/database?trustServerCertificate=true
readonly: true
```
Or with individual fields:
```yaml title="ktx.yaml"
connections:
my-sqlserver:
driver: sqlserver
host: sql.internal
port: 1433
database: Analytics
username: ktx_reader
password: env:MSSQL_PASSWORD
schema: dbo
trustServerCertificate: true
readonly: true
```
For multiple schemas:
```yaml
schemas:
- dbo
- analytics
- staging
```
### Authentication
| Method | Config |
|--------|--------|
| SQL Server auth | `username` + `password` |
| Encrypted connection | Always enabled, `trustServerCertificate: true` for self-signed |
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `INFORMATION_SCHEMA.TABLES` |
| Primary keys | Yes | Via `TABLE_CONSTRAINTS` and `KEY_COLUMN_USAGE` |
| Foreign keys | Yes | Via `REFERENTIAL_CONSTRAINTS` |
| Row count estimates | Yes | Via `sys.dm_db_partition_stats` |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | — |
| Nested analysis | No | — |
### Dialect notes
- Parameter binding uses `@paramName` syntax
- Row limiting uses `SELECT TOP N * FROM (query) AS ktx_query_result`
- Encryption is always required; certificate validation is optional
- Multi-schema support with per-schema isolation
---
## SQLite
File-based connector using `better-sqlite3`. Ideal for local development, embedded analytics, or testing.
### Connection config
```yaml title="ktx.yaml"
connections:
my-sqlite:
driver: sqlite
path: ./data/warehouse.sqlite
readonly: true
```
Path supports multiple formats:
```yaml
# Relative path (resolved against project directory)
path: ./warehouse.sqlite
# Absolute path
path: /var/data/analytics.db
# Home directory expansion
path: ~/data/warehouse.sqlite
# Environment variable
path: env:SQLITE_DB_PATH
# URL format
url: sqlite:///path/to/db.sqlite
```
### Authentication
No authentication required — SQLite is file-based. The file must be readable by the process running KTX.
### Features
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Via `sqlite_master` |
| Primary keys | Yes | Via `PRAGMA table_info()` |
| Foreign keys | Yes | Via `PRAGMA foreign_key_list()` (requires `PRAGMA foreign_keys = ON`) |
| Row count estimates | Yes | Exact count via `SELECT COUNT(*)` |
| Column statistics | No | — |
| Historic SQL | No | — |
| Table sampling | Yes | — |
| Nested analysis | No | — |
### Dialect notes
- Synchronous query execution (no connection pooling)
- Parameter binding uses `:paramName` syntax
- Uses `LIMIT X OFFSET Y` for pagination
- SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB`
- Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON`
- In-memory databases supported with `path: ":memory:"` (for testing)

View file

@ -1,13 +0,0 @@
{
"root": true,
"title": "KTX",
"pages": [
"getting-started",
"concepts",
"guides",
"integrations",
"benchmarks",
"cli-reference",
"community"
]
}

View file

@ -1,7 +0,0 @@
import { docs } from "@/.source";
import { loader } from "fumadocs-core/source";
export const source = loader({
source: docs.toFumadocsSource(),
baseUrl: "/docs",
});

6
docs/next-env.d.ts vendored
View file

@ -1,6 +0,0 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
/// <reference path="./.next/types/routes.d.ts" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.

View file

@ -1,8 +0,0 @@
import { createMDX } from "fumadocs-mdx/next";
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {};
export default withMDX(config);

View file

@ -1,26 +0,0 @@
{
"name": "ktx-docs",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start"
},
"dependencies": {
"fumadocs-core": "15.7.13",
"fumadocs-mdx": "11.10.1",
"fumadocs-ui": "15.7.13",
"next": "^15",
"react": "19.2.6",
"react-dom": "19.2.6"
},
"devDependencies": {
"@types/react": "^19",
"@types/react-dom": "^19",
"typescript": "^5.9",
"@tailwindcss/postcss": "^4",
"tailwindcss": "^4"
}
}

View file

@ -1,7 +0,0 @@
const config = {
plugins: {
"@tailwindcss/postcss": {},
},
};
export default config;

View file

@ -1,9 +0,0 @@
import { defineDocs, defineConfig } from "fumadocs-mdx/config";
export const docs = defineDocs({
dir: "content/docs",
});
export default defineConfig({
mdxOptions: {},
});

View file

@ -1,41 +0,0 @@
{
"compilerOptions": {
"target": "ESNext",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "ESNext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": [
"./*"
]
}
},
"include": [
"**/*.ts",
"**/*.tsx",
".source/**/*.ts",
"next-env.d.ts",
".next/types/**/*.ts"
],
"exclude": [
"node_modules"
]
}