mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
docs: rewrite Semantic Querying concept with imperative-vs-declarative diagram (#156)
* docs: rewrite Semantic Querying concept with imperative-vs-declarative diagram
Reframe semantic-layer-internals.mdx around the contract the semantic
layer offers an agent: declare what you want (a Semantic Query), KTX
figures out how to compute it. Replaces the old "Context-Aware SQL"
framing with a clear imperative-vs-declarative narrative.
Adds a React Flow component (semantic-layer-flow.tsx) that contrasts a
buggy 4-table agent-authored SQL (chasm trap, LEFT-JOIN-in-WHERE,
hardcoded DATE_TRUNC) against the chasm-safe per-fact CTE SQL the
planner actually emits, including the outer GROUP BY over the requested
dimensions. Both lanes converge into a shared warehouse node and each
SQL card now has parallel bullet notes (failures on the left, KTX
behavior on the right).
Side fixes bundled in:
- include the /ktx basePath in the favicon metadata so the icon resolves
under the production prefix
- migrate docs-site/middleware.ts to docs-site/proxy.ts (Next 16 rename)
- redirect / to /ktx/docs/getting-started/introduction so the apex docs
URL works
- add tests covering the apex redirect, the favicon basePath, and the
middleware-to-proxy rename
- propagate the Semantic Query terminology across the ktx-sl CLI
reference, the context-layer concept page, and the agent-clients /
primary-sources integration pages
* Fix CI dead-code failures
* docs-site: polish semantic-layer-internals code blocks and flow diagram
- Make CodeBlock a server component so children traverse synchronously
under React 19 RSC streaming; previously extractText returned "" in
dev SSR, leaving code blocks empty.
- Add custom JSON/YAML/SQL/code-like tokenizers with theme-aware token
classes; drop the colored file-glyph dot and gradient tab-head.
- Tighten tab-head: subtle grey background, smaller monospace filename
in muted grey, smaller rectangular language pill placed to the left
of the filename.
- Polish the React Flow semantic-layer diagram (controls, fit-view
padding, edge types).
* docs-site: annotate imperative SQL, add section anchor, drop ClickHouse
- Wire numbered red badges to each problematic span in the "Without KTX"
SQL with hover sync between SQL gutter, lines, and the notes list.
- Add #imperative-vs-declarative anchor on the flow section header so
the eyebrow link is shareable; reveals a # glyph on hover/focus.
- Align the compiled-SQL note dots to the first-line midpoint
(mt-[6px] instead of mt-1) so 4px dots sit at y=8 in a 16px line.
- Remove all ClickHouse references from docs-site (primary-sources,
quickstart, ktx-setup, contributing, agents-setup, mechanics test,
warehouse drivers in the flow diagram).
* test: drop ClickHouse contributing-docs assertion
Align the workspace-package mirror test with the ClickHouse removal
from docs-site (75907eb). The connector-clickhouse package still
exists in packages/, but contributing.mdx no longer lists it, so the
test that mirrored docs against the workspace was failing.
This commit is contained in:
parent
590dd5dddb
commit
620d6adbe6
19 changed files with 1872 additions and 347 deletions
|
|
@ -221,6 +221,72 @@ pre code,
|
|||
padding-inline: 0 !important;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-key {
|
||||
color: #0f766e;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-keyword {
|
||||
color: #0e7490;
|
||||
font-weight: 650;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-function {
|
||||
color: #7c3aed;
|
||||
font-weight: 650;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-flag {
|
||||
color: #0369a1;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-string {
|
||||
color: #b45309;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-number,
|
||||
.ktx-code .ktx-token-constant {
|
||||
color: #be123c;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-comment {
|
||||
color: #64748b;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.ktx-code .ktx-token-punctuation {
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-key {
|
||||
color: #5eead4;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-keyword {
|
||||
color: #67e8f9;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-function {
|
||||
color: #c4b5fd;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-flag {
|
||||
color: #7dd3fc;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-string {
|
||||
color: #fbbf24;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-number,
|
||||
.dark .ktx-code .ktx-token-constant {
|
||||
color: #fb7185;
|
||||
}
|
||||
|
||||
.dark .ktx-code .ktx-token-comment,
|
||||
.dark .ktx-code .ktx-token-punctuation {
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
/* Neutralize the outer figure styling that our wrapper now owns */
|
||||
figure:has(> .ktx-code),
|
||||
figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
|
||||
|
|
@ -327,55 +393,32 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) {
|
|||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
padding: 8px 10px 8px 14px;
|
||||
padding: 5px 8px 5px 12px;
|
||||
border-bottom: 1px solid var(--color-fd-border);
|
||||
background: linear-gradient(180deg, var(--color-fd-muted), transparent);
|
||||
background: rgba(0, 0, 0, 0.025);
|
||||
}
|
||||
|
||||
.dark .ktx-code-tab-head {
|
||||
border-bottom-color: rgba(255, 255, 255, 0.05);
|
||||
background: linear-gradient(180deg, rgba(255, 255, 255, 0.02), transparent);
|
||||
background: rgba(255, 255, 255, 0.02);
|
||||
}
|
||||
|
||||
.ktx-file-glyph {
|
||||
display: inline-block;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 999px;
|
||||
background: var(--color-fd-muted-foreground);
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.ktx-file-glyph[data-lang="yaml"],
|
||||
.ktx-file-glyph[data-lang="yml"] { background: #fbbf24; }
|
||||
.ktx-file-glyph[data-lang="ts"],
|
||||
.ktx-file-glyph[data-lang="tsx"],
|
||||
.ktx-file-glyph[data-lang="typescript"] { background: #3b82f6; }
|
||||
.ktx-file-glyph[data-lang="js"],
|
||||
.ktx-file-glyph[data-lang="jsx"],
|
||||
.ktx-file-glyph[data-lang="javascript"] { background: #facc15; }
|
||||
.ktx-file-glyph[data-lang="json"] { background: #84cc16; }
|
||||
.ktx-file-glyph[data-lang="md"],
|
||||
.ktx-file-glyph[data-lang="mdx"] { background: #a3a3a3; }
|
||||
.ktx-file-glyph[data-lang="sql"] { background: #f97316; }
|
||||
.ktx-file-glyph[data-lang="py"],
|
||||
.ktx-file-glyph[data-lang="python"] { background: #22d3ee; }
|
||||
|
||||
.ktx-code-tab-filename {
|
||||
font-family: var(--font-mono), ui-monospace, monospace;
|
||||
font-size: 12.5px;
|
||||
color: var(--color-fd-foreground);
|
||||
font-size: 11.5px;
|
||||
color: #6b7280;
|
||||
}
|
||||
|
||||
.ktx-lang-pill {
|
||||
margin-left: 4px;
|
||||
padding: 1px 6px;
|
||||
font-size: 10px;
|
||||
font-weight: 600;
|
||||
margin-right: 4px;
|
||||
padding: 0 7px;
|
||||
font-size: 9px;
|
||||
font-weight: 500;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
color: var(--color-fd-muted-foreground);
|
||||
letter-spacing: 0.06em;
|
||||
color: #9ca3af;
|
||||
border: 1px solid var(--color-fd-border);
|
||||
border-radius: 4px;
|
||||
border-radius: 3px;
|
||||
background: var(--color-fd-card);
|
||||
font-family: var(--font-display), var(--font-sans), sans-serif;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ export const metadata: Metadata = {
|
|||
description:
|
||||
"Open-source context infrastructure that makes agentic analytics reliable.",
|
||||
icons: {
|
||||
icon: "/brand/ktx-mascot.svg",
|
||||
shortcut: "/brand/ktx-mascot.svg",
|
||||
icon: "/ktx/brand/ktx-mascot.svg",
|
||||
shortcut: "/ktx/brand/ktx-mascot.svg",
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
"use client";
|
||||
|
||||
import {
|
||||
type ComponentPropsWithoutRef,
|
||||
type ReactNode,
|
||||
|
|
@ -15,6 +13,55 @@ type Props = ComponentPropsWithoutRef<"pre"> & {
|
|||
|
||||
const OUTPUT_LANGS = new Set(["text", "plain", "plaintext", "console", "output"]);
|
||||
const WIZARD_GLYPHS = /^\s*[◆◇◯◐○●]/;
|
||||
const JSON_TOKEN_PATTERN =
|
||||
/"(?:\\.|[^"\\])*"|-?\b\d+(?:\.\d+)?\b|\b(?:true|false|null)\b|[{}[\],:]/g;
|
||||
const SQL_TOKEN_PATTERN =
|
||||
/--[^\n]*|'(?:''|[^'])*'|\b\d+(?:\.\d+)?\b|\b(?:select|from|join|left|right|inner|outer|on|where|group|by|order|limit|as|sum|avg|min|max|count|coalesce|date_trunc|case|when|then|else|end|and|or|is|not|null|false|true|with|having|over|partition|insert|update|delete|create|alter|drop|table|view)\b|[(),.;=*<>+-]/gi;
|
||||
const CODE_LIKE_TOKEN_PATTERN =
|
||||
/\/\/[^\n]*|\/\*[\s\S]*?\*\/|#(?![{\w-]+:)[^\n]*|`(?:\\.|[^`\\])*`|"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|-?\b\d+(?:\.\d+)?\b|\b(?:const|let|var|function|return|import|export|from|type|interface|extends|async|await|if|else|for|while|switch|case|break|continue|try|catch|throw|new|class|public|private|protected|readonly|true|false|null|undefined|pnpm|uv|ktx|node|npx|curl|git)\b|--?[\w-]+|[{}[\](),.;:=*<>|&+-]/g;
|
||||
const SQL_FUNCTIONS = new Set([
|
||||
"sum",
|
||||
"avg",
|
||||
"min",
|
||||
"max",
|
||||
"count",
|
||||
"coalesce",
|
||||
"date_trunc",
|
||||
]);
|
||||
const CODE_KEYWORDS = new Set([
|
||||
"const",
|
||||
"let",
|
||||
"var",
|
||||
"function",
|
||||
"return",
|
||||
"import",
|
||||
"export",
|
||||
"from",
|
||||
"type",
|
||||
"interface",
|
||||
"extends",
|
||||
"async",
|
||||
"await",
|
||||
"if",
|
||||
"else",
|
||||
"for",
|
||||
"while",
|
||||
"switch",
|
||||
"case",
|
||||
"break",
|
||||
"continue",
|
||||
"try",
|
||||
"catch",
|
||||
"throw",
|
||||
"new",
|
||||
"class",
|
||||
"public",
|
||||
"private",
|
||||
"protected",
|
||||
"readonly",
|
||||
]);
|
||||
const COMMAND_KEYWORDS = new Set(["pnpm", "uv", "ktx", "node", "npx", "curl", "git"]);
|
||||
const CODE_CONSTANTS = new Set(["true", "false", "null", "undefined"]);
|
||||
|
||||
function extractText(node: ReactNode): string {
|
||||
if (typeof node === "string") return node;
|
||||
|
|
@ -65,15 +112,277 @@ function detectLanguage(props: Props, children: ReactNode): string | null {
|
|||
return findLanguageInNode(children);
|
||||
}
|
||||
|
||||
function stripOneLeadingBlankLine(text: string) {
|
||||
return text.startsWith("\n") ? text.slice(1) : text;
|
||||
}
|
||||
|
||||
function extractCodeHeader(language: string | null, code: string) {
|
||||
const normalized = normalizeLanguage(language);
|
||||
const firstLineEnd = code.indexOf("\n");
|
||||
const firstLine = firstLineEnd === -1 ? code : code.slice(0, firstLineEnd);
|
||||
const rest = firstLineEnd === -1 ? "" : code.slice(firstLineEnd + 1);
|
||||
const commentPrefix =
|
||||
normalized === "sql"
|
||||
? "--"
|
||||
: normalized === "javascript" ||
|
||||
normalized === "js" ||
|
||||
normalized === "jsx" ||
|
||||
normalized === "typescript" ||
|
||||
normalized === "ts" ||
|
||||
normalized === "tsx"
|
||||
? "//"
|
||||
: "#";
|
||||
|
||||
if (!firstLine.trimStart().startsWith(commentPrefix)) {
|
||||
return { header: null, code };
|
||||
}
|
||||
|
||||
const candidate = firstLine
|
||||
.trim()
|
||||
.slice(commentPrefix.length)
|
||||
.trim();
|
||||
const looksLikePath =
|
||||
candidate.includes("/") &&
|
||||
/\.[A-Za-z0-9]+(?:["'`)]*)?$/.test(candidate);
|
||||
|
||||
if (!looksLikePath) return { header: null, code };
|
||||
|
||||
return {
|
||||
header: candidate,
|
||||
code: stripOneLeadingBlankLine(rest),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeLanguage(language: string | null) {
|
||||
return language?.toLowerCase() ?? "";
|
||||
}
|
||||
|
||||
function pushMatchedToken(
|
||||
parts: ReactNode[],
|
||||
token: string,
|
||||
className: string,
|
||||
key: string,
|
||||
) {
|
||||
parts.push(
|
||||
<span key={key} className={className}>
|
||||
{token}
|
||||
</span>,
|
||||
);
|
||||
}
|
||||
|
||||
function highlightJson(code: string) {
|
||||
const parts: ReactNode[] = [];
|
||||
let lastIndex = 0;
|
||||
let tokenIndex = 0;
|
||||
|
||||
for (const match of code.matchAll(JSON_TOKEN_PATTERN)) {
|
||||
const token = match[0];
|
||||
const index = match.index ?? 0;
|
||||
if (index > lastIndex) parts.push(code.slice(lastIndex, index));
|
||||
|
||||
const nextText = code.slice(index + token.length);
|
||||
const className = token.startsWith('"')
|
||||
? /^\s*:/.test(nextText)
|
||||
? "ktx-token-key"
|
||||
: "ktx-token-string"
|
||||
: /^-?\d/.test(token)
|
||||
? "ktx-token-number"
|
||||
: /^(true|false|null)$/.test(token)
|
||||
? "ktx-token-constant"
|
||||
: "ktx-token-punctuation";
|
||||
|
||||
pushMatchedToken(parts, token, className, `json-${tokenIndex}`);
|
||||
lastIndex = index + token.length;
|
||||
tokenIndex += 1;
|
||||
}
|
||||
|
||||
if (lastIndex < code.length) parts.push(code.slice(lastIndex));
|
||||
return parts;
|
||||
}
|
||||
|
||||
function highlightYaml(code: string) {
|
||||
const parts: ReactNode[] = [];
|
||||
const lines = code.split(/(\n)/);
|
||||
let tokenIndex = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
if (line === "\n") {
|
||||
parts.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
const commentIndex = line.search(/\s#/);
|
||||
const fullLineComment = line.trimStart().startsWith("#");
|
||||
const contentEnd =
|
||||
fullLineComment || commentIndex === -1 ? line.length : commentIndex + 1;
|
||||
const content = fullLineComment ? "" : line.slice(0, contentEnd);
|
||||
const comment = fullLineComment ? line : line.slice(contentEnd);
|
||||
const keyMatch = content.match(/^(\s*(?:-\s*)?)([A-Za-z_][\w.-]*)(\s*:)/);
|
||||
|
||||
if (keyMatch) {
|
||||
parts.push(keyMatch[1]);
|
||||
pushMatchedToken(parts, keyMatch[2], "ktx-token-key", `yaml-key-${tokenIndex}`);
|
||||
pushMatchedToken(
|
||||
parts,
|
||||
keyMatch[3],
|
||||
"ktx-token-punctuation",
|
||||
`yaml-colon-${tokenIndex}`,
|
||||
);
|
||||
const rest = content.slice(keyMatch[0].length);
|
||||
if (rest) parts.push(...highlightInlineValue(rest, `yaml-${tokenIndex}`));
|
||||
} else if (content) {
|
||||
parts.push(...highlightInlineValue(content, `yaml-${tokenIndex}`));
|
||||
}
|
||||
|
||||
if (comment) {
|
||||
pushMatchedToken(parts, comment, "ktx-token-comment", `yaml-comment-${tokenIndex}`);
|
||||
}
|
||||
tokenIndex += 1;
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
function highlightInlineValue(value: string, keyPrefix: string) {
|
||||
const parts: ReactNode[] = [];
|
||||
let lastIndex = 0;
|
||||
let tokenIndex = 0;
|
||||
const pattern = /'(?:''|[^'])*'|"(?:\\.|[^"\\])*"|-?\b\d+(?:\.\d+)?\b|\b(?:true|false|null)\b|[()[\]{},:=!<>+-]/g;
|
||||
|
||||
for (const match of value.matchAll(pattern)) {
|
||||
const token = match[0];
|
||||
const index = match.index ?? 0;
|
||||
if (index > lastIndex) parts.push(value.slice(lastIndex, index));
|
||||
|
||||
const className =
|
||||
token.startsWith("'") || token.startsWith('"')
|
||||
? "ktx-token-string"
|
||||
: /^-?\d/.test(token)
|
||||
? "ktx-token-number"
|
||||
: /^(true|false|null)$/.test(token)
|
||||
? "ktx-token-constant"
|
||||
: "ktx-token-punctuation";
|
||||
|
||||
pushMatchedToken(parts, token, className, `${keyPrefix}-value-${tokenIndex}`);
|
||||
lastIndex = index + token.length;
|
||||
tokenIndex += 1;
|
||||
}
|
||||
|
||||
if (lastIndex < value.length) parts.push(value.slice(lastIndex));
|
||||
return parts;
|
||||
}
|
||||
|
||||
function highlightSql(code: string) {
|
||||
const parts: ReactNode[] = [];
|
||||
let lastIndex = 0;
|
||||
let tokenIndex = 0;
|
||||
|
||||
for (const match of code.matchAll(SQL_TOKEN_PATTERN)) {
|
||||
const token = match[0];
|
||||
const index = match.index ?? 0;
|
||||
if (index > lastIndex) parts.push(code.slice(lastIndex, index));
|
||||
|
||||
const lowerToken = token.toLowerCase();
|
||||
const className = token.startsWith("--")
|
||||
? "ktx-token-comment"
|
||||
: token.startsWith("'")
|
||||
? "ktx-token-string"
|
||||
: /^\d/.test(token)
|
||||
? "ktx-token-number"
|
||||
: SQL_FUNCTIONS.has(lowerToken)
|
||||
? "ktx-token-function"
|
||||
: /^[a-z_]+$/i.test(token)
|
||||
? "ktx-token-keyword"
|
||||
: "ktx-token-punctuation";
|
||||
|
||||
pushMatchedToken(parts, token, className, `sql-${tokenIndex}`);
|
||||
lastIndex = index + token.length;
|
||||
tokenIndex += 1;
|
||||
}
|
||||
|
||||
if (lastIndex < code.length) parts.push(code.slice(lastIndex));
|
||||
return parts;
|
||||
}
|
||||
|
||||
function highlightCodeLike(code: string) {
|
||||
const parts: ReactNode[] = [];
|
||||
let lastIndex = 0;
|
||||
let tokenIndex = 0;
|
||||
|
||||
for (const match of code.matchAll(CODE_LIKE_TOKEN_PATTERN)) {
|
||||
const token = match[0];
|
||||
const index = match.index ?? 0;
|
||||
if (index > lastIndex) parts.push(code.slice(lastIndex, index));
|
||||
|
||||
const lowerToken = token.toLowerCase();
|
||||
const className =
|
||||
token.startsWith("//") || token.startsWith("/*") || token.startsWith("#")
|
||||
? "ktx-token-comment"
|
||||
: token.startsWith("'") || token.startsWith('"') || token.startsWith("`")
|
||||
? "ktx-token-string"
|
||||
: /^-?\d/.test(token)
|
||||
? "ktx-token-number"
|
||||
: CODE_CONSTANTS.has(lowerToken)
|
||||
? "ktx-token-constant"
|
||||
: CODE_KEYWORDS.has(lowerToken)
|
||||
? "ktx-token-keyword"
|
||||
: COMMAND_KEYWORDS.has(lowerToken)
|
||||
? "ktx-token-function"
|
||||
: token.startsWith("-")
|
||||
? "ktx-token-flag"
|
||||
: "ktx-token-punctuation";
|
||||
|
||||
pushMatchedToken(parts, token, className, `code-${tokenIndex}`);
|
||||
lastIndex = index + token.length;
|
||||
tokenIndex += 1;
|
||||
}
|
||||
|
||||
if (lastIndex < code.length) parts.push(code.slice(lastIndex));
|
||||
return parts;
|
||||
}
|
||||
|
||||
function highlightCode(language: string | null, code: string) {
|
||||
const normalized = normalizeLanguage(language);
|
||||
if (normalized === "json" || normalized === "jsonc") return highlightJson(code);
|
||||
if (normalized === "yaml" || normalized === "yml") return highlightYaml(code);
|
||||
if (normalized === "sql") return highlightSql(code);
|
||||
if (
|
||||
[
|
||||
"bash",
|
||||
"sh",
|
||||
"shell",
|
||||
"zsh",
|
||||
"javascript",
|
||||
"js",
|
||||
"jsx",
|
||||
"typescript",
|
||||
"ts",
|
||||
"tsx",
|
||||
"python",
|
||||
"py",
|
||||
].includes(normalized)
|
||||
) {
|
||||
return highlightCodeLike(code);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
export function CodeBlock(props: Props) {
|
||||
const { children, title, className: _ignored, ...rest } = props;
|
||||
const language = detectLanguage(props, children);
|
||||
const codeText = extractText(children);
|
||||
const rawCodeText = extractText(children);
|
||||
const extractedHeader = extractCodeHeader(language, rawCodeText);
|
||||
const codeText = extractedHeader.code;
|
||||
const headerTitle =
|
||||
typeof title === "string" && title.length > 0
|
||||
? title
|
||||
: extractedHeader.header;
|
||||
const highlightedCode = highlightCode(language, codeText);
|
||||
|
||||
const hasTitle = typeof title === "string" && title.length > 0;
|
||||
const hasHeader = typeof headerTitle === "string" && headerTitle.length > 0;
|
||||
const isOutput =
|
||||
!hasTitle &&
|
||||
(WIZARD_GLYPHS.test(codeText) ||
|
||||
!hasHeader &&
|
||||
(WIZARD_GLYPHS.test(rawCodeText) ||
|
||||
(language !== null && OUTPUT_LANGS.has(language)));
|
||||
|
||||
// Mode D - Output preview (wizard prompts, terminal output)
|
||||
|
|
@ -81,7 +390,7 @@ export function CodeBlock(props: Props) {
|
|||
return (
|
||||
<div className="not-prose ktx-code ktx-code-output group relative">
|
||||
<span className="ktx-code-output-label">output</span>
|
||||
<CopyButton text={codeText} className="ktx-code-output-copy" />
|
||||
<CopyButton text={rawCodeText} className="ktx-code-output-copy" />
|
||||
<pre {...rest} className="ktx-code-body ktx-code-body-output">
|
||||
{children}
|
||||
</pre>
|
||||
|
|
@ -89,18 +398,17 @@ export function CodeBlock(props: Props) {
|
|||
);
|
||||
}
|
||||
|
||||
// Mode B - VS Code tab (filename present)
|
||||
if (hasTitle) {
|
||||
// Mode B - Header (filename present)
|
||||
if (hasHeader) {
|
||||
return (
|
||||
<div className="not-prose ktx-code ktx-code-tab group">
|
||||
<div className="ktx-code-tab-head">
|
||||
<span className="ktx-file-glyph" data-lang={language ?? ""} />
|
||||
<span className="ktx-code-tab-filename">{title}</span>
|
||||
{language && <span className="ktx-lang-pill">{language}</span>}
|
||||
<span className="ktx-code-tab-filename">{headerTitle}</span>
|
||||
<CopyButton text={codeText} className="ml-auto" />
|
||||
</div>
|
||||
<pre {...rest} className="ktx-code-body ktx-code-body-tab">
|
||||
{children}
|
||||
{highlightedCode}
|
||||
</pre>
|
||||
</div>
|
||||
);
|
||||
|
|
@ -111,7 +419,7 @@ export function CodeBlock(props: Props) {
|
|||
<div className="not-prose ktx-code ktx-code-minimal group relative">
|
||||
<CopyButton text={codeText} className="ktx-code-minimal-copy" />
|
||||
<pre {...rest} className="ktx-code-body ktx-code-body-minimal">
|
||||
{children}
|
||||
{highlightedCode}
|
||||
</pre>
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
1258
docs-site/components/semantic-layer-flow.tsx
Normal file
1258
docs-site/components/semantic-layer-flow.tsx
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -53,7 +53,7 @@ Ask the user (grouped if your harness supports it; otherwise sequentially):
|
|||
3. **Embeddings backend.** Default: `sentence-transformers` (local, no API key, managed Python runtime). Offer `openai` only if the user has a key.
|
||||
4. **Database connections.** Ask how many to add, then loop. For each, collect:
|
||||
- Connection name (e.g. `warehouse`, `analytics`).
|
||||
- Driver: one of `sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake`.
|
||||
- Driver: one of `sqlite`, `postgres`, `mysql`, `sqlserver`, `bigquery`, `snowflake`.
|
||||
- Connection URL/DSN (or service-account file for BigQuery). Accept `env:VAR_NAME` or `file:/abs/path` to avoid pasting raw secrets.
|
||||
- **Heads-up for the user**: even if they paste a literal URL, KTX will silently relocate it into `<project>/.ktx/secrets/<connection>-url` and rewrite `ktx.yaml` to `url: file:…` — this is correct, secure behavior and not a bug.
|
||||
- Schemas / datasets to include (postgres / sqlserver / snowflake / bigquery only).
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ runtime features are missing.
|
|||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `--database <driver>` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake` |
|
||||
| `--database <driver>` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `sqlserver`, `bigquery`, `snowflake` |
|
||||
| `--database-connection-id <id>` | Existing selected connection id; repeatable. With `--database` or `--database-url`, connection id for the new connection. |
|
||||
| `--database-url <url>` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection; also used as the SQLite path |
|
||||
| `--database-schema <schema>` | Database schema or dataset to include; repeatable |
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ ktx sl <subcommand> [options]
|
|||
| `list` | List semantic-layer sources |
|
||||
| `search <query>` | Search semantic-layer sources |
|
||||
| `validate <sourceName>` | Validate a semantic-layer source against the database schema |
|
||||
| `query` | Compile or execute a semantic-layer query |
|
||||
| `query` | Compile or execute a Semantic Query |
|
||||
|
||||
## Options
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ ktx sl <subcommand> [options]
|
|||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--connection-id <id>` | KTX connection id | - |
|
||||
| `--query-file <path>` | JSON semantic-layer query file | - |
|
||||
| `--query-file <path>` | JSON Semantic Query file | - |
|
||||
| `--measure <measure>` | Measure to query; repeatable (at least one required) | - |
|
||||
| `--dimension <dimension>` | Dimension to include; repeatable | - |
|
||||
| `--filter <filter>` | Filter expression; repeatable | - |
|
||||
|
|
@ -67,7 +67,7 @@ ktx sl <subcommand> [options]
|
|||
| `--max-rows <n>` | Maximum rows to return when executing | - |
|
||||
|
||||
`sl query` requires at least one `--measure` unless `--query-file` is set.
|
||||
`--query-file` should point to a JSON semantic-layer query object.
|
||||
`--query-file` should point to a JSON Semantic Query object.
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
|
|||
|
|
@ -91,7 +91,6 @@ packages/
|
|||
connector-postgres/ # PostgreSQL connector
|
||||
connector-snowflake/ # Snowflake connector
|
||||
connector-bigquery/ # BigQuery connector
|
||||
connector-clickhouse/ # ClickHouse connector
|
||||
connector-mysql/ # MySQL connector
|
||||
connector-sqlserver/ # SQL Server connector
|
||||
connector-sqlite/ # SQLite connector
|
||||
|
|
|
|||
|
|
@ -1,141 +1,115 @@
|
|||
---
|
||||
title: Context-Aware SQL
|
||||
description: How KTX turns reviewed context, grain, and relationship evidence into safe SQL for agents.
|
||||
title: Semantic Querying
|
||||
description: How KTX compiles a short Semantic Query into safe, dialect-correct SQL using a reviewed join graph.
|
||||
---
|
||||
|
||||
## Why query planning needs context
|
||||
import { SemanticLayerFlow } from "@/components/semantic-layer-flow";
|
||||
|
||||
Agents can generate SQL from schema alone, but safe analytics SQL needs more
|
||||
than table names. KTX uses reviewed context to understand grain, joins, measures,
|
||||
filters, and where aggregation must happen.
|
||||
KTX's semantic layer is a compiler that turns intent into SQL. The agent
|
||||
declares _what_ it wants — measures, dimensions, filters — in a small
|
||||
Semantic Query. KTX figures out the _how_: which tables to join, what
|
||||
grain to aggregate at, how to keep fan-out from inflating measures, and
|
||||
what dialect the warehouse speaks.
|
||||
|
||||
Read this page as four mechanics:
|
||||
This page covers four mechanics:
|
||||
|
||||
- context files feed the semantic engine;
|
||||
- evidence becomes a join graph with grain and relationship metadata;
|
||||
- review keeps the graph current;
|
||||
- query planning avoids fan-out and ambiguous joins.
|
||||
- The Semantic Query contract agents send to the compiler.
|
||||
- The planner steps that turn a Semantic Query into SQL.
|
||||
- The join graph that backs those steps, and how it's built.
|
||||
- The fan-out failure mode the compiler is designed to prevent.
|
||||
|
||||
## Where the semantic layer fits
|
||||
## Imperative SQL vs declarative Semantic Querying
|
||||
|
||||
This planner is one subsystem inside KTX's broader context layer. It uses source
|
||||
YAML, wiki context, scan evidence, and provenance to make context actionable for
|
||||
SQL generation.
|
||||
Writing analytics SQL is imperative work. Every question forces the
|
||||
agent to hold two things in mind at once: _what_ it wants — a measure, a
|
||||
slice, a filter — and _how_ to compute it: which tables to join, which
|
||||
key links them, what grain to aggregate at, how to keep one fact from
|
||||
inflating another, and what dialect the warehouse speaks. Plumbing on
|
||||
top of intent, every query.
|
||||
|
||||
<div
|
||||
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
|
||||
aria-label="How context inputs flow through the semantic layer into agent workflows"
|
||||
>
|
||||
<div className="grid gap-0 lg:grid-cols-[1fr_2rem_1.12fr_2rem_1fr]">
|
||||
<section className="bg-fd-background p-4">
|
||||
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
|
||||
{"Context inputs"}
|
||||
</p>
|
||||
<div className="grid gap-2 text-sm">
|
||||
<div className="border-l-2 border-fd-primary bg-fd-card px-3 py-2">
|
||||
<p className="font-mono text-xs text-fd-foreground">semantic-layer/</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"source YAML, measures, joins, grain"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="border-l-2 border-amber-500 bg-fd-card px-3 py-2">
|
||||
<p className="font-mono text-xs text-fd-foreground">wiki/</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"business rules, definitions, caveats"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="border-l-2 border-orange-500 bg-fd-card px-3 py-2">
|
||||
<p className="font-mono text-xs text-fd-foreground">raw-sources/</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"schema scans, keys, imported metadata"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="border-l-2 border-slate-500 bg-fd-card px-3 py-2 dark:border-cyan-200">
|
||||
<p className="font-mono text-xs text-fd-foreground">provenance</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"ingest decisions and review history"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
KTX's semantic layer separates those concerns:
|
||||
|
||||
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
|
||||
<span className="h-px w-full bg-fd-border" />
|
||||
</div>
|
||||
- **You and KTX maintain the how.** Sources, joins, grain, measures, and
|
||||
segments live in reviewable YAML — the analytical contract the team
|
||||
agrees on, version-controlled.
|
||||
- **The agent declares the what.** It sends a Semantic Query and trusts
|
||||
the compiler to produce safe SQL.
|
||||
|
||||
<section className="relative bg-[#102226] p-5 text-white dark:bg-[#0b181b]">
|
||||
<div className="absolute inset-y-0 left-0 w-1 bg-fd-primary" />
|
||||
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-cyan-200">
|
||||
{"Semantic layer engine"}
|
||||
</p>
|
||||
<div className="grid gap-2 sm:grid-cols-2">
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="text-sm font-semibold">Join graph</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"sources as nodes, joins as typed edges"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="text-sm font-semibold">Grain</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"row identity before aggregation"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="text-sm font-semibold">Measures</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"verified formulas and filters"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="whitespace-nowrap break-normal text-sm font-semibold">Relationships</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"many_to_one, one_to_many, one_to_one"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-3 rounded-md border border-cyan-100/20 bg-cyan-50/10 px-3 py-2 text-sm">
|
||||
{"Safe query planning before SQL is generated."}
|
||||
</div>
|
||||
</section>
|
||||
The agent stops reasoning about plumbing. It states intent. KTX turns
|
||||
that into SQL the warehouse can run.
|
||||
|
||||
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
|
||||
<span className="h-px w-full bg-fd-border" />
|
||||
</div>
|
||||
<SemanticLayerFlow />
|
||||
|
||||
<section className="bg-fd-muted/35 p-4">
|
||||
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
|
||||
{"Agent workflows"}
|
||||
</p>
|
||||
<div className="space-y-2 text-sm">
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Search sources and wiki pages"}
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Compile trusted SQL"}
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Explain metrics and provenance"}
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Patch files and validate review"}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
## The Semantic Query contract
|
||||
|
||||
## Join graph
|
||||
A Semantic Query is the JSON payload the agent sends. Every field is optional
|
||||
except `measures`, and column references are fully qualified
|
||||
(`source.column`) so the compiler never has to guess where a name came
|
||||
from.
|
||||
|
||||
A semantic source is a node. A join is a typed edge. KTX uses the graph to
|
||||
choose valid paths and detect row-multiplying joins before SQL is generated.
|
||||
Notice what's _not_ in the payload: no `FROM`, no `JOIN`, no `GROUP BY`,
|
||||
no `WITH`. The agent states what it wants. KTX picks the join path, the
|
||||
grain, the SQL shape, and the dialect.
|
||||
|
||||
| Field | Purpose |
|
||||
|-------|---------|
|
||||
| `measures` | Names of pre-defined measures, or inline expressions like `sum(orders.amount)` |
|
||||
| `dimensions` | Columns to group by, optionally with a `granularity` for time fields |
|
||||
| `filters` | Row-level predicates, classified into `WHERE` or `HAVING` at planning time |
|
||||
| `segments` | Named filter sets defined on a source, applied as additional predicates |
|
||||
| `order_by` | Sort fields with optional direction |
|
||||
| `limit` | Row cap on the result |
|
||||
|
||||
A typical agent call looks like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"measures": ["orders.revenue", "tickets.ticket_count"],
|
||||
"dimensions": ["customers.segment"],
|
||||
"filters": ["orders.created_at >= '2025-01-01'"],
|
||||
"limit": 1000
|
||||
}
|
||||
```
|
||||
|
||||
That payload is enough for KTX to plan and compile. The agent never
|
||||
authors a join, a CTE, or a dialect-specific cast.
|
||||
|
||||
## What the planner does
|
||||
|
||||
The planner is a deterministic pipeline. Each Semantic Query runs through the
|
||||
same ordered steps before any SQL is emitted.
|
||||
|
||||
1. **Resolve refs.** Qualify bare column names, look up pre-defined
|
||||
measure expressions, and classify each measure as raw or derived.
|
||||
2. **Pick an anchor and build the join tree.** Choose the largest measure
|
||||
source as the root, then run a shortest-path search across the typed
|
||||
join graph to reach every required source.
|
||||
3. **Detect fan-out.** Group measures by their owning source. If more
|
||||
than one group exists, the planner marks the query as a chasm trap
|
||||
and switches to aggregate-locality compilation.
|
||||
4. **Classify filters.** Split predicates into row-level (`WHERE`) and
|
||||
aggregate-level (`HAVING`) based on whether they reference a measure.
|
||||
5. **Generate SQL.** Emit Postgres-shaped SQL with the right shape:
|
||||
single-source aggregation when the query is safe, per-source CTEs
|
||||
when fan-out is present.
|
||||
6. **Transpile to the target dialect.** Run the result through `sqlglot`
|
||||
so the warehouse receives syntax it understands.
|
||||
|
||||
The output is the SQL string, the resolved plan, and any warnings
|
||||
surfaced during planning.
|
||||
|
||||
## The join graph
|
||||
|
||||
A semantic source is a node. A declared join is a typed edge. The graph
|
||||
is bidirectional: every forward edge has a reverse with the relationship
|
||||
inverted, so the planner can traverse from any anchor.
|
||||
|
||||
| Relationship | Planning impact |
|
||||
|--------------|-----------------|
|
||||
| `many_to_one` | Usually safe for adding dimensions |
|
||||
| `one_to_many` | Can multiply measures and trigger fan-out handling |
|
||||
| `one_to_one` | Usually safe when keys are correct |
|
||||
| Equal-cost paths | Ambiguous unless aliases or explicit joins disambiguate |
|
||||
| `many_to_one` | Safe direction for adding dimensions |
|
||||
| `one_to_many` | Multiplies measures and triggers fan-out handling |
|
||||
| `one_to_one` | Safe in either direction when keys match |
|
||||
| Equal-cost paths | Treated as ambiguous; aliases or explicit joins resolve them |
|
||||
|
||||
<figure
|
||||
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card p-4 shadow-sm"
|
||||
|
|
@ -143,43 +117,60 @@ choose valid paths and detect row-multiplying joins before SQL is generated.
|
|||
>
|
||||
<div className="grid gap-3 md:grid-cols-[1fr_1fr_1fr]">
|
||||
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
|
||||
<p className="text-sm font-semibold text-fd-foreground">customers</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">grain: customer_id</p>
|
||||
<p className="text-sm font-semibold text-fd-foreground">{"customers"}</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: customer_id"}</p>
|
||||
</div>
|
||||
<div className="rounded-md border-2 border-fd-primary bg-fd-background px-4 py-3">
|
||||
<p className="text-sm font-semibold text-fd-foreground">orders</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id</p>
|
||||
<p className="text-sm font-semibold text-fd-foreground">{"orders"}</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: order_id"}</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
|
||||
<p className="text-sm font-semibold text-fd-foreground">order_items</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id, line_id</p>
|
||||
<p className="text-sm font-semibold text-fd-foreground">{"order_items"}</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: order_id, line_id"}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="my-3 grid gap-2 text-center text-xs font-medium text-fd-muted-foreground md:grid-cols-[1fr_1fr]">
|
||||
<div>orders -> customers: many_to_one</div>
|
||||
<div>orders -> order_items: one_to_many</div>
|
||||
<div>{"orders -> customers: many_to_one"}</div>
|
||||
<div>{"orders -> order_items: one_to_many"}</div>
|
||||
</div>
|
||||
<figcaption className="mt-4 border-t border-fd-border pt-3 text-left text-xs leading-5 text-fd-muted-foreground">
|
||||
<span className="font-medium text-fd-foreground">{"Example: "}</span>
|
||||
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it can duplicate order-level measures."}
|
||||
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it duplicates order-level measures."}
|
||||
</figcaption>
|
||||
</figure>
|
||||
|
||||
The graph is bidirectional for planning. If `orders -> customers` is
|
||||
`many_to_one`, the reverse path is `one_to_many`.
|
||||
Edges and grain come from your YAML. The compiler treats them as fact,
|
||||
not a guess.
|
||||
|
||||
```yaml
|
||||
# semantic-layer/warehouse/orders.yaml
|
||||
name: orders
|
||||
table: public.orders
|
||||
grain: [order_id]
|
||||
joins:
|
||||
- to: customers
|
||||
on: customer_id = customers.id
|
||||
relationship: many_to_one
|
||||
- to: order_items
|
||||
on: id = order_items.order_id
|
||||
relationship: one_to_many
|
||||
measures:
|
||||
- name: revenue
|
||||
expr: sum(case when status != 'refunded' then amount end)
|
||||
```
|
||||
|
||||
## Building and maintaining the graph
|
||||
|
||||
KTX starts from evidence, writes reviewable source YAML, and treats the merged
|
||||
diff as the accepted graph.
|
||||
KTX builds the graph from evidence and accepted edits, not from runtime
|
||||
inference. Each input contributes a different kind of authority.
|
||||
|
||||
| Evidence | What it contributes |
|
||||
|----------|---------------------|
|
||||
| Declared primary keys | Initial row grain |
|
||||
| Declared foreign keys | Formal join candidates |
|
||||
| Inferred relationships | Edges when warehouses lack constraints |
|
||||
| Inferred relationships | Edges when the warehouse lacks constraints |
|
||||
| dbt, MetricFlow, and LookML imports | Existing metrics, dimensions, explores, and joins |
|
||||
| Query history | Real join and filter patterns |
|
||||
| Query history | Real join and filter patterns from analyst SQL |
|
||||
| Analyst review | Final authority before context is merged |
|
||||
|
||||
<div
|
||||
|
|
@ -295,105 +286,55 @@ diff as the accepted graph.
|
|||
</div>
|
||||
</div>
|
||||
|
||||
## Modeling problems
|
||||
## Fan-out and aggregate locality
|
||||
|
||||
Fan-out is the classic failure mode: an order-level measure joins to line-item
|
||||
rows before aggregation, so one order becomes many rows.
|
||||
Fan-out is the classic analytics failure mode. Two fact tables join to a
|
||||
shared dimension. A naive query joins them all together first, so each
|
||||
row from one fact is multiplied by the matching rows from the other.
|
||||
Measures duplicate, numbers go wrong, and the agent doesn't notice.
|
||||
|
||||
| Problem | What happens | How KTX handles it |
|
||||
|---------|--------------|--------------------|
|
||||
| Order measure joins to `order_items` | `orders.revenue` repeats once per item | Detect `one_to_many` and pre-aggregate |
|
||||
| Two fact sources share `customers` | Measures multiply across the shared dimension | Treat as a chasm trap and plan each fact locally |
|
||||
| Filter crosses `one_to_many` | Filtering changes measure grain | Reject or localize the filter |
|
||||
| Equal-cost paths connect sources | Join choice is ambiguous | Prefer safer paths or require aliases |
|
||||
|
||||
## Execution planning
|
||||
|
||||
The planner resolves sources, chooses a join tree, checks relationship paths,
|
||||
and picks a simple or aggregate-locality SQL shape.
|
||||
KTX's planner detects the shape by grouping measures by their owning
|
||||
source. If more than one source contributes raw measures, the generator
|
||||
switches to aggregate locality: each fact is pre-aggregated at its own
|
||||
grain inside a CTE, and the CTEs are joined back to the dimension at the
|
||||
end.
|
||||
|
||||
| Naive SQL shape | Semantic-layer SQL shape |
|
||||
|-----------------|--------------------------|
|
||||
| Join facts and dimensions first, then aggregate | Aggregate each fact source at its own grain, then join results |
|
||||
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source when locality is needed |
|
||||
| Trust the shortest textual join path | Prefer safe relationship paths and reject disconnected sources |
|
||||
| Let dimension grain differ across facts | Raise when asymmetric dimensions would fan out another measure |
|
||||
| Join facts and dimensions first, then aggregate | Aggregate each fact at its own grain, then join |
|
||||
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source |
|
||||
| Trust the shortest textual join path | Prefer typed safe paths, reject disconnected sources |
|
||||
| Let dimension grain differ across facts | Raise when an asymmetric dimension would fan out another measure |
|
||||
|
||||
<div
|
||||
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
|
||||
aria-label="Fan-out safe execution shape"
|
||||
>
|
||||
<div className="border-b border-fd-border bg-fd-muted/35 px-4 py-3">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
|
||||
{"Fan-out handling"}
|
||||
</p>
|
||||
<p className="mt-1 text-sm leading-6 text-fd-muted-foreground">
|
||||
{"The same question planned before and after KTX preserves the measure grain."}
|
||||
</p>
|
||||
</div>
|
||||
<div className="grid gap-3 bg-fd-background p-4 md:grid-cols-[0.92fr_1.08fr]">
|
||||
<section className="flex min-h-full flex-col rounded-md border border-fd-border bg-fd-card">
|
||||
<div className="border-b border-fd-border px-4 py-3">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-wide text-red-600 dark:text-red-300">
|
||||
{"Unsafe shape"}
|
||||
</p>
|
||||
<p className="mt-1 text-sm font-semibold text-fd-foreground">
|
||||
{"Join first, aggregate later"}
|
||||
</p>
|
||||
</div>
|
||||
<pre className="m-0 min-h-[13rem] flex-1 overflow-x-auto bg-transparent px-4 py-3 text-xs leading-5 text-fd-foreground">
|
||||
{`orders
|
||||
-> join order_items
|
||||
-> join customers
|
||||
The result is the same analyst answer, computed with the join shape an
|
||||
analyst would have written by hand.
|
||||
|
||||
group by
|
||||
customer_segment
|
||||
## Where the context comes from
|
||||
|
||||
measure
|
||||
sum(orders.amount)`}
|
||||
</pre>
|
||||
<div className="border-t border-fd-border bg-red-50/60 px-4 py-3 text-sm leading-6 text-red-950 dark:bg-red-950/20 dark:text-red-100">
|
||||
{"Order-level revenue is exposed to line-item fan-out before aggregation."}
|
||||
</div>
|
||||
</section>
|
||||
<section className="flex min-h-full flex-col rounded-md border border-fd-primary/40 bg-fd-card shadow-[inset_4px_0_0_var(--color-fd-primary)]">
|
||||
<div className="border-b border-fd-border px-4 py-3">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-primary">
|
||||
{"KTX shape"}
|
||||
</p>
|
||||
<p className="mt-1 text-sm font-semibold text-fd-foreground">
|
||||
{"Aggregate locally, then join"}
|
||||
</p>
|
||||
</div>
|
||||
<pre className="m-0 min-h-[13rem] flex-1 overflow-x-auto bg-transparent px-4 py-3 text-xs leading-5 text-fd-foreground">
|
||||
{`orders_agg as (
|
||||
select customer_id, sum(amount) revenue
|
||||
from orders
|
||||
group by customer_id
|
||||
)
|
||||
select customers.segment, sum(revenue)
|
||||
from orders_agg
|
||||
join customers`}
|
||||
</pre>
|
||||
<div className="border-t border-fd-border bg-fd-primary/10 px-4 py-3 text-sm leading-6 text-fd-foreground">
|
||||
{"The measure is pre-aggregated at order grain before dimensions are joined."}
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
The planner is only as good as the YAML it reads. KTX builds and
|
||||
maintains that YAML for you.
|
||||
|
||||
The result is structured planning: validated sources, typed relationships,
|
||||
graph search, fan-out detection, aggregate locality, and dialect transpilation.
|
||||
- `raw-sources/<connection>/` holds scan evidence from your warehouse:
|
||||
schemas, columns, keys, samples, and observed usage patterns.
|
||||
- `wiki/` holds business language, definitions, and caveats. The
|
||||
planner doesn't read wiki at compile time, but the agent does, so
|
||||
measure names and dimensions stay anchored to terms the team uses.
|
||||
- `semantic-layer/<connection>/` holds the structured sources, joins,
|
||||
grain, measures, and segments the planner actually compiles against.
|
||||
|
||||
Every accepted edit flows back into the next ingest, so the graph stays
|
||||
current as the warehouse changes.
|
||||
|
||||
## Agent usage notes
|
||||
|
||||
Use this page when an agent needs to explain how KTX turns reviewed semantic
|
||||
context into SQL, why relationship metadata matters, or why a query was rejected
|
||||
as unsafe.
|
||||
Point an agent at this page when it needs to explain why KTX asks for
|
||||
grain, why a query was rejected as unsafe, or why the compiled SQL looks
|
||||
different from what the agent first proposed.
|
||||
|
||||
| Agent task | Relevant section | Next page |
|
||||
|------------|------------------|-----------|
|
||||
| Explain why KTX asks for `grain` and relationship types | Join graph | [Writing Context](/docs/guides/writing-context) |
|
||||
| Diagnose duplicated measures after a join | Modeling problems | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Explain safe SQL generation | Execution planning | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Describe how semantic context stays current | Building and maintaining the graph | [Context as Code](/docs/concepts/context-as-code) |
|
||||
| Explain the Semantic Query shape | The Semantic Query contract | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Describe what the planner does between query and SQL | What the planner does | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Explain why KTX asks for grain and relationship types | The join graph | [Writing context](/docs/guides/writing-context) |
|
||||
| Diagnose duplicated measures after a join | Fan-out and aggregate locality | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Describe how semantic context stays current | Building and maintaining the graph | [Context as code](/docs/concepts/context-as-code) |
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ measures:
|
|||
```
|
||||
|
||||
For join graphs, fan-out handling, and execution mechanics, read
|
||||
[Context-Aware SQL](/docs/concepts/semantic-layer-internals).
|
||||
[Semantic Querying](/docs/concepts/semantic-layer-internals).
|
||||
|
||||
## Wiki pages
|
||||
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ The wizard walks you through everything KTX needs in one pass:
|
|||
3. **Embeddings** - picks an embeddings backend. Choose OpenAI for hosted
|
||||
embeddings or `sentence-transformers` to run locally without an API key.
|
||||
4. **Database** - adds at least one primary connection. Supported drivers:
|
||||
SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake.
|
||||
SQLite, PostgreSQL, MySQL, SQL Server, BigQuery, and Snowflake.
|
||||
5. **Context sources** - optionally adds dbt, MetricFlow, LookML, Looker,
|
||||
Metabase, or Notion. You can skip and add them later.
|
||||
6. **Build** - runs the first ingest so semantic-layer sources and wiki pages
|
||||
|
|
|
|||
|
|
@ -285,7 +285,7 @@ Admin CLI skills call the same KTX CLI commands:
|
|||
| `ktx sl list --json` | List semantic-layer sources |
|
||||
| `ktx sl search <query> --json` | Search semantic-layer sources |
|
||||
| `ktx sl validate <source> --connection-id <id>` | Validate semantic source definitions |
|
||||
| `ktx sl query --format json` | Execute a semantic-layer query when semantic compute is configured |
|
||||
| `ktx sl query --format json` | Execute a Semantic Query when semantic compute is configured |
|
||||
|
||||
### Security constraints
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
title: Primary Sources
|
||||
description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite.
|
||||
description: Connect KTX to PostgreSQL, Snowflake, BigQuery, MySQL, SQL Server, or SQLite.
|
||||
---
|
||||
|
||||
KTX connects to your data warehouse or database to build schema context,
|
||||
|
|
@ -26,9 +26,9 @@ Agents should prefer environment or file references over literal secrets.
|
|||
|
||||
| Field | Required | Applies to | Description |
|
||||
|-------|----------|------------|-------------|
|
||||
| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `clickhouse`, `mysql`, `sqlserver`, or `sqlite` |
|
||||
| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `mysql`, `sqlserver`, or `sqlite` |
|
||||
| `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` |
|
||||
| `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, ClickHouse, SQL Server | Field-by-field connection values |
|
||||
| `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, SQL Server | Field-by-field connection values |
|
||||
| `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan |
|
||||
| `context.queryHistory` | No | PostgreSQL, Snowflake, BigQuery | Enables query-history ingestion when the warehouse supports it |
|
||||
| `path` | Yes for path-style SQLite | SQLite | Local SQLite database path or `env:NAME` reference |
|
||||
|
|
@ -269,63 +269,6 @@ staged artifact shape as Postgres and Snowflake.
|
|||
|
||||
---
|
||||
|
||||
## ClickHouse
|
||||
|
||||
Connects over HTTP (port 8123) or HTTPS (port 8443). Supports the ClickHouse native type system including `Nullable`, `LowCardinality`, and `Array` wrappers.
|
||||
|
||||
### Connection config
|
||||
|
||||
```yaml title="ktx.yaml"
|
||||
connections:
|
||||
my-clickhouse:
|
||||
driver: clickhouse
|
||||
url: http://localhost:8123/analytics
|
||||
```
|
||||
|
||||
Or with individual fields:
|
||||
|
||||
```yaml title="ktx.yaml"
|
||||
connections:
|
||||
my-clickhouse:
|
||||
driver: clickhouse
|
||||
host: clickhouse.internal
|
||||
port: 8123
|
||||
database: analytics
|
||||
username: default
|
||||
password: env:CH_PASSWORD
|
||||
ssl: false
|
||||
```
|
||||
|
||||
### Authentication
|
||||
|
||||
| Method | Config |
|
||||
|--------|--------|
|
||||
| Basic auth | `username` + `password` (HTTP basic auth) |
|
||||
| No auth | Default user `default` with no password |
|
||||
| HTTPS | Set `ssl: true` (uses port 8443 by default) |
|
||||
|
||||
### Features
|
||||
|
||||
| Feature | Supported | Notes |
|
||||
|---------|-----------|-------|
|
||||
| Tables & views | Yes | Via `system.tables`, engine-based detection |
|
||||
| Primary keys | Yes | Via `system.columns` |
|
||||
| Foreign keys | No | Not a ClickHouse concept |
|
||||
| Row count estimates | Yes | Via `system.parts` aggregation |
|
||||
| Column statistics | No | - |
|
||||
| Query history | No | - |
|
||||
| Table sampling | Yes | - |
|
||||
|
||||
### Dialect notes
|
||||
|
||||
- Parameter binding uses `{param:Type}` syntax (e.g., `{database:String}`)
|
||||
- Detects views vs. tables by engine name (`View`, `MaterializedView`)
|
||||
- Handles `Nullable(T)` and `LowCardinality(Nullable(T))` type wrappers
|
||||
- Dictionary tables are excluded from scanning
|
||||
- Results returned in JSONCompact or JSONEachRow format
|
||||
|
||||
---
|
||||
|
||||
## MySQL
|
||||
|
||||
Standard MySQL/MariaDB connector with full foreign key support and schema introspection.
|
||||
|
|
@ -515,4 +458,4 @@ No authentication required - SQLite is file-based. The file must be readable by
|
|||
| Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
|
||||
| Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest <connectionId> --query-history` or `ktx setup` |
|
||||
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context |
|
||||
| Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |
|
||||
| Semantic Query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |
|
||||
|
|
|
|||
|
|
@ -15,6 +15,12 @@ const config = {
|
|||
},
|
||||
async redirects() {
|
||||
return [
|
||||
{
|
||||
source: "/",
|
||||
destination: "/ktx/docs/getting-started/introduction",
|
||||
permanent: false,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/docs",
|
||||
destination: "/docs/getting-started/introduction",
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ const markdownMimeTypes = new Set([
|
|||
"application/markdown",
|
||||
]);
|
||||
|
||||
export function middleware(request: NextRequest) {
|
||||
export function proxy(request: NextRequest) {
|
||||
if (!isMarkdownPreferred(request.headers.get("accept"))) {
|
||||
return NextResponse.next();
|
||||
}
|
||||
|
|
@ -112,6 +112,18 @@ test("/ktx/docs redirects to the docs introduction", async () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("/ redirects into the /ktx docs site", async () => {
|
||||
const response = await fetch(`${docsSiteUrl}/`, {
|
||||
redirect: "manual",
|
||||
});
|
||||
|
||||
assert.equal(response.status, 307);
|
||||
assert.equal(
|
||||
response.headers.get("location"),
|
||||
`${docsBasePath}/docs/getting-started/introduction`,
|
||||
);
|
||||
});
|
||||
|
||||
test("/ktx/api/search returns docs search results", async () => {
|
||||
const response = await fetch(
|
||||
`${docsSiteUrl}${docsBasePath}/api/search?query=setup`,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { access, readFile } from "node:fs/promises";
|
||||
import { dirname, join } from "node:path";
|
||||
import { test } from "node:test";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
|
@ -17,6 +17,23 @@ test("root provider uses the base-path-aware search API", async () => {
|
|||
assert.match(layout, /api:\s*"\/ktx\/api\/search"/);
|
||||
});
|
||||
|
||||
test("metadata icons include the docs base path", async () => {
|
||||
const layout = await readDocsFile("app/layout.tsx");
|
||||
|
||||
assert.match(layout, /icon:\s*"\/ktx\/brand\/ktx-mascot\.svg"/);
|
||||
assert.match(layout, /shortcut:\s*"\/ktx\/brand\/ktx-mascot\.svg"/);
|
||||
assert.doesNotMatch(layout, /:\s*"\/brand\/ktx-mascot\.svg"/);
|
||||
});
|
||||
|
||||
test("markdown negotiation uses the Next proxy convention", async () => {
|
||||
await assert.doesNotReject(access(join(docsSiteDir, "proxy.ts")));
|
||||
await assert.rejects(access(join(docsSiteDir, "middleware.ts")));
|
||||
|
||||
const proxy = await readDocsFile("proxy.ts");
|
||||
assert.match(proxy, /export function proxy/);
|
||||
assert.doesNotMatch(proxy, /export function middleware/);
|
||||
});
|
||||
|
||||
test("site background stacking does not target every body child", async () => {
|
||||
const css = await readDocsFile("app/global.css");
|
||||
|
||||
|
|
|
|||
|
|
@ -127,12 +127,11 @@ test("product mechanics component explains ingestion outputs", async () => {
|
|||
assert.doesNotMatch(component, /KTX works in two moments/);
|
||||
assert.doesNotMatch(component, /name: "Metabase and query history"/);
|
||||
assert.doesNotMatch(component, /name: "dbt, MetricFlow, LookML"/);
|
||||
assert.doesNotMatch(component, /ClickHouse/);
|
||||
assert.doesNotMatch(component, /MySQL/);
|
||||
assert.doesNotMatch(component, /SQL Server/);
|
||||
assert.doesNotMatch(
|
||||
component,
|
||||
/\/ktx\/brand\/(?:postgresql|snowflake|bigquery|clickhouse|mysql|sqlserver|sqlite|metabase|dbt|looker|notion)\.svg/,
|
||||
/\/ktx\/brand\/(?:postgresql|snowflake|bigquery|mysql|sqlserver|sqlite|metabase|dbt|looker|notion)\.svg/,
|
||||
);
|
||||
assert.doesNotMatch(component, /<img/);
|
||||
assert.doesNotMatch(component, /w-\[calc\(100vw/);
|
||||
|
|
|
|||
|
|
@ -151,7 +151,6 @@ describe('standalone example docs', () => {
|
|||
assert.match(contributing, /context\/\s+# Core context engine/);
|
||||
assert.match(contributing, /llm\/\s+# LLM client abstraction/);
|
||||
assert.match(contributing, /connector-bigquery\/\s+# BigQuery connector/);
|
||||
assert.match(contributing, /connector-clickhouse\/\s+# ClickHouse connector/);
|
||||
assert.match(contributing, /connector-mysql\/\s+# MySQL connector/);
|
||||
assert.match(contributing, /connector-postgres\/\s+# PostgreSQL connector/);
|
||||
assert.match(contributing, /connector-snowflake\/\s+# Snowflake connector/);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue