Align docs with current KTX behavior (#106)

* docs: align docs with current KTX behavior

* fix: generate valid agent sl query command

* docs: clarify KTX product mechanics

* fix: use <ol> for runtime pipeline steps in product mechanics

The PipelineStep component renders <li> elements, but the RuntimeDiagram
wrapper was a plain <div> instead of a list element. This produced invalid
HTML and accessibility warnings. IngestionDiagram already used <ol>.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Add docs favicon

* docs: add semantic layer internals concept

* docs: refine documentation source label

* docs: clarify company documentation examples

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Luca Martial 2026-05-15 15:31:51 -04:00 committed by GitHub
parent 465724a991
commit 42b688e934
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1495 additions and 79 deletions

View file

@ -39,20 +39,29 @@ export default async function Page(props: {
const hero = isHeroPage(params.slug);
return (
<DocsPage toc={page.data.toc}>
<DocsPage
toc={page.data.toc}
className="!mx-0 min-w-0 justify-self-start md:!mx-auto"
style={{
width: "calc(100vw - 2rem)",
maxWidth: "900px",
}}
>
{!hero && (
<>
<div className="flex items-start justify-between gap-4">
<div className="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between sm:gap-4">
<DocsTitle>{page.data.title}</DocsTitle>
<DocsPageActions
markdownUrl={`${page.url}.md`}
mdxSource={mdxSource}
/>
</div>
<DocsDescription>{page.data.description}</DocsDescription>
<DocsDescription className="wrap-anywhere">
{page.data.description}
</DocsDescription>
</>
)}
<DocsBody>
<DocsBody className="min-w-0 max-w-full wrap-anywhere">
<MDX components={{ ...defaultMdxComponents, pre: CodeBlock }} />
</DocsBody>
</DocsPage>

View file

@ -27,6 +27,10 @@ export const metadata: Metadata = {
},
description:
"Open-source context infrastructure that makes agentic analytics reliable.",
icons: {
icon: "/brand/ktx-mascot.svg",
shortcut: "/brand/ktx-mascot.svg",
},
};
export default function RootLayout({ children }: { children: ReactNode }) {

View file

@ -0,0 +1,402 @@
import type { ReactNode } from "react";
const sourceInputs = [
{
name: "Warehouse schema",
detail: "tables, columns, types, constraints, row counts",
signal: "grounds definitions in live database structure",
accent: "border-fd-primary",
},
{
name: "Metabase and query history",
detail: "historic SQL, questions, dashboards, usage patterns",
signal: "extracts joins, filters, grain, and trusted examples",
accent: "border-orange-500",
},
{
name: "dbt, MetricFlow, LookML",
detail: "models, metrics, dimensions, explores, joins",
signal: "maps existing modeling logic into semantic entities",
accent: "border-amber-500",
},
{
name: "Company documentation",
detail: "Notion pages, policies, caveats, analyst notes",
signal: "links business language back to semantic references",
accent: "border-slate-500 dark:border-cyan-200",
},
];
const ingestSteps = [
{
title: "extract evidence",
body: "Pull structured facts from schemas, SQL, BI metadata, and docs.",
},
{
title: "reconcile entities",
body: "Merge names, measures, joins, and caveats into one project model.",
},
{
title: "validate references",
body: "Check semantic fields and joins against database context before agents use them.",
},
];
const artifacts = [
{
path: "semantic-layer/*.yaml",
title: "Typed query model",
body: "sources, grain, joins, dimensions, measures, filters, segments",
},
{
path: "wiki/*.md",
title: "Business context",
body: "rules and caveats with sl_refs back to semantic-layer entities",
},
{
path: "raw-sources/",
title: "Evidence trail",
body: "scan artifacts, extracted metadata, relationship evidence",
},
{
path: ".ktx/",
title: "Local indexes",
body: "embeddings and search indexes, not the source of truth",
},
];
const runtimeSteps = [
{
title: "Search wiki",
body: "Find business rules, caveats, synonyms, and sl_refs.",
},
{
title: "Resolve semantic refs",
body: "Map measure and dimension names to approved entities.",
},
{
title: "Validate fields",
body: "Check source, columns, joins, grain, filters, and segments.",
},
{
title: "Build query plan",
body: "Create a semantic query plan before SQL is generated.",
},
{
title: "Compile dialect SQL",
body: "Generate warehouse-shaped SQL instead of copying examples.",
},
{
title: "Execute with bounds",
body: "Optionally run with bounded rows and return provenance.",
},
];
export function ProductMechanics() {
return (
<section
className="not-prose my-12 w-full max-w-full min-w-0 space-y-5"
aria-labelledby="mechanics-title"
>
<div className="max-w-3xl">
<p className="mb-2 text-xs font-semibold uppercase tracking-wide text-fd-primary">
Product mechanics
</p>
<h2
id="mechanics-title"
className="text-xl font-semibold tracking-normal text-fd-foreground sm:text-2xl"
style={{ fontFamily: "var(--font-display)" }}
>
A semantic compiler for analytics agents
</h2>
<p className="mt-3 text-sm leading-6 text-fd-muted-foreground">
KTX builds typed semantic files, links wiki context back to those
entities, validates the model against database evidence, then compiles
agent requests into executable SQL.
</p>
</div>
<div className="space-y-4">
<IngestionDiagram />
<RuntimeDiagram />
</div>
</section>
);
}
function IngestionDiagram() {
return (
<article
className="max-w-full min-w-0 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-labelledby="ingestion-diagram-title"
>
<DiagramHeader
eyebrow="Ingestion"
id="ingestion-diagram-title"
title="Messy source evidence becomes structured state"
body="The important step is reconciliation: KTX turns loose evidence into files agents can validate, edit, and compile against."
/>
<div className="grid gap-0 lg:grid-cols-[minmax(0,0.9fr)_minmax(0,1.1fr)]">
<section className="border-b border-fd-border p-4 lg:border-r lg:border-b-0">
<ColumnLabel>Inputs KTX reads</ColumnLabel>
<div className="grid gap-2 sm:grid-cols-2">
{sourceInputs.map((source) => (
<div
key={source.name}
className={`border-l-2 bg-fd-background px-3 py-2 ${source.accent}`}
>
<p className="text-sm font-semibold text-fd-foreground">
{source.name}
</p>
<p className="mt-0.5 text-xs leading-5 text-fd-muted-foreground">
{source.detail}
</p>
<p className="mt-1 text-xs leading-5 text-fd-primary">
{source.signal}
</p>
</div>
))}
</div>
</section>
<section className="bg-fd-muted/35 p-4">
<ColumnLabel>KTX builds the model</ColumnLabel>
<div className="grid gap-3 xl:grid-cols-[minmax(0,0.85fr)_minmax(0,1fr)]">
<div className="rounded-md border border-fd-border bg-[#102226] p-4 text-white dark:bg-[#0b181b]">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-cyan-200">
Ingest pipeline
</p>
<ol className="space-y-3">
{ingestSteps.map((step, index) => (
<PipelineStep
key={step.title}
index={index + 1}
title={step.title}
body={step.body}
dark
/>
))}
</ol>
</div>
<div className="grid gap-2 sm:grid-cols-2 xl:grid-cols-1">
{artifacts.map((artifact) => (
<Artifact key={artifact.path} {...artifact} />
))}
</div>
</div>
</section>
</div>
</article>
);
}
function RuntimeDiagram() {
return (
<article
className="max-w-full min-w-0 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-labelledby="runtime-diagram-title"
>
<DiagramHeader
eyebrow="Runtime"
id="runtime-diagram-title"
title="A tiny semantic request becomes a planned, executable query"
body="The agent names the business intent. KTX resolves the semantic model, checks the shape, compiles SQL, and can execute with row limits."
/>
<div className="grid gap-0 lg:grid-cols-[minmax(0,0.82fr)_minmax(0,1.18fr)]">
<section className="border-b border-fd-border p-4 lg:border-r lg:border-b-0">
<ColumnLabel>Agent sends</ColumnLabel>
<CodeBox>
<div>connection: warehouse</div>
<div>measure: orders.total_revenue</div>
<div>dimension: customers.segment</div>
<div>filter: orders.created_date &gt;= '2024-01-01'</div>
</CodeBox>
<p className="mt-3 text-xs leading-5 text-fd-muted-foreground">
This is the API surface agents should use: compact semantic intent,
not hand-written warehouse SQL.
</p>
</section>
<section className="bg-fd-muted/35 p-4">
<ColumnLabel>KTX planning and execution</ColumnLabel>
<ol className="grid gap-2 sm:grid-cols-2">
{runtimeSteps.map((step, index) => (
<PipelineStep
key={step.title}
index={index + 1}
title={step.title}
body={step.body}
/>
))}
</ol>
</section>
</div>
<div className="grid gap-0 border-t border-fd-border lg:grid-cols-[minmax(0,1fr)_minmax(0,1fr)]">
<section className="border-b border-fd-border p-4 lg:border-r lg:border-b-0">
<ColumnLabel>Semantic query plan</ColumnLabel>
<div className="rounded-md border border-fd-border bg-fd-card p-3 text-xs leading-5 text-fd-muted-foreground">
<p>
<strong className="text-fd-foreground">source:</strong>{" "}
orders joined to customers as many_to_one
</p>
<p>
<strong className="text-fd-foreground">measure:</strong>{" "}
total_revenue = sum(amount) with refund filter
</p>
<p>
<strong className="text-fd-foreground">grain:</strong> segment
group-by with date predicate
</p>
<p>
<strong className="text-fd-foreground">result:</strong> dialect
SQL, bounded rows, and provenance
</p>
</div>
</section>
<section className="p-4">
<ColumnLabel>KTX returns</ColumnLabel>
<CodeBox>
<div>select</div>
<div className="pl-3">customers.segment,</div>
<div className="pl-3">sum(orders.amount) as total_revenue</div>
<div>from analytics.orders</div>
<div>join analytics.customers</div>
<div className="pl-3">on orders.customer_id = customers.id</div>
<div>where orders.status != 'refunded'</div>
<div className="pl-3">and orders.created_date &gt;= '2024-01-01'</div>
<div>group by 1</div>
</CodeBox>
<p className="mt-3 text-xs leading-5 text-fd-muted-foreground">
The output can be SQL-only or executed results with provenance, so
the agent can show where the answer came from.
</p>
</section>
</div>
</article>
);
}
function DiagramHeader({
body,
eyebrow,
id,
title,
}: {
body: string;
eyebrow: string;
id: string;
title: string;
}) {
return (
<div className="border-b border-fd-border bg-fd-muted/35 px-5 py-4">
<p className="text-xs font-semibold uppercase tracking-wide text-fd-primary">
{eyebrow}
</p>
<h3
id={id}
className="mt-1 text-base font-semibold tracking-normal text-fd-foreground sm:text-lg"
style={{ fontFamily: "var(--font-display)" }}
>
{title}
</h3>
<p className="mt-2 max-w-3xl text-xs leading-5 text-fd-muted-foreground">
{body}
</p>
</div>
);
}
function Artifact({
body,
path,
title,
}: {
body: string;
path: string;
title: string;
}) {
return (
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
<p className="font-mono text-xs font-semibold text-fd-foreground">
{path}
</p>
<p className="mt-1 text-sm font-semibold text-fd-foreground">{title}</p>
<p className="mt-0.5 text-xs leading-5 text-fd-muted-foreground">
{body}
</p>
</div>
);
}
function PipelineStep({
body,
dark = false,
index,
title,
}: {
body: string;
dark?: boolean;
index: number;
title: string;
}) {
return (
<li
className={
dark
? "flex gap-3 text-sm"
: "flex gap-3 rounded-md border border-fd-border bg-fd-card px-3 py-2"
}
>
<span
className={
dark
? "flex h-5 w-5 flex-none items-center justify-center rounded-full bg-cyan-200 text-[11px] font-semibold text-[#102226]"
: "flex h-5 w-5 flex-none items-center justify-center rounded-full bg-fd-primary text-[11px] font-semibold text-fd-primary-foreground"
}
>
{index}
</span>
<span className="min-w-0">
<span
className={
dark
? "block text-sm font-semibold text-white"
: "block text-xs font-semibold text-fd-foreground"
}
>
{title}
</span>
<span
className={
dark
? "mt-0.5 block break-words text-xs leading-5 text-cyan-50/75"
: "mt-0.5 block break-words text-xs leading-5 text-fd-muted-foreground"
}
>
{body}
</span>
</span>
</li>
);
}
function ColumnLabel({ children }: { children: ReactNode }) {
return (
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{children}
</p>
);
}
function CodeBox({ children }: { children: ReactNode }) {
return (
<div className="max-w-full min-w-0 overflow-x-auto rounded-md border border-fd-border bg-[#0c1417] p-3 font-mono text-[11px] leading-5 text-cyan-50 shadow-sm">
<div className="[overflow-wrap:anywhere]">{children}</div>
</div>
);
}

View file

@ -29,14 +29,16 @@ connections when you use `--all`.
| `--deep` | Use AI-enriched database ingest | Stored connection default, or `fast` |
| `--query-history` | Include database query-history usage patterns | Stored connection default |
| `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default |
| `--query-history-window-days <days>` | Query-history lookback window for this run | Stored connection default |
| `--query-history-window-days <days>` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default |
| `--plain` | Print plain text output | `true` |
| `--json` | Print JSON output | `false` |
| `--no-input` | Disable interactive terminal input | — |
`--fast` and `--deep` are mutually exclusive. Depth flags apply only to
database connections. Query-history flags apply only to database connections
that support query history. Query-history ingest runs after schema ingest and
that support query history. The window flag applies to BigQuery and Snowflake;
Postgres reads the current `pg_stat_statements` aggregate data instead of a
time-windowed history table. Query-history ingest runs after schema ingest and
requires deep ingest readiness.
When `--all` selects both databases and context sources, database ingest runs
@ -70,6 +72,7 @@ ktx ingest warehouse --deep
# Include query-history usage patterns
ktx ingest warehouse --deep --query-history
# Set the lookback window for BigQuery or Snowflake query history
ktx ingest warehouse --query-history-window-days 30
# Build a source connection

View file

@ -96,13 +96,16 @@ incomplete.
|------|-------------|
| `--enable-query-history` | Enable query-history ingest when the selected database supports it |
| `--disable-query-history` | Disable query-history ingest for the selected database |
| `--query-history-window-days <number>` | Query-history lookback window |
| `--query-history-window-days <number>` | BigQuery/Snowflake query-history lookback window |
| `--query-history-min-executions <number>` | Minimum executions for a query-history template |
| `--query-history-service-account-pattern <pattern>` | Query-history service-account regex; repeatable |
| `--query-history-redaction-pattern <pattern>` | Query-history SQL-literal redaction regex; repeatable |
Query history setup is supported for Postgres, BigQuery, and Snowflake. Enabling
query history makes deep ingest readiness matter for later `ktx ingest` runs.
Query history setup is supported for Postgres, BigQuery, and Snowflake. The
window flag applies to BigQuery and Snowflake; Postgres reads the current
`pg_stat_statements` aggregate data instead of a time-windowed history table.
Enabling query history makes deep ingest readiness matter for later
`ktx ingest` runs.
### Context Sources

View file

@ -1,5 +1,5 @@
{
"title": "Concepts",
"defaultOpen": true,
"pages": ["the-context-layer", "context-as-code"]
"pages": ["the-context-layer", "semantic-layer-internals", "context-as-code"]
}

View file

@ -0,0 +1,398 @@
---
title: Semantic Layer Internals
description: How KTX uses join graphs, grain, and relationship metadata to turn context into safe SQL.
---
KTX is a context layer for agents. This page focuses on one internal subsystem:
the semantic execution layer that turns reviewed context into safe SQL.
The semantic layer is important, but it is not the whole product. KTX also
handles schema evidence, wiki context, provenance, validation, and agent
workflows around those files.
Read the page as a pipeline:
- context inputs feed the semantic engine;
- evidence becomes a join graph with grain and relationship metadata;
- review and corrections keep that graph current;
- the execution engine uses the graph to avoid fan-out and ambiguous joins.
## Where the semantic layer fits
The semantic layer is not a separate product category inside KTX. It is the
engine that makes the rest of the context actionable for SQL generation.
<div
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-label="How context inputs flow through the semantic layer into agent workflows"
>
<div className="grid gap-0 lg:grid-cols-[1fr_2rem_1.12fr_2rem_1fr]">
<section className="bg-fd-background p-4">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Context inputs"}
</p>
<div className="grid gap-2 text-sm">
<div className="border-l-2 border-fd-primary bg-fd-card px-3 py-2">
<p className="font-mono text-xs text-fd-foreground">semantic-layer/</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"source YAML, measures, joins, grain"}
</p>
</div>
<div className="border-l-2 border-amber-500 bg-fd-card px-3 py-2">
<p className="font-mono text-xs text-fd-foreground">wiki/</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"business rules, definitions, caveats"}
</p>
</div>
<div className="border-l-2 border-orange-500 bg-fd-card px-3 py-2">
<p className="font-mono text-xs text-fd-foreground">raw-sources/</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"schema scans, keys, imported metadata"}
</p>
</div>
<div className="border-l-2 border-slate-500 bg-fd-card px-3 py-2 dark:border-cyan-200">
<p className="font-mono text-xs text-fd-foreground">provenance</p>
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
{"ingest decisions and review history"}
</p>
</div>
</div>
</section>
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
<span className="h-px w-full bg-fd-border" />
</div>
<section className="relative bg-[#102226] p-5 text-white dark:bg-[#0b181b]">
<div className="absolute inset-y-0 left-0 w-1 bg-fd-primary" />
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-cyan-200">
{"Semantic layer engine"}
</p>
<div className="grid gap-2 sm:grid-cols-2">
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="text-sm font-semibold">Join graph</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"sources as nodes, joins as typed edges"}
</p>
</div>
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="text-sm font-semibold">Grain</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"row identity before aggregation"}
</p>
</div>
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="text-sm font-semibold">Measures</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"verified formulas and filters"}
</p>
</div>
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
<p className="whitespace-nowrap break-normal text-sm font-semibold">Relationships</p>
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
{"many_to_one, one_to_many, one_to_one"}
</p>
</div>
</div>
<div className="mt-3 rounded-md border border-cyan-100/20 bg-cyan-50/10 px-3 py-2 text-sm">
{"Safe query planning before SQL is generated."}
</div>
</section>
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
<span className="h-px w-full bg-fd-border" />
</div>
<section className="bg-fd-muted/35 p-4">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Agent workflows"}
</p>
<div className="space-y-2 text-sm">
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Search sources and wiki pages"}
</div>
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Compile trusted SQL"}
</div>
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Explain metrics and provenance"}
</div>
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
{"Patch files and validate review"}
</div>
</div>
</section>
</div>
</div>
## The join graph KTX builds
A semantic source is a node. A join is an edge with a join condition and a
relationship type. The graph lets KTX choose valid paths, reject unsafe paths,
and reason about whether a join preserves or multiplies rows before SQL is
generated.
- `many_to_one` paths are usually safe for adding dimensions.
- `one_to_many` paths can multiply fact rows and trigger fan-out handling.
- Equal-cost paths can be ambiguous, so aliases and explicit joins matter.
<figure
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card p-4 shadow-sm"
aria-label="Example semantic join graph"
>
<div className="grid gap-3 md:grid-cols-[1fr_1fr_1fr]">
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
<p className="text-sm font-semibold text-fd-foreground">customers</p>
<p className="mt-1 text-xs text-fd-muted-foreground">grain: customer_id</p>
</div>
<div className="rounded-md border-2 border-fd-primary bg-fd-background px-4 py-3">
<p className="text-sm font-semibold text-fd-foreground">orders</p>
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id</p>
</div>
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
<p className="text-sm font-semibold text-fd-foreground">order_items</p>
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id, line_id</p>
</div>
</div>
<div className="my-3 grid gap-2 text-center text-xs font-medium text-fd-muted-foreground md:grid-cols-[1fr_1fr]">
<div>orders -> customers: many_to_one</div>
<div>orders -> order_items: one_to_many</div>
</div>
<figcaption className="mt-4 border-t border-fd-border pt-3 text-left text-xs leading-5 text-fd-muted-foreground">
<span className="font-medium text-fd-foreground">{"Example: "}</span>
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it can duplicate order-level measures."}
</figcaption>
</figure>
The graph is bidirectional for planning. If `orders -> customers` is
`many_to_one`, the reverse path is `one_to_many`; KTX keeps that distinction
instead of treating every join as a neutral edge.
## How KTX builds the graph
KTX starts from evidence, not a blank modeling canvas. Database scans and
analytics-tool imports create source definitions that an analyst can review.
| Evidence | What it contributes |
|---|---|
| Declared primary keys | Initial row grain for each source |
| Declared foreign keys | Formal join candidates and relationship direction |
| Inferred relationships | Useful edges when warehouses lack constraints |
| dbt, MetricFlow, and LookML imports | Existing metrics, dimensions, entities, explores, and joins |
| Query history | Real join and filter patterns agents should respect |
| Analyst review | The final authority before context is merged |
Generated YAML is intentionally reviewable. KTX can draft joins and measures,
but the accepted semantic layer is still the plain-file diff your team approves.
## How KTX keeps the graph current
The semantic layer changes as schemas, metrics, and business rules change. KTX
keeps that loop explicit instead of hiding it behind a remote runtime.
<div
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-label="Semantic layer maintenance loop"
>
<div className="border-b border-fd-border bg-fd-muted/35 px-4 py-3">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Semantic maintenance loop"}
</p>
<p className="mt-1 text-sm leading-6 text-fd-muted-foreground">
{"Every accepted correction becomes input to the next graph build."}
</p>
</div>
<div className="p-4">
<div className="-mx-4 overflow-x-auto px-4">
<div className="relative mx-auto h-[460px] w-[720px] max-w-none md:w-full md:max-w-[760px]">
<svg
aria-hidden="true"
className="absolute inset-0 h-full w-full text-fd-primary"
fill="none"
viewBox="0 0 760 460"
>
<g
stroke="currentColor"
strokeLinecap="round"
strokeLinejoin="round"
strokeOpacity="0.68"
strokeWidth="2.5"
>
<path d="M 352 80 H 384" />
<path d="M 600 80 H 668 V 150" />
<path d="M 632 284 V 378 H 626" />
<path d="M 408 378 H 376" />
<path d="M 160 378 H 96 V 308" />
<path d="M 128 172 V 80 H 140" />
</g>
<g fill="currentColor" fillOpacity="0.96" stroke="none">
<polygon points="0,0 -14,-7 -14,7" transform="translate(398 80)" />
<polygon points="0,0 -14,-7 -14,7" transform="translate(668 164) rotate(90)" />
<polygon points="0,0 -14,-7 -14,7" transform="translate(612 378) rotate(180)" />
<polygon points="0,0 -14,-7 -14,7" transform="translate(362 378) rotate(180)" />
<polygon points="0,0 -14,-7 -14,7" transform="translate(96 294) rotate(270)" />
<polygon points="0,0 -14,-7 -14,7" transform="translate(154 80)" />
</g>
</svg>
<div className="absolute left-1/2 top-1/2 flex h-32 w-56 -translate-x-1/2 -translate-y-1/2 flex-col items-center justify-center rounded-md border border-fd-primary/50 bg-fd-background px-4 py-4 text-center shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-primary">
{"reviewed context"}
</p>
<p className="mt-2 text-sm font-semibold leading-6 text-fd-foreground">
{"The accepted graph becomes the starting point for the next build."}
</p>
</div>
<div className="absolute left-[160px] top-6 h-28 w-48 rounded-md border-2 border-fd-primary bg-fd-background px-4 py-3 text-sm shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Step 1"}
</p>
<p className="mt-1 font-semibold text-fd-foreground">{"ingest evidence"}</p>
<p className="mt-2 text-xs leading-5 text-fd-muted-foreground">
{"scan schemas, imports, and accepted files"}
</p>
</div>
<div className="absolute left-[408px] top-6 h-28 w-48 rounded-md border border-fd-border bg-fd-background px-4 py-3 text-sm shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Step 2"}
</p>
<p className="mt-1 font-semibold text-fd-foreground">{"YAML diff"}</p>
<p className="mt-2 text-xs leading-5 text-fd-muted-foreground">
{"draft source, join, grain, and measure changes"}
</p>
</div>
<div className="absolute left-[536px] top-[172px] h-28 w-48 rounded-md border border-fd-border bg-fd-background px-4 py-3 text-sm shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Step 3"}
</p>
<p className="mt-1 font-semibold text-fd-foreground">{"validation"}</p>
<p className="mt-2 text-xs leading-5 text-fd-muted-foreground">
{"check relationships, syntax, and unsafe query shapes"}
</p>
</div>
<div className="absolute left-[408px] top-[322px] h-28 w-48 rounded-md border border-fd-border bg-fd-background px-4 py-3 text-sm shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Step 4"}
</p>
<p className="mt-1 font-semibold text-fd-foreground">{"analyst review"}</p>
<p className="mt-2 text-xs leading-5 text-fd-muted-foreground">
{"accept, edit, or reject generated context"}
</p>
</div>
<div className="absolute left-[160px] top-[322px] h-28 w-48 rounded-md border border-fd-border bg-fd-background px-4 py-3 text-sm shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Step 5"}
</p>
<p className="mt-1 font-semibold text-fd-foreground">{"agent use"}</p>
<p className="mt-2 text-xs leading-5 text-fd-muted-foreground">
{"serve context to search, explain, and query"}
</p>
</div>
<div className="absolute left-8 top-[172px] h-28 w-48 rounded-md border border-fd-primary/70 bg-fd-background px-4 py-3 text-sm shadow-sm">
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Step 6"}
</p>
<p className="mt-1 font-semibold text-fd-foreground">{"corrections"}</p>
<p className="mt-2 text-xs leading-5 text-fd-muted-foreground">
{"agent and analyst fixes become new evidence"}
</p>
</div>
</div>
</div>
</div>
</div>
This matters because semantic correctness is not static. If a source gains a
new key, a metric changes definition, or an analyst corrects a relationship,
the next agent gets that reviewed context.
## The modeling problem the graph solves
Fan-out is the classic failure mode. If an order-level measure is joined to
line-item rows before aggregation, one order can become many rows and revenue
can be counted more than once.
| Problem | What happens | How KTX avoids it |
|---|---|---|
| Order measure joins to `order_items` | `orders.revenue` repeats once per item | Detect the `one_to_many` path and pre-aggregate the order measure |
| Two independent fact sources share `customers` | Measures from each fact table multiply across the shared dimension | Treat it as a chasm trap and use aggregate-locality planning |
| Filter lives only across a `one_to_many` path | Filtering after the join changes the measure grain | Reject or localize the filter instead of silently producing unsafe SQL |
| Multiple equal-cost paths connect the same sources | The join path is ambiguous | Prefer safer paths and use aliases to disambiguate repeated joins |
Many-to-many questions usually show up as multiple one-to-many paths or
independent fact sources. KTX treats those shapes as fan-out or chasm risks
unless the query can be planned at a safe grain.
## How the execution engine uses the graph
The planner resolves the sources in a semantic query, chooses a join tree, and
checks whether any requested dimension or filter crosses a row-multiplying
edge. The SQL generator then chooses the simple path or the aggregate-locality
path.
| Naive SQL shape | Semantic-layer SQL shape |
|---|---|
| Join facts and dimensions first, then aggregate | Aggregate each fact source at its own grain, then join the results |
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source when locality is needed |
| Trust the shortest textual join path | Prefer safe relationship paths and reject disconnected sources |
| Let dimension grain differ across facts | Raise when asymmetric dimensions would fan out another measure |
<div
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
aria-label="Fan-out safe execution shape"
>
<div className="grid gap-0 md:grid-cols-2">
<section className="border-b border-fd-border bg-fd-background p-4 md:border-b-0 md:border-r">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"Unsafe shape"}
</p>
<pre className="overflow-x-auto rounded-md bg-fd-muted p-3 text-xs leading-5 text-fd-foreground">
{`orders
join order_items
join customers
group by customer_segment
sum(orders.amount)`}
</pre>
<p className="mt-3 text-sm text-fd-muted-foreground">
{"The order measure is exposed to line-item fan-out before aggregation."}
</p>
</section>
<section className="bg-fd-background p-4">
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
{"KTX shape"}
</p>
<pre className="overflow-x-auto rounded-md border border-fd-border bg-fd-muted p-3 text-xs leading-5 text-fd-foreground">
{`orders_agg as (
select customer_id, sum(amount) revenue
from orders
group by customer_id
)
select customers.segment, sum(revenue)
from orders_agg
join customers`}
</pre>
<p className="mt-3 text-sm text-fd-muted-foreground">
{"KTX pre-aggregates fact measures at their own grain before joining dimensions."}
</p>
</section>
</div>
</div>
The result is not magic. It is structured planning: validated sources, typed
relationships, graph search, fan-out detection, aggregate locality, and final
dialect transpilation.
## What this means for agents
KTX gives agents a semantic surface they can inspect and improve, not just a
folder of notes.
- Search semantic sources and related wiki pages before writing SQL.
- Compile SQL through `ktx sl query` instead of guessing joins.
- Validate semantic-layer changes before review.
- Patch YAML and Markdown files in git.
- Explain metric meaning and provenance from the same accepted context.
Next, read [Writing Context](/docs/guides/writing-context) for the YAML editing
workflow or [ktx sl](/docs/cli-reference/ktx-sl) for the command reference.

View file

@ -191,7 +191,18 @@ KTX organizes context into four pillars:
Each pillar covers a different kind of context agents need before they can safely write SQL, update semantic definitions, or explain an analytics result.
**Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives.
**Semantic sources** are YAML definitions that describe your data in terms
agents can reason about:
- source tables or SQL queries;
- row grain;
- typed columns;
- valid joins;
- named measures, filters, and segments.
This is where "revenue means `sum(amount)` excluding refunds" lives. For the
join graph, fan-out protections, and execution mechanics, read
[Semantic Layer Internals](/docs/concepts/semantic-layer-internals).
```yaml
name: orders
@ -289,7 +300,7 @@ my-project/
│ └── data-quality-notes.md
├── raw-sources/
│ └── warehouse/
│ └── database-ingest/ # Schema ingest artifacts and reports
│ └── live-database/ # Schema ingest artifacts and reports
└── .ktx/
├── db.sqlite # Local state (git-ignored)
└── cache/ # Runtime cache (git-ignored)

View file

@ -3,10 +3,12 @@ title: Introduction
description: How KTX gives analytics agents trusted context for warehouse work.
---
<div className="not-prose mb-14">
<div className="mb-8">
import { ProductMechanics } from "@/components/product-mechanics";
<div className="not-prose mb-10">
<div>
<h1
className="text-4xl font-extrabold tracking-tight lg:text-5xl"
className="max-w-full text-3xl font-extrabold tracking-tight break-words sm:text-4xl lg:text-5xl"
style={{
fontFamily: 'var(--font-display)',
background: 'linear-gradient(180deg, var(--color-fd-foreground) 0%, color-mix(in oklch, var(--color-fd-foreground) 75%, var(--color-fd-primary)) 100%)',
@ -18,62 +20,43 @@ description: How KTX gives analytics agents trusted context for warehouse work.
letterSpacing: '0',
}}
>
Make analytics context{'\n'}usable by agents
Make analytics context usable by agents
</h1>
<p className="mt-4 text-lg text-fd-muted-foreground max-w-2xl" style={{ lineHeight: '1.7' }}>
KTX turns warehouse metadata, semantic definitions, and business knowledge
into reviewable project files that agents can use while planning, querying,
and updating analytics work.
<p className="mt-4 max-w-2xl text-lg text-fd-muted-foreground" style={{ lineHeight: '1.7' }}>
{'KTX turns warehouse metadata, semantic definitions, and business knowledge into reviewable project files that agents can use while planning, querying, and updating analytics work.'}
</p>
</div>
<div className="flex flex-wrap gap-3">
<a
href="/docs/getting-started/quickstart"
className="inline-flex h-10 items-center rounded-lg bg-fd-primary px-5 text-sm font-medium text-fd-primary-foreground transition-colors hover:opacity-90"
>
Get Started
</a>
<a
href="/docs/concepts/the-context-layer"
className="inline-flex h-10 items-center rounded-lg border border-fd-border bg-fd-background px-5 text-sm font-medium text-fd-foreground transition-colors hover:bg-fd-muted"
>
The Context Layer
</a>
<a
href="/docs/guides/building-context"
className="inline-flex h-10 items-center rounded-lg border border-fd-border bg-fd-background px-5 text-sm font-medium text-fd-foreground transition-colors hover:bg-fd-muted"
>
Building Context
</a>
</div>
</div>
## Who KTX is for
<ProductMechanics />
## What agents can do with KTX
KTX is built for analytics engineers and data teams who want data agents to
work on real analytics systems - not just generate one-off SQL.
work on real analytics systems, not just generate one-off SQL.
Use KTX when you want agents to:
Use it when agents need to:
- **Generate SQL** from approved measures and joins
- **Repair semantic definitions** through reviewable diffs
- **Explain metric provenance** with warehouse evidence
- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI platforms
- **Generate SQL** from approved measures, dimensions, joins, and filters
- **Explain provenance** with wiki context and warehouse evidence
- **Repair context** through reviewable YAML and Markdown diffs
- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and warehouses
Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server.
KTX works with SQLite, PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and
SQL Server.
## Explore the docs
## Read next
<Cards>
<Card title="Quickstart" href="/docs/getting-started/quickstart">
Set up KTX and build your first context in under 10 minutes.
</Card>
<Card title="Concepts" href="/docs/concepts/the-context-layer">
Understand what a context layer is and why agents need one.
</Card>
<Card title="Guides" href="/docs/guides/building-context">
Hands-on workflows for scanning, ingesting, writing, and serving.
</Card>
<Card title="Writing Context" href="/docs/guides/writing-context">
Edit semantic-layer YAML and wiki Markdown safely.
</Card>
<Card title="CLI Reference" href="/docs/cli-reference/ktx-setup">
Complete flag and subcommand reference for every KTX command.
</Card>

View file

@ -51,8 +51,8 @@ For scripted setup, pass the project directory explicitly:
ktx setup --project-dir ./analytics
```
If setup exits early, rerun `ktx setup` in the same directory. KTX tracks
completed setup steps and resumes from the remaining work.
If setup exits early, rerun `ktx setup` in the same directory. KTX keeps local
setup progress under `.ktx/setup/` and resumes from the remaining work.
## Step 2: Configure the LLM
@ -122,7 +122,8 @@ Database ready
PostgreSQL, BigQuery, and Snowflake can also enable query-history ingest. Query
history helps KTX learn common query patterns, joins, service-account filters,
and warehouse-specific usage.
and warehouse-specific usage. BigQuery and Snowflake support a lookback window;
Postgres reads the current `pg_stat_statements` aggregate data instead.
## Step 5: Add context sources
@ -200,7 +201,7 @@ KTX writes plain files so people and agents can inspect changes in git.
| Path | Purpose |
|------|---------|
| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and setup state |
| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and query-history settings |
| `.ktx/secrets/*` | Local secret files referenced from `ktx.yaml`; do not commit these |
| `.ktx/setup/*` | Local setup and context-build state |
| `.ktx/agents/install-manifest.json` | Manifest used to manage installed agent files |

View file

@ -62,13 +62,15 @@ configured, run `ktx setup` or use `--fast`.
PostgreSQL, BigQuery, and Snowflake can add query-history context. This helps
KTX learn common joins, filters, service-account patterns, redaction rules, and
usage-heavy query templates.
usage-heavy query templates. BigQuery and Snowflake support a lookback window;
Postgres reads the current `pg_stat_statements` aggregate data instead.
Enable it during setup, store it under `connections.<id>.context.queryHistory`,
or request it for one run:
```bash
ktx ingest warehouse --deep --query-history
# Set the lookback window for BigQuery or Snowflake query history
ktx ingest warehouse --query-history-window-days 30
```

View file

@ -60,21 +60,25 @@ semantic-layer/<connection-id>/<source-name>.yaml
```yaml
name: orders
description: Customer orders with booked revenue.
descriptions:
user: Customer orders with booked revenue.
table: public.orders
grain:
- order_id
columns:
- name: order_id
type: string
description: Unique order identifier.
descriptions:
user: Unique order identifier.
- name: order_date
type: time
role: time
description: Date the order was placed.
descriptions:
user: Date the order was placed.
- name: total_amount
type: number
description: Booked order value in USD.
descriptions:
user: Booked order value in USD.
measures:
- name: total_revenue
expr: SUM(total_amount)
@ -85,7 +89,8 @@ measures:
```yaml
name: orders
description: Customer orders with line-item totals.
descriptions:
user: Customer orders with line-item totals.
table: public.orders
grain:
- order_id
@ -93,26 +98,31 @@ grain:
columns:
- name: order_id
type: string
description: Unique order identifier.
descriptions:
user: Unique order identifier.
- name: order_date
type: time
role: time
description: Date the order was placed.
descriptions:
user: Date the order was placed.
- name: status
type: string
visibility: public
description: Current order status.
descriptions:
user: Current order status.
- name: _etl_loaded_at
type: time
visibility: hidden
description: Internal load timestamp.
descriptions:
user: Internal load timestamp.
- name: total_amount
type: number
description: Order total in USD.
descriptions:
user: Order total in USD.
measures:
- name: total_revenue
@ -149,9 +159,10 @@ joins:
| Field | Required | Description |
|-------|----------|-------------|
| `name` | Yes | Source identifier. Use lowercase words and underscores. |
| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
| `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. |
| `grain` | Yes | Columns that uniquely identify a row at the source grain. |
| `columns` | No | Column definitions with type, role, visibility, and descriptions. |
| `columns` | Yes | Non-empty column definitions with type, role, visibility, and descriptions. |
| `measures` | No | Aggregation expressions such as `SUM`, `COUNT`, and `AVG`. |
| `segments` | No | Named predicates agents can reuse. |
| `joins` | No | Relationships to other semantic sources. |
@ -165,7 +176,7 @@ joins:
| Column | `type` | Yes | Agent-facing type: `string`, `number`, `time`, or `boolean`. |
| Column | `role` | No | Special role such as `time` for default time dimensions. |
| Column | `visibility` | No | `public`, `internal`, or `hidden`. |
| Column | `description` | Strongly recommended | Business meaning and usage notes. |
| Column | `descriptions` | Strongly recommended | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
| Measure | `name` | Yes | Queryable metric name. |
| Measure | `expr` | Yes | SQL aggregation expression at the source grain. |
| Measure | `filter` | No | SQL predicate applied only to this measure. |

View file

@ -75,7 +75,7 @@ Available commands:
- `ktx status --json --project-dir /path/to/project`
- `ktx sl list --json --project-dir /path/to/project`
- `ktx sl search '<text>' --json --project-dir /path/to/project --connection-id '<id>'`
- `ktx sl query --json --project-dir /path/to/project --connection-id '<id>' --query-file '<path>' --execute --max-rows 100`
- `ktx sl query --project-dir /path/to/project --connection-id '<id>' --query-file '<path>' --format json --execute --max-rows 100`
- `ktx wiki search '<query>' --json --project-dir /path/to/project --limit 10`
```
@ -172,7 +172,7 @@ All supported agent clients call the same KTX CLI commands:
| `ktx sl list --json` | List semantic-layer sources |
| `ktx sl search <query> --json` | Search semantic-layer sources |
| `ktx sl validate <source> --connection-id <id>` | Validate semantic source definitions |
| `ktx sl query --json` | Execute a semantic-layer query when semantic compute is configured |
| `ktx sl query --format json` | Execute a semantic-layer query when semantic compute is configured |
### Security constraints

View file

@ -34,8 +34,9 @@ automation flags documented in [`ktx setup`](/docs/cli-reference/ktx-setup).
| Path | Purpose |
|------|---------|
| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, query history, and setup state |
| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, and query history |
| `.ktx/secrets/*` | Local file-backed secrets when you choose file references during setup |
| `.ktx/setup/*` | Local setup progress and context-build state |
| `semantic-layer/<connection-id>/` | YAML semantic sources generated by database and source ingestion |
| `wiki/` | Markdown business context, definitions, and ingested knowledge |
| `.ktx/agents/install-manifest.json` | Manifest of agent integration files installed by `ktx setup --agents` |

View file

@ -228,7 +228,7 @@ mapping metadata. The BigQuery connector still authenticates with the
| Feature | Supported | Notes |
|---------|-----------|-------|
| Tables & views | Yes | Including materialized views and external tables |
| Primary keys | No | - |
| Primary keys | Yes | Via `INFORMATION_SCHEMA` table constraints when declared |
| Foreign keys | No | Not available in BigQuery |
| Row count estimates | Yes | From table metadata |
| Column statistics | No | - |
@ -500,7 +500,7 @@ No authentication required - SQLite is file-based. The file must be readable by
- Uses `LIMIT X OFFSET Y` for pagination
- SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB`
- Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON`
- In-memory databases supported with `path: ":memory:"` (for testing)
- Database file must exist before `ktx connection test` or ingest runs
## Common errors

View file

@ -0,0 +1,86 @@
import assert from "node:assert/strict";
import { readFile } from "node:fs/promises";
import { dirname, join } from "node:path";
import { test } from "node:test";
import { fileURLToPath } from "node:url";
const docsSiteDir = join(dirname(fileURLToPath(import.meta.url)), "..");
async function readDocsFile(path) {
return readFile(join(docsSiteDir, path), "utf8");
}
test("docs introduction shows the ingestion and runtime mechanics early", async () => {
const introduction = await readDocsFile(
"content/docs/getting-started/introduction.mdx",
);
assert.match(
introduction,
/import\s+\{\s*ProductMechanics\s*\}\s+from\s+"@\/components\/product-mechanics";/,
);
assert.match(introduction, /<ProductMechanics\s*\/>/);
const heroIndex = introduction.indexOf("Make analytics context");
const mechanicsIndex = introduction.indexOf("<ProductMechanics />");
const useCaseIndex = introduction.indexOf("## What agents can do with KTX");
const heroSource = introduction.slice(0, mechanicsIndex);
assert.ok(heroIndex >= 0, "introduction should include the custom hero");
assert.ok(
mechanicsIndex > heroIndex,
"mechanics component should appear after the hero",
);
assert.ok(
mechanicsIndex < useCaseIndex,
"mechanics component should appear before use-case sections",
);
assert.doesNotMatch(heroSource, /Get Started/);
assert.doesNotMatch(heroSource, /The Context Layer/);
assert.doesNotMatch(heroSource, /Building Context/);
assert.doesNotMatch(heroSource, /flex flex-wrap gap-3/);
});
test("product mechanics component covers source-specific context and SQL expansion", async () => {
const component = await readDocsFile("components/product-mechanics.tsx");
for (const expectedText of [
"A semantic compiler for analytics agents",
"Ingestion",
"Runtime",
"wiki/",
"semantic-layer/",
"raw-sources/",
".ktx/",
"sl_refs",
"Company documentation",
"Notion pages",
"Metabase",
"query history",
"extract evidence",
"reconcile entities",
"validate references",
"semantic query plan",
"dialect SQL",
"bounded rows",
"provenance",
"measure: orders.total_revenue",
"dimension: customers.segment",
"select",
]) {
assert.ok(
component.includes(expectedText),
`component should include: ${expectedText}`,
);
}
assert.doesNotMatch(component, /KTX does more than retrieve Markdown/);
assert.doesNotMatch(component, /Plain Markdown \+ RAG/);
assert.doesNotMatch(component, /comparisonRows/);
assert.doesNotMatch(component, /ComparisonTable/);
assert.doesNotMatch(component, /Not just retrieval/);
assert.doesNotMatch(component, /KTX works in two moments/);
assert.doesNotMatch(component, /w-\[calc\(100vw/);
assert.doesNotMatch(component, /xl:grid-cols-2/);
assert.doesNotMatch(component, /lg:grid-cols-\[[^\]]*_2rem_/);
});

View file

@ -0,0 +1,328 @@
# Semantic Layer Docs Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [x]`) syntax for tracking.
**Goal:** Add a standalone, scannable Concepts page that explains the semantic-layer internals while positioning KTX as a broader context layer.
**Architecture:** Implement this as docs-only MDX content inside the existing Fumadocs tree. The new page uses inline MDX diagrams and Fumadocs color tokens, matching the custom diagram pattern already used in `the-context-layer.mdx`.
**Tech Stack:** MDX, Fumadocs content, Next.js docs site, pnpm workspace commands.
---
### Task 1: Add Concepts Navigation Entry
**Files:**
- Modify: `docs-site/content/docs/concepts/meta.json`
- [x] **Step 1: Update the Concepts page order**
Replace the `pages` array with:
```json
{
"title": "Concepts",
"defaultOpen": true,
"pages": ["the-context-layer", "semantic-layer-internals", "context-as-code"]
}
```
- [x] **Step 2: Verify JSON parses**
Run:
```bash
node -e "JSON.parse(require('node:fs').readFileSync('docs-site/content/docs/concepts/meta.json', 'utf8')); console.log('concepts meta ok')"
```
Expected output:
```text
concepts meta ok
```
### Task 2: Create the Semantic Layer Internals Page
**Files:**
- Create: `docs-site/content/docs/concepts/semantic-layer-internals.mdx`
- [x] **Step 1: Add frontmatter and opening positioning**
Create the page with this frontmatter and opening section:
```mdx
---
title: Semantic Layer Internals
description: How KTX uses join graphs, grain, and relationship metadata to turn context into safe SQL.
---
KTX is a context layer for agents. Its semantic layer is the query-planning core
that turns reviewed context into safe SQL.
Use this page to understand the mechanics behind KTX's semantic execution:
the join graph, how KTX builds and maintains it, and how that graph prevents
classic analytics errors like fan-out and ambiguous join paths.
| KTX is | KTX is not just |
|---|---|
| A context layer for agents | A metric definition store |
| A system for ingesting, reviewing, and serving analytics context | A markdown saver |
| A semantic execution layer plus wiki pages, scans, provenance, and agent workflows | A replacement for every BI semantic layer |
```
- [x] **Step 2: Add the system-fit diagram**
Add a `Where the semantic layer fits` section with a custom `not-prose` diagram.
The diagram must show:
```text
Context inputs -> Semantic layer engine -> Agent workflows
```
The semantic-layer box must be visually prominent and list:
```text
join graph
grain
measures
relationships
safe query planning
```
- [x] **Step 3: Add the join graph section**
Add `## The join graph` with:
- one short paragraph defining nodes and edges;
- bullets for why the graph matters;
- an inline diagram using `orders`, `customers`, `order_items`, and `refunds`.
The section must include this claim in plain language:
```text
The graph lets KTX choose valid paths, reject unsafe paths, and reason about
whether a join preserves or multiplies rows before SQL is generated.
```
- [x] **Step 4: Add build and maintenance sections**
Add `## How KTX builds it` and `## How KTX maintains it`.
`How KTX builds it` must cover these inputs:
```text
declared primary keys
declared foreign keys
inferred relationships
dbt, MetricFlow, and LookML imports
query history
analyst review
```
`How KTX maintains it` must show this loop:
```text
ingest evidence -> YAML diff -> validation -> analyst review -> agent use -> corrections
```
- [x] **Step 5: Add the fan-out and safe execution sections**
Add `## Why grain and relationships matter` with a fan-out example comparing
orders joined to order items. Include a compact table with columns:
```text
Problem
What happens
How KTX avoids it
```
Add `## How the execution engine uses the graph` with a before/after table:
```text
Naive SQL shape
Semantic-layer SQL shape
```
The safe path must mention:
```text
pre-aggregates fact measures at their own grain before joining dimensions
```
- [x] **Step 6: Add agent outcome links**
Add a closing `## What this means for agents` section with bullets explaining
that agents can:
```text
search semantic sources
compile SQL through ktx sl query
validate changes before review
patch YAML and Markdown files in git
explain provenance and metric meaning
```
End with links to:
```mdx
[Writing Context](/docs/guides/writing-context)
[ktx sl](/docs/cli-reference/ktx-sl)
```
### Task 3: Add the Cross-Link from The Context Layer
**Files:**
- Modify: `docs-site/content/docs/concepts/the-context-layer.mdx`
- [x] **Step 1: Replace the semantic sources paragraph with a scannable block**
Find the `**Semantic sources**` paragraph under `KTX organizes context into four pillars`.
Replace the long paragraph with:
```mdx
**Semantic sources** are YAML definitions that describe your data in terms
agents can reason about:
- source tables or SQL queries;
- row grain;
- typed columns;
- valid joins;
- named measures, filters, and segments.
This is where "revenue means `sum(amount)` excluding refunds" lives. For the
join graph, fan-out protections, and execution mechanics, read
[Semantic Layer Internals](/docs/concepts/semantic-layer-internals).
```
- [x] **Step 2: Confirm the page still owns the product positioning**
Search the edited file:
```bash
rg -n "context layer|Semantic Layer Internals|semantic layer - that's a critical component" docs-site/content/docs/concepts/the-context-layer.mdx
```
Expected: output includes the existing context-layer framing and the new internals link.
### Task 4: Fix Mobile Docs Header Overflow
**Files:**
- Modify: `docs-site/app/docs/[[...slug]]/page.tsx`
- [x] **Step 1: Stack title actions on narrow screens**
Replace the non-hero page header wrapper:
```tsx
<div className="flex items-start justify-between gap-4">
```
with:
```tsx
<div className="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between sm:gap-4">
```
This keeps desktop layout unchanged while preventing the action buttons from
forcing horizontal overflow on mobile.
- [x] **Step 2: Allow the docs article to shrink in the layout grid**
Update the `DocsPage` and `DocsBody` wrappers:
```tsx
<DocsPage
toc={page.data.toc}
className="!mx-0 min-w-0 !max-w-[calc(100vw-2rem)] md:!mx-auto md:!max-w-[900px]"
>
```
```tsx
<DocsBody className="min-w-0 max-w-full">
```
This prevents tables, code blocks, and custom diagrams from forcing the
Fumadocs main article column wider than the mobile viewport, overrides the
library's built-in max-width rule on mobile, aligns the article to the left on
mobile, and preserves the normal centered desktop max width.
If long words still clip under mobile viewport capture, add the same wrapping
behavior used by the Fumadocs sidebar:
```tsx
<DocsDescription className="wrap-anywhere">
{page.data.description}
</DocsDescription>
```
```tsx
<DocsBody className="min-w-0 max-w-full wrap-anywhere">
```
- [x] **Step 3: Recheck mobile render**
Capture or inspect a 390px-wide render of:
```text
http://127.0.0.1:3000/docs/concepts/semantic-layer-internals
```
Expected: the title, description, action buttons, and positioning block stay
within the viewport.
### Task 5: Verify Docs Content and Build
**Files:**
- Check: `docs-site/content/docs/concepts/semantic-layer-internals.mdx`
- Check: `docs-site/content/docs/concepts/the-context-layer.mdx`
- Check: `docs-site/content/docs/concepts/meta.json`
- Check: `docs-site/app/docs/[[...slug]]/page.tsx`
- [x] **Step 1: Run content checks**
Run:
```bash
rg -n "KTX is a context layer|markdown saver|fan-out|join graph|pre-aggregates|Semantic Layer Internals" docs-site/content/docs/concepts
```
Expected: matches appear in the new page and the cross-link appears in
`the-context-layer.mdx`.
- [x] **Step 2: Build the docs site**
Run:
```bash
pnpm --filter ktx-docs build
```
Expected: build exits 0.
- [x] **Step 3: Preview locally**
Run:
```bash
pnpm --filter ktx-docs dev
```
Open:
```text
http://localhost:3000/docs/concepts/semantic-layer-internals
```
Inspect desktop and mobile widths. The opening should clearly position KTX as a
context layer, the Concepts navigation should list the new page, and diagrams
should not overlap or produce unreadable text.
- [x] **Step 4: Commit implementation**
Run:
```bash
git status --short
git add docs-site/content/docs/concepts/meta.json docs-site/content/docs/concepts/semantic-layer-internals.mdx docs-site/content/docs/concepts/the-context-layer.mdx docs-site/app/docs/[[...slug]]/page.tsx docs/superpowers/plans/2026-05-15-semantic-layer-docs.md
git commit -m "docs: add semantic layer internals concept"
```

View file

@ -0,0 +1,166 @@
# Semantic Layer Docs Design
**Date:** 2026-05-15
**Status:** Design - pending implementation plan
## Goal
Add a concise Concepts page that explains the semantic layer as the query
planning engine inside KTX's broader context layer.
The page should make the technical depth visible to skeptical data users
without positioning KTX as only a semantic-layer product. Success means a reader
understands:
- KTX is a context layer for agents.
- The semantic layer is one core subsystem inside that context layer.
- The join graph, grain declarations, and relationship metadata are what make
generated SQL safer than schema-only or markdown-only approaches.
- KTX maintains this semantic layer through ingest, validation, analyst edits,
and reviewable files.
## Current State
The docs currently explain semantic sources in two places:
- `docs-site/content/docs/concepts/the-context-layer.mdx` describes semantic
sources as one pillar of KTX context.
- `docs-site/content/docs/guides/writing-context.mdx` documents the YAML fields
for sources, measures, joins, grain, validation, and common errors.
That content is useful, but the differentiator is not visually obvious. The
semantic layer is embedded in longer narrative pages, so readers can miss the
hard parts: join graph construction, fan-out prevention, chasm traps, and query
planning.
## Positioning
Create a standalone Concepts page with a guarded title such as
`Semantic Layer Internals` or `The Semantic Engine Inside KTX`.
The first screen must frame the product clearly:
> KTX is a context layer. Its semantic layer is the query-planning core that
> turns reviewed context into safe SQL.
The page should avoid a title like `Semantic Layer` by itself because that can
make KTX look like a narrow semantic-layer tool. The page should repeatedly show
the semantic layer between the broader context inputs and the agent workflows it
supports.
Add a short cross-link from `the-context-layer.mdx` so the existing overview
keeps owning the product category. That section should say the semantic layer is
one critical pillar, then link to the internals page for readers who want the
mechanics.
## Page Structure
Add `docs-site/content/docs/concepts/semantic-layer-internals.mdx` and include
it in `docs-site/content/docs/concepts/meta.json` after `the-context-layer`.
Recommended sections:
1. `What this page explains`
- One short paragraph.
- A two-column `KTX is / KTX is not just` table.
2. `Where the semantic layer fits`
- A visual block showing:
`context inputs -> semantic layer engine -> agent workflows`.
- Inputs include semantic YAML, wiki pages, scans, and provenance.
- Outputs include search, SQL generation, explanations, edits, and review.
3. `The join graph`
- Explain nodes as semantic sources and edges as validated joins.
- Show a small graph with `orders`, `customers`, `order_items`, and
`refunds`.
- Keep text to one or two short paragraphs plus bullets.
4. `How KTX builds it`
- Show a pipeline from database evidence and imported modeling tools to
reviewable YAML.
- Mention declared keys, inferred relationships, dbt/MetricFlow/LookML
imports, query history, validation, and analyst review.
5. `How KTX maintains it`
- Show a feedback loop:
ingest evidence -> YAML diff -> validation -> analyst review -> agent use
-> corrections.
- Emphasize that files remain the source of truth.
6. `Why grain and relationships matter`
- Use the fan-out problem as the central example.
- Compare a naive join against a safe semantic-layer plan.
- Explain many-to-one, one-to-many, many-to-many, chasm traps, and ambiguous
paths in compact bullets.
7. `How the execution engine uses the graph`
- Explain path selection, unsafe path rejection, pre-aggregation into CTEs,
filter placement, and dialect transpilation.
- Include a small before/after SQL-shape diagram or table.
8. `What this means for agents`
- Summarize why this is more than saving markdown:
agents can inspect, query, validate, edit, and review the same semantic
files.
- Link to `Writing Context` and `ktx sl`.
## Scannability Rules
The implementation should shorten long prose blocks across the touched pages.
- Keep most text blocks to one or two paragraphs.
- Prefer bullets, tables, diagrams, and compact callout blocks between prose.
- Avoid four-paragraph narrative runs.
- Use diagrams before dense explanations when the concept is spatial.
- Keep examples concrete and copy-pasteable.
## Visual Direction
Use the existing docs-site MDX style rather than a new design system. The current
`the-context-layer.mdx` page already uses custom `not-prose` MDX diagrams with
Fumadocs color tokens; the new page should follow that pattern.
The diagrams should feel like technical product documentation:
- restrained, dense, and readable;
- high contrast for the semantic-layer engine box;
- visible arrows or adjacency that make flow obvious;
- tables for classification and comparison;
- no marketing hero, decorative gradients, or generic card-heavy layout.
## Non-goals
- Do not redesign the whole docs site.
- Do not rename KTX concepts, packages, commands, or directories.
- Do not claim KTX replaces every BI or semantic-layer system.
- Do not add implementation details that are not true in the current codebase.
- Do not expand the page into a long reference for every YAML field; keep that
in `Writing Context`.
## Verification
Because this is docs-only work, verification should focus on the docs site:
- Run the docs build or the narrowest available docs-site type/build check.
- Run formatting or lint checks if the docs package exposes them.
- Preview the page locally and inspect desktop and mobile widths.
- Confirm the page is listed in Concepts navigation.
- Confirm the opening section clearly says KTX is a context layer, not just a
semantic-layer tool.
If implementation changes only MDX and metadata, TypeScript workspace tests are
not required unless the page introduces shared components.
## Acceptance Criteria
- A standalone Concepts page explains the semantic-layer internals.
- The Context Layer page links to the new internals page without making the
overview longer.
- The new page includes diagrams for the system fit, join graph, maintenance
loop, and fan-out-safe execution path.
- Long prose is broken into scannable sections with bullets, tables, and visual
interruptions.
- The positioning consistently says KTX is a context layer with a semantic
execution core.
- Docs-site verification passes or any skipped check is reported with a reason.

View file

@ -91,6 +91,9 @@ describe('setup agents', () => {
expect(skill).toContain('must not print secrets');
expect(skill).toContain('status --json');
expect(skill).toContain('sl list --json');
expect(skill).toContain('sl query');
expect(skill).toContain('--format json');
expect(skill).not.toContain('sl query --json');
expect(skill).not.toContain('agent ');
expect(skill).not.toContain('sql execute');
expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({
@ -150,6 +153,8 @@ describe('setup agents', () => {
expect(skill).not.toContain('`ktx agent');
expect(skill).toContain('status --json');
expect(skill).toContain('sl query');
expect(skill).toContain('--format json');
expect(skill).not.toContain('sl query --json');
expect(skill).not.toContain('sql execute');
});

View file

@ -310,7 +310,8 @@ function ktxCommandLine(launcher: KtxCliLauncher, args: string[]): string {
}
function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLauncher }): string {
const projectDirArgs = ['--json', '--project-dir', input.projectDir];
const projectDirArgs = ['--project-dir', input.projectDir];
const jsonProjectDirArgs = ['--json', ...projectDirArgs];
return [
'---',
'name: ktx',
@ -327,9 +328,9 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun
'',
'Available commands:',
'',
`- \`${ktxCommandLine(input.launcher, ['status', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['sl', 'search', '<text>', ...projectDirArgs, '--connection-id', '<id>'])}\``,
`- \`${ktxCommandLine(input.launcher, ['status', ...jsonProjectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...jsonProjectDirArgs])}\``,
`- \`${ktxCommandLine(input.launcher, ['sl', 'search', '<text>', ...jsonProjectDirArgs, '--connection-id', '<id>'])}\``,
`- \`${ktxCommandLine(input.launcher, [
'sl',
'query',
@ -338,11 +339,13 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun
'<id>',
'--query-file',
'<path>',
'--format',
'json',
'--execute',
'--max-rows',
'100',
])}\``,
`- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '<query>', ...projectDirArgs, '--limit', '10'])}\``,
`- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '<query>', ...jsonProjectDirArgs, '--limit', '10'])}\``,
'',
'Use semantic-layer queries before direct database access. Do not print secrets or credential references.',
'',