mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
docs: rewrite Semantic Querying concept with imperative-vs-declarative diagram
Reframe semantic-layer-internals.mdx around the contract the semantic layer offers an agent: declare what you want (a Semantic Query), KTX figures out how to compute it. Replaces the old "Context-Aware SQL" framing with a clear imperative-vs-declarative narrative. Adds a React Flow component (semantic-layer-flow.tsx) that contrasts a buggy 4-table agent-authored SQL (chasm trap, LEFT-JOIN-in-WHERE, hardcoded DATE_TRUNC) against the chasm-safe per-fact CTE SQL the planner actually emits, including the outer GROUP BY over the requested dimensions. Both lanes converge into a shared warehouse node and each SQL card now has parallel bullet notes (failures on the left, KTX behavior on the right). Side fixes bundled in: - include the /ktx basePath in the favicon metadata so the icon resolves under the production prefix - migrate docs-site/middleware.ts to docs-site/proxy.ts (Next 16 rename) - redirect / to /ktx/docs/getting-started/introduction so the apex docs URL works - add tests covering the apex redirect, the favicon basePath, and the middleware-to-proxy rename - propagate the Semantic Query terminology across the ktx-sl CLI reference, the context-layer concept page, and the agent-clients / primary-sources integration pages
This commit is contained in:
parent
14c2567c14
commit
911cfdc741
11 changed files with 1081 additions and 232 deletions
|
|
@ -28,8 +28,8 @@ export const metadata: Metadata = {
|
|||
description:
|
||||
"Open-source context infrastructure that makes agentic analytics reliable.",
|
||||
icons: {
|
||||
icon: "/brand/ktx-mascot.svg",
|
||||
shortcut: "/brand/ktx-mascot.svg",
|
||||
icon: "/ktx/brand/ktx-mascot.svg",
|
||||
shortcut: "/ktx/brand/ktx-mascot.svg",
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
|||
873
docs-site/components/semantic-layer-flow.tsx
Normal file
873
docs-site/components/semantic-layer-flow.tsx
Normal file
|
|
@ -0,0 +1,873 @@
|
|||
"use client";
|
||||
|
||||
import {
|
||||
Background,
|
||||
BackgroundVariant,
|
||||
Handle,
|
||||
MarkerType,
|
||||
type Node,
|
||||
type NodeProps,
|
||||
Position,
|
||||
ReactFlow,
|
||||
} from "@xyflow/react";
|
||||
import "@xyflow/react/dist/style.css";
|
||||
|
||||
type LaneVariant = "manual" | "ktx";
|
||||
|
||||
type AgentNodeData = {
|
||||
variant: "single";
|
||||
title: string;
|
||||
subtitle: string;
|
||||
};
|
||||
|
||||
type ManualSqlNodeData = {
|
||||
variant: "manual";
|
||||
badge: string;
|
||||
title: string;
|
||||
caption: string;
|
||||
code: string;
|
||||
notes: string[];
|
||||
};
|
||||
|
||||
type SlQueryNodeData = {
|
||||
variant: "slQuery";
|
||||
badge: string;
|
||||
title: string;
|
||||
caption: string;
|
||||
code: string;
|
||||
};
|
||||
|
||||
type EngineNodeData = {
|
||||
variant: "engine";
|
||||
badge: string;
|
||||
title: string;
|
||||
stages: Array<{ index: number; title: string; detail: string }>;
|
||||
};
|
||||
|
||||
type CompiledSqlNodeData = {
|
||||
variant: "compiled";
|
||||
badge: string;
|
||||
title: string;
|
||||
caption: string;
|
||||
code: string;
|
||||
notes: string[];
|
||||
};
|
||||
|
||||
type WarehouseNodeData = {
|
||||
variant: "warehouse";
|
||||
title: string;
|
||||
drivers: string[];
|
||||
};
|
||||
|
||||
type AgentNode = Node<AgentNodeData, "agent">;
|
||||
type ManualSqlNode = Node<ManualSqlNodeData, "manualSql">;
|
||||
type SlQueryNode = Node<SlQueryNodeData, "slQuery">;
|
||||
type EngineNode = Node<EngineNodeData, "engine">;
|
||||
type CompiledSqlNode = Node<CompiledSqlNodeData, "compiledSql">;
|
||||
type WarehouseNode = Node<WarehouseNodeData, "warehouse">;
|
||||
|
||||
type FlowNode =
|
||||
| AgentNode
|
||||
| ManualSqlNode
|
||||
| SlQueryNode
|
||||
| EngineNode
|
||||
| CompiledSqlNode
|
||||
| WarehouseNode;
|
||||
|
||||
const CANVAS_W = 1120;
|
||||
|
||||
const AGENT_W = 380;
|
||||
const AGENT_H = 104;
|
||||
const AGENT_X = (CANVAS_W - AGENT_W) / 2;
|
||||
const AGENT_Y = 16;
|
||||
|
||||
const LANE_W = 488;
|
||||
const LEFT_LANE_X = 32;
|
||||
const RIGHT_LANE_X = CANVAS_W - LEFT_LANE_X - LANE_W;
|
||||
|
||||
const LANE_TOP_Y = 248;
|
||||
|
||||
const SL_QUERY_H = 510;
|
||||
const ENGINE_H = 380;
|
||||
const COMPILED_H = 1380;
|
||||
const RIGHT_GAP = 24;
|
||||
|
||||
const RIGHT_LANE_TOTAL = SL_QUERY_H + RIGHT_GAP + ENGINE_H + RIGHT_GAP + COMPILED_H;
|
||||
const MANUAL_SQL_H = 840;
|
||||
const LANES_BOTTOM_Y =
|
||||
LANE_TOP_Y + Math.max(MANUAL_SQL_H, RIGHT_LANE_TOTAL);
|
||||
|
||||
const SL_QUERY_Y = LANE_TOP_Y;
|
||||
const ENGINE_Y = SL_QUERY_Y + SL_QUERY_H + RIGHT_GAP;
|
||||
const COMPILED_Y = ENGINE_Y + ENGINE_H + RIGHT_GAP;
|
||||
|
||||
const WAREHOUSE_W = 304;
|
||||
const WAREHOUSE_H = 92;
|
||||
const WAREHOUSE_X = (CANVAS_W - WAREHOUSE_W) / 2;
|
||||
const WAREHOUSE_Y = LANES_BOTTOM_Y + 56;
|
||||
const CANVAS_H = WAREHOUSE_Y + WAREHOUSE_H + 32;
|
||||
|
||||
const MANUAL_STROKE = "#94a3b8";
|
||||
const KTX_STROKE = "#0891b2";
|
||||
|
||||
const agent: AgentNode = {
|
||||
id: "agent",
|
||||
type: "agent",
|
||||
position: { x: AGENT_X, y: AGENT_Y },
|
||||
data: {
|
||||
variant: "single",
|
||||
title: "Analytics agent",
|
||||
subtitle:
|
||||
"Asks: monthly net revenue and open tickets per segment, high-value orders only, no test customers",
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
};
|
||||
|
||||
const manualSql: ManualSqlNode = {
|
||||
id: "manual-sql",
|
||||
type: "manualSql",
|
||||
position: { x: LEFT_LANE_X, y: LANE_TOP_Y },
|
||||
data: {
|
||||
variant: "manual",
|
||||
badge: "Without KTX",
|
||||
title: "Agent writes the SQL",
|
||||
caption:
|
||||
"Stitches four tables, mixes grains, and ships numbers that won't match the dashboard.",
|
||||
code: `-- agent stitches four tables, mixes facts,
|
||||
-- and ships numbers that won't match the dashboard
|
||||
|
||||
SELECT
|
||||
c.segment,
|
||||
DATE_TRUNC('month', o.created_at) AS month,
|
||||
SUM(o.amount) - SUM(r.amount) AS net_revenue,
|
||||
COUNT(t.id) AS open_tickets
|
||||
FROM customers c
|
||||
LEFT JOIN orders o
|
||||
ON o.customer_id = c.id
|
||||
LEFT JOIN refunds r
|
||||
ON r.order_id = o.id
|
||||
LEFT JOIN tickets t
|
||||
ON t.customer_id = c.id
|
||||
WHERE
|
||||
c.is_test = false
|
||||
AND o.amount >= 100
|
||||
AND t.status = 'open' -- turns LEFT JOIN into INNER
|
||||
GROUP BY
|
||||
c.segment,
|
||||
DATE_TRUNC('month', o.created_at)
|
||||
ORDER BY
|
||||
month,
|
||||
c.segment
|
||||
LIMIT 1000;
|
||||
|
||||
-- chasm trap: orders rows multiply by tickets and refunds
|
||||
-- net_revenue and open_tickets are both inflated
|
||||
-- DATE_TRUNC syntax breaks on BigQuery`,
|
||||
notes: [
|
||||
"Re-stitches a 4-way join on every question",
|
||||
"Reinvents net_revenue and the high-value rule",
|
||||
"Hides a chasm trap across three facts",
|
||||
"Filters a LEFT JOIN target in WHERE",
|
||||
"Hardcodes one warehouse's date functions",
|
||||
],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
};
|
||||
|
||||
const slQuery: SlQueryNode = {
|
||||
id: "sl-query",
|
||||
type: "slQuery",
|
||||
position: { x: RIGHT_LANE_X, y: SL_QUERY_Y },
|
||||
data: {
|
||||
variant: "slQuery",
|
||||
badge: "With KTX",
|
||||
title: "Agent sends a Semantic Query",
|
||||
caption:
|
||||
"Names the measures, dimensions, segments, and filters it wants. No SQL, no joins.",
|
||||
code: `{
|
||||
"measures": [
|
||||
"orders.revenue",
|
||||
"refunds.amount",
|
||||
"tickets.open_count",
|
||||
{
|
||||
"name": "net_revenue",
|
||||
"expr": "orders.revenue - refunds.amount"
|
||||
}
|
||||
],
|
||||
"dimensions": [
|
||||
"customers.segment",
|
||||
{ "field": "orders.created_at", "granularity": "month" }
|
||||
],
|
||||
"segments": ["orders.high_value"],
|
||||
"filters": ["customers.is_test = false"],
|
||||
"limit": 1000
|
||||
}`,
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
};
|
||||
|
||||
const engine: EngineNode = {
|
||||
id: "engine",
|
||||
type: "engine",
|
||||
position: { x: RIGHT_LANE_X, y: ENGINE_Y },
|
||||
data: {
|
||||
variant: "engine",
|
||||
badge: "Semantic-layer engine",
|
||||
title: "Plans the query against the reviewed graph",
|
||||
stages: [
|
||||
{
|
||||
index: 1,
|
||||
title: "Resolve refs",
|
||||
detail: "qualify columns, look up measure formulas",
|
||||
},
|
||||
{
|
||||
index: 2,
|
||||
title: "Build join tree",
|
||||
detail: "Dijkstra over typed edges from an anchor source",
|
||||
},
|
||||
{
|
||||
index: 3,
|
||||
title: "Detect fan-out",
|
||||
detail: "group measures by source, flag chasm traps",
|
||||
},
|
||||
{
|
||||
index: 4,
|
||||
title: "Localize aggregation",
|
||||
detail: "pre-aggregate each fact as its own CTE",
|
||||
},
|
||||
{
|
||||
index: 5,
|
||||
title: "Transpile dialect",
|
||||
detail: "emit Postgres-shaped SQL, then target dialect",
|
||||
},
|
||||
],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
};
|
||||
|
||||
const compiledSql: CompiledSqlNode = {
|
||||
id: "compiled-sql",
|
||||
type: "compiledSql",
|
||||
position: { x: RIGHT_LANE_X, y: COMPILED_Y },
|
||||
data: {
|
||||
variant: "compiled",
|
||||
badge: "Generated SQL",
|
||||
title: "KTX returns dialect-correct SQL",
|
||||
caption:
|
||||
"Pre-aggregates each fact at its own grain, then joins back on the shared dimension.",
|
||||
code: `WITH orders_agg AS (
|
||||
SELECT
|
||||
customer_id,
|
||||
DATE_TRUNC('month', created_at) AS month,
|
||||
SUM(amount) AS revenue
|
||||
FROM public.orders
|
||||
WHERE amount >= 100
|
||||
GROUP BY
|
||||
customer_id,
|
||||
DATE_TRUNC('month', created_at)
|
||||
),
|
||||
refunds_agg AS (
|
||||
SELECT
|
||||
o.customer_id,
|
||||
DATE_TRUNC('month', o.created_at) AS month,
|
||||
SUM(r.amount) AS refund_amount
|
||||
FROM public.refunds r
|
||||
JOIN public.orders o
|
||||
ON o.id = r.order_id
|
||||
WHERE o.amount >= 100
|
||||
GROUP BY
|
||||
o.customer_id,
|
||||
DATE_TRUNC('month', o.created_at)
|
||||
),
|
||||
tickets_agg AS (
|
||||
SELECT
|
||||
customer_id,
|
||||
DATE_TRUNC('month', opened_at) AS month,
|
||||
COUNT(*) AS open_count
|
||||
FROM public.tickets
|
||||
WHERE status = 'open'
|
||||
GROUP BY
|
||||
customer_id,
|
||||
DATE_TRUNC('month', opened_at)
|
||||
)
|
||||
SELECT
|
||||
c.segment,
|
||||
o.month,
|
||||
SUM(o.revenue - COALESCE(r.refund_amount, 0)) AS net_revenue,
|
||||
SUM(o.revenue) AS revenue,
|
||||
SUM(r.refund_amount) AS refund_amount,
|
||||
SUM(COALESCE(t.open_count, 0)) AS open_tickets
|
||||
FROM public.customers c
|
||||
JOIN orders_agg o
|
||||
ON o.customer_id = c.id
|
||||
LEFT JOIN refunds_agg r
|
||||
ON r.customer_id = c.id
|
||||
AND r.month = o.month
|
||||
LEFT JOIN tickets_agg t
|
||||
ON t.customer_id = c.id
|
||||
AND t.month = o.month
|
||||
WHERE c.is_test = false
|
||||
GROUP BY
|
||||
c.segment,
|
||||
o.month
|
||||
ORDER BY
|
||||
o.month,
|
||||
c.segment
|
||||
LIMIT 1000;`,
|
||||
notes: [
|
||||
"Walks the reviewed join graph automatically",
|
||||
"Uses the canonical net_revenue formula",
|
||||
"Pre-aggregates each fact to avoid the chasm trap",
|
||||
"Keeps LEFT JOIN filters on the dimension source",
|
||||
"Transpiles DATE_TRUNC to the target dialect",
|
||||
],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
};
|
||||
|
||||
const warehouse: WarehouseNode = {
|
||||
id: "warehouse",
|
||||
type: "warehouse",
|
||||
position: { x: WAREHOUSE_X, y: WAREHOUSE_Y },
|
||||
data: {
|
||||
variant: "warehouse",
|
||||
title: "Warehouse",
|
||||
drivers: ["PostgreSQL", "Snowflake", "BigQuery", "ClickHouse"],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
};
|
||||
|
||||
const nodes: FlowNode[] = [
|
||||
agent,
|
||||
manualSql,
|
||||
slQuery,
|
||||
engine,
|
||||
compiledSql,
|
||||
warehouse,
|
||||
];
|
||||
|
||||
const arrowMarker = (color: string) => ({
|
||||
type: MarkerType.ArrowClosed,
|
||||
color,
|
||||
width: 16,
|
||||
height: 16,
|
||||
});
|
||||
|
||||
const edges = [
|
||||
{
|
||||
id: "agent-manual",
|
||||
source: "agent",
|
||||
target: "manual-sql",
|
||||
type: "smoothstep" as const,
|
||||
label: "writes raw SQL",
|
||||
labelBgPadding: [6, 3] as [number, number],
|
||||
labelBgBorderRadius: 4,
|
||||
labelStyle: {
|
||||
fontSize: 12,
|
||||
fontWeight: 500,
|
||||
fill: "var(--color-fd-muted-foreground)",
|
||||
},
|
||||
labelBgStyle: {
|
||||
fill: "var(--color-fd-background)",
|
||||
stroke: "var(--color-fd-border)",
|
||||
strokeWidth: 1,
|
||||
},
|
||||
style: {
|
||||
stroke: MANUAL_STROKE,
|
||||
strokeWidth: 1.5,
|
||||
strokeDasharray: "5 4",
|
||||
},
|
||||
markerEnd: arrowMarker(MANUAL_STROKE),
|
||||
},
|
||||
{
|
||||
id: "manual-warehouse",
|
||||
source: "manual-sql",
|
||||
target: "warehouse",
|
||||
type: "smoothstep" as const,
|
||||
style: {
|
||||
stroke: MANUAL_STROKE,
|
||||
strokeWidth: 1.5,
|
||||
strokeDasharray: "5 4",
|
||||
},
|
||||
markerEnd: arrowMarker(MANUAL_STROKE),
|
||||
},
|
||||
{
|
||||
id: "agent-slquery",
|
||||
source: "agent",
|
||||
target: "sl-query",
|
||||
type: "smoothstep" as const,
|
||||
label: "sends Semantic Query",
|
||||
labelBgPadding: [6, 3] as [number, number],
|
||||
labelBgBorderRadius: 4,
|
||||
labelStyle: {
|
||||
fontSize: 12,
|
||||
fontWeight: 600,
|
||||
fill: KTX_STROKE,
|
||||
},
|
||||
labelBgStyle: {
|
||||
fill: "var(--color-fd-background)",
|
||||
stroke: "var(--color-fd-border)",
|
||||
strokeWidth: 1,
|
||||
},
|
||||
style: { stroke: KTX_STROKE, strokeWidth: 1.75 },
|
||||
markerEnd: arrowMarker(KTX_STROKE),
|
||||
},
|
||||
{
|
||||
id: "slquery-engine",
|
||||
source: "sl-query",
|
||||
target: "engine",
|
||||
type: "straight" as const,
|
||||
style: { stroke: KTX_STROKE, strokeWidth: 1.75 },
|
||||
markerEnd: arrowMarker(KTX_STROKE),
|
||||
},
|
||||
{
|
||||
id: "engine-compiled",
|
||||
source: "engine",
|
||||
target: "compiled-sql",
|
||||
type: "straight" as const,
|
||||
style: { stroke: KTX_STROKE, strokeWidth: 1.75 },
|
||||
markerEnd: arrowMarker(KTX_STROKE),
|
||||
},
|
||||
{
|
||||
id: "compiled-warehouse",
|
||||
source: "compiled-sql",
|
||||
target: "warehouse",
|
||||
type: "smoothstep" as const,
|
||||
style: { stroke: KTX_STROKE, strokeWidth: 1.75 },
|
||||
markerEnd: arrowMarker(KTX_STROKE),
|
||||
},
|
||||
];
|
||||
|
||||
function AgentNodeView({ data }: NodeProps<AgentNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: AGENT_W, height: AGENT_H }}
|
||||
className="flex items-center gap-3 rounded-md border border-fd-border bg-fd-card px-4 py-3 shadow-sm"
|
||||
>
|
||||
<Handle type="source" position={Position.Bottom} className="!opacity-0" />
|
||||
<div className="flex h-10 w-10 flex-none items-center justify-center rounded-full bg-fd-primary/15 text-fd-primary">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="20"
|
||||
height="20"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.75"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<rect x="3" y="6" width="18" height="12" rx="3" />
|
||||
<circle cx="9" cy="12" r="1.25" fill="currentColor" stroke="none" />
|
||||
<circle cx="15" cy="12" r="1.25" fill="currentColor" stroke="none" />
|
||||
<path d="M12 3v3" />
|
||||
</svg>
|
||||
</div>
|
||||
<div className="min-w-0">
|
||||
<p className="text-[15px] font-semibold leading-5 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
<p className="mt-0.5 text-[12px] leading-4 text-fd-muted-foreground">
|
||||
{data.subtitle}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function LaneBadge({
|
||||
variant,
|
||||
children,
|
||||
}: {
|
||||
variant: LaneVariant;
|
||||
children: React.ReactNode;
|
||||
}) {
|
||||
const cls =
|
||||
variant === "manual"
|
||||
? "border-slate-300 bg-slate-100 text-slate-700 dark:border-slate-600/60 dark:bg-slate-700/40 dark:text-slate-200"
|
||||
: "border-cyan-300/70 bg-cyan-50 text-cyan-800 dark:border-cyan-400/40 dark:bg-cyan-400/15 dark:text-cyan-100";
|
||||
return (
|
||||
<span
|
||||
className={`inline-flex items-center gap-1.5 rounded-sm border px-2 py-0.5 text-[10.5px] font-semibold uppercase tracking-[0.08em] ${cls}`}
|
||||
>
|
||||
<span
|
||||
className="h-1.5 w-1.5 rounded-full"
|
||||
style={{
|
||||
background: variant === "manual" ? MANUAL_STROKE : KTX_STROKE,
|
||||
}}
|
||||
/>
|
||||
{children}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function CodeBlock({
|
||||
language,
|
||||
code,
|
||||
tone,
|
||||
}: {
|
||||
language: string;
|
||||
code: string;
|
||||
tone: "manual" | "slQuery" | "compiled";
|
||||
}) {
|
||||
const toneClass =
|
||||
tone === "manual"
|
||||
? "text-slate-600 dark:text-slate-300"
|
||||
: tone === "slQuery"
|
||||
? "text-fd-primary"
|
||||
: "text-fd-primary/90";
|
||||
return (
|
||||
<div className="flex h-full flex-col overflow-hidden rounded-md border border-fd-border bg-[#fbfaf6] dark:bg-[#0c1417]">
|
||||
<div className="flex flex-none items-center justify-between border-b border-fd-border bg-fd-muted/40 px-3 py-1.5">
|
||||
<span
|
||||
className={`font-mono font-medium tracking-wide ${toneClass}`}
|
||||
style={{ fontSize: "11px", lineHeight: "16px" }}
|
||||
>
|
||||
{language}
|
||||
</span>
|
||||
<span
|
||||
className="font-mono uppercase tracking-[0.08em] text-fd-muted-foreground"
|
||||
style={{ fontSize: "10.5px", lineHeight: "16px" }}
|
||||
>
|
||||
{tone === "compiled" ? "ktx-compiled" : "agent-authored"}
|
||||
</span>
|
||||
</div>
|
||||
<pre
|
||||
className="m-0 flex-1 overflow-auto px-3 py-2 font-mono text-fd-foreground"
|
||||
style={{ fontSize: "11.5px", lineHeight: "17.5px" }}
|
||||
>
|
||||
{code}
|
||||
</pre>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ManualSqlNodeView({ data }: NodeProps<ManualSqlNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: LANE_W, height: MANUAL_SQL_H }}
|
||||
className="flex flex-col rounded-lg border border-fd-border bg-fd-card p-3.5 shadow-sm"
|
||||
>
|
||||
<Handle type="target" position={Position.Top} className="!opacity-0" />
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="min-w-0">
|
||||
<LaneBadge variant="manual">{data.badge}</LaneBadge>
|
||||
<p className="mt-2 text-[15px] font-semibold leading-5 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
<p className="mt-1 text-[12px] leading-5 text-fd-muted-foreground">
|
||||
{data.caption}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-3 min-h-0 flex-1">
|
||||
<CodeBlock language="sql" code={data.code} tone="manual" />
|
||||
</div>
|
||||
<ul className="mt-3 grid gap-1.5 sm:grid-cols-2">
|
||||
{data.notes.map((note) => (
|
||||
<li
|
||||
key={note}
|
||||
className="flex items-start gap-1.5 text-[11.5px] leading-4 text-fd-muted-foreground"
|
||||
>
|
||||
<span
|
||||
className="mt-1 h-1 w-1 flex-none rounded-full"
|
||||
style={{ background: MANUAL_STROKE }}
|
||||
aria-hidden="true"
|
||||
/>
|
||||
<span>{note}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
<Handle type="source" position={Position.Bottom} className="!opacity-0" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SlQueryNodeView({ data }: NodeProps<SlQueryNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: LANE_W, height: SL_QUERY_H }}
|
||||
className="flex flex-col rounded-lg border border-fd-primary/40 bg-fd-card p-3.5 shadow-sm"
|
||||
>
|
||||
<Handle type="target" position={Position.Top} className="!opacity-0" />
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="min-w-0">
|
||||
<LaneBadge variant="ktx">{data.badge}</LaneBadge>
|
||||
<p className="mt-2 text-[15px] font-semibold leading-5 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
<p className="mt-0.5 text-[12px] leading-4 text-fd-muted-foreground">
|
||||
{data.caption}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-2 min-h-0 flex-1 overflow-hidden">
|
||||
<CodeBlock language="json" code={data.code} tone="slQuery" />
|
||||
</div>
|
||||
<Handle type="source" position={Position.Bottom} className="!opacity-0" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function EngineNodeView({ data }: NodeProps<EngineNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: LANE_W, height: ENGINE_H }}
|
||||
className="relative flex flex-col rounded-lg border border-cyan-200/30 bg-[#0f1f23] p-3.5 text-white shadow-sm dark:bg-[#0b181b]"
|
||||
>
|
||||
<span
|
||||
className="absolute inset-y-0 left-0 w-[3px] rounded-l-lg"
|
||||
style={{ background: KTX_STROKE }}
|
||||
aria-hidden="true"
|
||||
/>
|
||||
<Handle type="target" position={Position.Top} className="!opacity-0" />
|
||||
<div className="flex items-center justify-between">
|
||||
<p className="text-[10.5px] font-semibold uppercase tracking-[0.08em] text-cyan-300">
|
||||
{data.badge}
|
||||
</p>
|
||||
</div>
|
||||
<p className="mt-1.5 text-[15px] font-semibold leading-5 text-white">
|
||||
{data.title}
|
||||
</p>
|
||||
<ol className="mt-3 flex flex-1 flex-col gap-1.5">
|
||||
{data.stages.map((stage) => (
|
||||
<li
|
||||
key={stage.index}
|
||||
className="flex items-start gap-3 rounded-md border border-cyan-100/15 bg-white/[0.04] px-3 py-2"
|
||||
>
|
||||
<span className="mt-0.5 flex h-6 w-6 flex-none items-center justify-center rounded-full bg-cyan-300/95 font-mono text-[11px] font-semibold text-[#0b1c20]">
|
||||
{stage.index}
|
||||
</span>
|
||||
<div className="min-w-0">
|
||||
<p className="text-[13px] font-semibold leading-[18px] text-white">
|
||||
{stage.title}
|
||||
</p>
|
||||
<p className="mt-0.5 text-[11.5px] leading-[16px] text-cyan-50/80">
|
||||
{stage.detail}
|
||||
</p>
|
||||
</div>
|
||||
</li>
|
||||
))}
|
||||
</ol>
|
||||
<Handle type="source" position={Position.Bottom} className="!opacity-0" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function CompiledSqlNodeView({ data }: NodeProps<CompiledSqlNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: LANE_W, height: COMPILED_H }}
|
||||
className="flex flex-col rounded-lg border border-fd-primary/40 bg-fd-card p-3.5 shadow-sm"
|
||||
>
|
||||
<Handle type="target" position={Position.Top} className="!opacity-0" />
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="min-w-0">
|
||||
<LaneBadge variant="ktx">{data.badge}</LaneBadge>
|
||||
<p className="mt-2 text-[15px] font-semibold leading-5 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
<p className="mt-1 text-[12px] leading-5 text-fd-muted-foreground">
|
||||
{data.caption}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-3 min-h-0 flex-1">
|
||||
<CodeBlock language="sql" code={data.code} tone="compiled" />
|
||||
</div>
|
||||
<ul className="mt-3 grid gap-1.5 sm:grid-cols-2">
|
||||
{data.notes.map((note) => (
|
||||
<li
|
||||
key={note}
|
||||
className="flex items-start gap-1.5 text-[11.5px] leading-4 text-fd-muted-foreground"
|
||||
>
|
||||
<span
|
||||
className="mt-1 h-1 w-1 flex-none rounded-full"
|
||||
style={{ background: KTX_STROKE }}
|
||||
aria-hidden="true"
|
||||
/>
|
||||
<span>{note}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
<Handle type="source" position={Position.Bottom} className="!opacity-0" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function WarehouseNodeView({ data }: NodeProps<WarehouseNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: WAREHOUSE_W, height: WAREHOUSE_H }}
|
||||
className="flex items-center gap-3 rounded-md border border-fd-border bg-fd-card px-4 py-3 shadow-sm"
|
||||
>
|
||||
<Handle type="target" position={Position.Top} className="!opacity-0" />
|
||||
<div className="flex h-10 w-10 flex-none items-center justify-center rounded-md bg-fd-primary/12 text-fd-primary">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="20"
|
||||
height="20"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.75"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<ellipse cx="12" cy="5.5" rx="8" ry="2.6" />
|
||||
<path d="M4 5.5v6.2c0 1.43 3.58 2.6 8 2.6s8-1.17 8-2.6V5.5" />
|
||||
<path d="M4 11.7v6.2c0 1.43 3.58 2.6 8 2.6s8-1.17 8-2.6v-6.2" />
|
||||
</svg>
|
||||
</div>
|
||||
<div className="min-w-0">
|
||||
<p className="text-[15px] font-semibold leading-5 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
<p className="mt-0.5 text-[11.5px] leading-4 text-fd-muted-foreground">
|
||||
{data.drivers.join(" • ")}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const nodeTypes = {
|
||||
agent: AgentNodeView,
|
||||
manualSql: ManualSqlNodeView,
|
||||
slQuery: SlQueryNodeView,
|
||||
engine: EngineNodeView,
|
||||
compiledSql: CompiledSqlNodeView,
|
||||
warehouse: WarehouseNodeView,
|
||||
};
|
||||
|
||||
export function SemanticLayerFlow() {
|
||||
return (
|
||||
<section
|
||||
className="not-prose my-10 w-full max-w-full min-w-0 space-y-4"
|
||||
aria-labelledby="sl-flow-title"
|
||||
>
|
||||
<article
|
||||
className="max-w-full min-w-0 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
|
||||
aria-label="From Semantic Query to executed SQL: contrast between agent-authored SQL and KTX-compiled SQL"
|
||||
>
|
||||
<div className="border-b border-fd-border bg-fd-muted/35 px-5 py-4">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-[0.08em] text-fd-primary">
|
||||
Imperative vs declarative
|
||||
</p>
|
||||
<h3
|
||||
id="sl-flow-title"
|
||||
className="mt-1 text-base font-semibold tracking-normal text-fd-foreground sm:text-lg"
|
||||
style={{ fontFamily: "var(--font-display)" }}
|
||||
>
|
||||
Same answer, two contracts
|
||||
</h3>
|
||||
<p className="mt-2 max-w-3xl text-xs leading-5 text-fd-muted-foreground">
|
||||
On the left, the agent works imperatively: chooses tables, writes
|
||||
joins, picks the grain, and remembers each warehouse's dialect. On
|
||||
the right, the agent only declares what it wants. KTX handles
|
||||
every how.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div
|
||||
className="sl-flow-canvas bg-fd-background"
|
||||
style={{
|
||||
height: "min(2340px, 290vw)",
|
||||
minHeight: 1780,
|
||||
}}
|
||||
>
|
||||
<ReactFlow
|
||||
nodes={nodes}
|
||||
edges={edges}
|
||||
nodeTypes={nodeTypes}
|
||||
fitView
|
||||
fitViewOptions={{ padding: 0.05 }}
|
||||
nodesDraggable={false}
|
||||
nodesConnectable={false}
|
||||
nodesFocusable={false}
|
||||
edgesFocusable={false}
|
||||
elementsSelectable={false}
|
||||
panOnDrag={false}
|
||||
panOnScroll={false}
|
||||
zoomOnScroll={false}
|
||||
zoomOnPinch={false}
|
||||
zoomOnDoubleClick={false}
|
||||
preventScrolling={false}
|
||||
minZoom={0.2}
|
||||
maxZoom={1.5}
|
||||
proOptions={{ hideAttribution: true }}
|
||||
>
|
||||
<Background
|
||||
variant={BackgroundVariant.Dots}
|
||||
gap={18}
|
||||
size={1}
|
||||
color="var(--color-fd-border)"
|
||||
/>
|
||||
</ReactFlow>
|
||||
</div>
|
||||
</article>
|
||||
<style>{`
|
||||
.sl-flow-canvas .react-flow__node {
|
||||
background: transparent;
|
||||
border: 0;
|
||||
box-shadow: none;
|
||||
padding: 0;
|
||||
border-radius: 0;
|
||||
width: auto;
|
||||
text-align: left;
|
||||
user-select: text;
|
||||
-webkit-user-select: text;
|
||||
cursor: auto;
|
||||
pointer-events: all !important;
|
||||
}
|
||||
.sl-flow-canvas .react-flow__node > * {
|
||||
pointer-events: auto;
|
||||
user-select: text;
|
||||
-webkit-user-select: text;
|
||||
}
|
||||
.sl-flow-canvas .react-flow__node.selected,
|
||||
.sl-flow-canvas .react-flow__node:focus,
|
||||
.sl-flow-canvas .react-flow__node:focus-visible {
|
||||
outline: none;
|
||||
box-shadow: none;
|
||||
}
|
||||
.sl-flow-canvas .react-flow__pane {
|
||||
cursor: default;
|
||||
}
|
||||
.sl-flow-canvas .react-flow__handle {
|
||||
width: 1px;
|
||||
height: 1px;
|
||||
min-width: 0;
|
||||
min-height: 0;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
pointer-events: none;
|
||||
}
|
||||
.sl-flow-canvas pre {
|
||||
font-size: 11.5px !important;
|
||||
line-height: 17.5px !important;
|
||||
background: transparent !important;
|
||||
padding: 8px 12px !important;
|
||||
border: 0 !important;
|
||||
margin: 0 !important;
|
||||
box-shadow: none !important;
|
||||
}
|
||||
.sl-flow-canvas .react-flow__node pre code,
|
||||
.sl-flow-canvas .react-flow__node pre span {
|
||||
font-size: inherit !important;
|
||||
line-height: inherit !important;
|
||||
}
|
||||
`}</style>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
export default SemanticLayerFlow;
|
||||
|
|
@ -20,7 +20,7 @@ ktx sl <subcommand> [options]
|
|||
| `list` | List semantic-layer sources |
|
||||
| `search <query>` | Search semantic-layer sources |
|
||||
| `validate <sourceName>` | Validate a semantic-layer source against the database schema |
|
||||
| `query` | Compile or execute a semantic-layer query |
|
||||
| `query` | Compile or execute a Semantic Query |
|
||||
|
||||
## Options
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ ktx sl <subcommand> [options]
|
|||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--connection-id <id>` | KTX connection id | - |
|
||||
| `--query-file <path>` | JSON semantic-layer query file | - |
|
||||
| `--query-file <path>` | JSON Semantic Query file | - |
|
||||
| `--measure <measure>` | Measure to query; repeatable (at least one required) | - |
|
||||
| `--dimension <dimension>` | Dimension to include; repeatable | - |
|
||||
| `--filter <filter>` | Filter expression; repeatable | - |
|
||||
|
|
@ -67,7 +67,7 @@ ktx sl <subcommand> [options]
|
|||
| `--max-rows <n>` | Maximum rows to return when executing | - |
|
||||
|
||||
`sl query` requires at least one `--measure` unless `--query-file` is set.
|
||||
`--query-file` should point to a JSON semantic-layer query object.
|
||||
`--query-file` should point to a JSON Semantic Query object.
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
|
|||
|
|
@ -1,141 +1,115 @@
|
|||
---
|
||||
title: Context-Aware SQL
|
||||
description: How KTX turns reviewed context, grain, and relationship evidence into safe SQL for agents.
|
||||
title: Semantic Querying
|
||||
description: How KTX compiles a short Semantic Query into safe, dialect-correct SQL using a reviewed join graph.
|
||||
---
|
||||
|
||||
## Why query planning needs context
|
||||
import { SemanticLayerFlow } from "@/components/semantic-layer-flow";
|
||||
|
||||
Agents can generate SQL from schema alone, but safe analytics SQL needs more
|
||||
than table names. KTX uses reviewed context to understand grain, joins, measures,
|
||||
filters, and where aggregation must happen.
|
||||
KTX's semantic layer is a compiler that turns intent into SQL. The agent
|
||||
declares _what_ it wants — measures, dimensions, filters — in a small
|
||||
Semantic Query. KTX figures out the _how_: which tables to join, what
|
||||
grain to aggregate at, how to keep fan-out from inflating measures, and
|
||||
what dialect the warehouse speaks.
|
||||
|
||||
Read this page as four mechanics:
|
||||
This page covers four mechanics:
|
||||
|
||||
- context files feed the semantic engine;
|
||||
- evidence becomes a join graph with grain and relationship metadata;
|
||||
- review keeps the graph current;
|
||||
- query planning avoids fan-out and ambiguous joins.
|
||||
- The Semantic Query contract agents send to the compiler.
|
||||
- The planner steps that turn a Semantic Query into SQL.
|
||||
- The join graph that backs those steps, and how it's built.
|
||||
- The fan-out failure mode the compiler is designed to prevent.
|
||||
|
||||
## Where the semantic layer fits
|
||||
## Imperative SQL vs declarative Semantic Querying
|
||||
|
||||
This planner is one subsystem inside KTX's broader context layer. It uses source
|
||||
YAML, wiki context, scan evidence, and provenance to make context actionable for
|
||||
SQL generation.
|
||||
Writing analytics SQL is imperative work. Every question forces the
|
||||
agent to hold two things in mind at once: _what_ it wants — a measure, a
|
||||
slice, a filter — and _how_ to compute it: which tables to join, which
|
||||
key links them, what grain to aggregate at, how to keep one fact from
|
||||
inflating another, and what dialect the warehouse speaks. Plumbing on
|
||||
top of intent, every query.
|
||||
|
||||
<div
|
||||
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
|
||||
aria-label="How context inputs flow through the semantic layer into agent workflows"
|
||||
>
|
||||
<div className="grid gap-0 lg:grid-cols-[1fr_2rem_1.12fr_2rem_1fr]">
|
||||
<section className="bg-fd-background p-4">
|
||||
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
|
||||
{"Context inputs"}
|
||||
</p>
|
||||
<div className="grid gap-2 text-sm">
|
||||
<div className="border-l-2 border-fd-primary bg-fd-card px-3 py-2">
|
||||
<p className="font-mono text-xs text-fd-foreground">semantic-layer/</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"source YAML, measures, joins, grain"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="border-l-2 border-amber-500 bg-fd-card px-3 py-2">
|
||||
<p className="font-mono text-xs text-fd-foreground">wiki/</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"business rules, definitions, caveats"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="border-l-2 border-orange-500 bg-fd-card px-3 py-2">
|
||||
<p className="font-mono text-xs text-fd-foreground">raw-sources/</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"schema scans, keys, imported metadata"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="border-l-2 border-slate-500 bg-fd-card px-3 py-2 dark:border-cyan-200">
|
||||
<p className="font-mono text-xs text-fd-foreground">provenance</p>
|
||||
<p className="mt-1 text-xs leading-5 text-fd-muted-foreground">
|
||||
{"ingest decisions and review history"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
KTX's semantic layer separates those concerns:
|
||||
|
||||
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
|
||||
<span className="h-px w-full bg-fd-border" />
|
||||
</div>
|
||||
- **You and KTX maintain the how.** Sources, joins, grain, measures, and
|
||||
segments live in reviewable YAML — the analytical contract the team
|
||||
agrees on, version-controlled.
|
||||
- **The agent declares the what.** It sends a Semantic Query and trusts
|
||||
the compiler to produce safe SQL.
|
||||
|
||||
<section className="relative bg-[#102226] p-5 text-white dark:bg-[#0b181b]">
|
||||
<div className="absolute inset-y-0 left-0 w-1 bg-fd-primary" />
|
||||
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-cyan-200">
|
||||
{"Semantic layer engine"}
|
||||
</p>
|
||||
<div className="grid gap-2 sm:grid-cols-2">
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="text-sm font-semibold">Join graph</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"sources as nodes, joins as typed edges"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="text-sm font-semibold">Grain</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"row identity before aggregation"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="text-sm font-semibold">Measures</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"verified formulas and filters"}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-cyan-100/20 bg-white/8 px-3 py-2">
|
||||
<p className="whitespace-nowrap break-normal text-sm font-semibold">Relationships</p>
|
||||
<p className="mt-1 text-xs leading-5 text-cyan-50/75">
|
||||
{"many_to_one, one_to_many, one_to_one"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-3 rounded-md border border-cyan-100/20 bg-cyan-50/10 px-3 py-2 text-sm">
|
||||
{"Safe query planning before SQL is generated."}
|
||||
</div>
|
||||
</section>
|
||||
The agent stops reasoning about plumbing. It states intent. KTX turns
|
||||
that into SQL the warehouse can run.
|
||||
|
||||
<div className="hidden items-center justify-center bg-fd-background lg:flex" aria-hidden="true">
|
||||
<span className="h-px w-full bg-fd-border" />
|
||||
</div>
|
||||
<SemanticLayerFlow />
|
||||
|
||||
<section className="bg-fd-muted/35 p-4">
|
||||
<p className="mb-3 text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
|
||||
{"Agent workflows"}
|
||||
</p>
|
||||
<div className="space-y-2 text-sm">
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Search sources and wiki pages"}
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Compile trusted SQL"}
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Explain metrics and provenance"}
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-card px-3 py-2">
|
||||
{"Patch files and validate review"}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
## The Semantic Query contract
|
||||
|
||||
## Join graph
|
||||
A Semantic Query is the JSON payload the agent sends. Every field is optional
|
||||
except `measures`, and column references are fully qualified
|
||||
(`source.column`) so the compiler never has to guess where a name came
|
||||
from.
|
||||
|
||||
A semantic source is a node. A join is a typed edge. KTX uses the graph to
|
||||
choose valid paths and detect row-multiplying joins before SQL is generated.
|
||||
Notice what's _not_ in the payload: no `FROM`, no `JOIN`, no `GROUP BY`,
|
||||
no `WITH`. The agent states what it wants. KTX picks the join path, the
|
||||
grain, the SQL shape, and the dialect.
|
||||
|
||||
| Field | Purpose |
|
||||
|-------|---------|
|
||||
| `measures` | Names of pre-defined measures, or inline expressions like `sum(orders.amount)` |
|
||||
| `dimensions` | Columns to group by, optionally with a `granularity` for time fields |
|
||||
| `filters` | Row-level predicates, classified into `WHERE` or `HAVING` at planning time |
|
||||
| `segments` | Named filter sets defined on a source, applied as additional predicates |
|
||||
| `order_by` | Sort fields with optional direction |
|
||||
| `limit` | Row cap on the result |
|
||||
|
||||
A typical agent call looks like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"measures": ["orders.revenue", "tickets.ticket_count"],
|
||||
"dimensions": ["customers.segment"],
|
||||
"filters": ["orders.created_at >= '2025-01-01'"],
|
||||
"limit": 1000
|
||||
}
|
||||
```
|
||||
|
||||
That payload is enough for KTX to plan and compile. The agent never
|
||||
authors a join, a CTE, or a dialect-specific cast.
|
||||
|
||||
## What the planner does
|
||||
|
||||
The planner is a deterministic pipeline. Each Semantic Query runs through the
|
||||
same ordered steps before any SQL is emitted.
|
||||
|
||||
1. **Resolve refs.** Qualify bare column names, look up pre-defined
|
||||
measure expressions, and classify each measure as raw or derived.
|
||||
2. **Pick an anchor and build the join tree.** Choose the largest measure
|
||||
source as the root, then run a shortest-path search across the typed
|
||||
join graph to reach every required source.
|
||||
3. **Detect fan-out.** Group measures by their owning source. If more
|
||||
than one group exists, the planner marks the query as a chasm trap
|
||||
and switches to aggregate-locality compilation.
|
||||
4. **Classify filters.** Split predicates into row-level (`WHERE`) and
|
||||
aggregate-level (`HAVING`) based on whether they reference a measure.
|
||||
5. **Generate SQL.** Emit Postgres-shaped SQL with the right shape:
|
||||
single-source aggregation when the query is safe, per-source CTEs
|
||||
when fan-out is present.
|
||||
6. **Transpile to the target dialect.** Run the result through `sqlglot`
|
||||
so the warehouse receives syntax it understands.
|
||||
|
||||
The output is the SQL string, the resolved plan, and any warnings
|
||||
surfaced during planning.
|
||||
|
||||
## The join graph
|
||||
|
||||
A semantic source is a node. A declared join is a typed edge. The graph
|
||||
is bidirectional: every forward edge has a reverse with the relationship
|
||||
inverted, so the planner can traverse from any anchor.
|
||||
|
||||
| Relationship | Planning impact |
|
||||
|--------------|-----------------|
|
||||
| `many_to_one` | Usually safe for adding dimensions |
|
||||
| `one_to_many` | Can multiply measures and trigger fan-out handling |
|
||||
| `one_to_one` | Usually safe when keys are correct |
|
||||
| Equal-cost paths | Ambiguous unless aliases or explicit joins disambiguate |
|
||||
| `many_to_one` | Safe direction for adding dimensions |
|
||||
| `one_to_many` | Multiplies measures and triggers fan-out handling |
|
||||
| `one_to_one` | Safe in either direction when keys match |
|
||||
| Equal-cost paths | Treated as ambiguous; aliases or explicit joins resolve them |
|
||||
|
||||
<figure
|
||||
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card p-4 shadow-sm"
|
||||
|
|
@ -143,43 +117,60 @@ choose valid paths and detect row-multiplying joins before SQL is generated.
|
|||
>
|
||||
<div className="grid gap-3 md:grid-cols-[1fr_1fr_1fr]">
|
||||
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
|
||||
<p className="text-sm font-semibold text-fd-foreground">customers</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">grain: customer_id</p>
|
||||
<p className="text-sm font-semibold text-fd-foreground">{"customers"}</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: customer_id"}</p>
|
||||
</div>
|
||||
<div className="rounded-md border-2 border-fd-primary bg-fd-background px-4 py-3">
|
||||
<p className="text-sm font-semibold text-fd-foreground">orders</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id</p>
|
||||
<p className="text-sm font-semibold text-fd-foreground">{"orders"}</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: order_id"}</p>
|
||||
</div>
|
||||
<div className="rounded-md border border-fd-border bg-fd-background px-4 py-3">
|
||||
<p className="text-sm font-semibold text-fd-foreground">order_items</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">grain: order_id, line_id</p>
|
||||
<p className="text-sm font-semibold text-fd-foreground">{"order_items"}</p>
|
||||
<p className="mt-1 text-xs text-fd-muted-foreground">{"grain: order_id, line_id"}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="my-3 grid gap-2 text-center text-xs font-medium text-fd-muted-foreground md:grid-cols-[1fr_1fr]">
|
||||
<div>orders -> customers: many_to_one</div>
|
||||
<div>orders -> order_items: one_to_many</div>
|
||||
<div>{"orders -> customers: many_to_one"}</div>
|
||||
<div>{"orders -> order_items: one_to_many"}</div>
|
||||
</div>
|
||||
<figcaption className="mt-4 border-t border-fd-border pt-3 text-left text-xs leading-5 text-fd-muted-foreground">
|
||||
<span className="font-medium text-fd-foreground">{"Example: "}</span>
|
||||
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it can duplicate order-level measures."}
|
||||
{"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it duplicates order-level measures."}
|
||||
</figcaption>
|
||||
</figure>
|
||||
|
||||
The graph is bidirectional for planning. If `orders -> customers` is
|
||||
`many_to_one`, the reverse path is `one_to_many`.
|
||||
Edges and grain come from your YAML. The compiler treats them as fact,
|
||||
not a guess.
|
||||
|
||||
```yaml
|
||||
# semantic-layer/warehouse/orders.yaml
|
||||
name: orders
|
||||
table: public.orders
|
||||
grain: [order_id]
|
||||
joins:
|
||||
- to: customers
|
||||
on: customer_id = customers.id
|
||||
relationship: many_to_one
|
||||
- to: order_items
|
||||
on: id = order_items.order_id
|
||||
relationship: one_to_many
|
||||
measures:
|
||||
- name: revenue
|
||||
expr: sum(case when status != 'refunded' then amount end)
|
||||
```
|
||||
|
||||
## Building and maintaining the graph
|
||||
|
||||
KTX starts from evidence, writes reviewable source YAML, and treats the merged
|
||||
diff as the accepted graph.
|
||||
KTX builds the graph from evidence and accepted edits, not from runtime
|
||||
inference. Each input contributes a different kind of authority.
|
||||
|
||||
| Evidence | What it contributes |
|
||||
|----------|---------------------|
|
||||
| Declared primary keys | Initial row grain |
|
||||
| Declared foreign keys | Formal join candidates |
|
||||
| Inferred relationships | Edges when warehouses lack constraints |
|
||||
| Inferred relationships | Edges when the warehouse lacks constraints |
|
||||
| dbt, MetricFlow, and LookML imports | Existing metrics, dimensions, explores, and joins |
|
||||
| Query history | Real join and filter patterns |
|
||||
| Query history | Real join and filter patterns from analyst SQL |
|
||||
| Analyst review | Final authority before context is merged |
|
||||
|
||||
<div
|
||||
|
|
@ -295,105 +286,55 @@ diff as the accepted graph.
|
|||
</div>
|
||||
</div>
|
||||
|
||||
## Modeling problems
|
||||
## Fan-out and aggregate locality
|
||||
|
||||
Fan-out is the classic failure mode: an order-level measure joins to line-item
|
||||
rows before aggregation, so one order becomes many rows.
|
||||
Fan-out is the classic analytics failure mode. Two fact tables join to a
|
||||
shared dimension. A naive query joins them all together first, so each
|
||||
row from one fact is multiplied by the matching rows from the other.
|
||||
Measures duplicate, numbers go wrong, and the agent doesn't notice.
|
||||
|
||||
| Problem | What happens | How KTX handles it |
|
||||
|---------|--------------|--------------------|
|
||||
| Order measure joins to `order_items` | `orders.revenue` repeats once per item | Detect `one_to_many` and pre-aggregate |
|
||||
| Two fact sources share `customers` | Measures multiply across the shared dimension | Treat as a chasm trap and plan each fact locally |
|
||||
| Filter crosses `one_to_many` | Filtering changes measure grain | Reject or localize the filter |
|
||||
| Equal-cost paths connect sources | Join choice is ambiguous | Prefer safer paths or require aliases |
|
||||
|
||||
## Execution planning
|
||||
|
||||
The planner resolves sources, chooses a join tree, checks relationship paths,
|
||||
and picks a simple or aggregate-locality SQL shape.
|
||||
KTX's planner detects the shape by grouping measures by their owning
|
||||
source. If more than one source contributes raw measures, the generator
|
||||
switches to aggregate locality: each fact is pre-aggregated at its own
|
||||
grain inside a CTE, and the CTEs are joined back to the dimension at the
|
||||
end.
|
||||
|
||||
| Naive SQL shape | Semantic-layer SQL shape |
|
||||
|-----------------|--------------------------|
|
||||
| Join facts and dimensions first, then aggregate | Aggregate each fact source at its own grain, then join results |
|
||||
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source when locality is needed |
|
||||
| Trust the shortest textual join path | Prefer safe relationship paths and reject disconnected sources |
|
||||
| Let dimension grain differ across facts | Raise when asymmetric dimensions would fan out another measure |
|
||||
| Join facts and dimensions first, then aggregate | Aggregate each fact at its own grain, then join |
|
||||
| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source |
|
||||
| Trust the shortest textual join path | Prefer typed safe paths, reject disconnected sources |
|
||||
| Let dimension grain differ across facts | Raise when an asymmetric dimension would fan out another measure |
|
||||
|
||||
<div
|
||||
className="not-prose my-8 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
|
||||
aria-label="Fan-out safe execution shape"
|
||||
>
|
||||
<div className="border-b border-fd-border bg-fd-muted/35 px-4 py-3">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-muted-foreground">
|
||||
{"Fan-out handling"}
|
||||
</p>
|
||||
<p className="mt-1 text-sm leading-6 text-fd-muted-foreground">
|
||||
{"The same question planned before and after KTX preserves the measure grain."}
|
||||
</p>
|
||||
</div>
|
||||
<div className="grid gap-3 bg-fd-background p-4 md:grid-cols-[0.92fr_1.08fr]">
|
||||
<section className="flex min-h-full flex-col rounded-md border border-fd-border bg-fd-card">
|
||||
<div className="border-b border-fd-border px-4 py-3">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-wide text-red-600 dark:text-red-300">
|
||||
{"Unsafe shape"}
|
||||
</p>
|
||||
<p className="mt-1 text-sm font-semibold text-fd-foreground">
|
||||
{"Join first, aggregate later"}
|
||||
</p>
|
||||
</div>
|
||||
<pre className="m-0 min-h-[13rem] flex-1 overflow-x-auto bg-transparent px-4 py-3 text-xs leading-5 text-fd-foreground">
|
||||
{`orders
|
||||
-> join order_items
|
||||
-> join customers
|
||||
The result is the same analyst answer, computed with the join shape an
|
||||
analyst would have written by hand.
|
||||
|
||||
group by
|
||||
customer_segment
|
||||
## Where the context comes from
|
||||
|
||||
measure
|
||||
sum(orders.amount)`}
|
||||
</pre>
|
||||
<div className="border-t border-fd-border bg-red-50/60 px-4 py-3 text-sm leading-6 text-red-950 dark:bg-red-950/20 dark:text-red-100">
|
||||
{"Order-level revenue is exposed to line-item fan-out before aggregation."}
|
||||
</div>
|
||||
</section>
|
||||
<section className="flex min-h-full flex-col rounded-md border border-fd-primary/40 bg-fd-card shadow-[inset_4px_0_0_var(--color-fd-primary)]">
|
||||
<div className="border-b border-fd-border px-4 py-3">
|
||||
<p className="text-[11px] font-semibold uppercase tracking-wide text-fd-primary">
|
||||
{"KTX shape"}
|
||||
</p>
|
||||
<p className="mt-1 text-sm font-semibold text-fd-foreground">
|
||||
{"Aggregate locally, then join"}
|
||||
</p>
|
||||
</div>
|
||||
<pre className="m-0 min-h-[13rem] flex-1 overflow-x-auto bg-transparent px-4 py-3 text-xs leading-5 text-fd-foreground">
|
||||
{`orders_agg as (
|
||||
select customer_id, sum(amount) revenue
|
||||
from orders
|
||||
group by customer_id
|
||||
)
|
||||
select customers.segment, sum(revenue)
|
||||
from orders_agg
|
||||
join customers`}
|
||||
</pre>
|
||||
<div className="border-t border-fd-border bg-fd-primary/10 px-4 py-3 text-sm leading-6 text-fd-foreground">
|
||||
{"The measure is pre-aggregated at order grain before dimensions are joined."}
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
The planner is only as good as the YAML it reads. KTX builds and
|
||||
maintains that YAML for you.
|
||||
|
||||
The result is structured planning: validated sources, typed relationships,
|
||||
graph search, fan-out detection, aggregate locality, and dialect transpilation.
|
||||
- `raw-sources/<connection>/` holds scan evidence from your warehouse:
|
||||
schemas, columns, keys, samples, and observed usage patterns.
|
||||
- `wiki/` holds business language, definitions, and caveats. The
|
||||
planner doesn't read wiki at compile time, but the agent does, so
|
||||
measure names and dimensions stay anchored to terms the team uses.
|
||||
- `semantic-layer/<connection>/` holds the structured sources, joins,
|
||||
grain, measures, and segments the planner actually compiles against.
|
||||
|
||||
Every accepted edit flows back into the next ingest, so the graph stays
|
||||
current as the warehouse changes.
|
||||
|
||||
## Agent usage notes
|
||||
|
||||
Use this page when an agent needs to explain how KTX turns reviewed semantic
|
||||
context into SQL, why relationship metadata matters, or why a query was rejected
|
||||
as unsafe.
|
||||
Point an agent at this page when it needs to explain why KTX asks for
|
||||
grain, why a query was rejected as unsafe, or why the compiled SQL looks
|
||||
different from what the agent first proposed.
|
||||
|
||||
| Agent task | Relevant section | Next page |
|
||||
|------------|------------------|-----------|
|
||||
| Explain why KTX asks for `grain` and relationship types | Join graph | [Writing Context](/docs/guides/writing-context) |
|
||||
| Diagnose duplicated measures after a join | Modeling problems | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Explain safe SQL generation | Execution planning | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Describe how semantic context stays current | Building and maintaining the graph | [Context as Code](/docs/concepts/context-as-code) |
|
||||
| Explain the Semantic Query shape | The Semantic Query contract | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Describe what the planner does between query and SQL | What the planner does | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Explain why KTX asks for grain and relationship types | The join graph | [Writing context](/docs/guides/writing-context) |
|
||||
| Diagnose duplicated measures after a join | Fan-out and aggregate locality | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| Describe how semantic context stays current | Building and maintaining the graph | [Context as code](/docs/concepts/context-as-code) |
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ measures:
|
|||
```
|
||||
|
||||
For join graphs, fan-out handling, and execution mechanics, read
|
||||
[Context-Aware SQL](/docs/concepts/semantic-layer-internals).
|
||||
[Semantic Querying](/docs/concepts/semantic-layer-internals).
|
||||
|
||||
## Wiki pages
|
||||
|
||||
|
|
|
|||
|
|
@ -285,7 +285,7 @@ Admin CLI skills call the same KTX CLI commands:
|
|||
| `ktx sl list --json` | List semantic-layer sources |
|
||||
| `ktx sl search <query> --json` | Search semantic-layer sources |
|
||||
| `ktx sl validate <source> --connection-id <id>` | Validate semantic source definitions |
|
||||
| `ktx sl query --format json` | Execute a semantic-layer query when semantic compute is configured |
|
||||
| `ktx sl query --format json` | Execute a Semantic Query when semantic compute is configured |
|
||||
|
||||
### Security constraints
|
||||
|
||||
|
|
|
|||
|
|
@ -515,4 +515,4 @@ No authentication required - SQLite is file-based. The file must be readable by
|
|||
| Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions |
|
||||
| Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest <connectionId> --query-history` or `ktx setup` |
|
||||
| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context |
|
||||
| Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |
|
||||
| Semantic Query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test <id>` and check the `ktx sl query` flags |
|
||||
|
|
|
|||
|
|
@ -15,6 +15,12 @@ const config = {
|
|||
},
|
||||
async redirects() {
|
||||
return [
|
||||
{
|
||||
source: "/",
|
||||
destination: "/ktx/docs/getting-started/introduction",
|
||||
permanent: false,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/docs",
|
||||
destination: "/docs/getting-started/introduction",
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ const markdownMimeTypes = new Set([
|
|||
"application/markdown",
|
||||
]);
|
||||
|
||||
export function middleware(request: NextRequest) {
|
||||
export function proxy(request: NextRequest) {
|
||||
if (!isMarkdownPreferred(request.headers.get("accept"))) {
|
||||
return NextResponse.next();
|
||||
}
|
||||
|
|
@ -112,6 +112,18 @@ test("/ktx/docs redirects to the docs introduction", async () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("/ redirects into the /ktx docs site", async () => {
|
||||
const response = await fetch(`${docsSiteUrl}/`, {
|
||||
redirect: "manual",
|
||||
});
|
||||
|
||||
assert.equal(response.status, 307);
|
||||
assert.equal(
|
||||
response.headers.get("location"),
|
||||
`${docsBasePath}/docs/getting-started/introduction`,
|
||||
);
|
||||
});
|
||||
|
||||
test("/ktx/api/search returns docs search results", async () => {
|
||||
const response = await fetch(
|
||||
`${docsSiteUrl}${docsBasePath}/api/search?query=setup`,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { access, readFile } from "node:fs/promises";
|
||||
import { dirname, join } from "node:path";
|
||||
import { test } from "node:test";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
|
@ -17,6 +17,23 @@ test("root provider uses the base-path-aware search API", async () => {
|
|||
assert.match(layout, /api:\s*"\/ktx\/api\/search"/);
|
||||
});
|
||||
|
||||
test("metadata icons include the docs base path", async () => {
|
||||
const layout = await readDocsFile("app/layout.tsx");
|
||||
|
||||
assert.match(layout, /icon:\s*"\/ktx\/brand\/ktx-mascot\.svg"/);
|
||||
assert.match(layout, /shortcut:\s*"\/ktx\/brand\/ktx-mascot\.svg"/);
|
||||
assert.doesNotMatch(layout, /:\s*"\/brand\/ktx-mascot\.svg"/);
|
||||
});
|
||||
|
||||
test("markdown negotiation uses the Next proxy convention", async () => {
|
||||
await assert.doesNotReject(access(join(docsSiteDir, "proxy.ts")));
|
||||
await assert.rejects(access(join(docsSiteDir, "middleware.ts")));
|
||||
|
||||
const proxy = await readDocsFile("proxy.ts");
|
||||
assert.match(proxy, /export function proxy/);
|
||||
assert.doesNotMatch(proxy, /export function middleware/);
|
||||
});
|
||||
|
||||
test("site background stacking does not target every body child", async () => {
|
||||
const css = await readDocsFile("app/global.css");
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue