From 50ffebd98b39ffb0369bf75263c7794189469cd0 Mon Sep 17 00:00:00 2001 From: Luca Martial <48870843+luca-martial@users.noreply.github.com> Date: Fri, 15 May 2026 08:54:36 -0400 Subject: [PATCH 1/4] refactor(cli): unify output formatting across commands (#111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor(cli): unify output formatting across search and status commands Replace clack-style box borders (◇/│/└) and bullets (●/◆) in printList pretty mode with a clean status-style layout: bold headers, indented aligned rows, no decorative framing. Migrate status-project.ts from hand-rolled ANSI escape codes to shared symbols.ts color helpers. Remove dead clack symbols from SYMBOLS, add yellow() for warnings. Co-Authored-By: Claude Opus 4.6 (1M context) * fix(cli): update stale badge role docstring after dim removal Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- packages/cli/src/connection.ts | 12 ++++----- packages/cli/src/io/print-list.test.ts | 35 +++++++++++++------------- packages/cli/src/io/print-list.ts | 25 +++++++++--------- packages/cli/src/io/symbols.ts | 9 +++---- packages/cli/src/status-project.ts | 21 ++++++++++------ 5 files changed, 52 insertions(+), 50 deletions(-) diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index c65fc3c3..06d02922 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -315,14 +315,14 @@ function padVisual(text: string, width: number): string { } function renderTestAll(io: KtxCliIo, rows: ReadonlyArray): void { - io.stdout.write(`${SYMBOLS.barStart} connection test --all\n`); - io.stdout.write(`${SYMBOLS.bar}\n`); + io.stdout.write(`${bold('connection test --all')}\n`); if (rows.length === 0) { - io.stdout.write(`${SYMBOLS.barEnd} No connections configured. Run \`ktx setup\` to add one.\n`); + io.stdout.write(`\n No connections configured. Run \`ktx setup\` to add one.\n\n`); return; } + io.stdout.write('\n'); const okLabel = green('✓ ok'); const failLabel = red('✗ failed'); const idWidth = Math.max(...rows.map((r) => r.connectionId.length)); @@ -334,17 +334,17 @@ function renderTestAll(io: KtxCliIo, rows: ReadonlyArray): vo const driver = dim(padVisual(row.driver, driverWidth)); const status = padVisual(row.ok ? okLabel : failLabel, statusWidth); const detail = dim(row.detail); - io.stdout.write(`${SYMBOLS.bar} ${SYMBOLS.item} ${id} ${driver} ${status} ${detail}\n`); + io.stdout.write(` ${id} ${driver} ${status} ${detail}\n`); } const failed = rows.filter((r) => !r.ok).length; const passed = rows.length - failed; - io.stdout.write(`${SYMBOLS.bar}\n`); + io.stdout.write('\n'); const summary = failed === 0 ? `${rows.length} tested ${dim(SYMBOLS.middot)} ${green(`${passed} passed`)}` : `${rows.length} tested ${dim(SYMBOLS.middot)} ${green(`${passed} passed`)} ${dim(SYMBOLS.middot)} ${red(`${failed} failed`)}`; - io.stdout.write(`${SYMBOLS.barEnd} ${summary}\n`); + io.stdout.write(`${summary}\n`); } async function runTestAll( diff --git a/packages/cli/src/io/print-list.test.ts b/packages/cli/src/io/print-list.test.ts index 543cc71e..cb6e7947 100644 --- a/packages/cli/src/io/print-list.test.ts +++ b/packages/cli/src/io/print-list.test.ts @@ -139,7 +139,7 @@ function stripAnsi(s: string): string { } describe('printList — pretty mode', () => { - it('renders a Clack-style header, grouped rows, and footer', () => { + it('renders a bold header, grouped rows, and footer', () => { const r = recorder(); printList({ rows: [ORDERS, USERS], @@ -152,13 +152,14 @@ describe('printList — pretty mode', () => { io: r.io, }); const out = stripAnsi(r.out()); - expect(out).toContain(`${SYMBOLS.barStart} sl list`); - expect(out).toContain(`${SYMBOLS.group} warehouse`); + expect(out).toContain('sl list'); + expect(out).toContain('warehouse'); expect(out).toContain('(2 sources)'); - expect(out).toMatch(new RegExp(`${escapeRegExp(SYMBOLS.item)} orders\\s+5 cols ${escapeRegExp(SYMBOLS.middot)} 3 measures ${escapeRegExp(SYMBOLS.middot)} 1 join\\b`)); - expect(out).toMatch(new RegExp(`${escapeRegExp(SYMBOLS.item)} users\\s+8 cols ${escapeRegExp(SYMBOLS.middot)} 2 measures ${escapeRegExp(SYMBOLS.middot)} 2 joins\\b`)); + expect(out).toMatch(/orders\s+5 cols/); + expect(out).toMatch(new RegExp(`3 measures ${escapeRegExp(SYMBOLS.middot)} 1 join\\b`)); + expect(out).toMatch(new RegExp(`2 measures ${escapeRegExp(SYMBOLS.middot)} 2 joins\\b`)); expect(out).toContain(`${SYMBOLS.emDash} User profile + auth`); - expect(out).toContain(`${SYMBOLS.barEnd} 2 sources`); + expect(out).toContain('2 sources'); }); it('renders an empty-state message when no rows', () => { @@ -174,11 +175,11 @@ describe('printList — pretty mode', () => { io: r.io, }); const out = stripAnsi(r.out()); - expect(out).toContain(`${SYMBOLS.barStart} sl list`); - expect(out).toContain(`${SYMBOLS.barEnd} No semantic-layer sources found in /tmp/proj`); + expect(out).toContain('sl list'); + expect(out).toContain('No semantic-layer sources found in /tmp/proj'); }); - it('renders empty-state with hint and zero-count footer when emptyHint is provided', () => { + it('renders empty-state with hint when emptyHint is provided', () => { const r = recorder(); printList({ rows: [], @@ -192,9 +193,8 @@ describe('printList — pretty mode', () => { io: r.io, }); const out = stripAnsi(r.out()); - expect(out).toContain(`${SYMBOLS.bar} No sources matched "foo"`); - expect(out).toContain(`${SYMBOLS.bar} Run \`ktx sl list\` to see available sources.`); - expect(out).toContain(`${SYMBOLS.barEnd} 0 sources`); + expect(out).toContain('No sources matched "foo"'); + expect(out).toContain('Run `ktx sl list` to see available sources.'); }); it('singularizes the footer when there is one row', () => { @@ -210,7 +210,7 @@ describe('printList — pretty mode', () => { io: r.io, }); const out = stripAnsi(r.out()); - expect(out).toContain(`${SYMBOLS.barEnd} 1 source`); + expect(out).toContain('1 source'); }); it('uses the provided unit in pluralization and group counts', () => { @@ -236,10 +236,10 @@ describe('printList — pretty mode', () => { }); const out = stripAnsi(r.out()); expect(out).toContain('(2 pages)'); - expect(out).toContain(`${SYMBOLS.barEnd} 2 pages`); + expect(out).toContain('2 pages'); }); - it('renders a leading dim badge column with prettyFormat in pretty mode', () => { + it('renders a leading badge column with prettyFormat in pretty mode', () => { const r = recorder(); interface SearchRow { score: number; scope: string; key: string; summary: string } const SEARCH_COLUMNS: ReadonlyArray> = [ @@ -270,9 +270,8 @@ describe('printList — pretty mode', () => { io: r.io, }); const out = stripAnsi(r.out()); - // Badge displays as right-padded percentage before the name column. - expect(out).toMatch(new RegExp(`${escapeRegExp(SYMBOLS.item)} 87%\\s+alpha\\s+`)); - expect(out).toMatch(new RegExp(`${escapeRegExp(SYMBOLS.item)} 4%\\s+beta\\s+`)); + expect(out).toMatch(/87%\s+alpha\s+/); + expect(out).toMatch(/4%\s+beta\s+/); }); it('emits the badge column in plain mode using its plain prefix', () => { diff --git a/packages/cli/src/io/print-list.ts b/packages/cli/src/io/print-list.ts index b05e12f2..3d8d1fba 100644 --- a/packages/cli/src/io/print-list.ts +++ b/packages/cli/src/io/print-list.ts @@ -18,7 +18,7 @@ export interface PrintListColumn { dim?: boolean; /** * Pretty-mode role override. When omitted, role is auto-detected: - * - `'badge'` — leading dim cell before the name column (right-padded across rows). + * - `'badge'` — leading cell before the name column (right-padded across rows). * - `'name'` — name column. Default: first non-grouped, non-metric, non-optional column. * - `'metric'` — `"N word"` cell. Default: any column with a non-empty `plain` prefix. * - `'suffix'` — trailing em-dash optional value. Default: any column with `optional: true`. @@ -202,20 +202,19 @@ function printListPretty(args: PrintListArgs): void { const { io, command, rows, columns, groupBy, emptyMessage, emptyHint } = args; const unit = args.unit ?? 'result'; - io.stdout.write(`${SYMBOLS.barStart} ${command}\n`); - io.stdout.write(`${SYMBOLS.bar}\n`); + io.stdout.write(`${bold(command)}\n`); if (rows.length === 0) { + io.stdout.write(`\n ${emptyMessage}\n`); if (emptyHint !== undefined && emptyHint !== '') { - io.stdout.write(`${SYMBOLS.bar} ${emptyMessage}\n`); - io.stdout.write(`${SYMBOLS.bar} ${dim(emptyHint)}\n`); - io.stdout.write(`${SYMBOLS.barEnd} ${dim(`0 ${unit}s`)}\n`); - } else { - io.stdout.write(`${SYMBOLS.barEnd} ${emptyMessage}\n`); + io.stdout.write(` ${dim(emptyHint)}\n`); } + io.stdout.write('\n'); return; } + io.stdout.write('\n'); + const resolved = resolveColumns(columns, groupBy); const buckets = groupBy ? groupRows(rows, groupBy) : new Map([['', [...rows]]]); @@ -231,14 +230,14 @@ function printListPretty(args: PrintListArgs): void { for (const [groupValue, groupRowList] of buckets) { if (groupBy) { io.stdout.write( - `${SYMBOLS.bar} ${SYMBOLS.group} ${bold(groupValue)} ${dim(`(${pluralize(groupRowList.length, unit)})`)}\n`, + ` ${bold(groupValue)} ${dim(`(${pluralize(groupRowList.length, unit)})`)}\n`, ); } for (const row of groupRowList) { const segments: string[] = []; resolved.badge.forEach((col, idx) => { - segments.push(dim(formatCellValue(col, row).padStart(badgeWidths[idx] ?? 0))); + segments.push(formatCellValue(col, row).padStart(badgeWidths[idx] ?? 0)); }); if (resolved.name) { @@ -265,10 +264,10 @@ function printListPretty(args: PrintListArgs): void { if (optionalSuffix.length > 0) segments.push(optionalSuffix); const indent = groupBy ? ' ' : ' '; - io.stdout.write(`${SYMBOLS.bar}${indent}${SYMBOLS.item} ${segments.join(' ')}\n`); + io.stdout.write(`${indent}${segments.join(' ')}\n`); } + io.stdout.write('\n'); } - io.stdout.write(`${SYMBOLS.bar}\n`); - io.stdout.write(`${SYMBOLS.barEnd} ${pluralize(rows.length, unit)}\n`); + io.stdout.write(`${pluralize(rows.length, unit)}\n`); } diff --git a/packages/cli/src/io/symbols.ts b/packages/cli/src/io/symbols.ts index f80c2b79..ba93a436 100644 --- a/packages/cli/src/io/symbols.ts +++ b/packages/cli/src/io/symbols.ts @@ -15,11 +15,6 @@ function detectUnicodeSupport(env: NodeJS.ProcessEnv = process.env): boolean { const unicode = detectUnicodeSupport(); export const SYMBOLS = { - bar: unicode ? '│' : '|', - barStart: unicode ? '◇' : 'o', - barEnd: unicode ? '└' : '—', - group: unicode ? '●' : '*', - item: unicode ? '◆' : '*', middot: unicode ? '·' : '-', emDash: unicode ? '—' : '--', } as const; @@ -43,3 +38,7 @@ export function green(text: string): string { export function red(text: string): string { return styleText('red', text); } + +export function yellow(text: string): string { + return styleText('yellow', text); +} diff --git a/packages/cli/src/status-project.ts b/packages/cli/src/status-project.ts index 2aab1e5c..8c2f2445 100644 --- a/packages/cli/src/status-project.ts +++ b/packages/cli/src/status-project.ts @@ -9,6 +9,13 @@ import type { } from '@ktx/context/project'; import type { PostgresPgssProbeResult } from '@ktx/context/ingest'; import type { DoctorCheck } from './doctor.js'; +import { + bold as _bold, + dim as _dim, + green, + red, + yellow, +} from './io/symbols.js'; import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js'; type ProjectStatusLevel = 'ok' | 'warn' | 'fail'; @@ -694,13 +701,11 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil const SYMBOL: Record = { ok: '✓', warn: '⚠', fail: '✗' }; -function ansi(useColor: boolean, code: string, text: string, closer = '39'): string { - return useColor ? `\u001b[${code}m${text}\u001b[${closer}m` : text; +function colorForLevel(useColor: boolean, level: ProjectStatusLevel, text: string): string { + if (!useColor) return text; + return level === 'ok' ? green(text) : level === 'warn' ? yellow(text) : red(text); } -function colorFor(level: ProjectStatusLevel): string { - return level === 'ok' ? '32' : level === 'warn' ? '33' : '31'; -} function abbreviateHome(filePath: string, env: NodeJS.ProcessEnv): string { const home = env.HOME; @@ -722,9 +727,9 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec const verbose = options.verbose ?? false; const useColor = options.useColor ?? false; const env = options.env ?? process.env; - const dim = (s: string) => ansi(useColor, '2', s, '22'); - const bold = (s: string) => ansi(useColor, '1', s, '22'); - const color = (level: ProjectStatusLevel, s: string) => ansi(useColor, colorFor(level), s); + const dim = useColor ? _dim : (s: string) => s; + const bold = useColor ? _bold : (s: string) => s; + const color = (level: ProjectStatusLevel, s: string) => colorForLevel(useColor, level, s); const sym = (level: ProjectStatusLevel) => color(level, SYMBOL[level]); const lines: string[] = []; From f9532f549b4589afc558a7516c36d4077dafb232 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 15 May 2026 15:49:39 +0200 Subject: [PATCH 2/4] perf(cli): cache pnpm run ktx builds against a stamp file (#113) The staleness check compared source mtimes against packages/cli/dist/bin.js, but tsc only rewrites outputs whose source actually changed. Editing any non-bin source (e.g. setup.ts) left bin.js untouched, so its mtime stayed older than the sources forever and every `pnpm run ktx` invocation rebuilt the whole workspace. Write a dedicated .ktx-build-stamp after a successful build and check sources against that instead. --- scripts/run-ktx.mjs | 24 ++++-- scripts/run-ktx.test.mjs | 157 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 166 insertions(+), 15 deletions(-) diff --git a/scripts/run-ktx.mjs b/scripts/run-ktx.mjs index a283dcae..1a6ba735 100644 --- a/scripts/run-ktx.mjs +++ b/scripts/run-ktx.mjs @@ -2,7 +2,12 @@ import { spawn } from 'node:child_process'; import { constants } from 'node:fs'; -import { access as fsAccess, readdir as fsReaddir, stat as fsStat } from 'node:fs/promises'; +import { + access as fsAccess, + readdir as fsReaddir, + stat as fsStat, + writeFile as fsWriteFile, +} from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; @@ -14,6 +19,10 @@ function cliBinPath(rootDir) { return resolve(rootDir, 'packages', 'cli', 'dist', 'bin.js'); } +function buildStampPath(rootDir) { + return resolve(rootDir, 'packages', 'cli', 'dist', '.ktx-build-stamp'); +} + async function fileExists(path, access) { try { await access(path, constants.R_OK); @@ -66,17 +75,17 @@ async function newestMtimeMs(path, fs) { return newest; } -async function isBuildStale(rootDir, binPath, fs) { - let binStats; +async function isBuildStale(rootDir, stampPath, fs) { + let stampStats; try { - binStats = await fs.stat(binPath); + stampStats = await fs.stat(stampPath); } catch { return true; } const inputPaths = await packageBuildInputPaths(rootDir, fs.readdir); for (const inputPath of inputPaths) { - if ((await newestMtimeMs(inputPath, fs)) > binStats.mtimeMs) { + if ((await newestMtimeMs(inputPath, fs)) > stampStats.mtimeMs) { return true; } } @@ -137,7 +146,9 @@ export async function runWorkspaceKtx(argv, options = {}) { stat: options.stat ?? fsStat, readdir: options.readdir ?? fsReaddir, }; + const writeFile = options.writeFile ?? fsWriteFile; const binPath = cliBinPath(rootDir); + const stampPath = buildStampPath(rootDir); const runCommand = options.runCommand ?? (options.execFile @@ -146,7 +157,7 @@ export async function runWorkspaceKtx(argv, options = {}) { const commandEnv = options.env; const binExists = await fileExists(binPath, access); - const needsBuild = !binExists || (await isBuildStale(rootDir, binPath, fs)); + const needsBuild = !binExists || (await isBuildStale(rootDir, stampPath, fs)); if (needsBuild) { stderr.write( binExists @@ -160,6 +171,7 @@ export async function runWorkspaceKtx(argv, options = {}) { ); return buildExitCode; } + await writeFile(stampPath, ''); } return await runCommand(process.execPath, [binPath, ...cliArgv], { cwd: rootDir, env: commandEnv }); diff --git a/scripts/run-ktx.test.mjs b/scripts/run-ktx.test.mjs index 1533b67c..98035aef 100644 --- a/scripts/run-ktx.test.mjs +++ b/scripts/run-ktx.test.mjs @@ -4,10 +4,18 @@ import { runWorkspaceKtx } from './run-ktx.mjs'; function freshBuildFs() { return { - stat: async (path) => ({ - mtimeMs: path.endsWith('/packages/cli/dist/bin.js') ? 2000 : 1000, - isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), - }), + stat: async (path) => { + if (path.endsWith('/.ktx-build-stamp')) { + return { mtimeMs: 2000, isDirectory: () => false }; + } + if (path.endsWith('/packages/cli/dist/bin.js')) { + return { mtimeMs: 2000, isDirectory: () => false }; + } + return { + mtimeMs: 1000, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }; + }, readdir: async (path) => { if (path.endsWith('/packages')) { return [{ name: 'cli', isDirectory: () => true }]; @@ -108,6 +116,7 @@ test('runWorkspaceKtx drops a leading npm argument separator', async () => { test('runWorkspaceKtx builds the workspace CLI before running it when dist is missing', async () => { const calls = []; const logs = []; + const writes = []; let binExists = false; const exitCode = await runWorkspaceKtx(['setup', 'demo', '--mode', 'replay', '--no-input', '--viz'], { @@ -125,6 +134,9 @@ test('runWorkspaceKtx builds the workspace CLI before running it when dist is mi } return { stdout: 'Replay complete\n', stderr: '' }; }, + writeFile: async (path, contents) => { + writes.push({ path, contents }); + }, stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, }); @@ -145,20 +157,32 @@ test('runWorkspaceKtx builds the workspace CLI before running it when dist is mi ['stdout', 'build ok\n'], ['stdout', 'Replay complete\n'], ]); + assert.deepEqual(writes, [ + { path: '/workspace/ktx/packages/cli/dist/.ktx-build-stamp', contents: '' }, + ]); }); -test('runWorkspaceKtx rebuilds before running when workspace sources are newer than dist', async () => { +test('runWorkspaceKtx rebuilds before running when workspace sources are newer than the build stamp', async () => { const calls = []; const logs = []; + const writes = []; let sourceMtimeMs = 3000; const exitCode = await runWorkspaceKtx(['status', '--json', '--no-input'], { rootDir: '/workspace/ktx', access: async () => undefined, - stat: async (path) => ({ - mtimeMs: path.endsWith('/packages/cli/dist/bin.js') ? 2000 : sourceMtimeMs, - isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), - }), + stat: async (path) => { + if (path.endsWith('/.ktx-build-stamp')) { + return { mtimeMs: 2000, isDirectory: () => false }; + } + if (path.endsWith('/packages/cli/dist/bin.js')) { + return { mtimeMs: 2000, isDirectory: () => false }; + } + return { + mtimeMs: sourceMtimeMs, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }; + }, readdir: async (path) => { if (path.endsWith('/packages')) { return [{ name: 'context', isDirectory: () => true }]; @@ -176,6 +200,9 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t } return { stdout: '{"status":"ready"}\n', stderr: '' }; }, + writeFile: async (path, contents) => { + writes.push({ path, contents }); + }, stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, }); @@ -193,4 +220,116 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t ['stdout', 'build ok\n'], ['stdout', '{"status":"ready"}\n'], ]); + assert.deepEqual(writes, [ + { path: '/workspace/ktx/packages/cli/dist/.ktx-build-stamp', contents: '' }, + ]); +}); + +test('runWorkspaceKtx skips rebuild when only bin.js is older than sources but stamp is fresh', async () => { + const calls = []; + const logs = []; + const writes = []; + + const exitCode = await runWorkspaceKtx(['status'], { + rootDir: '/workspace/ktx', + access: async () => undefined, + stat: async (path) => { + if (path.endsWith('/.ktx-build-stamp')) { + return { mtimeMs: 5000, isDirectory: () => false }; + } + if (path.endsWith('/packages/cli/dist/bin.js')) { + return { mtimeMs: 1000, isDirectory: () => false }; + } + return { + mtimeMs: 3000, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }; + }, + readdir: async (path) => { + if (path.endsWith('/packages')) { + return [{ name: 'cli', isDirectory: () => true }]; + } + if (path.endsWith('/src')) { + return [{ name: 'setup.ts', isDirectory: () => false }]; + } + return []; + }, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + return { stdout: 'KTX status\n', stderr: '' }; + }, + writeFile: async (path, contents) => { + writes.push({ path, contents }); + }, + stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, + stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual( + calls.map((call) => [call.command, call.args]), + [[process.execPath, ['/workspace/ktx/packages/cli/dist/bin.js', 'status']]], + ); + assert.deepEqual(writes, []); + assert.deepEqual(logs, [['stdout', 'KTX status\n']]); +}); + +test('runWorkspaceKtx rebuilds when stamp is missing even if bin.js exists', async () => { + const calls = []; + const logs = []; + const writes = []; + + const exitCode = await runWorkspaceKtx(['status'], { + rootDir: '/workspace/ktx', + access: async () => undefined, + stat: async (path) => { + if (path.endsWith('/.ktx-build-stamp')) { + throw Object.assign(new Error('missing'), { code: 'ENOENT' }); + } + if (path.endsWith('/packages/cli/dist/bin.js')) { + return { mtimeMs: 2000, isDirectory: () => false }; + } + return { + mtimeMs: 1000, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }; + }, + readdir: async (path) => { + if (path.endsWith('/packages')) { + return [{ name: 'cli', isDirectory: () => true }]; + } + if (path.endsWith('/src')) { + return [{ name: 'bin.ts', isDirectory: () => false }]; + } + return []; + }, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + if (command === 'pnpm') { + return { stdout: 'build ok\n', stderr: '' }; + } + return { stdout: 'KTX status\n', stderr: '' }; + }, + writeFile: async (path, contents) => { + writes.push({ path, contents }); + }, + stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, + stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual( + calls.map((call) => [call.command, call.args]), + [ + ['pnpm', ['run', 'build']], + [process.execPath, ['/workspace/ktx/packages/cli/dist/bin.js', 'status']], + ], + ); + assert.deepEqual(logs[0], [ + 'stderr', + 'KTX CLI build output is stale. Rebuilding it now with `pnpm run build`...\n', + ]); + assert.deepEqual(writes, [ + { path: '/workspace/ktx/packages/cli/dist/.ktx-build-stamp', contents: '' }, + ]); }); From 465724a9914f9aaccd3683c07ca181c6db159037 Mon Sep 17 00:00:00 2001 From: Luca Martial <48870843+luca-martial@users.noreply.github.com> Date: Fri, 15 May 2026 13:25:44 -0400 Subject: [PATCH 3/4] Improve README scanability and CLI docs (#112) * docs: simplify readme * docs: add mcp cli reference * test: use docs base path * docs: enlarge readme tagline --- README.md | 233 +++++------------- .../content/docs/cli-reference/index.mdx | 12 +- .../content/docs/cli-reference/ktx-mcp.mdx | 70 ++++++ .../content/docs/cli-reference/meta.json | 1 + docs-site/tests/docs-index-route.test.mjs | 11 +- 5 files changed, 153 insertions(+), 174 deletions(-) create mode 100644 docs-site/content/docs/cli-reference/ktx-mcp.mdx diff --git a/README.md b/README.md index 92a2a8dd..45c20bd9 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ KTX -

- The context layer for analytics agents -

+

+ The context layer for analytics agents +

npm version @@ -16,183 +16,82 @@ --- KTX turns warehouse metadata, semantic definitions, and business knowledge into -reviewable project files that agents can use while planning, querying, and -updating analytics work. - -A KTX project is a directory of plain files - YAML semantic sources, Markdown -wiki pages, and SQLite state - that you commit to git and review in PRs, -just like dbt models. - -## Who KTX is for - -KTX is built for analytics engineers and data teams who want data agents to -work on real analytics systems - not just generate one-off SQL. +reviewable project files that agents can use to plan, query, and update +analytics work. Use KTX when you want agents to: -- **Generate SQL** from approved measures and joins -- **Repair semantic definitions** through reviewable diffs -- **Explain metric provenance** with warehouse evidence -- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI - platforms +- Generate SQL from approved measures and joins +- Repair semantic definitions through reviewable diffs +- Explain metric provenance with warehouse evidence +- Work alongside dbt, MetricFlow, LookML, Looker, Metabase, and Notion -Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and +Supports PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and SQLite. -## Quick start - -Install the CLI and run the setup wizard: +## Quick Start ```bash -npm install @kaelio/ktx -npm install -g @kaelio/ktx +pnpm add --global @kaelio/ktx ktx setup -``` - -The wizard walks through six steps: configuring your LLM provider, setting up -embeddings, connecting your database, adding context sources (dbt, LookML, -Metabase, Looker, Notion), building context, and installing agent integration. - -If it exits before completion, rerun `ktx setup` to resume where you left off. - -Check your project status: - -```bash ktx status ``` -``` -KTX project: /home/user/analytics -Project ready: yes -LLM ready: yes (claude-sonnet-4-6) -Embeddings ready: yes (text-embedding-3-small) -Databases configured: yes (postgres-warehouse) -Context sources configured: yes (dbt-main) -KTX context built: yes -Agent integration ready: yes (claude-code:project) -``` +`ktx setup` creates or resumes a local KTX project, configures providers and +connections, builds context, and installs agent integration. -Generate SQL from a semantic-layer source: +## Common Commands -```bash -npx @kaelio/ktx sl query --project-dir "$PROJECT_DIR" \ - --connection-id warehouse \ - --measure accounts.account_count \ - --dimension accounts.segment \ - --format sql -``` +| Command | Purpose | +|---------|---------| +| `ktx setup` | Create, resume, or update a KTX project | +| `ktx status` | Check project readiness | +| `ktx connection list` | List configured connections | +| `ktx connection test ` | Test one connection | +| `ktx ingest ` | Build context for one connection | +| `ktx ingest --all` | Build context for every configured connection | +| `ktx ingest text ` | Capture free-form notes into memory | +| `ktx sl list` | List semantic-layer sources | +| `ktx sl search "revenue"` | Search semantic-layer sources | +| `ktx sl validate --connection-id ` | Validate a semantic source | +| `ktx sl query --measure --format sql` | Compile semantic-layer SQL | +| `ktx wiki search "revenue definition"` | Search local wiki context | +| `ktx mcp start` | Start the local MCP server for agent clients | -List and test a configured warehouse connection: +Project resolution defaults to `KTX_PROJECT_DIR`, then the nearest `ktx.yaml`, +then the current directory. Pass `--project-dir ` when scripting. -```bash -ktx connection list --project-dir "$PROJECT_DIR" -ktx connection test warehouse --project-dir "$PROJECT_DIR" -``` - -The connection test prints the configured driver and connector-specific status: +## Project Layout ```text -Connection test passed: warehouse -Driver: sqlite -Status: ok -``` - -## What's in a project - -``` my-project/ -├── ktx.yaml # Project configuration -├── semantic-layer/ -│ └── warehouse/ -│ ├── orders.yaml # Semantic source definitions -│ ├── customers.yaml -│ └── order_items.yaml -├── wiki/ -│ ├── global/ -│ │ ├── revenue.md # Business definitions and rules -│ │ └── segment-classification.md -│ └── user/ -│ └── local/ -├── raw-sources/ -│ └── warehouse/ -│ └── / # Database ingest artifacts and reports -└── .ktx/ - └── db.sqlite # Local state (git-ignored) +├── ktx.yaml # Project configuration +├── semantic-layer// # YAML semantic sources +├── wiki/global/ # Shared business context +├── wiki/user// # User-scoped notes +├── raw-sources// # Ingest artifacts and reports +└── .ktx/ # Local state and secrets, git-ignored ``` -Semantic sources and wiki pages are committed to git. The `.ktx/` directory -holds ephemeral state and is git-ignored - delete it and KTX rebuilds on the -next run. +Commit `ktx.yaml`, `semantic-layer/`, and `wiki/`. Keep `.ktx/` local. -### Build demo warehouse context +## Agent Usage -Database ingest artifacts are written under `raw-sources/warehouse//` -in the project directory. +Setup can install KTX instructions for Claude Code, Codex, Cursor, OpenCode, +and universal `.agents` clients: ```bash -ktx ingest warehouse --project-dir "$PROJECT_DIR" --fast -ktx status --project-dir "$PROJECT_DIR" +ktx setup --agents --target codex ``` -For non-SQLite drivers, prefer credential references such as `--url env:NAME` -or `--url file:PATH` over literal credential URLs. - -## Managed Python runtime - -KTX installs its Python runtime only when a Python-backed command needs it. -The runtime lives outside the npm cache, is versioned by the installed CLI -version, and is managed by `ktx dev runtime` commands. - -KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with -your system package manager or the official installer before running Python- -backed KTX commands. KTX doesn't download `uv` automatically; run -`ktx dev runtime status` if runtime installation fails: +Agent-facing workflows typically start with: ```bash -ktx dev runtime install --yes -ktx dev runtime status -ktx dev runtime start -ktx dev runtime stop +ktx sl search "revenue" --json +ktx wiki search "refund policy" --json +ktx sl query --connection-id warehouse --measure orders.revenue --format sql ``` -The release artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` -runtime wheel. The `python/ktx-sl` and `python/ktx-daemon` directories remain -source packages for development, not public release artifacts. - -## Use KTX with agents - -KTX integrates with coding agents through CLI skills. The setup wizard -configures this automatically. - -**CLI skills** - the agent calls `ktx` commands directly through a skill file -installed in your agent's config (e.g., `.claude/skills/ktx/SKILL.md`): - -```bash -ktx sl query --measure orders.revenue --dimension orders.status --format sql -ktx wiki search "revenue definition" -ktx sl validate orders -``` - -Supported agents: Claude Code, Codex, Cursor, OpenCode, and any agent that -reads `.agents/` skills. - -## Workspace packages - -| Package | Purpose | -|---------|---------| -| `packages/cli` | CLI entry point | -| `packages/context` | Core context engine | -| `packages/llm` | LLM and embedding providers | -| `packages/connector-bigquery` | BigQuery scan connector | -| `packages/connector-clickhouse` | ClickHouse scan connector | -| `packages/connector-mysql` | MySQL scan connector | -| `packages/connector-postgres` | Postgres scan connector | -| `packages/connector-snowflake` | Snowflake scan connector | -| `packages/connector-sqlite` | SQLite scan connector | -| `packages/connector-sqlserver` | SQL Server scan connector | -| `python/ktx-sl` | Semantic-layer query planning | -| `python/ktx-daemon` | Portable compute service | - ## Development ```bash @@ -204,7 +103,7 @@ pnpm run build pnpm run check ``` -Use the development CLI for local testing: +Use the development CLI locally: ```bash pnpm run setup:dev @@ -212,30 +111,28 @@ pnpm run link:dev ktx-dev --help ``` -### Debug LLM traces +KTX is a pnpm + uv workspace: -KTX can capture local AI SDK DevTools traces for LLM calls that run through the -KTX provider. Enable it with an environment flag when running an LLM-backed -command: +- TypeScript packages live in `packages/*` +- CLI source lives in `packages/cli` +- Python runtime source lives in `python/ktx-sl` and `python/ktx-daemon` +- Public docs live in `docs-site/content/docs` + +Useful checks: ```bash -KTX_AI_DEVTOOLS_ENABLED=true ktx ingest warehouse --project-dir "$PROJECT_DIR" --deep +pnpm run type-check +pnpm run test +pnpm run dead-code +uv run pytest -q ``` -Traces are written to `.devtools/generations.json` under the current working -directory. To inspect them, run: +## Docs -```bash -pnpm dlx @ai-sdk/devtools -``` - -Then open `http://localhost:4983`. These traces are local-development-only and -store prompts, model outputs, tool arguments/results, and raw provider payloads -in plain text. Do not enable this in production or for sensitive runs. - -The repository uses `pnpm` for TypeScript packages and `uv` for Python -packages. See [Contributing](docs-site/content/docs/community/contributing.mdx) -for full development setup, testing, and PR guidelines. +- [Quickstart](docs-site/content/docs/getting-started/quickstart.mdx) +- [CLI Reference](docs-site/content/docs/cli-reference/index.mdx) +- [Building Context](docs-site/content/docs/guides/building-context.mdx) +- [Contributing](docs-site/content/docs/community/contributing.mdx) ## License diff --git a/docs-site/content/docs/cli-reference/index.mdx b/docs-site/content/docs/cli-reference/index.mdx index c4ef07db..065339ca 100644 --- a/docs-site/content/docs/cli-reference/index.mdx +++ b/docs-site/content/docs/cli-reference/index.mdx @@ -4,8 +4,8 @@ description: "Command map and shared options for the KTX CLI." --- The `ktx` CLI sets up local projects, builds agent-ready context, checks -connections, queries semantic-layer sources, searches wiki pages, and manages -the bundled Python runtime. +connections, queries semantic-layer sources, searches wiki pages, runs the MCP +server, and manages the bundled Python runtime. ## Command Map @@ -26,6 +26,11 @@ ktx validate query status + mcp + start + stop + status + logs dev init [directory] schema @@ -73,4 +78,7 @@ ktx ingest --all # Search semantic-layer sources and wiki pages ktx sl search "revenue" ktx wiki search "revenue recognition" + +# Start the local MCP server for agent clients +ktx mcp start ``` diff --git a/docs-site/content/docs/cli-reference/ktx-mcp.mdx b/docs-site/content/docs/cli-reference/ktx-mcp.mdx new file mode 100644 index 00000000..9f0dd189 --- /dev/null +++ b/docs-site/content/docs/cli-reference/ktx-mcp.mdx @@ -0,0 +1,70 @@ +--- +title: "ktx mcp" +description: "Run the KTX MCP HTTP server for agent clients." +--- + +`ktx mcp` starts, stops, inspects, and tails the local KTX MCP server for a KTX +project. Use it when an agent client connects through MCP instead of generated +CLI instructions. + +## Command signature + +```bash +ktx mcp [options] +``` + +## Subcommands + +| Subcommand | Description | +|-----------|-------------| +| `start` | Start the KTX MCP HTTP server | +| `stop` | Stop the KTX MCP daemon | +| `status` | Show daemon status, URL, PID, token mode, and project path | +| `logs` | Print the daemon log | + +## `mcp start` Options + +| Flag | Description | Default | +|------|-------------|---------| +| `--host ` | Host to bind | `127.0.0.1` | +| `--port ` | Port to bind | `7878` | +| `--token ` | Bearer token for non-loopback binding | `KTX_MCP_TOKEN` | +| `--foreground` | Run the server in the foreground | `false` | +| `--allowed-host ` | Additional allowed Host header; repeatable | - | +| `--allowed-origin ` | Allowed browser Origin header; repeatable | - | + +## `mcp logs` Options + +| Flag | Description | Default | +|------|-------------|---------| +| `--follow` | Follow log output | `false` | + +## Examples + +```bash +# Start the daemon on localhost +ktx mcp start + +# Check status +ktx mcp status + +# Tail logs +ktx mcp logs --follow + +# Run in the foreground on a custom port +ktx mcp start --port 8787 --foreground +``` + +## Security notes + +The default host is loopback-only. If you bind to a non-loopback host, configure +a bearer token with `--token ` or `KTX_MCP_TOKEN` and restrict allowed +hosts and origins for browser clients. + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| No KTX project found | Current directory has no `ktx.yaml` and `KTX_PROJECT_DIR` is unset | Run from a KTX project or pass `--project-dir ` | +| Non-loopback host rejected | The server needs token auth before binding beyond localhost | Pass `--token ` or set `KTX_MCP_TOKEN` | +| Client cannot connect | Host, port, token, allowed host, or allowed origin does not match the client | Check `ktx mcp status`, then restart with explicit `--host`, `--port`, `--allowed-host`, and `--allowed-origin` values | diff --git a/docs-site/content/docs/cli-reference/meta.json b/docs-site/content/docs/cli-reference/meta.json index 46aafb4c..4103c025 100644 --- a/docs-site/content/docs/cli-reference/meta.json +++ b/docs-site/content/docs/cli-reference/meta.json @@ -9,6 +9,7 @@ "ktx-sl", "ktx-wiki", "ktx-status", + "ktx-mcp", "ktx-dev" ] } diff --git a/docs-site/tests/docs-index-route.test.mjs b/docs-site/tests/docs-index-route.test.mjs index ddcd3181..7d1c62c0 100644 --- a/docs-site/tests/docs-index-route.test.mjs +++ b/docs-site/tests/docs-index-route.test.mjs @@ -9,6 +9,7 @@ import { setTimeout as delay } from "node:timers/promises"; import { fileURLToPath } from "node:url"; const configuredDocsSiteUrl = process.env.DOCS_SITE_URL; +const docsBasePath = "/ktx"; let docsSiteUrl = configuredDocsSiteUrl; let docsServer; let docsServerOutput = ""; @@ -46,7 +47,7 @@ async function waitForDocsServer() { } try { - await fetch(`${docsSiteUrl}/docs`, { redirect: "manual" }); + await fetch(`${docsSiteUrl}${docsBasePath}/docs`, { redirect: "manual" }); return; } catch { await delay(200); @@ -99,12 +100,14 @@ after(async () => { } }); -test("/docs redirects to the docs introduction", async () => { - const response = await fetch(`${docsSiteUrl}/docs`, { redirect: "manual" }); +test("/ktx/docs redirects to the docs introduction", async () => { + const response = await fetch(`${docsSiteUrl}${docsBasePath}/docs`, { + redirect: "manual", + }); assert.equal(response.status, 307); assert.equal( response.headers.get("location"), - "/docs/getting-started/introduction", + `${docsBasePath}/docs/getting-started/introduction`, ); }); From 42b688e9346c88ec9dab069d0aacda4c60f7c94e Mon Sep 17 00:00:00 2001 From: Luca Martial <48870843+luca-martial@users.noreply.github.com> Date: Fri, 15 May 2026 15:31:51 -0400 Subject: [PATCH 4/4] Align docs with current KTX behavior (#106) * docs: align docs with current KTX behavior * fix: generate valid agent sl query command * docs: clarify KTX product mechanics * fix: use

    for runtime pipeline steps in product mechanics The PipelineStep component renders
  1. elements, but the RuntimeDiagram wrapper was a plain
    instead of a list element. This produced invalid HTML and accessibility warnings. IngestionDiagram already used
      . Co-Authored-By: Claude Opus 4.6 (1M context) * Add docs favicon * docs: add semantic layer internals concept * docs: refine documentation source label * docs: clarify company documentation examples --------- Co-authored-by: Claude Opus 4.6 (1M context) --- docs-site/app/docs/[[...slug]]/page.tsx | 17 +- docs-site/app/layout.tsx | 4 + docs-site/components/product-mechanics.tsx | 402 ++++++++++++++++++ .../content/docs/cli-reference/ktx-ingest.mdx | 7 +- .../content/docs/cli-reference/ktx-setup.mdx | 9 +- docs-site/content/docs/concepts/meta.json | 2 +- .../concepts/semantic-layer-internals.mdx | 398 +++++++++++++++++ .../docs/concepts/the-context-layer.mdx | 15 +- .../docs/getting-started/introduction.mdx | 63 +-- .../docs/getting-started/quickstart.mdx | 9 +- .../content/docs/guides/building-context.mdx | 4 +- .../content/docs/guides/writing-context.mdx | 35 +- .../docs/integrations/agent-clients.mdx | 4 +- docs-site/content/docs/integrations/index.mdx | 3 +- .../docs/integrations/primary-sources.mdx | 4 +- .../tests/product-mechanics-content.test.mjs | 86 ++++ .../plans/2026-05-15-semantic-layer-docs.md | 328 ++++++++++++++ .../2026-05-15-semantic-layer-docs-design.md | 166 ++++++++ packages/cli/src/setup-agents.test.ts | 5 + packages/cli/src/setup-agents.ts | 13 +- 20 files changed, 1495 insertions(+), 79 deletions(-) create mode 100644 docs-site/components/product-mechanics.tsx create mode 100644 docs-site/content/docs/concepts/semantic-layer-internals.mdx create mode 100644 docs-site/tests/product-mechanics-content.test.mjs create mode 100644 docs/superpowers/plans/2026-05-15-semantic-layer-docs.md create mode 100644 docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md diff --git a/docs-site/app/docs/[[...slug]]/page.tsx b/docs-site/app/docs/[[...slug]]/page.tsx index d1ae21d4..dd5d944c 100644 --- a/docs-site/app/docs/[[...slug]]/page.tsx +++ b/docs-site/app/docs/[[...slug]]/page.tsx @@ -39,20 +39,29 @@ export default async function Page(props: { const hero = isHeroPage(params.slug); return ( - + {!hero && ( <> -
      +
      {page.data.title}
      - {page.data.description} + + {page.data.description} + )} - + diff --git a/docs-site/app/layout.tsx b/docs-site/app/layout.tsx index 35a4b1fa..48e12a3f 100644 --- a/docs-site/app/layout.tsx +++ b/docs-site/app/layout.tsx @@ -27,6 +27,10 @@ export const metadata: Metadata = { }, description: "Open-source context infrastructure that makes agentic analytics reliable.", + icons: { + icon: "/brand/ktx-mascot.svg", + shortcut: "/brand/ktx-mascot.svg", + }, }; export default function RootLayout({ children }: { children: ReactNode }) { diff --git a/docs-site/components/product-mechanics.tsx b/docs-site/components/product-mechanics.tsx new file mode 100644 index 00000000..7f551450 --- /dev/null +++ b/docs-site/components/product-mechanics.tsx @@ -0,0 +1,402 @@ +import type { ReactNode } from "react"; + +const sourceInputs = [ + { + name: "Warehouse schema", + detail: "tables, columns, types, constraints, row counts", + signal: "grounds definitions in live database structure", + accent: "border-fd-primary", + }, + { + name: "Metabase and query history", + detail: "historic SQL, questions, dashboards, usage patterns", + signal: "extracts joins, filters, grain, and trusted examples", + accent: "border-orange-500", + }, + { + name: "dbt, MetricFlow, LookML", + detail: "models, metrics, dimensions, explores, joins", + signal: "maps existing modeling logic into semantic entities", + accent: "border-amber-500", + }, + { + name: "Company documentation", + detail: "Notion pages, policies, caveats, analyst notes", + signal: "links business language back to semantic references", + accent: "border-slate-500 dark:border-cyan-200", + }, +]; + +const ingestSteps = [ + { + title: "extract evidence", + body: "Pull structured facts from schemas, SQL, BI metadata, and docs.", + }, + { + title: "reconcile entities", + body: "Merge names, measures, joins, and caveats into one project model.", + }, + { + title: "validate references", + body: "Check semantic fields and joins against database context before agents use them.", + }, +]; + +const artifacts = [ + { + path: "semantic-layer/*.yaml", + title: "Typed query model", + body: "sources, grain, joins, dimensions, measures, filters, segments", + }, + { + path: "wiki/*.md", + title: "Business context", + body: "rules and caveats with sl_refs back to semantic-layer entities", + }, + { + path: "raw-sources/", + title: "Evidence trail", + body: "scan artifacts, extracted metadata, relationship evidence", + }, + { + path: ".ktx/", + title: "Local indexes", + body: "embeddings and search indexes, not the source of truth", + }, +]; + +const runtimeSteps = [ + { + title: "Search wiki", + body: "Find business rules, caveats, synonyms, and sl_refs.", + }, + { + title: "Resolve semantic refs", + body: "Map measure and dimension names to approved entities.", + }, + { + title: "Validate fields", + body: "Check source, columns, joins, grain, filters, and segments.", + }, + { + title: "Build query plan", + body: "Create a semantic query plan before SQL is generated.", + }, + { + title: "Compile dialect SQL", + body: "Generate warehouse-shaped SQL instead of copying examples.", + }, + { + title: "Execute with bounds", + body: "Optionally run with bounded rows and return provenance.", + }, +]; + +export function ProductMechanics() { + return ( +
      +
      +

      + Product mechanics +

      +

      + A semantic compiler for analytics agents +

      +

      + KTX builds typed semantic files, links wiki context back to those + entities, validates the model against database evidence, then compiles + agent requests into executable SQL. +

      +
      + +
      + + +
      +
      + ); +} + +function IngestionDiagram() { + return ( +
      + + +
      +
      + Inputs KTX reads +
      + {sourceInputs.map((source) => ( +
      +

      + {source.name} +

      +

      + {source.detail} +

      +

      + {source.signal} +

      +
      + ))} +
      +
      + +
      + KTX builds the model +
      +
      +

      + Ingest pipeline +

      +
        + {ingestSteps.map((step, index) => ( + + ))} +
      +
      + +
      + {artifacts.map((artifact) => ( + + ))} +
      +
      +
      +
      +
      + ); +} + +function RuntimeDiagram() { + return ( +
      + + +
      +
      + Agent sends + +
      connection: warehouse
      +
      measure: orders.total_revenue
      +
      dimension: customers.segment
      +
      filter: orders.created_date >= '2024-01-01'
      +
      +

      + This is the API surface agents should use: compact semantic intent, + not hand-written warehouse SQL. +

      +
      + +
      + KTX planning and execution +
        + {runtimeSteps.map((step, index) => ( + + ))} +
      +
      +
      + +
      +
      + Semantic query plan +
      +

      + source:{" "} + orders joined to customers as many_to_one +

      +

      + measure:{" "} + total_revenue = sum(amount) with refund filter +

      +

      + grain: segment + group-by with date predicate +

      +

      + result: dialect + SQL, bounded rows, and provenance +

      +
      +
      + +
      + KTX returns + +
      select
      +
      customers.segment,
      +
      sum(orders.amount) as total_revenue
      +
      from analytics.orders
      +
      join analytics.customers
      +
      on orders.customer_id = customers.id
      +
      where orders.status != 'refunded'
      +
      and orders.created_date >= '2024-01-01'
      +
      group by 1
      +
      +

      + The output can be SQL-only or executed results with provenance, so + the agent can show where the answer came from. +

      +
      +
      +
      + ); +} + +function DiagramHeader({ + body, + eyebrow, + id, + title, +}: { + body: string; + eyebrow: string; + id: string; + title: string; +}) { + return ( +
      +

      + {eyebrow} +

      +

      + {title} +

      +

      + {body} +

      +
      + ); +} + +function Artifact({ + body, + path, + title, +}: { + body: string; + path: string; + title: string; +}) { + return ( +
      +

      + {path} +

      +

      {title}

      +

      + {body} +

      +
      + ); +} + +function PipelineStep({ + body, + dark = false, + index, + title, +}: { + body: string; + dark?: boolean; + index: number; + title: string; +}) { + return ( +
    1. + + {index} + + + + {title} + + + {body} + + +
    2. + ); +} + +function ColumnLabel({ children }: { children: ReactNode }) { + return ( +

      + {children} +

      + ); +} + +function CodeBox({ children }: { children: ReactNode }) { + return ( +
      +
      {children}
      +
      + ); +} diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index a0bca58f..ab907992 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -29,14 +29,16 @@ connections when you use `--all`. | `--deep` | Use AI-enriched database ingest | Stored connection default, or `fast` | | `--query-history` | Include database query-history usage patterns | Stored connection default | | `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default | -| `--query-history-window-days ` | Query-history lookback window for this run | Stored connection default | +| `--query-history-window-days ` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default | | `--plain` | Print plain text output | `true` | | `--json` | Print JSON output | `false` | | `--no-input` | Disable interactive terminal input | — | `--fast` and `--deep` are mutually exclusive. Depth flags apply only to database connections. Query-history flags apply only to database connections -that support query history. Query-history ingest runs after schema ingest and +that support query history. The window flag applies to BigQuery and Snowflake; +Postgres reads the current `pg_stat_statements` aggregate data instead of a +time-windowed history table. Query-history ingest runs after schema ingest and requires deep ingest readiness. When `--all` selects both databases and context sources, database ingest runs @@ -70,6 +72,7 @@ ktx ingest warehouse --deep # Include query-history usage patterns ktx ingest warehouse --deep --query-history +# Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 # Build a source connection diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 4de40ecb..90d0b175 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -96,13 +96,16 @@ incomplete. |------|-------------| | `--enable-query-history` | Enable query-history ingest when the selected database supports it | | `--disable-query-history` | Disable query-history ingest for the selected database | -| `--query-history-window-days ` | Query-history lookback window | +| `--query-history-window-days ` | BigQuery/Snowflake query-history lookback window | | `--query-history-min-executions ` | Minimum executions for a query-history template | | `--query-history-service-account-pattern ` | Query-history service-account regex; repeatable | | `--query-history-redaction-pattern ` | Query-history SQL-literal redaction regex; repeatable | -Query history setup is supported for Postgres, BigQuery, and Snowflake. Enabling -query history makes deep ingest readiness matter for later `ktx ingest` runs. +Query history setup is supported for Postgres, BigQuery, and Snowflake. The +window flag applies to BigQuery and Snowflake; Postgres reads the current +`pg_stat_statements` aggregate data instead of a time-windowed history table. +Enabling query history makes deep ingest readiness matter for later +`ktx ingest` runs. ### Context Sources diff --git a/docs-site/content/docs/concepts/meta.json b/docs-site/content/docs/concepts/meta.json index e1749365..72c0a407 100644 --- a/docs-site/content/docs/concepts/meta.json +++ b/docs-site/content/docs/concepts/meta.json @@ -1,5 +1,5 @@ { "title": "Concepts", "defaultOpen": true, - "pages": ["the-context-layer", "context-as-code"] + "pages": ["the-context-layer", "semantic-layer-internals", "context-as-code"] } diff --git a/docs-site/content/docs/concepts/semantic-layer-internals.mdx b/docs-site/content/docs/concepts/semantic-layer-internals.mdx new file mode 100644 index 00000000..c48428e6 --- /dev/null +++ b/docs-site/content/docs/concepts/semantic-layer-internals.mdx @@ -0,0 +1,398 @@ +--- +title: Semantic Layer Internals +description: How KTX uses join graphs, grain, and relationship metadata to turn context into safe SQL. +--- + +KTX is a context layer for agents. This page focuses on one internal subsystem: +the semantic execution layer that turns reviewed context into safe SQL. + +The semantic layer is important, but it is not the whole product. KTX also +handles schema evidence, wiki context, provenance, validation, and agent +workflows around those files. + +Read the page as a pipeline: + +- context inputs feed the semantic engine; +- evidence becomes a join graph with grain and relationship metadata; +- review and corrections keep that graph current; +- the execution engine uses the graph to avoid fan-out and ambiguous joins. + +## Where the semantic layer fits + +The semantic layer is not a separate product category inside KTX. It is the +engine that makes the rest of the context actionable for SQL generation. + +
      +
      +
      +

      + {"Context inputs"} +

      +
      +
      +

      semantic-layer/

      +

      + {"source YAML, measures, joins, grain"} +

      +
      +
      +

      wiki/

      +

      + {"business rules, definitions, caveats"} +

      +
      +
      +

      raw-sources/

      +

      + {"schema scans, keys, imported metadata"} +

      +
      +
      +

      provenance

      +

      + {"ingest decisions and review history"} +

      +
      +
      +
      + + + +
      +
      +

      + {"Semantic layer engine"} +

      +
      +
      +

      Join graph

      +

      + {"sources as nodes, joins as typed edges"} +

      +
      +
      +

      Grain

      +

      + {"row identity before aggregation"} +

      +
      +
      +

      Measures

      +

      + {"verified formulas and filters"} +

      +
      +
      +

      Relationships

      +

      + {"many_to_one, one_to_many, one_to_one"} +

      +
      +
      +
      + {"Safe query planning before SQL is generated."} +
      +
      + + + +
      +

      + {"Agent workflows"} +

      +
      +
      + {"Search sources and wiki pages"} +
      +
      + {"Compile trusted SQL"} +
      +
      + {"Explain metrics and provenance"} +
      +
      + {"Patch files and validate review"} +
      +
      +
      +
      +
      + +## The join graph KTX builds + +A semantic source is a node. A join is an edge with a join condition and a +relationship type. The graph lets KTX choose valid paths, reject unsafe paths, +and reason about whether a join preserves or multiplies rows before SQL is +generated. + +- `many_to_one` paths are usually safe for adding dimensions. +- `one_to_many` paths can multiply fact rows and trigger fan-out handling. +- Equal-cost paths can be ambiguous, so aliases and explicit joins matter. + +
      +
      +
      +

      customers

      +

      grain: customer_id

      +
      +
      +

      orders

      +

      grain: order_id

      +
      +
      +

      order_items

      +

      grain: order_id, line_id

      +
      +
      +
      +
      orders -> customers: many_to_one
      +
      orders -> order_items: one_to_many
      +
      +
      + {"Example: "} + {"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it can duplicate order-level measures."} +
      +
      + +The graph is bidirectional for planning. If `orders -> customers` is +`many_to_one`, the reverse path is `one_to_many`; KTX keeps that distinction +instead of treating every join as a neutral edge. + +## How KTX builds the graph + +KTX starts from evidence, not a blank modeling canvas. Database scans and +analytics-tool imports create source definitions that an analyst can review. + +| Evidence | What it contributes | +|---|---| +| Declared primary keys | Initial row grain for each source | +| Declared foreign keys | Formal join candidates and relationship direction | +| Inferred relationships | Useful edges when warehouses lack constraints | +| dbt, MetricFlow, and LookML imports | Existing metrics, dimensions, entities, explores, and joins | +| Query history | Real join and filter patterns agents should respect | +| Analyst review | The final authority before context is merged | + +Generated YAML is intentionally reviewable. KTX can draft joins and measures, +but the accepted semantic layer is still the plain-file diff your team approves. + +## How KTX keeps the graph current + +The semantic layer changes as schemas, metrics, and business rules change. KTX +keeps that loop explicit instead of hiding it behind a remote runtime. + +
      +
      +

      + {"Semantic maintenance loop"} +

      +

      + {"Every accepted correction becomes input to the next graph build."} +

      +
      +
      +
      +
      + + +
      +

      + {"reviewed context"} +

      +

      + {"The accepted graph becomes the starting point for the next build."} +

      +
      + +
      +

      + {"Step 1"} +

      +

      {"ingest evidence"}

      +

      + {"scan schemas, imports, and accepted files"} +

      +
      +
      +

      + {"Step 2"} +

      +

      {"YAML diff"}

      +

      + {"draft source, join, grain, and measure changes"} +

      +
      +
      +

      + {"Step 3"} +

      +

      {"validation"}

      +

      + {"check relationships, syntax, and unsafe query shapes"} +

      +
      +
      +

      + {"Step 4"} +

      +

      {"analyst review"}

      +

      + {"accept, edit, or reject generated context"} +

      +
      +
      +

      + {"Step 5"} +

      +

      {"agent use"}

      +

      + {"serve context to search, explain, and query"} +

      +
      +
      +

      + {"Step 6"} +

      +

      {"corrections"}

      +

      + {"agent and analyst fixes become new evidence"} +

      +
      +
      +
      +
      +
      + +This matters because semantic correctness is not static. If a source gains a +new key, a metric changes definition, or an analyst corrects a relationship, +the next agent gets that reviewed context. + +## The modeling problem the graph solves + +Fan-out is the classic failure mode. If an order-level measure is joined to +line-item rows before aggregation, one order can become many rows and revenue +can be counted more than once. + +| Problem | What happens | How KTX avoids it | +|---|---|---| +| Order measure joins to `order_items` | `orders.revenue` repeats once per item | Detect the `one_to_many` path and pre-aggregate the order measure | +| Two independent fact sources share `customers` | Measures from each fact table multiply across the shared dimension | Treat it as a chasm trap and use aggregate-locality planning | +| Filter lives only across a `one_to_many` path | Filtering after the join changes the measure grain | Reject or localize the filter instead of silently producing unsafe SQL | +| Multiple equal-cost paths connect the same sources | The join path is ambiguous | Prefer safer paths and use aliases to disambiguate repeated joins | + +Many-to-many questions usually show up as multiple one-to-many paths or +independent fact sources. KTX treats those shapes as fan-out or chasm risks +unless the query can be planned at a safe grain. + +## How the execution engine uses the graph + +The planner resolves the sources in a semantic query, chooses a join tree, and +checks whether any requested dimension or filter crosses a row-multiplying +edge. The SQL generator then chooses the simple path or the aggregate-locality +path. + +| Naive SQL shape | Semantic-layer SQL shape | +|---|---| +| Join facts and dimensions first, then aggregate | Aggregate each fact source at its own grain, then join the results | +| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source when locality is needed | +| Trust the shortest textual join path | Prefer safe relationship paths and reject disconnected sources | +| Let dimension grain differ across facts | Raise when asymmetric dimensions would fan out another measure | + +
      +
      +
      +

      + {"Unsafe shape"} +

      +
      +{`orders
      +  join order_items
      +  join customers
      +group by customer_segment
      +sum(orders.amount)`}
      +      
      +

      + {"The order measure is exposed to line-item fan-out before aggregation."} +

      +
      +
      +

      + {"KTX shape"} +

      +
      +{`orders_agg as (
      +  select customer_id, sum(amount) revenue
      +  from orders
      +  group by customer_id
      +)
      +select customers.segment, sum(revenue)
      +from orders_agg
      +join customers`}
      +      
      +

      + {"KTX pre-aggregates fact measures at their own grain before joining dimensions."} +

      +
      +
      +
      + +The result is not magic. It is structured planning: validated sources, typed +relationships, graph search, fan-out detection, aggregate locality, and final +dialect transpilation. + +## What this means for agents + +KTX gives agents a semantic surface they can inspect and improve, not just a +folder of notes. + +- Search semantic sources and related wiki pages before writing SQL. +- Compile SQL through `ktx sl query` instead of guessing joins. +- Validate semantic-layer changes before review. +- Patch YAML and Markdown files in git. +- Explain metric meaning and provenance from the same accepted context. + +Next, read [Writing Context](/docs/guides/writing-context) for the YAML editing +workflow or [ktx sl](/docs/cli-reference/ktx-sl) for the command reference. diff --git a/docs-site/content/docs/concepts/the-context-layer.mdx b/docs-site/content/docs/concepts/the-context-layer.mdx index cb03b7c0..ba7ee3f3 100644 --- a/docs-site/content/docs/concepts/the-context-layer.mdx +++ b/docs-site/content/docs/concepts/the-context-layer.mdx @@ -191,7 +191,18 @@ KTX organizes context into four pillars: Each pillar covers a different kind of context agents need before they can safely write SQL, update semantic definitions, or explain an analytics result. -**Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives. +**Semantic sources** are YAML definitions that describe your data in terms +agents can reason about: + +- source tables or SQL queries; +- row grain; +- typed columns; +- valid joins; +- named measures, filters, and segments. + +This is where "revenue means `sum(amount)` excluding refunds" lives. For the +join graph, fan-out protections, and execution mechanics, read +[Semantic Layer Internals](/docs/concepts/semantic-layer-internals). ```yaml name: orders @@ -289,7 +300,7 @@ my-project/ │ └── data-quality-notes.md ├── raw-sources/ │ └── warehouse/ -│ └── database-ingest/ # Schema ingest artifacts and reports +│ └── live-database/ # Schema ingest artifacts and reports └── .ktx/ ├── db.sqlite # Local state (git-ignored) └── cache/ # Runtime cache (git-ignored) diff --git a/docs-site/content/docs/getting-started/introduction.mdx b/docs-site/content/docs/getting-started/introduction.mdx index cb8ac0dd..7a6c9b3e 100644 --- a/docs-site/content/docs/getting-started/introduction.mdx +++ b/docs-site/content/docs/getting-started/introduction.mdx @@ -3,10 +3,12 @@ title: Introduction description: How KTX gives analytics agents trusted context for warehouse work. --- -
      -
      +import { ProductMechanics } from "@/components/product-mechanics"; + +
      +

      - Make analytics context{'\n'}usable by agents + Make analytics context usable by agents

      -

      - KTX turns warehouse metadata, semantic definitions, and business knowledge - into reviewable project files that agents can use while planning, querying, - and updating analytics work. +

      + {'KTX turns warehouse metadata, semantic definitions, and business knowledge into reviewable project files that agents can use while planning, querying, and updating analytics work.'}

      -
      -## Who KTX is for + + +## What agents can do with KTX KTX is built for analytics engineers and data teams who want data agents to -work on real analytics systems - not just generate one-off SQL. +work on real analytics systems, not just generate one-off SQL. -Use KTX when you want agents to: +Use it when agents need to: -- **Generate SQL** from approved measures and joins -- **Repair semantic definitions** through reviewable diffs -- **Explain metric provenance** with warehouse evidence -- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI platforms +- **Generate SQL** from approved measures, dimensions, joins, and filters +- **Explain provenance** with wiki context and warehouse evidence +- **Repair context** through reviewable YAML and Markdown diffs +- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and warehouses -Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server. +KTX works with SQLite, PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and +SQL Server. -## Explore the docs +## Read next Set up KTX and build your first context in under 10 minutes. - - Understand what a context layer is and why agents need one. - Hands-on workflows for scanning, ingesting, writing, and serving. + + Edit semantic-layer YAML and wiki Markdown safely. + Complete flag and subcommand reference for every KTX command. diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 335aedfa..84bf4611 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -51,8 +51,8 @@ For scripted setup, pass the project directory explicitly: ktx setup --project-dir ./analytics ``` -If setup exits early, rerun `ktx setup` in the same directory. KTX tracks -completed setup steps and resumes from the remaining work. +If setup exits early, rerun `ktx setup` in the same directory. KTX keeps local +setup progress under `.ktx/setup/` and resumes from the remaining work. ## Step 2: Configure the LLM @@ -122,7 +122,8 @@ Database ready PostgreSQL, BigQuery, and Snowflake can also enable query-history ingest. Query history helps KTX learn common query patterns, joins, service-account filters, -and warehouse-specific usage. +and warehouse-specific usage. BigQuery and Snowflake support a lookback window; +Postgres reads the current `pg_stat_statements` aggregate data instead. ## Step 5: Add context sources @@ -200,7 +201,7 @@ KTX writes plain files so people and agents can inspect changes in git. | Path | Purpose | |------|---------| -| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and setup state | +| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and query-history settings | | `.ktx/secrets/*` | Local secret files referenced from `ktx.yaml`; do not commit these | | `.ktx/setup/*` | Local setup and context-build state | | `.ktx/agents/install-manifest.json` | Manifest used to manage installed agent files | diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index c21b7921..5fd288a6 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -62,13 +62,15 @@ configured, run `ktx setup` or use `--fast`. PostgreSQL, BigQuery, and Snowflake can add query-history context. This helps KTX learn common joins, filters, service-account patterns, redaction rules, and -usage-heavy query templates. +usage-heavy query templates. BigQuery and Snowflake support a lookback window; +Postgres reads the current `pg_stat_statements` aggregate data instead. Enable it during setup, store it under `connections..context.queryHistory`, or request it for one run: ```bash ktx ingest warehouse --deep --query-history +# Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 ``` diff --git a/docs-site/content/docs/guides/writing-context.mdx b/docs-site/content/docs/guides/writing-context.mdx index fe9d3fdb..b68960bf 100644 --- a/docs-site/content/docs/guides/writing-context.mdx +++ b/docs-site/content/docs/guides/writing-context.mdx @@ -60,21 +60,25 @@ semantic-layer//.yaml ```yaml name: orders -description: Customer orders with booked revenue. +descriptions: + user: Customer orders with booked revenue. table: public.orders grain: - order_id columns: - name: order_id type: string - description: Unique order identifier. + descriptions: + user: Unique order identifier. - name: order_date type: time role: time - description: Date the order was placed. + descriptions: + user: Date the order was placed. - name: total_amount type: number - description: Booked order value in USD. + descriptions: + user: Booked order value in USD. measures: - name: total_revenue expr: SUM(total_amount) @@ -85,7 +89,8 @@ measures: ```yaml name: orders -description: Customer orders with line-item totals. +descriptions: + user: Customer orders with line-item totals. table: public.orders grain: - order_id @@ -93,26 +98,31 @@ grain: columns: - name: order_id type: string - description: Unique order identifier. + descriptions: + user: Unique order identifier. - name: order_date type: time role: time - description: Date the order was placed. + descriptions: + user: Date the order was placed. - name: status type: string visibility: public - description: Current order status. + descriptions: + user: Current order status. - name: _etl_loaded_at type: time visibility: hidden - description: Internal load timestamp. + descriptions: + user: Internal load timestamp. - name: total_amount type: number - description: Order total in USD. + descriptions: + user: Order total in USD. measures: - name: total_revenue @@ -149,9 +159,10 @@ joins: | Field | Required | Description | |-------|----------|-------------| | `name` | Yes | Source identifier. Use lowercase words and underscores. | +| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. | | `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. | | `grain` | Yes | Columns that uniquely identify a row at the source grain. | -| `columns` | No | Column definitions with type, role, visibility, and descriptions. | +| `columns` | Yes | Non-empty column definitions with type, role, visibility, and descriptions. | | `measures` | No | Aggregation expressions such as `SUM`, `COUNT`, and `AVG`. | | `segments` | No | Named predicates agents can reuse. | | `joins` | No | Relationships to other semantic sources. | @@ -165,7 +176,7 @@ joins: | Column | `type` | Yes | Agent-facing type: `string`, `number`, `time`, or `boolean`. | | Column | `role` | No | Special role such as `time` for default time dimensions. | | Column | `visibility` | No | `public`, `internal`, or `hidden`. | -| Column | `description` | Strongly recommended | Business meaning and usage notes. | +| Column | `descriptions` | Strongly recommended | Description map keyed by source, such as `user`, `dbt`, or `ai`. | | Measure | `name` | Yes | Queryable metric name. | | Measure | `expr` | Yes | SQL aggregation expression at the source grain. | | Measure | `filter` | No | SQL predicate applied only to this measure. | diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index de628197..01cbbca5 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -75,7 +75,7 @@ Available commands: - `ktx status --json --project-dir /path/to/project` - `ktx sl list --json --project-dir /path/to/project` - `ktx sl search '' --json --project-dir /path/to/project --connection-id ''` -- `ktx sl query --json --project-dir /path/to/project --connection-id '' --query-file '' --execute --max-rows 100` +- `ktx sl query --project-dir /path/to/project --connection-id '' --query-file '' --format json --execute --max-rows 100` - `ktx wiki search '' --json --project-dir /path/to/project --limit 10` ``` @@ -172,7 +172,7 @@ All supported agent clients call the same KTX CLI commands: | `ktx sl list --json` | List semantic-layer sources | | `ktx sl search --json` | Search semantic-layer sources | | `ktx sl validate --connection-id ` | Validate semantic source definitions | -| `ktx sl query --json` | Execute a semantic-layer query when semantic compute is configured | +| `ktx sl query --format json` | Execute a semantic-layer query when semantic compute is configured | ### Security constraints diff --git a/docs-site/content/docs/integrations/index.mdx b/docs-site/content/docs/integrations/index.mdx index 8f77a624..92a677aa 100644 --- a/docs-site/content/docs/integrations/index.mdx +++ b/docs-site/content/docs/integrations/index.mdx @@ -34,8 +34,9 @@ automation flags documented in [`ktx setup`](/docs/cli-reference/ktx-setup). | Path | Purpose | |------|---------| -| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, query history, and setup state | +| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, and query history | | `.ktx/secrets/*` | Local file-backed secrets when you choose file references during setup | +| `.ktx/setup/*` | Local setup progress and context-build state | | `semantic-layer//` | YAML semantic sources generated by database and source ingestion | | `wiki/` | Markdown business context, definitions, and ingested knowledge | | `.ktx/agents/install-manifest.json` | Manifest of agent integration files installed by `ktx setup --agents` | diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index a3d4db29..00cc39aa 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -228,7 +228,7 @@ mapping metadata. The BigQuery connector still authenticates with the | Feature | Supported | Notes | |---------|-----------|-------| | Tables & views | Yes | Including materialized views and external tables | -| Primary keys | No | - | +| Primary keys | Yes | Via `INFORMATION_SCHEMA` table constraints when declared | | Foreign keys | No | Not available in BigQuery | | Row count estimates | Yes | From table metadata | | Column statistics | No | - | @@ -500,7 +500,7 @@ No authentication required - SQLite is file-based. The file must be readable by - Uses `LIMIT X OFFSET Y` for pagination - SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB` - Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON` -- In-memory databases supported with `path: ":memory:"` (for testing) +- Database file must exist before `ktx connection test` or ingest runs ## Common errors diff --git a/docs-site/tests/product-mechanics-content.test.mjs b/docs-site/tests/product-mechanics-content.test.mjs new file mode 100644 index 00000000..6992d9a1 --- /dev/null +++ b/docs-site/tests/product-mechanics-content.test.mjs @@ -0,0 +1,86 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import { test } from "node:test"; +import { fileURLToPath } from "node:url"; + +const docsSiteDir = join(dirname(fileURLToPath(import.meta.url)), ".."); + +async function readDocsFile(path) { + return readFile(join(docsSiteDir, path), "utf8"); +} + +test("docs introduction shows the ingestion and runtime mechanics early", async () => { + const introduction = await readDocsFile( + "content/docs/getting-started/introduction.mdx", + ); + + assert.match( + introduction, + /import\s+\{\s*ProductMechanics\s*\}\s+from\s+"@\/components\/product-mechanics";/, + ); + assert.match(introduction, //); + + const heroIndex = introduction.indexOf("Make analytics context"); + const mechanicsIndex = introduction.indexOf(""); + const useCaseIndex = introduction.indexOf("## What agents can do with KTX"); + const heroSource = introduction.slice(0, mechanicsIndex); + + assert.ok(heroIndex >= 0, "introduction should include the custom hero"); + assert.ok( + mechanicsIndex > heroIndex, + "mechanics component should appear after the hero", + ); + assert.ok( + mechanicsIndex < useCaseIndex, + "mechanics component should appear before use-case sections", + ); + assert.doesNotMatch(heroSource, /Get Started/); + assert.doesNotMatch(heroSource, /The Context Layer/); + assert.doesNotMatch(heroSource, /Building Context/); + assert.doesNotMatch(heroSource, /flex flex-wrap gap-3/); +}); + +test("product mechanics component covers source-specific context and SQL expansion", async () => { + const component = await readDocsFile("components/product-mechanics.tsx"); + + for (const expectedText of [ + "A semantic compiler for analytics agents", + "Ingestion", + "Runtime", + "wiki/", + "semantic-layer/", + "raw-sources/", + ".ktx/", + "sl_refs", + "Company documentation", + "Notion pages", + "Metabase", + "query history", + "extract evidence", + "reconcile entities", + "validate references", + "semantic query plan", + "dialect SQL", + "bounded rows", + "provenance", + "measure: orders.total_revenue", + "dimension: customers.segment", + "select", + ]) { + assert.ok( + component.includes(expectedText), + `component should include: ${expectedText}`, + ); + } + + assert.doesNotMatch(component, /KTX does more than retrieve Markdown/); + assert.doesNotMatch(component, /Plain Markdown \+ RAG/); + assert.doesNotMatch(component, /comparisonRows/); + assert.doesNotMatch(component, /ComparisonTable/); + assert.doesNotMatch(component, /Not just retrieval/); + assert.doesNotMatch(component, /KTX works in two moments/); + assert.doesNotMatch(component, /w-\[calc\(100vw/); + assert.doesNotMatch(component, /xl:grid-cols-2/); + assert.doesNotMatch(component, /lg:grid-cols-\[[^\]]*_2rem_/); +}); diff --git a/docs/superpowers/plans/2026-05-15-semantic-layer-docs.md b/docs/superpowers/plans/2026-05-15-semantic-layer-docs.md new file mode 100644 index 00000000..59e5d5bf --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-semantic-layer-docs.md @@ -0,0 +1,328 @@ +# Semantic Layer Docs Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [x]`) syntax for tracking. + +**Goal:** Add a standalone, scannable Concepts page that explains the semantic-layer internals while positioning KTX as a broader context layer. + +**Architecture:** Implement this as docs-only MDX content inside the existing Fumadocs tree. The new page uses inline MDX diagrams and Fumadocs color tokens, matching the custom diagram pattern already used in `the-context-layer.mdx`. + +**Tech Stack:** MDX, Fumadocs content, Next.js docs site, pnpm workspace commands. + +--- + +### Task 1: Add Concepts Navigation Entry + +**Files:** +- Modify: `docs-site/content/docs/concepts/meta.json` + +- [x] **Step 1: Update the Concepts page order** + +Replace the `pages` array with: + +```json +{ + "title": "Concepts", + "defaultOpen": true, + "pages": ["the-context-layer", "semantic-layer-internals", "context-as-code"] +} +``` + +- [x] **Step 2: Verify JSON parses** + +Run: + +```bash +node -e "JSON.parse(require('node:fs').readFileSync('docs-site/content/docs/concepts/meta.json', 'utf8')); console.log('concepts meta ok')" +``` + +Expected output: + +```text +concepts meta ok +``` + +### Task 2: Create the Semantic Layer Internals Page + +**Files:** +- Create: `docs-site/content/docs/concepts/semantic-layer-internals.mdx` + +- [x] **Step 1: Add frontmatter and opening positioning** + +Create the page with this frontmatter and opening section: + +```mdx +--- +title: Semantic Layer Internals +description: How KTX uses join graphs, grain, and relationship metadata to turn context into safe SQL. +--- + +KTX is a context layer for agents. Its semantic layer is the query-planning core +that turns reviewed context into safe SQL. + +Use this page to understand the mechanics behind KTX's semantic execution: +the join graph, how KTX builds and maintains it, and how that graph prevents +classic analytics errors like fan-out and ambiguous join paths. + +| KTX is | KTX is not just | +|---|---| +| A context layer for agents | A metric definition store | +| A system for ingesting, reviewing, and serving analytics context | A markdown saver | +| A semantic execution layer plus wiki pages, scans, provenance, and agent workflows | A replacement for every BI semantic layer | +``` + +- [x] **Step 2: Add the system-fit diagram** + +Add a `Where the semantic layer fits` section with a custom `not-prose` diagram. +The diagram must show: + +```text +Context inputs -> Semantic layer engine -> Agent workflows +``` + +The semantic-layer box must be visually prominent and list: + +```text +join graph +grain +measures +relationships +safe query planning +``` + +- [x] **Step 3: Add the join graph section** + +Add `## The join graph` with: + +- one short paragraph defining nodes and edges; +- bullets for why the graph matters; +- an inline diagram using `orders`, `customers`, `order_items`, and `refunds`. + +The section must include this claim in plain language: + +```text +The graph lets KTX choose valid paths, reject unsafe paths, and reason about +whether a join preserves or multiplies rows before SQL is generated. +``` + +- [x] **Step 4: Add build and maintenance sections** + +Add `## How KTX builds it` and `## How KTX maintains it`. + +`How KTX builds it` must cover these inputs: + +```text +declared primary keys +declared foreign keys +inferred relationships +dbt, MetricFlow, and LookML imports +query history +analyst review +``` + +`How KTX maintains it` must show this loop: + +```text +ingest evidence -> YAML diff -> validation -> analyst review -> agent use -> corrections +``` + +- [x] **Step 5: Add the fan-out and safe execution sections** + +Add `## Why grain and relationships matter` with a fan-out example comparing +orders joined to order items. Include a compact table with columns: + +```text +Problem +What happens +How KTX avoids it +``` + +Add `## How the execution engine uses the graph` with a before/after table: + +```text +Naive SQL shape +Semantic-layer SQL shape +``` + +The safe path must mention: + +```text +pre-aggregates fact measures at their own grain before joining dimensions +``` + +- [x] **Step 6: Add agent outcome links** + +Add a closing `## What this means for agents` section with bullets explaining +that agents can: + +```text +search semantic sources +compile SQL through ktx sl query +validate changes before review +patch YAML and Markdown files in git +explain provenance and metric meaning +``` + +End with links to: + +```mdx +[Writing Context](/docs/guides/writing-context) +[ktx sl](/docs/cli-reference/ktx-sl) +``` + +### Task 3: Add the Cross-Link from The Context Layer + +**Files:** +- Modify: `docs-site/content/docs/concepts/the-context-layer.mdx` + +- [x] **Step 1: Replace the semantic sources paragraph with a scannable block** + +Find the `**Semantic sources**` paragraph under `KTX organizes context into four pillars`. +Replace the long paragraph with: + +```mdx +**Semantic sources** are YAML definitions that describe your data in terms +agents can reason about: + +- source tables or SQL queries; +- row grain; +- typed columns; +- valid joins; +- named measures, filters, and segments. + +This is where "revenue means `sum(amount)` excluding refunds" lives. For the +join graph, fan-out protections, and execution mechanics, read +[Semantic Layer Internals](/docs/concepts/semantic-layer-internals). +``` + +- [x] **Step 2: Confirm the page still owns the product positioning** + +Search the edited file: + +```bash +rg -n "context layer|Semantic Layer Internals|semantic layer - that's a critical component" docs-site/content/docs/concepts/the-context-layer.mdx +``` + +Expected: output includes the existing context-layer framing and the new internals link. + +### Task 4: Fix Mobile Docs Header Overflow + +**Files:** +- Modify: `docs-site/app/docs/[[...slug]]/page.tsx` + +- [x] **Step 1: Stack title actions on narrow screens** + +Replace the non-hero page header wrapper: + +```tsx +
      +``` + +with: + +```tsx +
      +``` + +This keeps desktop layout unchanged while preventing the action buttons from +forcing horizontal overflow on mobile. + +- [x] **Step 2: Allow the docs article to shrink in the layout grid** + +Update the `DocsPage` and `DocsBody` wrappers: + +```tsx + +``` + +```tsx + +``` + +This prevents tables, code blocks, and custom diagrams from forcing the +Fumadocs main article column wider than the mobile viewport, overrides the +library's built-in max-width rule on mobile, aligns the article to the left on +mobile, and preserves the normal centered desktop max width. + +If long words still clip under mobile viewport capture, add the same wrapping +behavior used by the Fumadocs sidebar: + +```tsx + + {page.data.description} + +``` + +```tsx + +``` + +- [x] **Step 3: Recheck mobile render** + +Capture or inspect a 390px-wide render of: + +```text +http://127.0.0.1:3000/docs/concepts/semantic-layer-internals +``` + +Expected: the title, description, action buttons, and positioning block stay +within the viewport. + +### Task 5: Verify Docs Content and Build + +**Files:** +- Check: `docs-site/content/docs/concepts/semantic-layer-internals.mdx` +- Check: `docs-site/content/docs/concepts/the-context-layer.mdx` +- Check: `docs-site/content/docs/concepts/meta.json` +- Check: `docs-site/app/docs/[[...slug]]/page.tsx` + +- [x] **Step 1: Run content checks** + +Run: + +```bash +rg -n "KTX is a context layer|markdown saver|fan-out|join graph|pre-aggregates|Semantic Layer Internals" docs-site/content/docs/concepts +``` + +Expected: matches appear in the new page and the cross-link appears in +`the-context-layer.mdx`. + +- [x] **Step 2: Build the docs site** + +Run: + +```bash +pnpm --filter ktx-docs build +``` + +Expected: build exits 0. + +- [x] **Step 3: Preview locally** + +Run: + +```bash +pnpm --filter ktx-docs dev +``` + +Open: + +```text +http://localhost:3000/docs/concepts/semantic-layer-internals +``` + +Inspect desktop and mobile widths. The opening should clearly position KTX as a +context layer, the Concepts navigation should list the new page, and diagrams +should not overlap or produce unreadable text. + +- [x] **Step 4: Commit implementation** + +Run: + +```bash +git status --short +git add docs-site/content/docs/concepts/meta.json docs-site/content/docs/concepts/semantic-layer-internals.mdx docs-site/content/docs/concepts/the-context-layer.mdx docs-site/app/docs/[[...slug]]/page.tsx docs/superpowers/plans/2026-05-15-semantic-layer-docs.md +git commit -m "docs: add semantic layer internals concept" +``` diff --git a/docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md b/docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md new file mode 100644 index 00000000..34d7594c --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md @@ -0,0 +1,166 @@ +# Semantic Layer Docs Design + +**Date:** 2026-05-15 +**Status:** Design - pending implementation plan + +## Goal + +Add a concise Concepts page that explains the semantic layer as the query +planning engine inside KTX's broader context layer. + +The page should make the technical depth visible to skeptical data users +without positioning KTX as only a semantic-layer product. Success means a reader +understands: + +- KTX is a context layer for agents. +- The semantic layer is one core subsystem inside that context layer. +- The join graph, grain declarations, and relationship metadata are what make + generated SQL safer than schema-only or markdown-only approaches. +- KTX maintains this semantic layer through ingest, validation, analyst edits, + and reviewable files. + +## Current State + +The docs currently explain semantic sources in two places: + +- `docs-site/content/docs/concepts/the-context-layer.mdx` describes semantic + sources as one pillar of KTX context. +- `docs-site/content/docs/guides/writing-context.mdx` documents the YAML fields + for sources, measures, joins, grain, validation, and common errors. + +That content is useful, but the differentiator is not visually obvious. The +semantic layer is embedded in longer narrative pages, so readers can miss the +hard parts: join graph construction, fan-out prevention, chasm traps, and query +planning. + +## Positioning + +Create a standalone Concepts page with a guarded title such as +`Semantic Layer Internals` or `The Semantic Engine Inside KTX`. + +The first screen must frame the product clearly: + +> KTX is a context layer. Its semantic layer is the query-planning core that +> turns reviewed context into safe SQL. + +The page should avoid a title like `Semantic Layer` by itself because that can +make KTX look like a narrow semantic-layer tool. The page should repeatedly show +the semantic layer between the broader context inputs and the agent workflows it +supports. + +Add a short cross-link from `the-context-layer.mdx` so the existing overview +keeps owning the product category. That section should say the semantic layer is +one critical pillar, then link to the internals page for readers who want the +mechanics. + +## Page Structure + +Add `docs-site/content/docs/concepts/semantic-layer-internals.mdx` and include +it in `docs-site/content/docs/concepts/meta.json` after `the-context-layer`. + +Recommended sections: + +1. `What this page explains` + - One short paragraph. + - A two-column `KTX is / KTX is not just` table. + +2. `Where the semantic layer fits` + - A visual block showing: + `context inputs -> semantic layer engine -> agent workflows`. + - Inputs include semantic YAML, wiki pages, scans, and provenance. + - Outputs include search, SQL generation, explanations, edits, and review. + +3. `The join graph` + - Explain nodes as semantic sources and edges as validated joins. + - Show a small graph with `orders`, `customers`, `order_items`, and + `refunds`. + - Keep text to one or two short paragraphs plus bullets. + +4. `How KTX builds it` + - Show a pipeline from database evidence and imported modeling tools to + reviewable YAML. + - Mention declared keys, inferred relationships, dbt/MetricFlow/LookML + imports, query history, validation, and analyst review. + +5. `How KTX maintains it` + - Show a feedback loop: + ingest evidence -> YAML diff -> validation -> analyst review -> agent use + -> corrections. + - Emphasize that files remain the source of truth. + +6. `Why grain and relationships matter` + - Use the fan-out problem as the central example. + - Compare a naive join against a safe semantic-layer plan. + - Explain many-to-one, one-to-many, many-to-many, chasm traps, and ambiguous + paths in compact bullets. + +7. `How the execution engine uses the graph` + - Explain path selection, unsafe path rejection, pre-aggregation into CTEs, + filter placement, and dialect transpilation. + - Include a small before/after SQL-shape diagram or table. + +8. `What this means for agents` + - Summarize why this is more than saving markdown: + agents can inspect, query, validate, edit, and review the same semantic + files. + - Link to `Writing Context` and `ktx sl`. + +## Scannability Rules + +The implementation should shorten long prose blocks across the touched pages. + +- Keep most text blocks to one or two paragraphs. +- Prefer bullets, tables, diagrams, and compact callout blocks between prose. +- Avoid four-paragraph narrative runs. +- Use diagrams before dense explanations when the concept is spatial. +- Keep examples concrete and copy-pasteable. + +## Visual Direction + +Use the existing docs-site MDX style rather than a new design system. The current +`the-context-layer.mdx` page already uses custom `not-prose` MDX diagrams with +Fumadocs color tokens; the new page should follow that pattern. + +The diagrams should feel like technical product documentation: + +- restrained, dense, and readable; +- high contrast for the semantic-layer engine box; +- visible arrows or adjacency that make flow obvious; +- tables for classification and comparison; +- no marketing hero, decorative gradients, or generic card-heavy layout. + +## Non-goals + +- Do not redesign the whole docs site. +- Do not rename KTX concepts, packages, commands, or directories. +- Do not claim KTX replaces every BI or semantic-layer system. +- Do not add implementation details that are not true in the current codebase. +- Do not expand the page into a long reference for every YAML field; keep that + in `Writing Context`. + +## Verification + +Because this is docs-only work, verification should focus on the docs site: + +- Run the docs build or the narrowest available docs-site type/build check. +- Run formatting or lint checks if the docs package exposes them. +- Preview the page locally and inspect desktop and mobile widths. +- Confirm the page is listed in Concepts navigation. +- Confirm the opening section clearly says KTX is a context layer, not just a + semantic-layer tool. + +If implementation changes only MDX and metadata, TypeScript workspace tests are +not required unless the page introduces shared components. + +## Acceptance Criteria + +- A standalone Concepts page explains the semantic-layer internals. +- The Context Layer page links to the new internals page without making the + overview longer. +- The new page includes diagrams for the system fit, join graph, maintenance + loop, and fan-out-safe execution path. +- Long prose is broken into scannable sections with bullets, tables, and visual + interruptions. +- The positioning consistently says KTX is a context layer with a semantic + execution core. +- Docs-site verification passes or any skipped check is reported with a reason. diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index 9fb6903a..cccf1474 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -91,6 +91,9 @@ describe('setup agents', () => { expect(skill).toContain('must not print secrets'); expect(skill).toContain('status --json'); expect(skill).toContain('sl list --json'); + expect(skill).toContain('sl query'); + expect(skill).toContain('--format json'); + expect(skill).not.toContain('sl query --json'); expect(skill).not.toContain('agent '); expect(skill).not.toContain('sql execute'); expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({ @@ -150,6 +153,8 @@ describe('setup agents', () => { expect(skill).not.toContain('`ktx agent'); expect(skill).toContain('status --json'); expect(skill).toContain('sl query'); + expect(skill).toContain('--format json'); + expect(skill).not.toContain('sl query --json'); expect(skill).not.toContain('sql execute'); }); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index a065fc41..ae7e91dc 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -310,7 +310,8 @@ function ktxCommandLine(launcher: KtxCliLauncher, args: string[]): string { } function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLauncher }): string { - const projectDirArgs = ['--json', '--project-dir', input.projectDir]; + const projectDirArgs = ['--project-dir', input.projectDir]; + const jsonProjectDirArgs = ['--json', ...projectDirArgs]; return [ '---', 'name: ktx', @@ -327,9 +328,9 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '', 'Available commands:', '', - `- \`${ktxCommandLine(input.launcher, ['status', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['sl', 'search', '', ...projectDirArgs, '--connection-id', ''])}\``, + `- \`${ktxCommandLine(input.launcher, ['status', ...jsonProjectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...jsonProjectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'search', '', ...jsonProjectDirArgs, '--connection-id', ''])}\``, `- \`${ktxCommandLine(input.launcher, [ 'sl', 'query', @@ -338,11 +339,13 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '', '--query-file', '', + '--format', + 'json', '--execute', '--max-rows', '100', ])}\``, - `- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '', ...projectDirArgs, '--limit', '10'])}\``, + `- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '', ...jsonProjectDirArgs, '--limit', '10'])}\``, '', 'Use semantic-layer queries before direct database access. Do not print secrets or credential references.', '',