diff --git a/README.md b/README.md index 014ac600..c92371a4 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ artifacts. You can inspect them, commit them, and serve them to any MCP client. - Durable warehouse memory with semantic-layer sources and knowledge pages. - Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server, - BigQuery, Snowflake, and PostHog. + BigQuery, and Snowflake. - Agentic ingest with provenance links, tool transcripts, and replay metadata. - Local semantic-layer query planning and optional query execution. - A stdio MCP server with tools for connections, knowledge, semantic-layer @@ -221,7 +221,6 @@ The MCP server exposes `connection_list`, `knowledge_search`, - `packages/connector-clickhouse`: ClickHouse scan connector. - `packages/connector-mysql`: MySQL scan connector. - `packages/connector-postgres`: Postgres scan connector. -- `packages/connector-posthog`: PostHog scan connector. - `packages/connector-snowflake`: Snowflake scan connector. - `packages/connector-sqlite`: SQLite scan connector. - `packages/connector-sqlserver`: SQL Server scan connector. diff --git a/packages/cli/package.json b/packages/cli/package.json index 0cc4d6e9..e85986a4 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -38,7 +38,6 @@ "@ktx/connector-clickhouse": "workspace:*", "@ktx/connector-mysql": "workspace:*", "@ktx/connector-postgres": "workspace:*", - "@ktx/connector-posthog": "workspace:*", "@ktx/connector-snowflake": "workspace:*", "@ktx/connector-sqlite": "workspace:*", "@ktx/connector-sqlserver": "workspace:*", diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 60129922..124d132d 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -22,7 +22,7 @@ export interface KtxCliPackageInfo { } export interface KtxCliIo { - stdout: { isTTY?: boolean; write(chunk: string): void }; + stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void }; stderr: { write(chunk: string): void }; } diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index c14102ec..d7069578 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -3,20 +3,23 @@ import { describe, expect, it, vi } from 'vitest'; import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js'; import { extractProgressMessage, + createRepainter, initViewState, parseIngestSummary, parseScanSummary, renderContextBuildView, runContextBuild, + viewStateFromSourceProgress, } from './context-build-view.js'; -function makeIo(options: { isTTY?: boolean } = {}) { +function makeIo(options: { isTTY?: boolean; columns?: number } = {}) { let stdout = ''; let stderr = ''; return { io: { stdout: { isTTY: options.isTTY, + columns: options.columns, write: (chunk: string) => { stdout += chunk; }, @@ -98,7 +101,7 @@ describe('parseScanSummary', () => { describe('parseIngestSummary', () => { it('extracts work units and saved memory', () => { - expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('5 work units · 3 wiki, 2 SL'); + expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL'); }); it('extracts work units alone when no saved memory', () => { @@ -127,10 +130,18 @@ describe('initViewState', () => { expect(state.contextSources[0].target.connectionId).toBe('dbt-main'); expect(state.frame).toBe(0); }); + + it('initializes global timing fields', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + expect(state.startedAt).toBeNull(); + expect(state.totalElapsedMs).toBe(0); + }); }); describe('renderContextBuildView', () => { - it('renders all-queued state', () => { + it('renders all-queued state with ○ icon and progress counter', () => { const state = initViewState([ { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, @@ -138,6 +149,8 @@ describe('renderContextBuildView', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('Building KTX context'); + expect(output).toContain('(0/2)'); + expect(output).toContain('○'); expect(output).toContain('Primary sources:'); expect(output).toContain('warehouse'); expect(output).toContain('queued'); @@ -145,6 +158,29 @@ describe('renderContextBuildView', () => { expect(output).toContain('dbt-main'); }); + it('renders header with total elapsed time when set', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.totalElapsedMs = 65000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('(0/1 · 1m05s)'); + }); + + it('renders dynamic separator matching header width', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.totalElapsedMs = 120000; + + const output = renderContextBuildView(state, { styled: false }); + const lines = output.split('\n'); + const headerLine = lines.find((l) => l.includes('Building KTX context'))!; + const separatorLine = lines.find((l) => /^─+$/.test(l))!; + expect(separatorLine.length).toBeGreaterThanOrEqual(headerLine.length); + }); + it('renders completed state with summary', () => { const state = initViewState([ { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, @@ -156,6 +192,74 @@ describe('renderContextBuildView', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('42 tables'); expect(output).toContain('1m12s'); + expect(output).toContain('(1/1)'); + }); + + it('renders running target with elapsed time', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'running'; + state.primarySources[0].elapsedMs = 30000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('scanning...'); + expect(output).toContain('(30s)'); + }); + + it('renders running target with progress bar when percentage is available', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'running'; + state.primarySources[0].detailLine = '[50%] Scanning tables...'; + state.primarySources[0].elapsedMs = 15000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('██████░░░░░░'); + expect(output).toContain('50%'); + expect(output).toContain('Scanning tables...'); + expect(output).toContain('(15s)'); + }); + + it('renders completion summary when all targets are done', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + state.primarySources[0].status = 'done'; + state.primarySources[0].elapsedMs = 72000; + state.contextSources[0].status = 'done'; + state.contextSources[0].elapsedMs = 34000; + state.totalElapsedMs = 106000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Done in 1m46s · 2 sources processed'); + }); + + it('renders singular source label in completion summary', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'done'; + state.primarySources[0].elapsedMs = 5000; + state.totalElapsedMs = 5000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Done in 5s · 1 source processed'); + }); + + it('does not render completion summary while targets are still active', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + state.primarySources[0].status = 'done'; + state.contextSources[0].status = 'running'; + state.totalElapsedMs = 30000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).not.toContain('Done in'); }); it('renders failed state', () => { @@ -178,6 +282,54 @@ describe('renderContextBuildView', () => { expect(output).not.toContain('Primary sources:'); expect(output).toContain('Context sources:'); }); + + it('preserves detach hint while targets are active', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'running'; + + const output = renderContextBuildView(state, { styled: false, showHint: true, projectDir: '/tmp/project' }); + expect(output).toContain('d to detach'); + expect(output).toContain('ktx setup --project-dir /tmp/project'); + expect(output).toContain('to resume'); + }); + + it('omits detach hint when all targets are done', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'done'; + state.totalElapsedMs = 5000; + + const output = renderContextBuildView(state, { styled: false, showHint: true }); + expect(output).not.toContain('d to detach'); + }); +}); + +describe('createRepainter', () => { + it('moves up visual rows, not just newline count, when content wraps', () => { + const io = makeIo({ isTTY: true, columns: 5 }); + const repainter = createRepainter(io.io); + + repainter.paint('abcdefghijk\n'); + repainter.paint('updated\n'); + repainter.paint('done\n'); + + const cursorMoves = [...io.stdout().matchAll(/\u001b\[(\d+)A\r/g)].map((match) => Number(match[1])); + expect(cursorMoves).toEqual([3, 2]); + }); + + it('returns to the start of a single-line frame without moving up when content has no newline', () => { + const io = makeIo({ isTTY: true, columns: 80 }); + const repainter = createRepainter(io.io); + + repainter.paint('hello'); + repainter.paint('bye'); + + expect(io.stdout()).toContain('\rbye'); + expect(io.stdout()).not.toContain('\u001b[1A\rbye'); + }); }); describe('runContextBuild', () => { @@ -298,6 +450,135 @@ describe('runContextBuild', () => { expect(mockExit).toHaveBeenCalledWith(0); expect(io.stdout()).toContain('Context build continuing in the background.'); expect(io.stdout()).toContain('Resume: ktx setup --project-dir /tmp/project'); + expect(io.stdout()).toContain('Status: ktx setup context status --project-dir /tmp/project'); mockExit.mockRestore(); }); + + it('calls onSourceProgress when sources start and finish', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + dbt_main: { driver: 'dbt' }, + }); + const progressUpdates: Array> = []; + const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation)); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { + executeTarget, + now: () => 1000, + onSourceProgress: (sources) => { + progressUpdates.push(sources.map((s) => ({ connectionId: s.connectionId, status: s.status }))); + }, + }, + ); + + expect(progressUpdates).toHaveLength(4); + expect(progressUpdates[0]).toEqual([ + { connectionId: 'warehouse', status: 'running' }, + { connectionId: 'dbt_main', status: 'queued' }, + ]); + expect(progressUpdates[1]).toEqual([ + { connectionId: 'warehouse', status: 'done' }, + { connectionId: 'dbt_main', status: 'queued' }, + ]); + expect(progressUpdates[2]).toEqual([ + { connectionId: 'warehouse', status: 'done' }, + { connectionId: 'dbt_main', status: 'running' }, + ]); + expect(progressUpdates[3]).toEqual([ + { connectionId: 'warehouse', status: 'done' }, + { connectionId: 'dbt_main', status: 'done' }, + ]); + }); + + it('returns report IDs and artifact paths parsed from target output', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + dbt_main: { driver: 'dbt' }, + }); + const executeTarget = vi.fn(async (target, _args, targetIo) => { + if (target.operation === 'scan') { + targetIo.stdout.write('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n'); + targetIo.stdout.write('Raw sources: raw-sources/warehouse/live-database/sync-1\n'); + } else { + targetIo.stdout.write('Report: report-dbt-1\n'); + targetIo.stdout.write('Saved memory: 2 wiki, 3 SL\n'); + } + return successResult(target.connectionId, target.driver, target.operation); + }); + + const result = await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(result).toMatchObject({ + exitCode: 0, + detached: false, + reportIds: ['report-dbt-1'], + artifactPaths: [ + 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + 'raw-sources/warehouse/live-database/sync-1', + ], + }); + }); +}); + +describe('viewStateFromSourceProgress', () => { + it('partitions sources into primary and context groups', () => { + const state = viewStateFromSourceProgress( + [ + { connectionId: 'warehouse', operation: 'scan', status: 'running', startedAtMs: 900 }, + { connectionId: 'dbt-main', operation: 'source-ingest', status: 'queued' }, + ], + 1000, + 500, + ); + + expect(state.primarySources).toHaveLength(1); + expect(state.primarySources[0].target.connectionId).toBe('warehouse'); + expect(state.primarySources[0].status).toBe('running'); + expect(state.primarySources[0].elapsedMs).toBe(100); + expect(state.contextSources).toHaveLength(1); + expect(state.contextSources[0].target.connectionId).toBe('dbt-main'); + expect(state.contextSources[0].status).toBe('queued'); + expect(state.totalElapsedMs).toBe(500); + }); + + it('uses stored elapsedMs for completed sources', () => { + const state = viewStateFromSourceProgress( + [{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }], + 99999, + ); + + expect(state.primarySources[0].elapsedMs).toBe(72000); + expect(state.primarySources[0].summaryText).toBe('42 tables'); + }); + + it('renders the same view format as the foreground build', () => { + const state = viewStateFromSourceProgress( + [ + { connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }, + { connectionId: 'dbt-main', operation: 'source-ingest', status: 'running', startedAtMs: 900 }, + ], + 1000, + 500, + ); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Primary sources:'); + expect(output).toContain('warehouse'); + expect(output).toContain('42 tables'); + expect(output).toContain('Context sources:'); + expect(output).toContain('dbt-main'); + expect(output).toContain('ingesting...'); + }); }); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 2e39537c..571c71dd 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -30,6 +30,8 @@ export interface ContextBuildViewState { primarySources: ContextBuildTargetState[]; contextSources: ContextBuildTargetState[]; frame: number; + startedAt: number | null; + totalElapsedMs: number; } export interface ContextBuildArgs { @@ -42,6 +44,17 @@ export interface ContextBuildArgs { export interface ContextBuildResult { exitCode: number; detached: boolean; + reportIds?: string[]; + artifactPaths?: string[]; +} + +export interface ContextBuildSourceProgressUpdate { + connectionId: string; + operation: 'scan' | 'source-ingest'; + status: 'queued' | 'running' | 'done' | 'failed'; + startedAtMs?: number; + elapsedMs?: number; + summaryText?: string; } export interface ContextBuildDeps { @@ -49,6 +62,7 @@ export interface ContextBuildDeps { now?: () => number; setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; onDetach?: () => void; + onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void; } // --- Rendering --- @@ -79,7 +93,7 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st case 'running': return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'; default: - return '·'; + return '○'; } } switch (status) { @@ -90,10 +104,27 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st case 'running': return cyan(SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'); default: - return dim('·'); + return dim('○'); } } +function extractPercent(detailLine: string | null): number | null { + if (!detailLine) return null; + const match = detailLine.match(/^\[(\d+)%\]/); + return match ? Number(match[1]) : null; +} + +const BAR_WIDTH = 12; +const BAR_FILLED = '█'; +const BAR_EMPTY = '░'; + +function renderProgressBar(percent: number, styled: boolean): string { + const filled = Math.round((percent / 100) * BAR_WIDTH); + const empty = BAR_WIDTH - filled; + const bar = `${BAR_FILLED.repeat(filled)}${BAR_EMPTY.repeat(empty)}`; + return styled ? cyan(bar) : bar; +} + function targetDetail(target: ContextBuildTargetState, styled: boolean): string { if (target.status === 'done') { const parts: string[] = []; @@ -105,7 +136,17 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string return styled ? red('failed') : 'failed'; } if (target.status === 'running') { - return target.detailLine ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); + const percent = extractPercent(target.detailLine); + const progressText = target.detailLine?.replace(/^\[\d+%\]\s*/, '') + ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); + const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : null; + const parts: string[] = []; + if (percent !== null) { + parts.push(`${renderProgressBar(percent, styled)} ${percent}%`); + } + parts.push(progressText); + if (elapsed) parts.push(styled ? dim(elapsed) : elapsed); + return parts.join(' '); } return styled ? dim('queued') : 'queued'; } @@ -136,23 +177,46 @@ function resumeCommand(projectDir?: string): string { export function renderContextBuildView( state: ContextBuildViewState, - options: { styled?: boolean; showHint?: boolean; projectDir?: string } = {}, + options: { styled?: boolean; showHint?: boolean; hintText?: string; projectDir?: string } = {}, ): string { const styled = options.styled ?? true; const width = columnWidth(state); + const allTargets = [...state.primarySources, ...state.contextSources]; + const doneCount = allTargets.filter((t) => t.status === 'done' || t.status === 'failed').length; + const totalCount = allTargets.length; + const hasActive = allTargets.some((t) => t.status === 'running' || t.status === 'queued'); + const allDone = totalCount > 0 && !hasActive; + + const headerParts = ['Building KTX context']; + if (totalCount > 0) { + const progressParts: string[] = [`${doneCount}/${totalCount}`]; + if (state.totalElapsedMs > 0) progressParts.push(formatDuration(state.totalElapsedMs)); + const progress = `(${progressParts.join(' · ')})`; + headerParts.push(styled ? dim(progress) : progress); + } + const header = headerParts.join(' '); + const headerPlainLength = header.replace(/\x1b\[[0-9;]*m/g, '').length; + const separator = '─'.repeat(Math.max(21, headerPlainLength)); + const lines: string[] = [ '', - 'Building KTX context', - '─────────────────────', + header, + separator, ...renderTargetGroup('Primary sources', state.primarySources, state.frame, styled, width), ...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width), '', ]; - const hasActive = [...state.primarySources, ...state.contextSources].some( - (t) => t.status === 'running' || t.status === 'queued', - ); + + if (allDone && state.totalElapsedMs > 0) { + const sourcesLabel = totalCount === 1 ? '1 source' : `${totalCount} sources`; + const summary = ` Done in ${formatDuration(state.totalElapsedMs)} · ${sourcesLabel} processed`; + lines.push(styled ? green(summary) : summary); + lines.push(''); + } + if (options.showHint && hasActive) { - const hint = ` d to detach · ${resumeCommand(options.projectDir)} to resume`; + const hintContent = options.hintText ?? `d to detach · ${resumeCommand(options.projectDir)} to resume`; + const hint = ` ${hintContent}`; lines.push(styled ? dim(hint) : hint); lines.push(''); } @@ -162,6 +226,7 @@ export function renderContextBuildView( // --- IO Capture --- const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g'); +const ANSI_RE = /\x1b\[[0-9;]*m/g; export function extractProgressMessage(chunk: string): string | null { const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim(); @@ -175,12 +240,41 @@ export function parseScanSummary(output: string): string | null { } export function parseIngestSummary(output: string): string | null { - const parts: string[] = []; - const workUnits = output.match(/Work units: (\d+)/); - if (workUnits) parts.push(`${workUnits[1]} work units`); const savedMemory = output.match(/Saved memory: (.+)/); - if (savedMemory) parts.push(savedMemory[1]); - return parts.length > 0 ? parts.join(' · ') : null; + if (savedMemory) return savedMemory[1]; + const workUnits = output.match(/Work units: (\d+)/); + if (workUnits) return `${workUnits[1]} work units`; + return null; +} + +function collectOutputMetadata( + output: string, + operation: KtxPublicIngestPlanTarget['operation'], +): { reportIds: string[]; artifactPaths: string[] } { + const reportIds = new Set(); + const artifactPaths = new Set(); + for (const line of output.split(/\r?\n/)) { + const trimmed = line.trim(); + const reportLine = trimmed.match(/^Report:\s*(.+)$/); + if (reportLine) { + const value = reportLine[1].trim(); + if (value && value !== 'none') { + if (operation === 'scan') artifactPaths.add(value); + else reportIds.add(value); + } + } + const rawSourcesLine = trimmed.match(/^Raw sources:\s*(.+)$/); + if (rawSourcesLine) { + const value = rawSourcesLine[1].trim(); + if (value && value !== 'none') artifactPaths.add(value); + } + if (operation === 'source-ingest') { + for (const match of trimmed.matchAll(/\breport=([^\s]+)/g)) { + reportIds.add(match[1]); + } + } + } + return { reportIds: [...reportIds], artifactPaths: [...artifactPaths] }; } interface CapturedIo { @@ -210,19 +304,84 @@ function createCaptureIo(onProgress: (message: string) => void, isTTY: boolean): }; } +// --- Source progress helpers --- + +function collectSourceProgress(targets: ContextBuildTargetState[]): ContextBuildSourceProgressUpdate[] { + return targets.map((t) => ({ + connectionId: t.target.connectionId, + operation: t.target.operation, + status: t.status, + ...(t.startedAt !== null ? { startedAtMs: t.startedAt } : {}), + ...(t.elapsedMs > 0 ? { elapsedMs: t.elapsedMs } : {}), + ...(t.summaryText ? { summaryText: t.summaryText } : {}), + })); +} + +export function viewStateFromSourceProgress( + sources: ContextBuildSourceProgressUpdate[], + now: number, + startedAtMs?: number, +): ContextBuildViewState { + const makeTarget = (s: ContextBuildSourceProgressUpdate): ContextBuildTargetState => ({ + target: { connectionId: s.connectionId, driver: '', operation: s.operation, debugCommand: '', steps: [] }, + status: s.status, + detailLine: null, + summaryText: s.summaryText ?? null, + startedAt: s.startedAtMs ?? null, + elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0), + }); + + return { + primarySources: sources.filter((s) => s.operation === 'scan').map(makeTarget), + contextSources: sources.filter((s) => s.operation === 'source-ingest').map(makeTarget), + frame: 0, + startedAt: startedAtMs ?? null, + totalElapsedMs: startedAtMs ? now - startedAtMs : 0, + }; +} + // --- Repaint --- -function createRepainter(io: KtxCliIo) { - let lastLineCount = 0; +export function createRepainter(io: KtxCliIo) { + let hasPainted = false; + let lastCursorUpRows = 0; + + const terminalColumns = () => { + for (const columns of [io.stdout.columns, process.stdout.columns]) { + if (typeof columns === 'number' && Number.isFinite(columns) && columns > 0) return columns; + } + return 80; + }; + + const visualRows = (line: string, columns: number) => { + const plainLength = line.replace(ANSI_RE, '').length; + return Math.max(1, Math.ceil(plainLength / columns)); + }; + + const cursorUpRowsAfterWrite = (content: string) => { + const columns = terminalColumns(); + const endsWithNewline = content.endsWith('\n'); + const lines = content.split('\n'); + return lines.reduce((sum, line, index) => { + if (index === lines.length - 1) { + return endsWithNewline ? sum : sum + Math.max(0, visualRows(line, columns) - 1); + } + return sum + visualRows(line, columns); + }, 0); + }; return { paint(content: string) { - if (lastLineCount > 0) { - io.stdout.write(`${ESC}[${lastLineCount}A\r`); + if (hasPainted) { + if (lastCursorUpRows > 0) { + io.stdout.write(`${ESC}[${lastCursorUpRows}A`); + } + io.stdout.write('\r'); } - io.stdout.write(content); + io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`)); io.stdout.write(`${ESC}[J`); - lastLineCount = (content.match(/\n/g) ?? []).length; + hasPainted = true; + lastCursorUpRows = cursorUpRowsAfterWrite(content); }, }; } @@ -258,7 +417,7 @@ function spawnBackgroundBuild(projectDir: string): { logPath: string } | null { // --- Keystroke handling --- -function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { +export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { const stdin = process.stdin; if (!stdin.isTTY || typeof stdin.setRawMode !== 'function') { return null; @@ -289,6 +448,8 @@ export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuil primarySources: targets.filter((t) => t.operation === 'scan').map(makeTargetState), contextSources: targets.filter((t) => t.operation === 'source-ingest').map(makeTargetState), frame: 0, + startedAt: null, + totalElapsedMs: 0, }; } @@ -303,6 +464,8 @@ export async function runContextBuild( const isTTY = io.stdout.isTTY === true; const nowFn = deps.now ?? (() => Date.now()); + state.startedAt = nowFn(); + const repainter = isTTY ? createRepainter(io) : null; const viewOpts = { styled: true, projectDir: args.projectDir }; const paint = (hint: boolean) => repainter?.paint(renderContextBuildView(state, { ...viewOpts, showHint: hint })); @@ -312,6 +475,9 @@ export async function runContextBuild( if (repainter) { spinnerInterval = setInterval(() => { state.frame++; + if (state.startedAt !== null) { + state.totalElapsedMs = nowFn() - state.startedAt; + } for (const t of [...state.primarySources, ...state.contextSources]) { if (t.status === 'running' && t.startedAt !== null) { t.elapsedMs = nowFn() - t.startedAt; @@ -323,6 +489,8 @@ export async function runContextBuild( const orderedTargets = [...state.primarySources, ...state.contextSources]; const execTarget = deps.executeTarget ?? executePublicIngestTarget; + const reportIds = new Set(); + const artifactPaths = new Set(); let detached = false; let cleanupKeystroke: (() => void) | null = null; @@ -339,8 +507,8 @@ export async function runContextBuild( const bg = spawnBackgroundBuild(args.projectDir); io.stdout.write('\n\nContext build continuing in the background.\n'); if (bg) io.stdout.write(`Log: ${bg.logPath}\n`); - io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`); io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); + io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`); process.exit(0); }, () => { @@ -370,6 +538,7 @@ export async function runContextBuild( targetState.status = 'running'; targetState.startedAt = nowFn(); paint(true); + deps.onSourceProgress?.(collectSourceProgress(orderedTargets)); const capture = createCaptureIo( (message) => { @@ -386,20 +555,29 @@ export async function runContextBuild( targetState.status = failed ? 'failed' : 'done'; targetState.detailLine = null; if (!failed) { + const capturedOutput = capture.captured(); + const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation); + for (const reportId of metadata.reportIds) reportIds.add(reportId); + for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath); targetState.summaryText = targetState.target.operation === 'scan' - ? parseScanSummary(capture.captured()) - : parseIngestSummary(capture.captured()); + ? parseScanSummary(capturedOutput) + : parseIngestSummary(capturedOutput); } if (failed) hasFailure = true; paint(true); + deps.onSourceProgress?.(collectSourceProgress(orderedTargets)); } } finally { if (spinnerInterval) clearInterval(spinnerInterval); cleanupKeystroke?.(); } + if (state.startedAt !== null) { + state.totalElapsedMs = nowFn() - state.startedAt; + } + if (detached) { return { exitCode: 0, detached: true }; } @@ -410,5 +588,10 @@ export async function runContextBuild( paint(false); } - return { exitCode: hasFailure ? 1 : 0, detached: false }; + return { + exitCode: hasFailure ? 1 : 0, + detached: false, + ...(reportIds.size > 0 ? { reportIds: [...reportIds] } : {}), + ...(artifactPaths.size > 0 ? { artifactPaths: [...artifactPaths] } : {}), + }; } diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 5c536f0f..5a18938b 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -222,6 +222,39 @@ function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): L }; } +function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const failedWorkUnit = { + ...bundleReportSnapshot().body.workUnits[0], + status: 'failed' as const, + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }; + const nextReport = localFakeBundleReport(jobId, { + id: 'report-failed-1', + runId: 'run-failed-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + body: { + workUnits: [failedWorkUnit], + failedWorkUnits: [failedWorkUnit.unitKey], + }, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + class CliLookerSlWritingAgentRunner extends AgentRunnerService { override runLoop = vi.fn(async (params: RunLoopParams) => { if ( @@ -621,7 +654,10 @@ function makeCliLookerParser() { }; } -function localFakeBundleReport(jobId: string, overrides: Partial = {}): IngestReportSnapshot { +function localFakeBundleReport( + jobId: string, + overrides: Partial> & { body?: Partial } = {}, +): IngestReportSnapshot { const report = bundleReportSnapshot(); return { ...report, @@ -826,6 +862,77 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); }); + it('returns a non-zero code when Metabase fan-out has failed children', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + body: { + failedWorkUnits: ['metabase-db-1'], + workUnits: [ + { + unitKey: 'metabase-db-1', + rawFiles: ['cards/1.json'], + status: 'failed', + reason: 'tool write failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'partial_failure', + totals: { workUnits: 1, failedWorkUnits: 1 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 1, + failedWorkUnits: ['metabase-db-1'], + artifactsWritten: 0, + commitSha: null, + }, + report, + }, + ], + }), + }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain('Metabase fan-out: partial_failure'); + expect(io.stdout()).toContain('Failed work units: 1'); + expect(io.stdout()).toContain('status=error'); + expect(io.stderr()).toBe(''); + }); + it('prints Metabase fan-out progress before the final summary', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); @@ -1143,6 +1250,38 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Diff: +2/~0/-0/=0\n'); }); + it('returns a non-zero code when local ingest reports failed work units', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => failedLocalBundleRun(input, 'local-job-failed')); + + const io = makeIo(); + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'local-job-failed', + }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Status: error\n'); + }); + it('passes the debug LLM request file to local ingest runs', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index d6748991..2e33372c 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -111,6 +111,16 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void } function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIngestIo): void { + const counts = result.children.reduce( + (acc, child) => { + const childCounts = reportActionCounts(child.report); + return { + wikiCount: acc.wikiCount + childCounts.wikiCount, + slCount: acc.slCount + childCounts.slCount, + }; + }, + { wikiCount: 0, slCount: 0 }, + ); io.stdout.write(`Metabase fan-out: ${result.status}\n`); io.stdout.write(`Source: ${result.metabaseConnectionId}\n`); io.stdout.write(`Children: ${result.children.length}\n`); @@ -118,10 +128,11 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng io.stdout.write(`Work units: ${result.totals.workUnits}\n`); io.stdout.write(`Failed work units: ${result.totals.failedWorkUnits}\n`); } + io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); for (const child of result.children) { const status = reportStatus(child.report); io.stdout.write( - `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId}\n`, + `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`, ); } } @@ -326,7 +337,7 @@ export async function runKtxIngest( } else { writeMetabaseFanoutStatus(result, io); } - return 0; + return result.status === 'all_succeeded' ? 0 : 1; } const jobId = deps.jobIdFactory?.(); @@ -377,14 +388,14 @@ export async function runKtxIngest( liveTui?.close(); liveTui = null; io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); - return 0; + return reportStatus(result.report) === 'done' ? 0 : 1; } await writeReportRecord(result.report, runOutputMode, io, { interactive: (args.inputMode ?? 'auto') === 'auto', renderStoredMemoryFlow: deps.renderStoredMemoryFlow, env, }); - return 0; + return reportStatus(result.report) === 'done' ? 0 : 1; } finally { liveTui?.close(); } diff --git a/packages/cli/src/local-scan-connectors.test.ts b/packages/cli/src/local-scan-connectors.test.ts index 13d19c18..0fe57518 100644 --- a/packages/cli/src/local-scan-connectors.test.ts +++ b/packages/cli/src/local-scan-connectors.test.ts @@ -95,29 +95,6 @@ describe('createKtxCliScanConnector', () => { ]); }); - it('does not create a standalone PostHog scan connector', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - await writeFile( - join(tempDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' product:', - ' driver: posthog', - ' api_key: phx_test', - ' project_id: "157881"', - ' readonly: true', - '', - ].join('\n'), - 'utf-8', - ); - const project = await loadKtxProject({ projectDir: tempDir }); - - await expect(createKtxCliScanConnector(project, 'product')).rejects.toThrow( - 'Connection "product" uses driver "posthog", which has no native standalone KTX scan connector', - ); - }); - it('throws for structural daemon-only fallback configs', async () => { await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); await writeFile( diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts index e00b11da..13d8f364 100644 --- a/packages/cli/src/public-ingest.test.ts +++ b/packages/cli/src/public-ingest.test.ts @@ -80,13 +80,6 @@ describe('buildPublicIngestPlan', () => { ); }); - it('does not plan PostHog connections as CLI ingest targets', () => { - const project = projectWithConnections({ product: { driver: 'posthog' } }); - - expect(() => - buildPublicIngestPlan(project, { projectDir: '/tmp/project', targetConnectionId: 'product', all: false }), - ).toThrow('Connection "product" uses unsupported public ingest driver "posthog"'); - }); }); describe('runKtxPublicIngest', () => { diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index 75d2ba51..d5ced403 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { + formatInstallSummary, plannedKtxAgentFiles, readKtxAgentInstallManifest, removeKtxAgentInstall, @@ -37,11 +38,13 @@ describe('setup agents', () => { it('plans project-scoped CLI and MCP files for every target', () => { expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'both' })).toEqual([ - { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md') }, + { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' }, { kind: 'json-key', path: join(tempDir, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([ - { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md') }, + { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.codex/instructions/ktx.md'), role: 'rule' }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'mcp' })).toEqual([ { kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, @@ -154,6 +157,7 @@ describe('setup agents', () => { await expect(removeKtxAgentInstall(tempDir, io.io)).resolves.toBe(0); await expect(stat(join(tempDir, '.claude/skills/ktx/SKILL.md'))).rejects.toThrow(); + await expect(stat(join(tempDir, '.claude/rules/ktx.md'))).rejects.toThrow(); await expect(stat(join(tempDir, '.claude/skills/ktx/keep.txt'))).resolves.toBeDefined(); await expect(readKtxAgentInstallManifest(tempDir)).resolves.toEqual(null); }); @@ -214,4 +218,71 @@ describe('setup agents', () => { }), ); }); + + it('prints per-agent install summary after successful installation', async () => { + const io = makeIo(); + + await runKtxSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + yes: true, + agents: true, + target: 'claude-code', + scope: 'project', + mode: 'both', + skipAgents: false, + }, + io.io, + ); + + const output = io.stdout(); + expect(output).toContain('Agent integration complete'); + expect(output).toContain('Claude Code'); + expect(output).toContain('+ Skill installed'); + expect(output).toContain('.claude/skills/ktx/SKILL.md'); + expect(output).toContain('+ Rule installed'); + expect(output).toContain('.claude/rules/ktx.md'); + expect(output).toContain('+ MCP config added'); + expect(output).toContain('.mcp.json'); + }); + + it('formats summary with relative paths for project scope', () => { + const summary = formatInstallSummary( + [{ target: 'cursor', scope: 'project', mode: 'both' }], + [ + { kind: 'file', path: join(tempDir, '.cursor/rules/ktx.mdc') }, + { kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, + ], + tempDir, + ); + + expect(summary).toContain('Cursor'); + expect(summary).toContain('+ Rule installed'); + expect(summary).toContain('.cursor/rules/ktx.mdc'); + expect(summary).toContain('+ MCP config added'); + expect(summary).toContain('.cursor/mcp.json'); + expect(summary).not.toContain(tempDir); + }); + + it('formats summary with multiple agent targets', () => { + const summary = formatInstallSummary( + [ + { target: 'claude-code', scope: 'project', mode: 'cli' }, + { target: 'codex', scope: 'project', mode: 'mcp' }, + ], + [ + { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' }, + { kind: 'json-key', path: join(tempDir, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] }, + ], + tempDir, + ); + + expect(summary).toContain('Claude Code'); + expect(summary).toContain('+ Skill installed'); + expect(summary).toContain('+ Rule installed'); + expect(summary).toContain('Codex'); + expect(summary).toContain('+ MCP config added'); + }); }); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index 32fc326c..67394861 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -1,5 +1,5 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; -import { dirname, join, resolve } from 'node:path'; +import { dirname, join, relative, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { cancel, isCancel, multiselect, select } from '@clack/prompts'; import { loadKtxProject, markKtxSetupStepComplete, serializeKtxProjectConfig } from '@ktx/context/project'; @@ -38,7 +38,10 @@ export interface KtxAgentInstallManifest { projectDir: string; installedAt: string; installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>; - entries: Array<{ kind: 'file'; path: string } | { kind: 'json-key'; path: string; jsonPath: string[] }>; + entries: Array< + | { kind: 'file'; path: string; role?: 'skill' | 'rule' } + | { kind: 'json-key'; path: string; jsonPath: string[] } + >; } type InstallEntry = KtxAgentInstallManifest['entries'][number]; @@ -60,11 +63,17 @@ export function plannedKtxAgentFiles(input: { }): InstallEntry[] { if (input.scope === 'global') { if (input.target === 'claude-code') { - return [{ kind: 'file', path: join(process.env.HOME ?? '', '.claude/skills/ktx/SKILL.md') }]; + const home = process.env.HOME ?? ''; + return [ + { kind: 'file', path: join(home, '.claude/skills/ktx/SKILL.md'), role: 'skill' as const }, + { kind: 'file', path: join(home, '.claude/rules/ktx.md'), role: 'rule' as const }, + ]; } if (input.target === 'codex') { + const codexHome = process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'); return [ - { kind: 'file', path: join(process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'), 'skills/ktx/SKILL.md') }, + { kind: 'file', path: join(codexHome, 'skills/ktx/SKILL.md'), role: 'skill' as const }, + { kind: 'file', path: join(codexHome, 'instructions/ktx.md'), role: 'rule' as const }, ]; } throw new Error(`Global ${input.target} installation is not supported; use --project.`); @@ -72,12 +81,16 @@ export function plannedKtxAgentFiles(input: { const root = resolve(input.projectDir); const cliEntries: Partial> = { - 'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md') }, - codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') }, + 'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, cursor: { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') }, opencode: { kind: 'file', path: join(root, '.opencode/commands/ktx.md') }, universal: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') }, }; + const ruleEntries: Partial> = { + 'claude-code': { kind: 'file', path: join(root, '.claude/rules/ktx.md'), role: 'rule' }, + codex: { kind: 'file', path: join(root, '.codex/instructions/ktx.md'), role: 'rule' }, + }; const mcpEntries: Record = { 'claude-code': { kind: 'json-key', path: join(root, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, codex: { kind: 'json-key', path: join(root, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] }, @@ -86,7 +99,7 @@ export function plannedKtxAgentFiles(input: { universal: { kind: 'json-key', path: join(root, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] }, }; return [ - ...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target]] : []), + ...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target], ruleEntries[input.target]] : []), ...(input.mode === 'mcp' || input.mode === 'both' ? [mcpEntries[input.target]] : []), ].filter((entry): entry is InstallEntry => entry !== undefined); } @@ -163,6 +176,17 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun ].join('\n'); } +function ruleInstructionContent(input: { projectDir: string }): string { + return [ + `Use the \`ktx\` CLI to query local semantic context, wiki knowledge, and execute safe SQL for this project (\`--project-dir ${input.projectDir}\`).`, + '', + 'Use when the user asks about data schemas, metrics, dimensions, database structure, or wants to run SQL queries.', + '', + 'Do not use for general programming, code review, or tasks unrelated to data and analytics.', + '', + ].join('\n'); +} + function mcpConfig(projectDir: string, launcher: KtxCliLauncher): Record { return { command: launcher.command, @@ -295,6 +319,55 @@ function createPromptAdapter(): KtxSetupAgentsPromptAdapter { }; } +const targetDisplayNames: Record = { + 'claude-code': 'Claude Code', + codex: 'Codex', + cursor: 'Cursor', + opencode: 'OpenCode', + universal: 'Universal .agents', +}; + +const fileEntryLabels: Record = { + 'claude-code': 'Skill installed', + codex: 'Skill installed', + cursor: 'Rule installed', + opencode: 'Command installed', + universal: 'Skill installed', +}; + +export function formatInstallSummary( + installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>, + entries: InstallEntry[], + projectDir: string, +): string { + const entriesByTarget = new Map(); + let idx = 0; + for (const install of installs) { + const planned = plannedKtxAgentFiles({ projectDir, ...install }); + entriesByTarget.set(install.target, entries.slice(idx, idx + planned.length)); + idx += planned.length; + } + + const lines: string[] = []; + for (const install of installs) { + const targetEntries = entriesByTarget.get(install.target) ?? []; + lines.push(` ${targetDisplayNames[install.target]}`); + for (const entry of targetEntries) { + const displayPath = + install.scope === 'global' ? entry.path : relative(projectDir, entry.path); + if (entry.kind === 'file') { + const label = entry.role === 'rule' ? 'Rule installed' : fileEntryLabels[install.target]; + lines.push(` + ${label}`); + lines.push(` ${displayPath}`); + } else { + lines.push(` + MCP config added`); + lines.push(` ${displayPath}`); + } + } + } + return lines.join('\n'); +} + async function installTarget(input: { projectDir: string; target: KtxAgentTarget; @@ -305,8 +378,12 @@ async function installTarget(input: { const launcher = ktxCliLauncher(); for (const entry of entries) { if (entry.kind === 'file') { + const content = + entry.role === 'rule' + ? ruleInstructionContent({ projectDir: input.projectDir }) + : cliInstructionContent({ projectDir: input.projectDir, launcher }); await mkdir(dirname(entry.path), { recursive: true }); - await writeFile(entry.path, cliInstructionContent({ projectDir: input.projectDir, launcher }), 'utf-8'); + await writeFile(entry.path, content, 'utf-8'); } else { await writeJsonKey(entry.path, entry.jsonPath, mcpConfig(input.projectDir, launcher)); } @@ -362,7 +439,6 @@ export async function runKtxSetupAgentsStep( { value: 'cursor', label: 'Cursor' }, { value: 'opencode', label: 'OpenCode' }, { value: 'universal', label: 'Universal .agents' }, - { value: 'back', label: 'Back' }, ], required: true, })) as KtxAgentTarget[]); @@ -378,7 +454,7 @@ export async function runKtxSetupAgentsStep( for (const install of installs) entries.push(...(await installTarget({ projectDir: args.projectDir, ...install }))); await writeManifest(args.projectDir, mergeManifest(args.projectDir, await readKtxAgentInstallManifest(args.projectDir), installs, entries)); await markAgentsComplete(args.projectDir); - io.stdout.write(`Agent integration installed for ${installs.map((install) => install.target).join(', ')}.\n`); + io.stdout.write(`\nAgent integration complete\n\n${formatInstallSummary(installs, entries, args.projectDir)}\n`); return { status: 'ready', projectDir: args.projectDir, installs }; } catch (error) { io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 2cebff8d..0d803b7b 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -166,7 +166,12 @@ describe('setup context build state', () => { it('runs setup context build, verifies readiness, and marks context complete', async () => { await writeReadyProject(tempDir); const io = makeIo(); - const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); + const runContextBuildMock = vi.fn(async () => ({ + exitCode: 0, + detached: false, + reportIds: ['report-docs-1'], + artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'], + })); const verifyContextReady = vi.fn(async () => ({ ready: true, agentContextReady: true, @@ -204,6 +209,8 @@ describe('setup context build state', () => { runId: 'setup-context-local-abc123', status: 'completed', completedAt: '2026-05-09T10:00:00.000Z', + reportIds: ['report-docs-1'], + artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'], }); expect(io.stdout()).toContain('KTX context is ready for agents.'); }); @@ -340,6 +347,207 @@ describe('setup context build state', () => { expect(io.stderr()).toContain('No primary or context sources are configured for a KTX context build.'); }); + it('watches an already-running setup context build from the resume prompt', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-resume-watch', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'), + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-resume-watch', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'), + }); + }; + const select = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => { + expect(options.options.map((option) => option.label)).toContain('Watch progress'); + return 'watch'; + }); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + sleep: completeRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-resume-watch' }); + expect(io.stdout()).toContain('KTX context built: detached'); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + + it('auto-watches a running build without prompting when autoWatch is true', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-auto-watch', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'), + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-auto-watch', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'), + }); + }; + const select = vi.fn(async () => { + throw new Error('should not prompt when autoWatch is true'); + }); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + sleep: completeRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-auto-watch' }); + expect(select).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + + it('renders the progress view when watching a build with sourceProgress', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-progress', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-progress'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 }, + { connectionId: 'docs', operation: 'source-ingest' as const, status: 'running' as const, startedAtMs: Date.now() - 5000 }, + ], + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-progress', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-progress'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 }, + { connectionId: 'docs', operation: 'source-ingest' as const, status: 'done' as const, elapsedMs: 60000 }, + ], + }); + }; + const select = vi.fn(async () => 'watch'); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + sleep: completeRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-progress' }); + + const output = io.stdout(); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Primary sources:'); + expect(output).toContain('warehouse'); + expect(output).toContain('Context sources:'); + expect(output).toContain('docs'); + expect(output).not.toContain('KTX context built: detached'); + }); + + it('supports d to detach from the progress watch view', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-detach', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-detach'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'running' as const, startedAtMs: Date.now() }, + ], + }); + const io = makeIo(); + let triggerDetach: (() => void) | null = null; + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + io.io, + { + sleep: async () => { triggerDetach?.(); }, + watchIntervalMs: 1, + setupKeystroke: (onDetach) => { + triggerDetach = onDetach; + return () => {}; + }, + }, + ), + ).resolves.toMatchObject({ status: 'detached' }); + + const output = io.stdout(); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Context build continuing in the background.'); + expect(output).toContain('Resume: ktx setup --project-dir'); + }); + it('prints JSON setup context command status with watch and resume commands', async () => { await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true }); await writeKtxSetupContextState(tempDir, { @@ -372,6 +580,48 @@ describe('setup context build state', () => { }); }); + it('watches setup context command status until the run reaches a terminal state', async () => { + await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true }); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-watch', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-watch'), + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-watch', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-watch'), + }); + }; + + await expect( + runKtxSetupContextCommand( + { command: 'watch', projectDir: tempDir, runId: 'setup-context-local-watch', inputMode: 'disabled' }, + io.io, + { sleep: completeRun, watchIntervalMs: 1 }, + ), + ).resolves.toBe(0); + expect(io.stdout()).toContain('KTX context built: running'); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + it('runs direct build commands without asking for setup confirmation first', async () => { await writeReadyProject(tempDir); const io = makeIo(); diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 042c5b1e..f88635f4 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -10,7 +10,14 @@ import { } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { buildPublicIngestPlan } from './public-ingest.js'; -import { runContextBuild } from './context-build-view.js'; +import { + type ContextBuildSourceProgressUpdate, + createRepainter, + defaultSetupKeystroke, + renderContextBuildView, + runContextBuild, + viewStateFromSourceProgress, +} from './context-build-view.js'; import { withMenuOptionsSpacing } from './prompt-navigation.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; @@ -45,6 +52,7 @@ export interface KtxSetupContextState { retryableFailedTargets: string[]; commands: KtxSetupContextCommands; failureReason?: string; + sourceProgress?: ContextBuildSourceProgressUpdate[]; } export interface KtxSetupContextStatusSummary { @@ -80,6 +88,7 @@ export interface KtxSetupContextStepArgs { forcePrompt?: boolean; allowEmpty?: boolean; prompt?: boolean; + autoWatch?: boolean; } export type KtxSetupContextCommandArgs = @@ -99,6 +108,9 @@ export interface KtxSetupContextDeps { now?: () => Date; runContextBuild?: typeof runContextBuild; verifyContextReady?: (projectDir: string) => Promise; + sleep?: (ms: number) => Promise; + watchIntervalMs?: number; + setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; } interface KtxSetupContextTargets { @@ -109,6 +121,7 @@ interface KtxSetupContextTargets { const SETUP_CONTEXT_STATE_PATH = ['.ktx', 'setup', 'context-build.json'] as const; const LIVE_DATABASE_ADAPTER = 'live-database'; const SCAN_REPORT_FILE = 'scan-report.json'; +const DEFAULT_WATCH_INTERVAL_MS = 2_000; function createPromptAdapter(): KtxSetupContextPromptAdapter { return { @@ -193,9 +206,34 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat : [], commands: contextBuildCommands(projectDir, runId), ...(typeof record.failureReason === 'string' ? { failureReason: record.failureReason } : {}), + ...(normalizeSourceProgress(record.sourceProgress) ? { sourceProgress: normalizeSourceProgress(record.sourceProgress) } : {}), }; } +const VALID_SOURCE_OPERATIONS = new Set(['scan', 'source-ingest']); +const VALID_SOURCE_STATUSES = new Set(['queued', 'running', 'done', 'failed']); + +function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpdate[] | undefined { + if (!Array.isArray(value)) return undefined; + const entries: ContextBuildSourceProgressUpdate[] = []; + for (const item of value) { + if (typeof item !== 'object' || item === null || Array.isArray(item)) continue; + const rec = item as Record; + if (typeof rec.connectionId !== 'string') continue; + if (!VALID_SOURCE_OPERATIONS.has(String(rec.operation))) continue; + if (!VALID_SOURCE_STATUSES.has(String(rec.status))) continue; + entries.push({ + connectionId: rec.connectionId, + operation: rec.operation as 'scan' | 'source-ingest', + status: rec.status as 'queued' | 'running' | 'done' | 'failed', + ...(typeof rec.startedAtMs === 'number' ? { startedAtMs: rec.startedAtMs } : {}), + ...(typeof rec.elapsedMs === 'number' ? { elapsedMs: rec.elapsedMs } : {}), + ...(typeof rec.summaryText === 'string' ? { summaryText: rec.summaryText } : {}), + }); + } + return entries.length > 0 ? entries : undefined; +} + export async function readKtxSetupContextState(projectDir: string): Promise { const filePath = statePath(projectDir); if (!(await pathExists(filePath))) { @@ -514,6 +552,7 @@ async function runBuild( }; await writeKtxSetupContextState(args.projectDir, runningState); + let lastSourceProgress: ContextBuildSourceProgressUpdate[] | undefined; const contextBuild = deps.runContextBuild ?? runContextBuild; const buildResult = await contextBuild( project, @@ -532,14 +571,39 @@ async function runBuild( ...runningState, status: 'detached', updatedAt: new Date().toISOString(), + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); writeFileSync(statePath(resolvedDir), `${JSON.stringify(detachedState, null, 2)}\n`); }, + onSourceProgress: (sources) => { + lastSourceProgress = sources; + try { + const resolvedDir = resolve(args.projectDir); + mkdirSync(join(resolvedDir, '.ktx', 'setup'), { recursive: true }); + const progressState = normalizeState(resolvedDir, { + ...runningState, + sourceProgress: sources, + updatedAt: new Date().toISOString(), + }); + writeFileSync(statePath(resolvedDir), `${JSON.stringify(progressState, null, 2)}\n`); + } catch { + // Progress reporting is supplementary — don't crash the build + } + }, }, ); + const completedReportIds = buildResult.reportIds ?? []; + const completedArtifactPaths = buildResult.artifactPaths ?? []; if (buildResult.detached) { const updatedAt = now().toISOString(); - await writeKtxSetupContextState(args.projectDir, { ...runningState, status: 'detached', updatedAt }); + await writeKtxSetupContextState(args.projectDir, { + ...runningState, + status: 'detached', + updatedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), + }); return { status: 'detached', projectDir: args.projectDir, runId }; } if (buildResult.exitCode !== 0) { @@ -548,8 +612,11 @@ async function runBuild( ...runningState, status: 'failed', updatedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds], failureReason: 'Context build failed.', + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); return { status: 'failed', projectDir: args.projectDir }; } @@ -561,8 +628,11 @@ async function runBuild( ...runningState, status: 'failed', updatedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, retryableFailedTargets: readiness.failedTargets ?? [], failureReason: readiness.details.join(' '), + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); io.stderr.write('KTX context build did not pass agent-readiness verification.\n'); for (const detail of readiness.details) { @@ -578,7 +648,10 @@ async function runBuild( status: 'completed', updatedAt: completedAt, completedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, retryableFailedTargets: [], + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); writeSuccess(readiness, targets, io); return { status: 'ready', projectDir: args.projectDir, runId }; @@ -632,17 +705,46 @@ export async function runKtxSetupContextStep( (existingState.status === 'running' || existingState.status === 'detached') && args.inputMode !== 'disabled' ) { + if (args.autoWatch) { + const watched = await watchContextStatus( + { + command: 'watch', + projectDir: args.projectDir, + ...(existingState.runId ? { runId: existingState.runId } : {}), + inputMode: args.inputMode, + }, + existingState, + io, + deps, + ); + return setupResultFromWatchedState(args.projectDir, watched.state); + } const prompts = deps.prompts ?? createPromptAdapter(); const choice = await prompts.select({ message: 'A context build is running in the background.\n\n' + - 'You can wait for it to finish, check its status, or start a fresh build.', + 'You can watch it until it finishes, check its status once, or start a fresh build.', options: [ + { value: 'watch', label: 'Watch progress' }, { value: 'status', label: 'Check status' }, { value: 'rebuild', label: 'Start a fresh context build' }, { value: 'back', label: 'Back' }, ], }); + if (choice === 'watch') { + const watched = await watchContextStatus( + { + command: 'watch', + projectDir: args.projectDir, + ...(existingState.runId ? { runId: existingState.runId } : {}), + inputMode: args.inputMode, + }, + existingState, + io, + deps, + ); + return setupResultFromWatchedState(args.projectDir, watched.state); + } if (choice === 'status') { const commands = contextBuildCommands(args.projectDir, existingState.runId); io.stdout.write(`\nRun: ${commands.status}\n`); @@ -698,6 +800,18 @@ function stateMatchesRunId(state: KtxSetupContextState, runId: string | undefine return !runId || state.runId === runId; } +function isActiveStatus(status: KtxSetupContextBuildStatus): boolean { + return status === 'running' || status === 'detached'; +} + +function watchExitCode(status: KtxSetupContextBuildStatus): number { + return status === 'failed' || status === 'interrupted' || status === 'stale' ? 1 : 0; +} + +function defaultSleep(ms: number): Promise { + return new Promise((resolveSleep) => setTimeout(resolveSleep, ms)); +} + function statusPayload(state: KtxSetupContextState): KtxSetupContextStatusSummary { return setupContextStatusFromState(state, { completedStep: state.status === 'completed' }); } @@ -714,6 +828,149 @@ function writeContextStatus(state: KtxSetupContextState, io: KtxCliIo): void { } } +async function watchContextStatus( + args: Extract, + initialState: KtxSetupContextState, + io: KtxCliIo, + deps: KtxSetupContextDeps, +): Promise<{ exitCode: number; state: KtxSetupContextState }> { + if (initialState.sourceProgress && initialState.sourceProgress.length > 0) { + return watchContextStatusWithProgressView(args, initialState, io, deps); + } + return watchContextStatusText(args, initialState, io, deps); +} + +async function watchContextStatusText( + args: Extract, + initialState: KtxSetupContextState, + io: KtxCliIo, + deps: KtxSetupContextDeps, +): Promise<{ exitCode: number; state: KtxSetupContextState }> { + const sleep = deps.sleep ?? defaultSleep; + const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; + let state = initialState; + let lastRenderedStatus = ''; + + io.stdout.write('KTX context build\n'); + while (true) { + const renderedStatus = `${state.status}:${state.updatedAt ?? ''}:${state.completedAt ?? ''}:${state.failureReason ?? ''}`; + if (renderedStatus !== lastRenderedStatus) { + writeContextStatus(state, io); + lastRenderedStatus = renderedStatus; + } + + if (!isActiveStatus(state.status)) { + return { exitCode: watchExitCode(state.status), state }; + } + + await sleep(intervalMs); + state = await readKtxSetupContextState(args.projectDir); + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); + return { exitCode: 1, state }; + } + } +} + +async function watchContextStatusWithProgressView( + args: Extract, + initialState: KtxSetupContextState, + io: KtxCliIo, + deps: KtxSetupContextDeps, +): Promise<{ exitCode: number; state: KtxSetupContextState }> { + const sleep = deps.sleep ?? defaultSleep; + const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; + const isTTY = io.stdout.isTTY === true; + const repainter = isTTY ? createRepainter(io) : null; + const projectDir = resolve(args.projectDir); + const viewOpts = { styled: isTTY, showHint: true, projectDir }; + let state = initialState; + let lastProgressKey = ''; + let detached = false; + + let viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], Date.now(), + state.startedAt ? new Date(state.startedAt).getTime() : undefined); + + const cleanupKeystroke = (isTTY || deps.setupKeystroke) + ? (deps.setupKeystroke ?? defaultSetupKeystroke)( + () => { detached = true; }, + () => { detached = true; }, + ) + : null; + + let spinnerInterval: ReturnType | null = null; + if (repainter) { + repainter.paint(renderContextBuildView(viewState, viewOpts)); + spinnerInterval = setInterval(() => { + viewState.frame++; + const now = Date.now(); + viewState.totalElapsedMs = viewState.startedAt !== null ? now - viewState.startedAt : 0; + for (const t of [...viewState.primarySources, ...viewState.contextSources]) { + if (t.status === 'running' && t.startedAt !== null) { + t.elapsedMs = now - t.startedAt; + } + } + repainter.paint(renderContextBuildView(viewState, viewOpts)); + }, 140); + } + + try { + while (true) { + if (!repainter) { + const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); + if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { + io.stdout.write(renderContextBuildView(viewState, viewOpts)); + lastProgressKey = currentKey; + } + } + + if (!isActiveStatus(state.status)) { + return { exitCode: watchExitCode(state.status), state }; + } + if (detached) break; + + await sleep(intervalMs); + if (detached) break; + + try { + state = await readKtxSetupContextState(args.projectDir); + } catch { + continue; + } + + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); + return { exitCode: 1, state }; + } + + const now = Date.now(); + const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; + viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); + } + } finally { + if (spinnerInterval) clearInterval(spinnerInterval); + cleanupKeystroke?.(); + } + + io.stdout.write('\n\nContext build continuing in the background.\n'); + io.stdout.write(`Resume: ktx setup --project-dir ${projectDir}\n`); + io.stdout.write(`Status: ktx setup context status --project-dir ${projectDir}\n`); + return { exitCode: 0, state }; +} + +function setupResultFromWatchedState(projectDir: string, state: KtxSetupContextState): KtxSetupContextResult { + if (state.status === 'completed') { + return { status: 'ready', projectDir, runId: state.runId ?? 'setup-context-completed' }; + } + if (state.status === 'paused') { + return { status: 'paused', projectDir, runId: state.runId ?? '' }; + } + if (state.status === 'running' || state.status === 'detached') { + return { status: 'detached', projectDir, runId: state.runId ?? '' }; + } + return { status: 'failed', projectDir }; +} + export async function runKtxSetupContextCommand( args: KtxSetupContextCommandArgs, io: KtxCliIo, @@ -744,9 +1001,7 @@ export async function runKtxSetupContextCommand( } if (args.command === 'watch') { - io.stdout.write('KTX context build\n'); - writeContextStatus(state, io); - return 0; + return (await watchContextStatus(args, state, io, deps)).exitCode; } const updatedAt = new Date().toISOString(); diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 41b12f95..3f268ce8 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -962,10 +962,95 @@ describe('setup databases step', () => { }); }); + it('prompts for discovered Postgres schemas before the first scan', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + multiselectValues: [['orbit_analytics', 'orbit_raw']], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async asyncScanProjectDir => { + const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['postgres-warehouse']).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw'], + }); + return 0; + }); + const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection, listSchemas }, + ); + + expect(result.status).toBe('ready'); + expect(listSchemas).toHaveBeenCalledWith(tempDir, 'postgres-warehouse'); + expect(prompts.multiselect).toHaveBeenCalledWith({ + message: expect.stringContaining('PostgreSQL schemas to scan'), + options: [ + { value: 'orbit_analytics', label: 'orbit_analytics' }, + { value: 'orbit_raw', label: 'orbit_raw' }, + { value: 'public', label: 'public' }, + ], + initialValues: ['orbit_analytics', 'orbit_raw'], + required: true, + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['postgres-warehouse']).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw'], + }); + expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw'); + }); + + it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({}); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async asyncScanProjectDir => { + const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw', 'public'], + }); + return 0; + }); + const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection, listSchemas }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.multiselect).not.toHaveBeenCalled(); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw', 'public'], + }); + expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw, public'); + }); + it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => { const io = makeIo(); const testConnection = vi.fn(async () => 0); const scanConnection = vi.fn(async () => 0); + const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); const result = await runKtxSetupDatabasesStep( { @@ -978,10 +1063,11 @@ describe('setup databases step', () => { skipDatabases: false, }, io.io, - { testConnection, scanConnection }, + { testConnection, scanConnection, listSchemas }, ); expect(result.status).toBe('ready'); + expect(listSchemas).not.toHaveBeenCalled(); expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index e22f4741..1838725d 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -52,6 +52,7 @@ export interface KtxSetupDatabasesPromptAdapter { message: string; options: Array<{ value: string; label: string }>; required?: boolean; + initialValues?: string[]; }): Promise; select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; @@ -76,6 +77,7 @@ export interface KtxSetupDatabasesDeps { prompts?: KtxSetupDatabasesPromptAdapter; testConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; scanConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; + listSchemas?: (projectDir: string, connectionId: string) => Promise; historicSqlProbe?: KtxSetupHistoricSqlProbe; } @@ -255,6 +257,21 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr } } +async function defaultListSchemas(projectDir: string, connectionId: string): Promise { + const project = await loadKtxProject({ projectDir }); + const connection = project.config.connections[connectionId]; + const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres'); + if (!isKtxPostgresConnectionConfig(connection)) { + return []; + } + const connector = new KtxPostgresScanConnector({ connectionId, connection }); + try { + return await connector.listSchemas(); + } finally { + await connector.cleanup(); + } +} + function existingConnectionIdsByDriver( connections: Record, driver: KtxSetupDatabaseDriver, @@ -814,6 +831,113 @@ async function writeConnectionConfig(input: { } } +function configuredSchemas(connection: KtxProjectConnectionConfig | undefined): string[] { + if (!connection) return []; + if (Array.isArray(connection.schemas)) { + return connection.schemas + .filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0) + .map((schema) => schema.trim()); + } + return typeof connection.schema === 'string' && connection.schema.trim().length > 0 ? [connection.schema.trim()] : []; +} + +function defaultSchemaSelection(schemas: string[]): string[] { + const nonPublic = schemas.filter((schema) => schema !== 'public'); + return nonPublic.length > 0 ? nonPublic : schemas; +} + +async function writeConnectionSchemas(input: { + projectDir: string; + connectionId: string; + schemas: string[]; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + const connection = project.config.connections[input.connectionId]; + if (!connection) return; + const { schema: _schema, ...connectionWithoutLegacySchema } = connection; + await writeConnectionConfig({ + projectDir: input.projectDir, + connectionId: input.connectionId, + connection: { + ...connectionWithoutLegacySchema, + schemas: unique(input.schemas), + }, + }); +} + +async function maybeConfigurePostgresSchemas(input: { + projectDir: string; + connectionId: string; + args: KtxSetupDatabasesArgs; + prompts: KtxSetupDatabasesPromptAdapter; + deps: KtxSetupDatabasesDeps; + io: KtxCliIo; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + const connection = project.config.connections[input.connectionId]; + if (normalizeDriver(connection?.driver) !== 'postgres') { + return true; + } + + if (configuredSchemas(connection).length > 0) { + return true; + } + + if (input.args.databaseSchemas.length > 0) { + await writeConnectionSchemas({ + projectDir: input.projectDir, + connectionId: input.connectionId, + schemas: input.args.databaseSchemas, + }); + return true; + } + + let discoveredSchemas: string[]; + try { + discoveredSchemas = unique( + await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId), + ); + } catch (error) { + input.io.stderr.write( + `Could not discover PostgreSQL schemas for ${input.connectionId}; continuing with existing schema scope. ` + + `Pass --database-schema to set it explicitly. ${error instanceof Error ? error.message : String(error)}\n`, + ); + return true; + } + if (discoveredSchemas.length === 0) { + return true; + } + + let selectedSchemas: string[]; + if (input.args.inputMode === 'disabled' || discoveredSchemas.length === 1) { + selectedSchemas = discoveredSchemas; + } else { + const initialValues = defaultSchemaSelection(discoveredSchemas); + const choices = await input.prompts.multiselect({ + message: withMultiselectNavigation( + 'PostgreSQL schemas to scan\nKTX found multiple non-system schemas. Select every schema agents should use.', + ), + options: discoveredSchemas.map((schema) => ({ value: schema, label: schema })), + initialValues, + required: true, + }); + if (choices.includes('back')) { + return false; + } + selectedSchemas = choices.length > 0 ? choices : initialValues; + } + + await writeConnectionSchemas({ + projectDir: input.projectDir, + connectionId: input.connectionId, + schemas: selectedSchemas, + }); + writeSetupSection(input.io, `Selecting schemas for ${input.connectionId}`, [ + `Schemas: ${selectedSchemas.join(', ')}`, + ]); + return true; +} + async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); if (project.config.ingest.adapters.includes('historic-sql')) { @@ -902,6 +1026,8 @@ async function validateAndScanConnection(input: { connectionId: string; io: KtxCliIo; deps: KtxSetupDatabasesDeps; + args: KtxSetupDatabasesArgs; + prompts: KtxSetupDatabasesPromptAdapter; }): Promise { const testConnection = input.deps.testConnection ?? defaultTestConnection; const scanConnection = input.deps.scanConnection ?? defaultScanConnection; @@ -923,6 +1049,10 @@ async function validateAndScanConnection(input: { testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`); writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines); + if (!(await maybeConfigurePostgresSchemas(input))) { + return false; + } + await maybeRunHistoricSqlSetupProbe({ projectDir: input.projectDir, connectionId: input.connectionId, @@ -1069,7 +1199,7 @@ export async function runKtxSetupDatabasesStep( prompts, }); if (historicSqlResult === 'back') return { status: 'back', projectDir: args.projectDir }; - if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps }))) { + if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps, args, prompts }))) { return { status: 'failed', projectDir: args.projectDir }; } selectedConnectionIds.push(connectionId); @@ -1209,6 +1339,8 @@ export async function runKtxSetupDatabasesStep( connectionId: connectionChoice.connectionId, io, deps, + args, + prompts, })) ) { if (args.inputMode === 'disabled') return { status: 'failed', projectDir: args.projectDir }; diff --git a/packages/cli/src/setup-ready-menu.test.ts b/packages/cli/src/setup-ready-menu.test.ts index 1e64488e..643d8b3d 100644 --- a/packages/cli/src/setup-ready-menu.test.ts +++ b/packages/cli/src/setup-ready-menu.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it, vi } from 'vitest'; -import { isKtxSetupReady, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js'; +import { isKtxPreAgentSetupReady, isKtxSetupReady, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js'; import type { KtxSetupStatus } from './setup.js'; const readyStatus: KtxSetupStatus = { @@ -20,6 +20,13 @@ describe('setup ready menu', () => { expect(isKtxSetupReady({ ...readyStatus, agents: [] })).toBe(false); }); + it('recognizes pre-agent readiness without requiring agents', () => { + expect(isKtxPreAgentSetupReady(readyStatus)).toBe(true); + expect(isKtxPreAgentSetupReady({ ...readyStatus, agents: [] })).toBe(true); + expect(isKtxPreAgentSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false); + expect(isKtxPreAgentSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false); + }); + it('maps ready-project menu choices to setup sections', async () => { const prompts = { select: vi.fn(async () => 'agents'), cancel: vi.fn() }; diff --git a/packages/cli/src/setup-ready-menu.ts b/packages/cli/src/setup-ready-menu.ts index 675655f2..a101e45a 100644 --- a/packages/cli/src/setup-ready-menu.ts +++ b/packages/cli/src/setup-ready-menu.ts @@ -14,18 +14,21 @@ export interface KtxSetupReadyMenuDeps { prompts?: KtxSetupReadyMenuPromptAdapter; } -export function isKtxSetupReady(status: KtxSetupStatus): boolean { +export function isKtxPreAgentSetupReady(status: KtxSetupStatus): boolean { return ( status.project.ready && status.llm.ready && status.embeddings.ready && status.databases.every((database) => database.ready) && status.sources.every((source) => source.ready) && - status.context.ready && - status.agents.some((agent) => agent.ready) + status.context.ready ); } +export function isKtxSetupReady(status: KtxSetupStatus): boolean { + return isKtxPreAgentSetupReady(status) && status.agents.some((agent) => agent.ready); +} + function createPromptAdapter(): KtxSetupReadyMenuPromptAdapter { return { async select(options) { diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index b8ff4eed..1a281261 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -1,4 +1,4 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { @@ -8,6 +8,7 @@ import { serializeKtxProjectConfig, } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { KtxCliIo } from './cli-runtime.js'; import { runKtxSetupSourcesStep, type KtxSetupSourcesDeps, @@ -41,14 +42,17 @@ function prompts(values: { multiselect?: string[][]; select?: string[]; text?: Array; + password?: Array; }): KtxSetupSourcesPromptAdapter { const multiselectValues = [...(values.multiselect ?? [])]; const selectValues = [...(values.select ?? [])]; const textValues = [...(values.text ?? [])]; + const passwordValues = [...(values.password ?? [])]; return { multiselect: vi.fn(async () => multiselectValues.shift() ?? []), select: vi.fn(async () => selectValues.shift() ?? 'skip'), text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')), + password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : undefined)), cancel: vi.fn(), log: vi.fn(), }; @@ -201,12 +205,199 @@ describe('setup sources step', () => { mappings: { databaseMappings: { '1': 'warehouse' }, syncEnabled: { '1': true }, - syncMode: 'ONLY', + syncMode: 'ALL', }, }); expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io); }); + it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => { + await addPrimarySource(); + const cases: Array<{ + source: 'metabase' | 'looker'; + text: string[]; + deps: KtxSetupSourcesDeps; + expectedConnection: Record; + }> = [ + { + source: 'metabase', + text: ['metabase-main', 'https://metabase.example.com'], + deps: { + discoverMetabaseDatabases: vi.fn(async () => [ + { id: 1, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' }, + ]), + validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'metabase', + mappings: { databaseMappings: { '1': 'warehouse' } }, + }, + }, + { + source: 'looker', + text: ['looker-main', 'https://looker.example.com', 'client-id', ''], + deps: { + validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'looker', + mappings: { connectionMappings: { warehouse: 'warehouse' } }, + }, + }, + ]; + + for (const testCase of cases) { + const testPrompts = prompts({ + multiselect: [[testCase.source]], + select: ['env', 'done'], + text: testCase.text, + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + ...testCase.deps, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] }); + + expect( + vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Mapped warehouse')), + ).toBe(false); + if (testCase.source === 'metabase') { + expect( + vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Metabase database id')), + ).toBe(false); + } + expect((await readConfig()).connections[`${testCase.source}-main`]).toMatchObject(testCase.expectedConnection); + } + }); + + it('prompts for the mapped warehouse when interactive Metabase and Looker source setup has multiple choices', async () => { + await addPrimarySource(); + await addConnection('analytics_warehouse', { + driver: 'snowflake', + account: 'acme', + database: 'analytics', + readonly: true, + }); + + const cases: Array<{ + source: 'metabase' | 'looker'; + text: string[]; + deps: KtxSetupSourcesDeps; + expectedConnection: Record; + }> = [ + { + source: 'metabase', + text: ['metabase-main', 'https://metabase.example.com'], + deps: { + discoverMetabaseDatabases: vi.fn(async () => [ + { id: 1, name: 'Finance', engine: 'postgres', host: 'db.example.com', dbName: 'finance' }, + { id: 2, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' }, + ]), + validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'metabase', + mappings: { databaseMappings: { '2': 'analytics_warehouse' } }, + }, + }, + { + source: 'looker', + text: ['looker-main', 'https://looker.example.com', 'client-id', 'analytics'], + deps: { + validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'looker', + mappings: { connectionMappings: { analytics: 'analytics_warehouse' } }, + }, + }, + ]; + + for (const testCase of cases) { + const testPrompts = prompts({ + multiselect: [[testCase.source]], + select: testCase.source === 'metabase' ? ['env', 'analytics_warehouse', '2', 'done'] : ['env', 'analytics_warehouse', 'done'], + text: testCase.text, + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + ...testCase.deps, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] }); + + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'Mapped warehouse connection', + options: [ + { value: 'analytics_warehouse', label: 'analytics_warehouse (SNOWFLAKE)' }, + { value: 'warehouse', label: 'warehouse (POSTGRESQL)' }, + { value: 'back', label: 'Back' }, + ], + }); + if (testCase.source === 'metabase') { + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'Metabase database', + options: [ + { value: '1', label: '1: Finance (postgres)' }, + { value: '2', label: '2: Analytics (postgres)' }, + { value: 'back', label: 'Back' }, + ], + }); + expect( + vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Metabase database id')), + ).toBe(false); + } + expect((await readConfig()).connections[`${testCase.source}-main`]).toMatchObject(testCase.expectedConnection); + } + }); + + it('lets visible Metabase mapping surface refresh and validation failures', async () => { + await addPrimarySource(); + const runMapping = vi.fn(async (_projectDir: string, _connectionId: string, io: KtxCliIo) => { + io.stderr.write('1: Metabase database does not match KTX connection database\n'); + return 1; + }); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['metabase']], + select: ['env'], + text: ['metabase-main', 'https://metabase.example.com'], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { + prompts: testPrompts, + discoverMetabaseDatabases: vi.fn(async () => [ + { id: 1, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' }, + ]), + runMapping, + }, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(runMapping).toHaveBeenCalledWith(projectDir, 'metabase-main', io.io); + expect(io.stderr()).toContain('1: Metabase database does not match KTX connection database'); + expect(io.stderr()).not.toContain('Metabase mapping validation failed'); + }); + it('does not mark sources complete when validation fails', async () => { await addPrimarySource(); const io = makeIo(); @@ -253,7 +444,6 @@ describe('setup sources step', () => { ); const options = vi.mocked(testPrompts.multiselect).mock.calls[0]?.[0].options ?? []; expect(options).toContainEqual({ value: 'notion', label: 'Notion' }); - expect(options).not.toContainEqual({ value: 'posthog', label: 'PostHog' }); }); it('uses a source-specific editable connection name for new interactive connections', async () => { @@ -333,8 +523,8 @@ describe('setup sources step', () => { const io = makeIo(); const testPrompts = prompts({ multiselect: [['dbt']], - select: ['git'], - text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', '', 'env:GITHUB_TOKEN'], + select: ['git', 'env'], + text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', ''], }); await expect( @@ -350,19 +540,16 @@ describe('setup sources step', () => { ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); expect(testGitRepo).toHaveBeenCalledWith({ repoUrl: 'https://github.com/acme-org/private-repo' }); - expect(testPrompts.text).toHaveBeenNthCalledWith(5, { - message: textInputPrompt( - [ - 'This repo requires authentication.', - 'Generate a token at: https://github.com/settings/tokens/new', - 'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).', - 'Or use file:/absolute/path if the token is stored in a file.', - 'Press Enter to skip and try without authentication anyway.', - ].join('\n'), - ), - placeholder: 'env:GITHUB_TOKEN', + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'This repo requires authentication.', + options: [ + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, + { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'skip', label: 'Skip — try without authentication' }, + { value: 'back', label: 'Back' }, + ], }); - expect(testPrompts.text).toHaveBeenCalledTimes(5); + expect(testPrompts.text).toHaveBeenCalledTimes(4); }); it('enables the dbt adapter when adding a dbt source connection', async () => { @@ -520,7 +707,7 @@ describe('setup sources step', () => { mappings: { databaseMappings: { '1': 'warehouse' }, syncEnabled: { '1': true }, - syncMode: 'ONLY', + syncMode: 'ALL', }, }, deps: { @@ -692,13 +879,11 @@ describe('setup sources step', () => { }, { source: 'metabase', + select: ['back', 'env'], text: [ 'metabase-main', 'https://old-metabase.example.com', - undefined, 'https://metabase.example.com', - 'env:METABASE_API_KEY', - 'warehouse', '1', ], deps: { @@ -709,14 +894,13 @@ describe('setup sources step', () => { }, { source: 'looker', + select: ['env'], text: [ 'looker-main', 'https://old-looker.example.com', undefined, 'https://looker.example.com', 'client-id', - 'env:LOOKER_CLIENT_SECRET', - 'warehouse', '', ], deps: { @@ -727,10 +911,10 @@ describe('setup sources step', () => { }, { source: 'notion', - select: ['back', 'all_accessible'], - text: ['notion-main', 'env:NOTION_TOKEN', 'env:NOTION_TOKEN'], + select: ['env', 'back', 'env', 'all_accessible'], + text: ['notion-main'], deps: { validateNotion: vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })) }, - repeatedTextMessage: textInputPrompt('Notion token ref'), + repeatedSelectMessage: 'How should KTX find your Notion integration token?', }, ]; @@ -787,4 +971,102 @@ describe('setup sources step', () => { expect(io.stdout()).toContain('Connect a primary source before adding context sources.'); expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources'); }); + + it('auto-detects dbt_project.yml at the root of a local path', async () => { + await addPrimarySource(); + const dbtDir = join(tempDir, 'dbt-repo'); + await mkdir(dbtDir, { recursive: true }); + await writeFile(join(dbtDir, 'dbt_project.yml'), 'name: analytics\n'); + + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path'], + text: ['dbt-main', dbtDir], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.text).toHaveBeenCalledTimes(2); + const config = await readConfig(); + expect(config.connections['dbt-main']).toMatchObject({ driver: 'dbt', source_dir: dbtDir }); + expect(config.connections['dbt-main']).not.toHaveProperty('path'); + }); + + it('auto-detects dbt_project.yml in a subdirectory of a local path', async () => { + await addPrimarySource(); + const dbtDir = join(tempDir, 'monorepo'); + await mkdir(join(dbtDir, 'analytics', 'dbt'), { recursive: true }); + await writeFile(join(dbtDir, 'analytics', 'dbt', 'dbt_project.yml'), 'name: analytics\n'); + + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path'], + text: ['dbt-main', dbtDir], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.text).toHaveBeenCalledTimes(2); + expect(testPrompts.log).toHaveBeenCalledWith('Found dbt_project.yml in analytics/dbt/'); + const config = await readConfig(); + expect(config.connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: dbtDir, + path: 'analytics/dbt', + }); + }); + + it('shows a picker when multiple dbt projects are found in a local path', async () => { + await addPrimarySource(); + const dbtDir = join(tempDir, 'multi-dbt'); + await mkdir(join(dbtDir, 'analytics'), { recursive: true }); + await mkdir(join(dbtDir, 'staging'), { recursive: true }); + await writeFile(join(dbtDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n'); + await writeFile(join(dbtDir, 'staging', 'dbt_project.yml'), 'name: staging\n'); + + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path', 'staging'], + text: ['dbt-main', dbtDir], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Multiple dbt projects found — which one should KTX use?', + }), + ); + expect(testPrompts.text).toHaveBeenCalledTimes(2); + const config = await readConfig(); + expect(config.connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: dbtDir, + path: 'staging', + }); + }); }); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index b0e0fe2e..e6e7f41b 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -1,14 +1,18 @@ import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; -import { join, resolve } from 'node:path'; +import { join, relative, resolve } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; -import { cancel, isCancel, log, multiselect, select, text } from '@clack/prompts'; -import { resolveNotionAuthToken } from '@ktx/context/connections'; +import { cancel, isCancel, log, multiselect, password, select, text } from '@clack/prompts'; +import { localConnectionTypeForConfig, resolveNotionAuthToken } from '@ktx/context/connections'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { cloneOrPull, + DEFAULT_METABASE_CLIENT_CONFIG, + discoverMetabaseDatabases, + type DiscoveredMetabaseDatabase, loadDbtSchemaFiles, loadProjectInfo, + MetabaseClient, type NotionApi, NotionClient, parseLookmlStagedDir, @@ -28,6 +32,7 @@ import { runKtxConnection } from './connection.js'; import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxPublicIngest } from './public-ingest.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { writeProjectLocalSecretReference } from './setup-secrets.js'; export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion'; @@ -71,6 +76,7 @@ export interface KtxSetupSourcesPromptAdapter { }): Promise; select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; + password(options: { message: string }): Promise; cancel(message: string): void; log?(message: string): void; } @@ -86,6 +92,11 @@ export interface KtxSetupSourcesDeps { validateLooker?: (projectDir: string, connectionId: string) => Promise; validateLookml?: (connection: KtxProjectConnectionConfig) => Promise; validateNotion?: (connection: KtxProjectConnectionConfig) => Promise; + discoverMetabaseDatabases?: (args: { + sourceUrl: string; + sourceApiKeyRef: string; + sourceConnectionId: string; + }) => Promise; runMapping?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; runInitialIngest?: ( projectDir: string, @@ -143,6 +154,12 @@ function createPromptAdapter(): KtxSetupSourcesPromptAdapter { ); return isCancel(value) ? undefined : String(value); }, + async password(options) { + const value = await withSetupInterruptConfirmation(() => + password({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, cancel(message) { cancel(message); }, @@ -172,17 +189,6 @@ function connectionNamePrompt(label: string): string { return `Name this ${label} connection\nKTX will use this short name in commands and config. You can rename it now.`; } -function gitAuthAfterFailurePrompt(source: KtxSetupSourceType): string { - const label = source === 'dbt' ? 'This' : `This ${sourceLabel(source)}`; - return [ - `${label} repo requires authentication.`, - 'Generate a token at: https://github.com/settings/tokens/new', - 'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).', - 'Or use file:/absolute/path if the token is stored in a file.', - 'Press Enter to skip and try without authentication anyway.', - ].join('\n'); -} - function sourceSubpathPrompt(source: KtxSetupSourceType): string { if (source === 'dbt') { return [ @@ -198,6 +204,21 @@ function sourceSubpathPrompt(source: KtxSetupSourceType): string { ].join('\n'); } +const SCAN_SKIP_DIRS = new Set(['.git', 'node_modules', '.venv', 'target', 'dbt_packages', 'dbt_modules', '__pycache__']); + +async function findDbtProjectSubpaths(rootDir: string): Promise { + const entries = await readdir(rootDir, { withFileTypes: true, recursive: true }); + const subpaths: string[] = []; + for (const entry of entries) { + if (!entry.isFile()) continue; + if (entry.name !== 'dbt_project.yml' && entry.name !== 'dbt_project.yaml') continue; + const relDir = relative(rootDir, entry.parentPath); + if (relDir.split('/').some((part) => SCAN_SKIP_DIRS.has(part))) continue; + subpaths.push(relDir); + } + return subpaths; +} + async function promptText( prompts: KtxSetupSourcesPromptAdapter, options: { message: string; placeholder?: string; initialValue?: string }, @@ -222,6 +243,75 @@ function credentialRef(value: string | undefined, label: string): string { return ref; } +async function chooseSourceCredentialRef(input: { + prompts: KtxSetupSourcesPromptAdapter; + projectDir: string; + label: string; + envName: string; + secretFileName: string; +}): Promise { + while (true) { + const choice = await input.prompts.select({ + message: `How should KTX find your ${input.label}?`, + options: [ + { value: 'env', label: `Use ${input.envName} from the environment` }, + { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + if (choice === 'paste') { + const value = await input.prompts.password({ message: input.label }); + if (value === undefined) continue; + if (!value.trim()) continue; + const ref = await writeProjectLocalSecretReference({ + projectDir: input.projectDir, + fileName: input.secretFileName, + value, + }); + input.prompts.log?.(`Saved to .ktx/secrets/${input.secretFileName}`); + return ref; + } + return `env:${input.envName}`; + } +} + +async function chooseGitAuthCredentialRef(input: { + prompts: KtxSetupSourcesPromptAdapter; + projectDir: string; + source: KtxSetupSourceType; + connectionId: string; +}): Promise { + const label = input.source === 'dbt' ? 'This' : `This ${sourceLabel(input.source)}`; + while (true) { + const choice = await input.prompts.select({ + message: `${label} repo requires authentication.`, + options: [ + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, + { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'skip', label: 'Skip — try without authentication' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + if (choice === 'skip') return undefined; + if (choice === 'paste') { + const value = await input.prompts.password({ message: 'Git access token' }); + if (value === undefined) continue; + if (!value.trim()) continue; + const fileName = `${input.connectionId}-auth-token`; + const ref = await writeProjectLocalSecretReference({ + projectDir: input.projectDir, + fileName, + value, + }); + input.prompts.log?.(`Saved to .ktx/secrets/${fileName}`); + return ref; + } + return 'env:GITHUB_TOKEN'; + } +} + function repoOrLocalSource(args: KtxSetupSourcesArgs): { sourceDir?: string; repoUrl?: string } { if (args.sourcePath && args.sourceGitUrl) { throw new Error('Choose only one source location: --source-path or --source-git-url.'); @@ -373,7 +463,7 @@ function buildMetabaseConnection(args: KtxSetupSourcesArgs): KtxProjectConnectio mappings: { databaseMappings: { [String(args.metabaseDatabaseId)]: args.sourceWarehouseConnectionId }, syncEnabled: { [String(args.metabaseDatabaseId)]: true }, - syncMode: 'ONLY', + syncMode: 'ALL', }, }; } @@ -512,16 +602,6 @@ async function defaultValidateMetricflow(connection: KtxProjectConnectionConfig) }; } -async function defaultValidateMetabase(projectDir: string, connectionId: string): Promise { - const code = await runKtxConnection( - { command: 'map', projectDir, sourceConnectionId: connectionId, json: true }, - { stdout: { write() {} }, stderr: { write() {} } }, - ); - return code === 0 - ? { ok: true, detail: 'mapping validated' } - : { ok: false, message: 'Metabase mapping validation failed' }; -} - async function defaultValidateLooker(projectDir: string, connectionId: string): Promise { const code = await runKtxConnectionMapping( { command: 'refresh', projectDir, connectionId, autoAccept: true }, @@ -559,8 +639,37 @@ async function defaultValidateNotion(connection: KtxProjectConnectionConfig): Pr return { ok: true, detail: `roots=${roots.length}` }; } +interface MappingJsonOutput { + connectionId: string; + refresh: { ok: boolean; output: string[] }; + validation: { ok: boolean; output: string[] }; + mappings: unknown[]; +} + +function summarizeMappingResult(parsed: MappingJsonOutput): string { + const mappingCount = parsed.mappings.length; + const mappingNoun = mappingCount === 1 ? 'mapping' : 'mappings'; + return `Mapping validated — ${mappingCount} ${mappingNoun} configured`; +} + async function defaultRunMapping(projectDir: string, connectionId: string, io: KtxCliIo): Promise { - return await runKtxConnection({ command: 'map', projectDir, sourceConnectionId: connectionId, json: false }, io); + let captured = ''; + const captureIo: KtxCliIo = { + stdout: { write(chunk: string) { captured += chunk; } }, + stderr: io.stderr, + }; + const code = await runKtxConnection( + { command: 'map', projectDir, sourceConnectionId: connectionId, json: true }, + captureIo, + ); + if (code !== 0) return code; + try { + const parsed = JSON.parse(captured.trim()) as MappingJsonOutput; + io.stdout.write(`${summarizeMappingResult(parsed)}\n`); + } catch { + io.stdout.write(captured); + } + return 0; } async function defaultRunInitialIngest( @@ -634,6 +743,11 @@ type SourcePromptState = KtxSetupSourcesArgs & { type SourcePromptStep = (state: SourcePromptState) => Promise<'next' | 'back'>; +interface WarehouseConnectionChoice { + id: string; + connectionType: string; +} + type InteractiveSourceConnectionChoice = | { kind: 'existing'; connectionId: string; connection: KtxProjectConnectionConfig } | { kind: 'new'; args: KtxSetupSourcesArgs } @@ -672,6 +786,107 @@ function resetRepoLocationFields(state: SourcePromptState): void { delete state.sourceProjectName; } +function warehouseConnectionChoices(config: KtxProjectConfig): WarehouseConnectionChoice[] { + return Object.entries(config.connections) + .filter(([, connection]) => PRIMARY_SOURCE_DRIVERS.has(String(connection.driver ?? '').toLowerCase())) + .map(([id, connection]) => ({ id, connectionType: localConnectionTypeForConfig(id, connection) })) + .sort((left, right) => left.id.localeCompare(right.id)); +} + +async function chooseMappedWarehouseConnectionId(input: { + projectDir: string; + prompts: KtxSetupSourcesPromptAdapter; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + const choices = warehouseConnectionChoices(project.config); + if (choices.length === 1) { + return choices[0].id; + } + if (choices.length === 0) { + const entered = await promptText(input.prompts, { message: 'Mapped warehouse connection id' }); + return entered === undefined ? 'back' : entered; + } + + const selected = await input.prompts.select({ + message: 'Mapped warehouse connection', + options: [ + ...choices.map((choice) => ({ + value: choice.id, + label: `${choice.id} (${choice.connectionType})`, + })), + { value: 'back', label: 'Back' }, + ], + }); + return selected === 'back' ? 'back' : selected; +} + +async function defaultDiscoverMetabaseDatabases(input: { + sourceUrl: string; + sourceApiKeyRef: string; +}): Promise { + const apiKey = resolveKtxConfigReference(input.sourceApiKeyRef, process.env); + if (!apiKey) { + throw new Error('Metabase API key ref could not be resolved'); + } + const client = new MetabaseClient( + { apiUrl: input.sourceUrl, apiKey }, + DEFAULT_METABASE_CLIENT_CONFIG, + ); + try { + return await discoverMetabaseDatabases(client); + } finally { + await client.cleanup(); + } +} + +function metabaseDatabaseLabel(database: DiscoveredMetabaseDatabase): string { + const detail = [database.engine].filter(Boolean).join(', '); + return detail ? `${database.id}: ${database.name} (${detail})` : `${database.id}: ${database.name}`; +} + +async function chooseMetabaseDatabaseId(input: { + state: SourcePromptState; + prompts: KtxSetupSourcesPromptAdapter; + deps: KtxSetupSourcesDeps; +}): Promise { + const sourceUrl = input.state.sourceUrl; + const sourceApiKeyRef = input.state.sourceApiKeyRef; + if (sourceUrl && sourceApiKeyRef) { + try { + const discovered = await (input.deps.discoverMetabaseDatabases ?? defaultDiscoverMetabaseDatabases)({ + sourceUrl, + sourceApiKeyRef, + sourceConnectionId: input.state.sourceConnectionId ?? 'metabase-main', + }); + if (discovered.length === 1) { + return discovered[0].id; + } + if (discovered.length > 1) { + const selected = await input.prompts.select({ + message: 'Metabase database', + options: [ + ...discovered + .slice() + .sort((left, right) => left.id - right.id) + .map((database) => ({ + value: String(database.id), + label: metabaseDatabaseLabel(database), + })), + { value: 'back', label: 'Back' }, + ], + }); + return selected === 'back' ? 'back' : Number.parseInt(selected, 10); + } + } catch { + // Discovery is a convenience. Fall back to the raw id prompt when credentials + // are unavailable locally or the Metabase API cannot be reached yet. + } + } + + const databaseId = await promptText(input.prompts, { message: 'Metabase database id' }); + return databaseId === undefined ? 'back' : Number.parseInt(databaseId, 10); +} + function connectionIdPromptSteps( args: KtxSetupSourcesArgs, source: KtxSetupSourceType, @@ -703,6 +918,7 @@ async function promptForInteractiveSource( prompts: KtxSetupSourcesPromptAdapter, defaultConnectionId = `${source}-main`, testGitRepo: KtxSetupSourcesDeps['testGitRepo'] = testRepoConnection, + discoverMetabaseDatabaseList?: KtxSetupSourcesDeps['discoverMetabaseDatabases'], ): Promise { const initialState: SourcePromptState = { ...args, source }; if (args.sourceConnectionId) { @@ -757,23 +973,6 @@ async function promptForInteractiveSource( }, ] : []), - ...(state.sourceLocation - ? [ - async (currentState: SourcePromptState) => { - const subpath = await promptText(prompts, { - message: sourceSubpathPrompt(source), - placeholder: 'optional', - }); - if (subpath === undefined) return 'back'; - if (subpath) { - currentState.sourceSubpath = subpath; - } else { - delete currentState.sourceSubpath; - } - return 'next'; - }, - ] - : []), ...(state.sourceLocation === 'git' ? [ async (currentState: SourcePromptState) => { @@ -783,11 +982,13 @@ async function promptForInteractiveSource( prompts.log?.('Repository connected.'); return 'next'; } - const authRef = await promptText(prompts, { - message: gitAuthAfterFailurePrompt(source), - placeholder: 'env:GITHUB_TOKEN', + const authRef = await chooseGitAuthCredentialRef({ + prompts, + projectDir: args.projectDir, + source, + connectionId: currentState.sourceConnectionId ?? `${source}-main`, }); - if (authRef === undefined) return 'back'; + if (authRef === 'back') return 'back'; if (authRef) { currentState.sourceAuthTokenRef = authRef; } else { @@ -797,6 +998,79 @@ async function promptForInteractiveSource( }, ] : []), + ...(state.sourceLocation + ? [ + async (currentState: SourcePromptState) => { + if (source === 'dbt') { + let scanDir: string | undefined; + if (currentState.sourceLocation === 'path' && currentState.sourcePath) { + scanDir = currentState.sourcePath; + } else if (currentState.sourceLocation === 'git' && currentState.sourceGitUrl) { + try { + const cacheDir = await mkdtemp(join(tmpdir(), 'ktx-setup-dbt-scan-')); + const authToken = currentState.sourceAuthTokenRef + ? resolveKtxConfigReference(currentState.sourceAuthTokenRef, process.env) + : null; + await cloneOrPull({ + repoUrl: currentState.sourceGitUrl, + authToken, + cacheDir, + branch: currentState.sourceBranch ?? 'main', + }); + scanDir = cacheDir; + } catch { + // Clone failed — fall through to manual prompt + } + } + if (scanDir) { + try { + const subpaths = await findDbtProjectSubpaths(scanDir); + if (subpaths.length === 1) { + const found = subpaths[0]!; + if (found) { + currentState.sourceSubpath = found; + prompts.log?.(`Found dbt_project.yml in ${found}/`); + } else { + delete currentState.sourceSubpath; + } + return 'next'; + } + if (subpaths.length > 1) { + const selected = await prompts.select({ + message: 'Multiple dbt projects found — which one should KTX use?', + options: [ + ...subpaths.map((p) => ({ value: p || '.', label: p || '(project root)' })), + { value: 'back', label: 'Back' }, + ], + }); + if (selected === 'back') return 'back'; + const subpath = selected === '.' ? '' : selected; + if (subpath) { + currentState.sourceSubpath = subpath; + } else { + delete currentState.sourceSubpath; + } + return 'next'; + } + } catch { + // Directory unreadable — fall through to manual prompt + } + } + } + const subpath = await promptText(prompts, { + message: sourceSubpathPrompt(source), + placeholder: 'optional', + }); + if (subpath === undefined) return 'back'; + if (subpath) { + currentState.sourceSubpath = subpath; + } else { + delete currentState.sourceSubpath; + } + return 'next'; + }, + ] + : []), ]); } @@ -810,24 +1084,34 @@ async function promptForInteractiveSource( return 'next'; }, async (state) => { - const sourceApiKeyRef = await promptText(prompts, { - message: 'Metabase API key ref', - placeholder: 'env:METABASE_API_KEY', + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Metabase API key', + envName: 'METABASE_API_KEY', + secretFileName: `${state.sourceConnectionId ?? 'metabase-main'}-api-key`, }); - if (sourceApiKeyRef === undefined) return 'back'; - state.sourceApiKeyRef = sourceApiKeyRef; + if (ref === 'back') return 'back'; + state.sourceApiKeyRef = ref; return 'next'; }, async (state) => { - const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' }); - if (sourceWarehouseConnectionId === undefined) return 'back'; + const sourceWarehouseConnectionId = await chooseMappedWarehouseConnectionId({ + projectDir: args.projectDir, + prompts, + }); + if (sourceWarehouseConnectionId === 'back') return 'back'; state.sourceWarehouseConnectionId = sourceWarehouseConnectionId; return 'next'; }, async (state) => { - const databaseId = await promptText(prompts, { message: 'Metabase database id' }); - if (databaseId === undefined) return 'back'; - state.metabaseDatabaseId = Number.parseInt(databaseId, 10); + const databaseId = await chooseMetabaseDatabaseId({ + state, + prompts, + deps: { discoverMetabaseDatabases: discoverMetabaseDatabaseList }, + }); + if (databaseId === 'back') return 'back'; + state.metabaseDatabaseId = databaseId; return 'next'; }, ]); @@ -849,17 +1133,23 @@ async function promptForInteractiveSource( return 'next'; }, async (state) => { - const sourceClientSecretRef = await promptText(prompts, { - message: 'Looker client secret ref', - placeholder: 'env:LOOKER_CLIENT_SECRET', + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Looker client secret', + envName: 'LOOKER_CLIENT_SECRET', + secretFileName: `${state.sourceConnectionId ?? 'looker-main'}-client-secret`, }); - if (sourceClientSecretRef === undefined) return 'back'; - state.sourceClientSecretRef = sourceClientSecretRef; + if (ref === 'back') return 'back'; + state.sourceClientSecretRef = ref; return 'next'; }, async (state) => { - const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' }); - if (sourceWarehouseConnectionId === undefined) return 'back'; + const sourceWarehouseConnectionId = await chooseMappedWarehouseConnectionId({ + projectDir: args.projectDir, + prompts, + }); + if (sourceWarehouseConnectionId === 'back') return 'back'; state.sourceWarehouseConnectionId = sourceWarehouseConnectionId; return 'next'; }, @@ -882,12 +1172,15 @@ async function promptForInteractiveSource( return await runSourcePromptSteps(initialState, (state) => [ ...connectionSteps, async (currentState) => { - const sourceApiKeyRef = await promptText(prompts, { - message: 'Notion token ref', - placeholder: 'env:NOTION_TOKEN', + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Notion integration token', + envName: 'NOTION_TOKEN', + secretFileName: `${currentState.sourceConnectionId ?? 'notion-main'}-token`, }); - if (sourceApiKeyRef === undefined) return 'back'; - currentState.sourceApiKeyRef = sourceApiKeyRef; + if (ref === 'back') return 'back'; + currentState.sourceApiKeyRef = ref; return 'next'; }, async (currentState) => { @@ -956,13 +1249,21 @@ async function chooseInteractiveSourceConnection(input: { connections: Record; prompts: KtxSetupSourcesPromptAdapter; testGitRepo?: KtxSetupSourcesDeps['testGitRepo']; + discoverMetabaseDatabases?: KtxSetupSourcesDeps['discoverMetabaseDatabases']; }): Promise { const existingIds = existingConnectionIdsBySource(input.connections, input.source); const defaultConnectionId = defaultConnectionIdForSource(input.connections, input.source); const label = sourceLabel(input.source); if (existingIds.length === 0) { - const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo); + const sourceArgs = await promptForInteractiveSource( + input.args, + input.source, + input.prompts, + defaultConnectionId, + input.testGitRepo, + input.discoverMetabaseDatabases, + ); return sourceArgs === 'back' ? 'back' : { kind: 'new', args: sourceArgs }; } @@ -987,7 +1288,14 @@ async function chooseInteractiveSourceConnection(input: { } continue; } - const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo); + const sourceArgs = await promptForInteractiveSource( + input.args, + input.source, + input.prompts, + defaultConnectionId, + input.testGitRepo, + input.discoverMetabaseDatabases, + ); if (sourceArgs === 'back') { continue; } @@ -1026,7 +1334,9 @@ async function validateSource( return await (deps.validateMetricflow ?? defaultValidateMetricflow)(args.connection); } if (source === 'metabase') { - return await (deps.validateMetabase ?? defaultValidateMetabase)(args.projectDir, args.connectionId); + return deps.validateMetabase + ? await deps.validateMetabase(args.projectDir, args.connectionId) + : { ok: true, detail: 'mapping validation runs after the connection is saved' }; } if (source === 'looker') { return await (deps.validateLooker ?? defaultValidateLooker)(args.projectDir, args.connectionId); @@ -1097,6 +1407,7 @@ export async function runKtxSetupSourcesStep( connections: (await loadKtxProject({ projectDir: args.projectDir })).config.connections, prompts, testGitRepo: deps.testGitRepo, + discoverMetabaseDatabases: deps.discoverMetabaseDatabases, }); if (sourceChoice === 'back') { if (args.source) { @@ -1126,6 +1437,7 @@ export async function runKtxSetupSourcesStep( return { status: 'failed', projectDir: args.projectDir }; } if (source === 'metabase' || source === 'looker') { + prompts.log?.(`Validating ${sourceLabel(source)} mapping…`); const mappingCode = await (deps.runMapping ?? defaultRunMapping)(args.projectDir, connectionId, io); if (mappingCode !== 0) { await rollback?.(); diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 3e772d92..20f12e6e 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -1305,6 +1305,140 @@ describe('setup status', () => { expect(calls).toEqual(['context']); }); + it('resumes an active context build before prompting for earlier setup steps', async () => { + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-active', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-active'), + }); + const context = vi.fn(async () => ({ + status: 'detached' as const, + projectDir: tempDir, + runId: 'setup-context-local-active', + })); + const databases = vi.fn(async () => { + throw new Error('database setup should not run while context build is active'); + }); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { context, databases }, + ), + ).resolves.toBe(0); + + expect(context).toHaveBeenCalledWith( + { projectDir: tempDir, inputMode: 'auto', allowEmpty: true }, + io.io, + ); + expect(databases).not.toHaveBeenCalled(); + }); + + it('skips entry menu and auto-watches when context build is active and showEntryMenu is true', async () => { + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-active', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-active'), + }); + const context = vi.fn(async () => ({ + status: 'detached' as const, + projectDir: tempDir, + runId: 'setup-context-local-active', + })); + const entryMenuSelect = vi.fn(async () => 'exit'); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + showEntryMenu: true, + }, + io.io, + { + context, + entryMenuDeps: { prompts: { select: entryMenuSelect, cancel: vi.fn() } }, + }, + ), + ).resolves.toBe(0); + + expect(entryMenuSelect).not.toHaveBeenCalled(); + expect(context).toHaveBeenCalledWith( + { projectDir: tempDir, inputMode: 'auto', allowEmpty: true, autoWatch: true }, + io.io, + ); + }); + it('routes a ready project menu selection to agent setup', async () => { const calls: string[] = []; const io = makeIo(); @@ -1416,6 +1550,102 @@ describe('setup status', () => { expect(calls).toEqual(['agents']); }); + it('skips to agent setup when context is ready but agents are not configured', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' completed_steps:', + ' - project', + ' - llm', + ' - embeddings', + ' - sources', + ' - context', + ' database_connection_ids: []', + 'connections: {}', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-ready', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: [], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-ready'), + }); + + const readyMenuSelect = vi.fn(); + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + readyMenuDeps: { prompts: { select: readyMenuSelect, cancel: vi.fn() } }, + model: async (args) => { + expect(args.skipLlm).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + embeddings: async (args) => { + expect(args.skipEmbeddings).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + databases: async (args) => { + expect(args.skipDatabases).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + sources: async (args) => { + expect(args.skipSources).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + agents: async () => { + calls.push('agents'); + return { + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(readyMenuSelect).not.toHaveBeenCalled(); + expect(calls).toEqual(['agents']); + }); + it('runs only project resolution, context gate, and agent setup in --agents mode', async () => { const io = makeIo(); const context = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-local-test' })); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 09deff37..b9b0b412 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -24,7 +24,12 @@ import { import { type KtxSetupEmbeddingsDeps, runKtxSetupEmbeddingsStep } from './setup-embeddings.js'; import { type KtxSetupModelDeps, runKtxSetupAnthropicModelStep } from './setup-models.js'; import { type KtxSetupProjectDeps, runKtxSetupProjectStep } from './setup-project.js'; -import { isKtxSetupReady, type KtxSetupReadyMenuDeps, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js'; +import { + isKtxPreAgentSetupReady, + isKtxSetupReady, + type KtxSetupReadyMenuDeps, + runKtxSetupReadyChangeMenu, +} from './setup-ready-menu.js'; import { type KtxSetupSourcesDeps, type KtxSetupSourceType, runKtxSetupSourcesStep } from './setup-sources.js'; import { withMenuOptionsSpacing } from './prompt-navigation.js'; import { @@ -391,6 +396,10 @@ function setupContextReady(status: KtxSetupStatus): boolean { return status.context.ready; } +function setupContextActive(status: KtxSetupStatus): boolean { + return status.context.status === 'running' || status.context.status === 'detached'; +} + function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void { io.stderr.write('KTX context is not ready for agents.\n\n'); io.stderr.write(`Build context first:\n ktx setup context build --project-dir ${resolve(projectDir)}\n\n`); @@ -454,22 +463,27 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup args.inputMode !== 'disabled' && !args.agents && (io.stdout.isTTY === true || deps.entryMenuDeps?.prompts !== undefined); + let autoWatchActiveBuild = false; setupLoop: while (true) { entryAction = undefined; if (canShowEntryMenu) { const status = await readKtxSetupStatus(args.projectDir); - entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action; - if (entryAction === 'exit') { - (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.'); - return 0; - } - if (entryAction === 'status') { - io.stdout.write(formatKtxSetupStatus(status)); - return 0; - } - if (entryAction === 'demo') { - return await runKtxSetupDemoFromEntryMenu(args, io, deps); + if (setupContextActive(status)) { + autoWatchActiveBuild = true; + } else { + entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action; + if (entryAction === 'exit') { + (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.'); + return 0; + } + if (entryAction === 'status') { + io.stdout.write(formatKtxSetupStatus(status)); + return 0; + } + if (entryAction === 'demo') { + return await runKtxSetupDemoFromEntryMenu(args, io, deps); + } } } @@ -497,9 +511,38 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup const agentsRequested = args.agents || entryAction === 'agents'; const currentStatus = await readKtxSetupStatus(projectResult.projectDir); let readyAction: string | undefined; - if (args.inputMode !== 'disabled' && !agentsRequested && isKtxSetupReady(currentStatus)) { - readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action; - if (readyAction === 'exit') return 0; + + if (args.inputMode !== 'disabled' && !agentsRequested && setupContextActive(currentStatus)) { + const contextRunner = + deps.context ?? ((contextArgs, contextIo) => runKtxSetupContextStep(contextArgs, contextIo, deps.contextDeps)); + const contextResult = await contextRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + allowEmpty: true, + ...(autoWatchActiveBuild ? { autoWatch: true } : {}), + }, + io, + ); + autoWatchActiveBuild = false; + if (contextResult.status === 'back') { + continue; + } + if (contextResult.status === 'failed' || contextResult.status === 'missing-input') { + return 1; + } + if (contextResult.status !== 'ready') { + return 0; + } + } + + if (args.inputMode !== 'disabled' && !agentsRequested) { + if (isKtxSetupReady(currentStatus)) { + readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action; + if (readyAction === 'exit') return 0; + } else if (isKtxPreAgentSetupReady(currentStatus)) { + readyAction = 'agents'; + } } const runOnly = readyAction; diff --git a/packages/connector-posthog/package.json b/packages/connector-posthog/package.json deleted file mode 100644 index da2de540..00000000 --- a/packages/connector-posthog/package.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "name": "@ktx/connector-posthog", - "version": "0.0.0-private", - "description": "PostHog connector package for KTX scan interfaces", - "private": true, - "type": "module", - "engines": { - "node": ">=22.0.0" - }, - "main": "dist/index.js", - "types": "dist/index.d.ts", - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js", - "default": "./dist/index.js" - }, - "./package.json": "./package.json" - }, - "files": [ - "dist" - ], - "scripts": { - "build": "tsc -p tsconfig.json", - "test": "vitest run", - "type-check": "tsc -p tsconfig.json --noEmit" - }, - "dependencies": { - "@ktx/context": "workspace:*" - }, - "devDependencies": { - "@types/node": "^24.3.0", - "typescript": "^5.9.3", - "vitest": "^4.0.18" - }, - "license": "Apache-2.0", - "repository": { - "type": "git", - "url": "git+https://github.com/kaelio/ktx.git", - "directory": "packages/connector-posthog" - }, - "bugs": { - "url": "https://github.com/kaelio/ktx/issues" - }, - "homepage": "https://github.com/kaelio/ktx#readme" -} diff --git a/packages/connector-posthog/src/connector.test.ts b/packages/connector-posthog/src/connector.test.ts deleted file mode 100644 index 69dc7223..00000000 --- a/packages/connector-posthog/src/connector.test.ts +++ /dev/null @@ -1,400 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { - createPostHogLiveDatabaseIntrospection, - isKtxPostHogConnectionConfig, - KtxPostHogScanConnector, - postHogConnectionConfigFromConfig, - type KtxPostHogConnectionConfig, - type KtxPostHogFetch, -} from './index.js'; - -function jsonResponse(body: unknown, status = 200): Response { - return { - ok: status >= 200 && status < 300, - status, - json: async () => body, - text: async () => JSON.stringify(body), - } as Response; -} - -function fakeFetch(queries: string[] = []): KtxPostHogFetch { - return vi.fn(async (_url: string, init?: RequestInit) => { - const body = JSON.parse(String(init?.body ?? '{}')) as { query?: { kind?: string; query?: string } }; - const sql = body.query?.query ?? ''; - if (sql) { - queries.push(sql); - } - if (body.query?.kind === 'DatabaseSchemaQuery') { - return jsonResponse({ - tables: { - events: { - id: 'events', - name: 'events', - type: 'posthog', - row_count: 42, - fields: { - uuid: { - name: 'uuid', - type: 'uuid', - hogql_value: 'uuid', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'uuid', - }, - event: { - name: 'event', - type: 'string', - hogql_value: 'event', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'event', - }, - timestamp: { - name: 'timestamp', - type: 'datetime', - hogql_value: 'timestamp', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'timestamp', - }, - properties: { - name: 'properties', - type: 'json', - hogql_value: 'properties', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'properties', - }, - virtual: { - name: 'virtual', - type: 'virtual_table', - hogql_value: 'virtual', - schema_valid: true, - table: null, - fields: null, - chain: null, - id: 'virtual', - }, - }, - }, - query_log: { - id: 'query_log', - name: 'query_log', - type: 'posthog', - row_count: 1, - fields: {}, - }, - }, - joins: [], - }); - } - if (sql.includes('SELECT * FROM person_distinct_ids LIMIT 0')) { - return jsonResponse({ - results: [], - columns: ['distinct_id', 'person_id'], - types: [ - ['distinct_id', 'String'], - ['person_id', 'UUID'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('LIMIT 0')) { - return jsonResponse({ results: null, columns: null, types: null, error: 'Table not found', hogql: sql }); - } - if (sql.includes('SELECT 1 AS test')) { - return jsonResponse({ results: [[1]], columns: ['test'], types: [['test', 'Int64']], error: null, hogql: sql }); - } - if (sql.includes('count() AS cnt')) { - return jsonResponse({ results: [[42]], columns: ['cnt'], types: [['cnt', 'Int64']], error: null, hogql: sql }); - } - if (sql.includes('GROUP BY event')) { - return jsonResponse({ - results: [['$pageview', 9]], - columns: ['event', 'cnt'], - types: [ - ['event', 'String'], - ['cnt', 'Int64'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('arrayJoin(JSONExtractKeys')) { - return jsonResponse({ - results: [['$browser', 7]], - columns: ['key', 'cnt'], - types: [ - ['key', 'String'], - ['cnt', 'Int64'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('uniq(JSONExtractString') || sql.includes('uniq(val) AS cardinality')) { - return jsonResponse({ - results: [[2]], - columns: ['cardinality'], - types: [['cardinality', 'Int64']], - error: null, - hogql: sql, - }); - } - if (sql.includes('DISTINCT JSONExtractString') || sql.includes('SELECT DISTINCT toString(')) { - return jsonResponse({ - results: [['Chrome'], ['Safari']], - columns: ['value'], - types: [['value', 'String']], - error: null, - hogql: sql, - }); - } - return jsonResponse({ results: [['$pageview']], columns: ['event'], types: [['event', 'String']], error: null, hogql: sql }); - }) as KtxPostHogFetch; -} - -const posthogApiKeyEnv = ['POSTHOG', 'API', 'KEY'].join('_'); -const fixtureToken = ['phx', 'fixture'].join('_'); -const env = { [posthogApiKeyEnv]: fixtureToken }; -const connection: KtxPostHogConnectionConfig & { driver: string } = { - driver: 'posthog', - ['api_' + 'key']: `env:${posthogApiKeyEnv}`, - project_id: '157881', - region: 'us', - readonly: true, -}; - -describe('KtxPostHogScanConnector', () => { - it('resolves configuration safely', () => { - expect(isKtxPostHogConnectionConfig(connection)).toBe(true); - expect(isKtxPostHogConnectionConfig({ driver: 'mysql' })).toBe(false); - const resolved = postHogConnectionConfigFromConfig({ - connectionId: 'product', - connection, - env, - }); - expect(resolved).toMatchObject({ projectId: '157881', baseUrl: 'https://us.posthog.com' }); - const tokenField = ['api', 'Key'].join('') as keyof typeof resolved; - expect(resolved[tokenField]).toBe(fixtureToken); - expect(() => - postHogConnectionConfigFromConfig({ - connectionId: 'product', - connection: { ...connection, readonly: false }, - }), - ).toThrow('Native PostHog connector requires connections.product.readonly: true'); - }); - - it('introspects schema metadata, hidden tables, descriptions, primary keys, and normalized types', async () => { - const connector = new KtxPostHogScanConnector({ - connectionId: 'product', - connection, - env, - fetch: fakeFetch(), - sleep: async () => {}, - now: () => new Date('2026-04-29T19:00:00.000Z'), - }); - - const snapshot = await connector.introspect({ connectionId: 'product', driver: 'posthog' }, { runId: 'scan-run-1' }); - - expect(snapshot).toMatchObject({ - connectionId: 'product', - driver: 'posthog', - extractedAt: '2026-04-29T19:00:00.000Z', - scope: { catalogs: ['157881'] }, - metadata: { - project_id: '157881', - table_count: 2, - total_columns: 6, - }, - }); - expect(snapshot.tables.map((table) => table.name)).toEqual(['events', 'person_distinct_ids']); - expect(snapshot.tables[0]).toMatchObject({ - catalog: '157881', - db: null, - name: 'events', - kind: 'event_stream', - estimatedRows: 42, - comment: expect.stringContaining('PostHog event stream'), - foreignKeys: [], - }); - expect(snapshot.tables[0]?.columns).toEqual([ - { - name: 'uuid', - nativeType: 'UUID', - normalizedType: 'UUID', - dimensionType: 'string', - nullable: false, - primaryKey: true, - comment: 'Unique identifier for this specific event.', - }, - { - name: 'event', - nativeType: 'String', - normalizedType: 'VARCHAR', - dimensionType: 'string', - nullable: false, - primaryKey: false, - comment: expect.stringContaining('Event name'), - }, - { - name: 'timestamp', - nativeType: 'DateTime64', - normalizedType: 'TIMESTAMP', - dimensionType: 'time', - nullable: false, - primaryKey: false, - comment: expect.stringContaining('UTC timestamp'), - }, - { - name: 'properties', - nativeType: 'JSON', - normalizedType: 'JSON', - dimensionType: 'string', - nullable: true, - primaryKey: false, - comment: expect.stringContaining('JSON object'), - }, - ]); - }); - - it('runs samples, read-only SQL, event-stream discovery, row counts, and cleanup', async () => { - const queries: string[] = []; - const connector = new KtxPostHogScanConnector({ - connectionId: 'product', - connection, - env, - fetch: fakeFetch(queries), - sleep: async () => {}, - }); - - await expect(connector.testConnection()).resolves.toEqual({ success: true }); - await expect( - connector.sampleTable( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - columns: ['event'], - limit: 1, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1 }); - await expect( - connector.sampleColumn( - { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event', limit: 5 }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual({ values: ['$pageview'], nullCount: null, distinctCount: null }); - await expect( - connector.executeReadOnly({ connectionId: 'product', sql: 'select event from events', maxRows: 1 }, { runId: 'scan-run-1' }), - ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1, rowCount: 1 }); - await expect( - connector.executeReadOnly({ connectionId: 'product', sql: 'delete from events' }, { runId: 'scan-run-1' }), - ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); - await expect(connector.getTableRowCount('events')).resolves.toBe(42); - await expect( - connector.getColumnDistinctValues({ catalog: '157881', db: null, name: 'events' }, 'properties.$browser', { - maxCardinality: 5, - limit: 10, - sampleSize: 100, - }), - ).resolves.toEqual({ values: ['Chrome', 'Safari'], cardinality: 2 }); - await expect( - connector.eventStreamDiscovery.listEventTypes( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - eventColumn: 'event', - limit: 10, - minCount: 30, - lookbackDays: 14, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual([{ value: '$pageview', count: 9 }]); - expect(queries.some((query) => query.includes('HAVING cnt >= 30'))).toBe(true); - expect(queries.some((query) => query.includes('INTERVAL 14 DAY'))).toBe(true); - - await expect( - connector.eventStreamDiscovery.listPropertyKeys( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - jsonColumn: 'properties', - sampleSize: 1000, - limit: 10, - lookbackDays: 7, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual([{ key: '$browser', count: 7 }]); - - await expect( - connector.eventStreamDiscovery.listPropertyValues( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - jsonColumn: 'properties', - propertyKey: '$browser', - limit: 10, - maxCardinality: 1000, - lookbackDays: 30, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual({ - values: ['Chrome', 'Safari'], - cardinality: 2, - }); - await expect( - connector.columnStats( - { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event' }, - { runId: 'scan-run-1' }, - ), - ).resolves.toBeNull(); - await connector.cleanup(); - }); - - it('adapts native snapshots to live-database introspection snapshots', async () => { - const introspection = createPostHogLiveDatabaseIntrospection({ - connections: { product: connection }, - env, - fetch: fakeFetch(), - sleep: async () => {}, - now: () => new Date('2026-04-29T19:00:00.000Z'), - }); - - await expect(introspection.extractSchema('product')).resolves.toMatchObject({ - connectionId: 'product', - metadata: { project_id: '157881' }, - tables: expect.arrayContaining([ - expect.objectContaining({ - catalog: '157881', - db: null, - name: 'events', - columns: expect.arrayContaining([ - { - name: 'uuid', - nativeType: 'UUID', - normalizedType: 'UUID', - dimensionType: 'string', - nullable: false, - primaryKey: true, - comment: 'Unique identifier for this specific event.', - }, - ]), - }), - ]), - }); - }); -}); diff --git a/packages/connector-posthog/src/connector.ts b/packages/connector-posthog/src/connector.ts deleted file mode 100644 index 0ac2b37c..00000000 --- a/packages/connector-posthog/src/connector.ts +++ /dev/null @@ -1,609 +0,0 @@ -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; -import { assertReadOnlySql, limitSqlForExecution } from '@ktx/context/connections'; -import { - createKtxConnectorCapabilities, - type KtxColumnSampleInput, - type KtxColumnSampleResult, - type KtxColumnStatsInput, - type KtxColumnStatsResult, - type KtxEventPropertyDiscovery, - type KtxEventPropertyDiscoveryInput, - type KtxEventPropertyValuesInput, - type KtxEventPropertyValuesResult, - type KtxEventStreamDiscoveryPort, - type KtxEventTypeDiscovery, - type KtxEventTypeDiscoveryInput, - type KtxQueryResult, - type KtxReadOnlyQueryInput, - type KtxScanConnector, - type KtxScanContext, - type KtxScanInput, - type KtxSchemaColumn, - type KtxSchemaSnapshot, - type KtxSchemaTable, - type KtxTableRef, - type KtxTableSampleInput, - type KtxTableSampleResult, -} from '@ktx/context/scan'; -import { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js'; -import { getKtxPostHogColumnDescription, getKtxPostHogTableDescription } from './schema-descriptions.js'; - -export interface KtxPostHogConnectionConfig { - driver?: string; - api_key?: string; - apiKey?: string; - project_id?: string; - projectId?: string; - region?: 'us' | 'eu'; - host?: string; - readonly?: boolean; - [key: string]: unknown; -} - -export interface KtxPostHogResolvedConnectionConfig { - apiKey: string; - projectId: string; - baseUrl: string; -} - -export type KtxPostHogFetch = (url: string, init?: RequestInit) => Promise; - -export interface KtxPostHogScanConnectorOptions { - connectionId: string; - connection: KtxPostHogConnectionConfig | undefined; - env?: NodeJS.ProcessEnv; - fetch?: KtxPostHogFetch; - sleep?: (ms: number) => Promise; - now?: () => Date; -} - -export interface KtxPostHogReadOnlyQueryInput extends KtxReadOnlyQueryInput { - params?: Record; -} - -export interface KtxPostHogColumnDistinctValuesOptions { - maxCardinality: number; - limit: number; - sampleSize?: number; -} - -export interface KtxPostHogColumnDistinctValuesResult { - values: string[] | null; - cardinality: number; -} - -interface PostHogSchemaField { - name: string; - type: string; - hogql_value: string; - schema_valid: boolean; - table: string | null; - fields: string[] | null; - chain: string[] | null; - id: string | null; -} - -interface PostHogSchemaTable { - id: string; - name: string; - type: string; - row_count: number | null; - fields: Record; -} - -interface PostHogSchemaResponse { - tables: Record; - joins: unknown[]; -} - -interface PostHogQueryResponse { - results: unknown[][] | null; - columns: string[] | null; - types: [string, string][] | null; - error: string | null; - hogql: string | null; -} - -const allowedTableTypes = new Set(['posthog', 'system']); -const excludedTables = new Set([ - 'query_log', - 'system.teams', - 'system.exports', - 'system.ingestion_warnings', - 'system.insight_variables', - 'system.data_warehouse_sources', - 'system.groups', - 'system.group_type_mappings', -]); -const hiddenTablesToProbe = ['person_distinct_ids', 'cohort_people', 'static_cohort_people']; - -export function isKtxPostHogConnectionConfig(connection: KtxPostHogConnectionConfig | undefined): boolean { - return String(connection?.driver ?? '').toLowerCase() === 'posthog'; -} - -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - -function stringConfigValue( - connection: KtxPostHogConnectionConfig | undefined, - key: keyof KtxPostHogConnectionConfig, - env: NodeJS.ProcessEnv, -): string | undefined { - const value = connection?.[key]; - return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; -} - -export function postHogConnectionConfigFromConfig(input: { - connectionId: string; - connection: KtxPostHogConnectionConfig | undefined; - env?: NodeJS.ProcessEnv; -}): KtxPostHogResolvedConnectionConfig { - if (!isKtxPostHogConnectionConfig(input.connection)) { - throw new Error(`Native PostHog connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); - } - if (input.connection?.readonly !== true) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.readonly: true`); - } - const env = input.env ?? process.env; - const apiKey = stringConfigValue(input.connection, 'api_key', env) ?? stringConfigValue(input.connection, 'apiKey', env); - const projectId = - stringConfigValue(input.connection, 'project_id', env) ?? stringConfigValue(input.connection, 'projectId', env); - if (!apiKey) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.api_key`); - } - if (!projectId) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.project_id`); - } - const host = stringConfigValue(input.connection, 'host', env); - const region = input.connection?.region ?? 'us'; - return { - apiKey, - projectId, - baseUrl: host ? host.replace(/\/$/, '') : region === 'eu' ? 'https://eu.posthog.com' : 'https://us.posthog.com', - }; -} - -export class KtxPostHogScanConnector implements KtxScanConnector { - readonly id: string; - readonly driver = 'posthog' as const; - readonly capabilities = createKtxConnectorCapabilities({ - tableSampling: true, - columnSampling: true, - columnStats: false, - readOnlySql: true, - nestedAnalysis: true, - eventStreamDiscovery: true, - formalForeignKeys: false, - estimatedRowCounts: true, - }); - - readonly eventStreamDiscovery: KtxEventStreamDiscoveryPort = { - listEventTypes: (input, ctx) => this.listEventTypes(input, ctx), - listPropertyKeys: (input, ctx) => this.listPropertyKeys(input, ctx), - listPropertyValues: (input, ctx) => this.listPropertyValues(input, ctx), - }; - - private readonly connectionId: string; - private readonly resolved: KtxPostHogResolvedConnectionConfig; - private readonly fetchImpl: KtxPostHogFetch; - private readonly sleep: (ms: number) => Promise; - private readonly now: () => Date; - private readonly dialect = new KtxPostHogDialect(); - - constructor(options: KtxPostHogScanConnectorOptions) { - this.connectionId = options.connectionId; - this.resolved = postHogConnectionConfigFromConfig({ - connectionId: options.connectionId, - connection: options.connection, - env: options.env, - }); - this.fetchImpl = options.fetch ?? fetch; - this.sleep = options.sleep ?? ((ms) => new Promise((resolveSleep) => setTimeout(resolveSleep, ms))); - this.now = options.now ?? (() => new Date()); - this.id = `posthog:${options.connectionId}`; - } - - async testConnection(): Promise<{ success: boolean; error?: string }> { - const response = await this.query('SELECT 1 AS test'); - return response.error ? { success: false, error: response.error } : { success: true }; - } - - async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const response = await this.makeRequest('/query', { query: { kind: 'DatabaseSchemaQuery' } }); - const tables: KtxSchemaTable[] = []; - for (const [tableName, tableInfo] of Object.entries(response.tables ?? {})) { - if (!allowedTableTypes.has(tableInfo.type) || excludedTables.has(tableName)) { - continue; - } - tables.push(this.toSchemaTable(tableName, tableInfo)); - } - tables.push(...(await this.discoverHiddenTables())); - tables.sort((left, right) => left.name.localeCompare(right.name)); - return { - connectionId: this.connectionId, - driver: 'posthog', - extractedAt: this.now().toISOString(), - scope: { catalogs: [this.resolved.projectId] }, - metadata: { - project_id: this.resolved.projectId, - table_count: tables.length, - total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), - }, - tables, - }; - } - - async sampleTable( - input: KtxTableSampleInput & { columnMetadata?: KtxPostHogSampleColumnInfo[] }, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const sql = input.columnMetadata - ? this.dialect.generateSampleQueryWithMetadata(this.qTableName(input.table), input.limit, input.columnMetadata) - : this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns); - const result = await this.query(sql); - return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; - } - - async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const result = await this.query( - this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), - ); - const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); - return { values, nullCount: null, distinctCount: null }; - } - - async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise { - return null; - } - - async executeReadOnly(input: KtxPostHogReadOnlyQueryInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); - const prepared = this.dialect.prepareQuery(limitedSql, input.params); - const result = await this.query(prepared.sql, prepared.params); - return { ...result, rowCount: result.rows.length }; - } - - async getTableRowCount(tableName: string): Promise { - const result = await this.query(`SELECT count() AS cnt FROM ${this.dialect.quoteIdentifier(tableName)}`); - return Number(result.rows[0]?.[0] ?? 0); - } - - async getColumnDistinctValues( - table: KtxTableRef, - columnName: string, - options: KtxPostHogColumnDistinctValuesOptions, - ): Promise { - const sampleSize = options.sampleSize ?? 10000; - const tableName = this.qTableName(table); - const cardinalityResult = await this.query( - this.dialect.generateCardinalitySampleQuery(tableName, columnName, sampleSize), - ); - if (cardinalityResult.error || cardinalityResult.rows.length === 0) { - return null; - } - const cardinality = Number(cardinalityResult.rows[0]?.[0]); - if (!Number.isFinite(cardinality)) { - return null; - } - if (cardinality === 0) { - return { values: [], cardinality: 0 }; - } - if (cardinality > options.maxCardinality) { - return { values: null, cardinality }; - } - const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, columnName, options.limit)); - if (valuesResult.error) { - return null; - } - return { - values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])), - cardinality, - }; - } - - private async listEventTypes( - input: KtxEventTypeDiscoveryInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const limit = this.positiveInteger(input.limit, 'limit'); - const lookbackDays = this.positiveInteger(input.lookbackDays ?? 30, 'lookbackDays'); - const minCount = this.positiveInteger(input.minCount ?? 0, 'minCount'); - const eventColumn = this.dialect.quoteIdentifier(input.eventColumn); - const tableName = this.qTableName(input.table); - const havingClause = minCount > 0 ? `HAVING cnt >= ${minCount}` : ''; - const result = await this.query(` - SELECT ${eventColumn} AS event, count() as cnt - FROM ${tableName} - WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY - GROUP BY event - ${havingClause} - ORDER BY cnt DESC - LIMIT ${limit} - `); - if (result.error) { - return []; - } - return result.rows - .filter((row) => row[0] != null && String(row[0]).trim() !== '') - .map((row) => ({ value: String(row[0]), count: Number(row[1]) })); - } - - private async listPropertyKeys( - input: KtxEventPropertyDiscoveryInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const sampleSize = this.positiveInteger(input.sampleSize, 'sampleSize'); - const limit = this.positiveInteger(input.limit, 'limit'); - const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); - const tableName = this.qTableName(input.table); - const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); - const whereClause = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; - const result = await this.query(` - SELECT key, count() as cnt - FROM ( - SELECT arrayJoin(JSONExtractKeys(${jsonColumn})) AS key - FROM ${tableName} - ${whereClause} - LIMIT ${sampleSize} - ) - GROUP BY key - ORDER BY cnt DESC - LIMIT ${limit} - `); - if (result.error) { - return []; - } - return result.rows.map((row) => ({ key: String(row[0]), count: Number(row[1]) })); - } - - private async listPropertyValues( - input: KtxEventPropertyValuesInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const limit = this.positiveInteger(input.limit, 'limit'); - const maxCardinality = this.positiveInteger(input.maxCardinality ?? 1000, 'maxCardinality'); - const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); - const tableName = this.qTableName(input.table); - const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); - const escapedKey = this.escapeHogQLString(input.propertyKey); - const timeFilter = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; - const cardinalityResult = await this.query(` - SELECT uniq(JSONExtractString(${jsonColumn}, '${escapedKey}')) as cardinality - FROM ${tableName} - ${timeFilter} - LIMIT 1000000 - `); - if (cardinalityResult.error || cardinalityResult.rows.length === 0) { - return null; - } - const cardinality = Number(cardinalityResult.rows[0]?.[0]); - if (!Number.isFinite(cardinality) || cardinality > maxCardinality) { - return null; - } - const valuesResult = await this.query(` - SELECT DISTINCT JSONExtractString(${jsonColumn}, '${escapedKey}') as value - FROM ${tableName} - WHERE JSONExtractString(${jsonColumn}, '${escapedKey}') IS NOT NULL - AND JSONExtractString(${jsonColumn}, '${escapedKey}') != '' - ${lookbackDays === null ? '' : `AND timestamp > now() - INTERVAL ${lookbackDays} DAY`} - ORDER BY value - LIMIT ${limit} - `); - if (valuesResult.error) { - return null; - } - const values = valuesResult.rows - .map((row) => (row[0] != null ? String(row[0]) : '')) - .filter((value) => { - const trimmed = value.trim(); - return trimmed !== '' && trimmed !== '[]' && trimmed !== '{}' && trimmed !== 'null'; - }); - return { values, cardinality }; - } - - async cleanup(): Promise {} - - qTableName(table: Pick): string { - return this.dialect.formatTableName(table); - } - - quoteIdentifier(identifier: string): string { - return this.dialect.quoteIdentifier(identifier); - } - - private toSchemaTable(tableName: string, tableInfo: PostHogSchemaTable): KtxSchemaTable { - return { - catalog: this.resolved.projectId, - db: null, - name: tableName, - kind: tableName === 'events' ? 'event_stream' : 'table', - comment: getKtxPostHogTableDescription(tableName) ?? null, - estimatedRows: tableInfo.row_count ?? null, - columns: this.extractColumns(tableName, tableInfo.fields), - foreignKeys: [], - }; - } - - private async discoverHiddenTables(): Promise { - const tables: KtxSchemaTable[] = []; - for (const tableName of hiddenTablesToProbe) { - const result = await this.query(`SELECT * FROM ${tableName} LIMIT 0`); - if (result.error) { - continue; - } - tables.push({ - catalog: this.resolved.projectId, - db: null, - name: tableName, - kind: 'table', - comment: getKtxPostHogTableDescription(tableName) ?? null, - estimatedRows: null, - columns: result.headers.map((header) => ({ - name: header, - nativeType: 'String', - normalizedType: 'VARCHAR', - dimensionType: 'string', - nullable: true, - primaryKey: false, - comment: getKtxPostHogColumnDescription(tableName, header) ?? null, - })), - foreignKeys: [], - }); - } - return tables; - } - - private extractColumns(tableName: string, fields: Record): KtxSchemaColumn[] { - const columns: KtxSchemaColumn[] = []; - for (const [fieldName, fieldInfo] of Object.entries(fields)) { - if ( - fieldInfo.type === 'lazy_table' || - fieldInfo.type === 'virtual_table' || - fieldInfo.type === 'field_traverser' || - fieldInfo.type === 'expression' - ) { - continue; - } - const nativeType = this.normalizeFieldType(fieldInfo.type); - columns.push({ - name: fieldName, - nativeType, - normalizedType: this.dialect.mapDataType(nativeType), - dimensionType: this.dialect.mapToDimensionType(nativeType), - nullable: this.isNullableField(tableName, fieldName, fieldInfo.type), - primaryKey: this.isPrimaryKeyField(tableName, fieldName), - comment: getKtxPostHogColumnDescription(tableName, fieldName) ?? null, - }); - } - return columns; - } - - private normalizeFieldType(posthogType: string): string { - const typeMap: Record = { - string: 'String', - integer: 'Int64', - datetime: 'DateTime64', - boolean: 'UInt8', - bool: 'Boolean', - json: 'JSON', - array: 'Array(String)', - uuid: 'UUID', - event: 'String', - }; - return typeMap[posthogType.toLowerCase()] ?? posthogType; - } - - private isNullableField(tableName: string, fieldName: string, fieldType: string): boolean { - if (tableName === 'events' && ['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldName)) { - return false; - } - return !['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldType.toLowerCase()); - } - - private isPrimaryKeyField(tableName: string, fieldName: string): boolean { - return ( - (tableName === 'events' && fieldName === 'uuid') || - (tableName === 'persons' && fieldName === 'id') || - (tableName === 'sessions' && fieldName === 'session_id') || - (tableName === 'groups' && fieldName === 'key') - ); - } - - private async query(sql: string, params?: Record): Promise { - const response = await this.makeRequest('/query', { - query: { - kind: 'HogQLQuery', - query: sql, - ...(params && Object.keys(params).length > 0 ? { values: params } : {}), - }, - }); - if (response.error) { - return { headers: [], rows: [], totalRows: 0, rowCount: null, error: response.error }; - } - const headers = response.columns ?? []; - const rows = response.results ?? []; - const headerTypes = response.types?.map((type) => type[1]); - return { - headers, - rows, - totalRows: rows.length, - rowCount: rows.length, - ...(headerTypes && headerTypes.length > 0 ? { headerTypes } : {}), - }; - } - - private async makeRequest(endpoint: string, body: Record, maxRetries = 3): Promise { - const url = `${this.resolved.baseUrl}/api/projects/${this.resolved.projectId}${endpoint}`; - let lastError: Error | null = null; - for (let attempt = 0; attempt <= maxRetries; attempt += 1) { - const response = await this.fetchImpl(url, { - method: 'POST', - headers: { - Authorization: `Bearer ${this.resolved.apiKey}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }); - if (response.ok) { - return response.json() as Promise; - } - const errorText = await response.text(); - const errorMessage = this.parseErrorMessage(errorText); - if (response.status === 429 && attempt < maxRetries) { - await this.sleep(this.parseRateLimitWaitTime(errorMessage) * 1000); - continue; - } - lastError = new Error(`PostHog API error (${response.status}): ${errorMessage}`); - } - throw lastError ?? new Error('PostHog API request failed after retries'); - } - - private parseErrorMessage(errorText: string): string { - try { - const errorJson = JSON.parse(errorText) as { detail?: unknown; error?: unknown }; - return String(errorJson.detail ?? errorJson.error ?? errorText); - } catch { - return errorText; - } - } - - private parseRateLimitWaitTime(errorMessage: string): number { - const match = errorMessage.match(/(?:Expected available in|retry after) (\d+) seconds?/i); - return match ? Number.parseInt(match[1] ?? '30', 10) + 2 : 30; - } - - private escapeHogQLString(value: string): string { - return value.replace(/\\/g, '\\\\').replace(/'/g, "''"); - } - - private positiveInteger(value: number, name: string): number { - if (!Number.isInteger(value) || value < 0) { - throw new Error(`PostHog event-stream discovery requires ${name} to be a non-negative integer`); - } - return value; - } - - private assertConnection(connectionId: string): void { - if (connectionId !== this.connectionId) { - throw new Error(`PostHog connector ${this.connectionId} cannot scan connection ${connectionId}`); - } - } -} diff --git a/packages/connector-posthog/src/dialect.test.ts b/packages/connector-posthog/src/dialect.test.ts deleted file mode 100644 index 5c5b2c43..00000000 --- a/packages/connector-posthog/src/dialect.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { KtxPostHogDialect } from './dialect.js'; - -describe('KtxPostHogDialect', () => { - const dialect = new KtxPostHogDialect(); - - it('quotes identifiers, formats table names, maps types, and prepares HogQL params', () => { - expect(dialect.quoteIdentifier('weird`name')).toBe('`weird\\`name`'); - expect(dialect.formatTableName({ name: 'events', catalog: '157881', db: null })).toBe('`events`'); - expect(dialect.mapDataType('Nullable(DateTime64(6, UTC))')).toBe('TIMESTAMP'); - expect(dialect.mapDataType('Array(String)')).toBe('JSON'); - expect(dialect.mapToDimensionType('UInt8')).toBe('number'); - expect(dialect.mapToDimensionType('Boolean')).toBe('boolean'); - expect(dialect.prepareQuery('SELECT * FROM events WHERE event = :event', { event: '$pageview' })).toEqual({ - sql: 'SELECT * FROM events WHERE event = {event}', - params: { event: '$pageview' }, - }); - }); - - it('builds sample and virtual-property queries without app dependencies', () => { - expect(dialect.generateSampleQuery('`events`', 5, ['event', 'timestamp'])).toBe( - 'SELECT `event`, `timestamp` FROM `events` ORDER BY rand() LIMIT 5', - ); - expect( - dialect.generateSampleQueryWithMetadata('`events`', 3, [ - { name: 'event', parentColumnId: null }, - { name: 'properties.$browser', parentColumnId: 'properties' }, - ]), - ).toBe( - "SELECT `event`, JSONExtractString(properties, '$browser') AS `properties.$browser` FROM `events` ORDER BY rand() LIMIT 3", - ); - expect(dialect.generateColumnSampleQuery('`events`', 'properties.$browser', 10)).toBe( - "SELECT JSONExtractString(properties, '$browser') FROM `events` WHERE JSONExtractString(properties, '$browser') IS NOT NULL ORDER BY rand() LIMIT 10", - ); - }); - - it('builds data-dictionary and time helper SQL', () => { - expect(dialect.generateCardinalitySampleQuery('events', 'properties.$browser', 100)).toContain( - "JSONExtractString(properties, '$browser') AS val", - ); - expect(dialect.generateDistinctValuesQuery('events', 'event', 20)).toContain('SELECT DISTINCT toString(`event`) AS val'); - expect(dialect.getNullCountExpression('event')).toBe('countIf(event IS NULL)'); - expect(dialect.getDistinctCountExpression('event')).toBe('uniq(event)'); - expect(dialect.getTimeTruncExpression('timestamp', 'week', 'UTC')).toBe("DATE_TRUNC('week', toTimeZone(timestamp, 'UTC'))"); - expect(dialect.parseIntervalToSql('7 day')).toBe('INTERVAL 7 DAY'); - expect(dialect.generateColumnStatisticsQuery('', 'events')).toBeNull(); - }); -}); diff --git a/packages/connector-posthog/src/dialect.ts b/packages/connector-posthog/src/dialect.ts deleted file mode 100644 index 36f6edee..00000000 --- a/packages/connector-posthog/src/dialect.ts +++ /dev/null @@ -1,258 +0,0 @@ -import type { KtxSchemaDimensionType, KtxTableRef } from '@ktx/context/scan'; - -type PostHogTableNameRef = Pick & Partial>; - -export interface KtxPostHogSampleColumnInfo { - name: string; - parentColumnId: string | null; -} - -export class KtxPostHogDialect { - readonly type = 'posthog'; - - private readonly typeMappings: Record = { - datetime64: 'time', - datetime: 'time', - date: 'time', - int64: 'number', - int32: 'number', - int16: 'number', - int8: 'number', - uint64: 'number', - uint32: 'number', - uint16: 'number', - uint8: 'number', - float64: 'number', - float32: 'number', - decimal: 'number', - integer: 'number', - string: 'string', - uuid: 'string', - json: 'string', - boolean: 'boolean', - bool: 'boolean', - }; - - quoteIdentifier(identifier: string): string { - return `\`${identifier.replace(/`/g, '\\`')}\``; - } - - formatTableName(table: PostHogTableNameRef): string { - return this.quoteIdentifier(table.name); - } - - mapDataType(nativeType: string): string { - const cleanType = this.cleanType(nativeType); - const typeMapping: Record = { - STRING: 'VARCHAR', - UUID: 'UUID', - INT64: 'BIGINT', - INT32: 'INTEGER', - INT16: 'SMALLINT', - INT8: 'TINYINT', - UINT64: 'BIGINT', - UINT32: 'INTEGER', - UINT16: 'SMALLINT', - UINT8: 'TINYINT', - FLOAT64: 'DOUBLE', - FLOAT32: 'FLOAT', - DATETIME64: 'TIMESTAMP', - DATETIME: 'TIMESTAMP', - DATE: 'DATE', - JSON: 'JSON', - ARRAY: 'JSON', - BOOLEAN: 'BOOLEAN', - BOOL: 'BOOLEAN', - }; - return typeMapping[cleanType] ?? cleanType; - } - - mapToDimensionType(nativeType: string): KtxSchemaDimensionType { - if (!nativeType) { - return 'string'; - } - const cleanType = this.cleanType(nativeType).toLowerCase(); - if (this.typeMappings[cleanType]) { - return this.typeMappings[cleanType]; - } - if (cleanType.includes('date') || cleanType.includes('time')) { - return 'time'; - } - if (cleanType.includes('int') || cleanType.includes('float') || cleanType.includes('decimal') || cleanType.includes('num')) { - return 'number'; - } - if (cleanType === 'bool' || cleanType === 'boolean') { - return 'boolean'; - } - return 'string'; - } - - generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { - const columnList = - columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; - return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; - } - - generateSampleQueryWithMetadata(tableName: string, limit: number, columnMetadata?: KtxPostHogSampleColumnInfo[]): string { - if (!columnMetadata || columnMetadata.length === 0) { - return this.generateSampleQuery(tableName, limit); - } - const columnList = columnMetadata - .map((column) => { - if (!column.parentColumnId) { - return this.quoteIdentifier(column.name); - } - const expression = this.formatColumnExpression(column.name); - return `${expression} AS ${this.quoteIdentifier(column.name)}`; - }) - .join(', '); - return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; - } - - generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { - const colExpr = this.formatColumnExpression(columnName); - return `SELECT ${colExpr} FROM ${tableName} WHERE ${colExpr} IS NOT NULL ORDER BY rand() LIMIT ${limit}`; - } - - prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { - if (!params) { - return { sql, params: undefined }; - } - let processedSql = sql; - const processedParams: Record = {}; - for (const [key, value] of Object.entries(params)) { - processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `{${key}}`); - processedParams[key] = value; - } - return { - sql: processedSql, - params: Object.keys(processedParams).length > 0 ? processedParams : undefined, - }; - } - - getRandomSampleFilter(samplePct: number): string { - if (samplePct <= 0 || samplePct >= 1) { - return ''; - } - return `rand() < ${samplePct}`; - } - - getTableSampleClause(_samplePct: number): string { - return ''; - } - - getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; - } - - getNullCountExpression(column: string): string { - return `countIf(${column} IS NULL)`; - } - - getDistinctCountExpression(column: string): string { - return `uniq(${column})`; - } - - generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT uniq(val) AS cardinality - FROM ( - SELECT ${colExpr} AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - LIMIT ${sampleSize} - ) - `; - } - - generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT DISTINCT toString(${colExpr}) AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - ORDER BY val - LIMIT ${limit} - `; - } - - generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { - return null; - } - - generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT uniq(val) AS cardinality - FROM ( - SELECT ${colExpr} AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - ORDER BY rand() - LIMIT ${sampleSize} - ) - `; - } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; - return `DATE_TRUNC('${granularity}', ${col})`; - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; - const [amount, unit] = interval.split(' '); - const seconds = Number(amount) * this.getUnitSeconds(unit ?? 'day'); - const originExpr = origin ? `toDateTime('${origin}')` : `toDateTime('1970-01-01')`; - return `${originExpr} + toIntervalSecond(intDiv(toUnixTimestamp(${col}) - toUnixTimestamp(${originExpr}), ${seconds}) * ${seconds})`; - } - - parseIntervalToSql(interval: string): string { - const [amount, unit] = interval.split(' '); - return `INTERVAL ${amount} ${unit?.toUpperCase() ?? 'DAY'}`; - } - - private formatColumnExpression(columnName: string): string { - const rawName = columnName.replace(/^`|`$/g, ''); - const propertyMatch = rawName.match(/^(properties|person\.properties)\.(.+)$/); - if (propertyMatch) { - const [, parentCol, propertyKey] = propertyMatch; - return `JSONExtractString(${parentCol}, '${propertyKey.replace(/'/g, "''")}')`; - } - return this.quoteIdentifier(rawName); - } - - private cleanType(nativeType: string): string { - let cleanType = nativeType.toUpperCase().trim(); - const nullableMatch = cleanType.match(/^NULLABLE\((.+)\)$/); - if (nullableMatch) { - cleanType = nullableMatch[1] ?? cleanType; - } - if (cleanType.startsWith('ARRAY(')) { - return 'ARRAY'; - } - if (cleanType.startsWith('DATETIME64')) { - return 'DATETIME64'; - } - return cleanType; - } - - private getUnitSeconds(unit: string): number { - const secondsByUnit: Record = { - second: 1, - minute: 60, - hour: 3600, - day: 86400, - week: 604800, - month: 2592000, - quarter: 7776000, - year: 31536000, - }; - return secondsByUnit[unit.toLowerCase()] ?? 86400; - } -} diff --git a/packages/connector-posthog/src/index.ts b/packages/connector-posthog/src/index.ts deleted file mode 100644 index 7fa61ebb..00000000 --- a/packages/connector-posthog/src/index.ts +++ /dev/null @@ -1,19 +0,0 @@ -export { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js'; -export { - getKtxPostHogColumnDescription, - getKtxPostHogPropertyDescription, - getKtxPostHogTableDescription, -} from './schema-descriptions.js'; -export { - isKtxPostHogConnectionConfig, - KtxPostHogScanConnector, - postHogConnectionConfigFromConfig, - type KtxPostHogColumnDistinctValuesOptions, - type KtxPostHogColumnDistinctValuesResult, - type KtxPostHogConnectionConfig, - type KtxPostHogFetch, - type KtxPostHogReadOnlyQueryInput, - type KtxPostHogResolvedConnectionConfig, - type KtxPostHogScanConnectorOptions, -} from './connector.js'; -export { createPostHogLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-posthog/src/live-database-introspection.ts b/packages/connector-posthog/src/live-database-introspection.ts deleted file mode 100644 index 04828a19..00000000 --- a/packages/connector-posthog/src/live-database-introspection.ts +++ /dev/null @@ -1,34 +0,0 @@ -import type { LiveDatabaseIntrospectionPort } from '@ktx/context/ingest'; -import type { KtxProjectConnectionConfig } from '@ktx/context/project'; -import { KtxPostHogScanConnector, type KtxPostHogConnectionConfig, type KtxPostHogFetch } from './connector.js'; - -interface CreatePostHogLiveDatabaseIntrospectionOptions { - connections: Record; - env?: NodeJS.ProcessEnv; - fetch?: KtxPostHogFetch; - sleep?: (ms: number) => Promise; - now?: () => Date; -} - -export function createPostHogLiveDatabaseIntrospection( - options: CreatePostHogLiveDatabaseIntrospectionOptions, -): LiveDatabaseIntrospectionPort { - return { - async extractSchema(connectionId: string) { - const connection = options.connections[connectionId] as KtxPostHogConnectionConfig | undefined; - const connector = new KtxPostHogScanConnector({ - connectionId, - connection, - env: options.env, - fetch: options.fetch, - sleep: options.sleep, - now: options.now, - }); - try { - return await connector.introspect({ connectionId, driver: 'posthog' }, { runId: `posthog-${connectionId}` }); - } finally { - await connector.cleanup(); - } - }, - }; -} diff --git a/packages/connector-posthog/src/package-exports.test.ts b/packages/connector-posthog/src/package-exports.test.ts deleted file mode 100644 index f9d822ae..00000000 --- a/packages/connector-posthog/src/package-exports.test.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import * as posthog from './index.js'; - -describe('@ktx/connector-posthog package exports', () => { - it('exports the connector, dialect, descriptions, and live-database adapter', () => { - expect(posthog.KtxPostHogDialect).toBeTypeOf('function'); - expect(posthog.KtxPostHogScanConnector).toBeTypeOf('function'); - expect(posthog.createPostHogLiveDatabaseIntrospection).toBeTypeOf('function'); - expect(posthog.getKtxPostHogPropertyDescription('$browser')).toBe('User browser name.'); - }); -}); diff --git a/packages/connector-posthog/src/schema-descriptions.ts b/packages/connector-posthog/src/schema-descriptions.ts deleted file mode 100644 index d333fcb4..00000000 --- a/packages/connector-posthog/src/schema-descriptions.ts +++ /dev/null @@ -1,99 +0,0 @@ -const TABLE_DESCRIPTIONS: Record = { - events: - 'PostHog event stream containing all tracked user interactions. Each row represents a single event with properties, timestamp, and user identifier.', - persons: - 'PostHog persons table containing unique users, identifiers, and user properties for segmentation and cohort analysis.', - sessions: - 'PostHog sessions table grouping events into user sessions with duration, entry and exit URLs, and device details.', - groups: - 'PostHog groups table for B2B and team-based analytics. Contains group identifiers and group properties.', - person_distinct_ids: 'PostHog identity resolution table mapping distinct_ids to person_ids.', - cohort_people: 'PostHog dynamic cohort membership table.', - static_cohort_people: 'PostHog static cohort membership table.', - 'system.cohorts': 'PostHog cohort definitions table.', - 'system.feature_flags': 'PostHog feature flag definitions table.', - 'system.experiments': 'PostHog A/B test and experiment definitions table.', - 'system.surveys': 'PostHog survey definitions table.', - 'system.dashboards': 'PostHog dashboard metadata table.', - 'system.insights': 'PostHog saved insight and chart definitions table.', -}; - -const COLUMN_DESCRIPTIONS: Record = { - 'events.uuid': 'Unique identifier for this specific event.', - 'events.event': 'Event name such as $pageview, $autocapture, $identify, or a custom event.', - 'events.distinct_id': 'User identifier that links events to persons.', - 'events.timestamp': 'UTC timestamp when the event occurred.', - 'events.created_at': 'Timestamp when the event was ingested into PostHog.', - 'events.properties': 'JSON object containing event-specific properties.', - 'events.person_id': 'Internal PostHog person UUID.', - 'events.$session_id': 'Session identifier linking this event to sessions.', - 'persons.id': 'Internal PostHog person UUID.', - 'persons.distinct_id': 'Primary user identifier for joins with events.', - 'persons.properties': 'JSON object containing user properties.', - 'persons.created_at': 'Timestamp when this person was first seen in PostHog.', - 'persons.is_identified': 'Whether the person has been explicitly identified.', - 'sessions.session_id': 'Unique session identifier.', - 'sessions.distinct_id': 'User identifier for this session.', - 'sessions.$start_timestamp': 'Timestamp when the session started.', - 'sessions.$end_timestamp': 'Timestamp when the session ended.', - 'sessions.$session_duration': 'Total session duration in seconds.', - 'groups.index': 'Index identifying the configured PostHog group type.', - 'groups.key': 'Unique identifier for this group.', - 'groups.properties': 'JSON object containing group properties.', - 'groups.created_at': 'Timestamp when this group was first seen.', - 'person_distinct_ids.distinct_id': 'Device or browser identifier for a person.', - 'person_distinct_ids.person_id': 'Internal PostHog person UUID mapped to the distinct_id.', - 'cohort_people.person_id': 'Person UUID belonging to the cohort.', - 'cohort_people.cohort_id': 'Cohort identifier.', - 'static_cohort_people.person_id': 'Person UUID belonging to the static cohort.', - 'static_cohort_people.cohort_id': 'Static cohort identifier.', - 'system.cohorts.id': 'Unique cohort identifier.', - 'system.cohorts.name': 'Human-readable cohort name.', - 'system.feature_flags.id': 'Unique feature flag identifier.', - 'system.feature_flags.key': 'Feature flag key used in code.', - 'system.experiments.id': 'Unique experiment identifier.', - 'system.experiments.name': 'Experiment name.', - 'system.surveys.id': 'Unique survey identifier.', - 'system.surveys.name': 'Survey name.', - 'system.dashboards.id': 'Unique dashboard identifier.', - 'system.dashboards.name': 'Dashboard name.', - 'system.insights.id': 'Unique insight identifier.', - 'system.insights.name': 'Insight or chart name.', -}; - -const PROPERTY_DESCRIPTIONS: Record = { - $browser: 'User browser name.', - $browser_version: 'User browser version.', - $os: 'Operating system.', - $os_version: 'Operating system version.', - $device: 'Device name.', - $device_type: 'Device type.', - $current_url: 'Full URL of the current page.', - $pathname: 'Path portion of the current URL.', - $host: 'Hostname of the current page.', - $referrer: 'Referrer URL.', - $referring_domain: 'Referrer domain.', - $utm_source: 'UTM source parameter.', - $utm_medium: 'UTM medium parameter.', - $utm_campaign: 'UTM campaign parameter.', - $utm_content: 'UTM content parameter.', - $utm_term: 'UTM term parameter.', - $lib: 'PostHog library name used to capture the event.', - $lib_version: 'PostHog library version.', - $insert_id: 'Unique identifier for event deduplication.', - $active_feature_flags: 'List of active feature flags for this user or event.', - $feature_flag: 'Feature flag name for flag-related events.', - $feature_flag_response: 'Feature flag value or variant.', -}; - -export function getKtxPostHogTableDescription(tableName: string): string | undefined { - return TABLE_DESCRIPTIONS[tableName]; -} - -export function getKtxPostHogColumnDescription(tableName: string, columnName: string): string | undefined { - return COLUMN_DESCRIPTIONS[`${tableName}.${columnName}`]; -} - -export function getKtxPostHogPropertyDescription(propertyKey: string): string | null { - return PROPERTY_DESCRIPTIONS[propertyKey] ?? null; -} diff --git a/packages/connector-posthog/tsconfig.json b/packages/connector-posthog/tsconfig.json deleted file mode 100644 index 965e6978..00000000 --- a/packages/connector-posthog/tsconfig.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "extends": "../../tsconfig.base.json", - "compilerOptions": { - "outDir": "./dist", - "rootDir": "./src" - }, - "include": ["src/**/*.ts"], - "exclude": ["dist", "node_modules"] -} diff --git a/packages/context/src/connections/connection-type.ts b/packages/context/src/connections/connection-type.ts index 81c17bb4..6cd48042 100644 --- a/packages/context/src/connections/connection-type.ts +++ b/packages/context/src/connections/connection-type.ts @@ -18,7 +18,6 @@ export const connectionTypeSchema = z.enum([ 'METABASE', 'LOOKER', 'NOTION', - 'POSTHOG', 'MYSQL', 'CLICKHOUSE', 'PLAIN', diff --git a/packages/context/src/core/git.service.test.ts b/packages/context/src/core/git.service.test.ts index 308bbd4d..14e93495 100644 --- a/packages/context/src/core/git.service.test.ts +++ b/packages/context/src/core/git.service.test.ts @@ -256,6 +256,31 @@ describe('GitService', () => { await service.removeWorktree(wtDir).catch(() => undefined); await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); }); + + it('serializes concurrent commits from scoped services targeting the same worktree', async () => { + const { commitHash } = await writeAndCommit('seed.md', 'seed'); + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`); + await service.addWorktree(wtDir, 'session/concurrent', commitHash); + + const first = service.forWorktree(wtDir); + const second = service.forWorktree(wtDir); + await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8'); + await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8'); + + const [a, b] = await Promise.all([ + first.commitFile('a.md', 'add a', 'System User', 'system@example.com'), + second.commitFile('b.md', 'add b', 'System User', 'system@example.com'), + ]); + + expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/); + expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n'); + await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n'); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); }); describe('squashMergeIntoMain', () => { diff --git a/packages/context/src/core/git.service.ts b/packages/context/src/core/git.service.ts index 5da67e59..6539f9fd 100644 --- a/packages/context/src/core/git.service.ts +++ b/packages/context/src/core/git.service.ts @@ -32,6 +32,8 @@ export type SquashMergeResult = | { ok: false; conflict: true; conflictPaths: string[] }; export class GitService { + private static readonly mutationQueues = new Map>(); + private readonly logger: KtxLogger; private git!: SimpleGit; private configDir: string; @@ -92,6 +94,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.commitFileUnlocked(filePath, commitMessage, author, authorEmail)); + } + + private async commitFileUnlocked( + filePath: string, + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { // Stage the file @@ -166,6 +177,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.commitFilesUnlocked(filePaths, commitMessage, author, authorEmail)); + } + + private async commitFilesUnlocked( + filePaths: string[], + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { for (const filePath of filePaths) { @@ -231,6 +251,10 @@ export class GitService { if (filePaths.length === 0) { return; } + return this.withMutationQueue(() => this.checkoutFilesUnlocked(filePaths)); + } + + private async checkoutFilesUnlocked(filePaths: string[]): Promise { try { await this.git.checkout(['--', ...filePaths]); } catch (error) { @@ -292,6 +316,10 @@ export class GitService { if (!trimmed) { return; } + return this.withMutationQueue(() => this.addNoteUnlocked(commitHash, trimmed)); + } + + private async addNoteUnlocked(commitHash: string, trimmed: string): Promise { try { await this.git.raw(['notes', 'add', '-f', '-m', trimmed, commitHash]); } catch (error) { @@ -343,6 +371,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.deleteFileUnlocked(filePath, commitMessage, author, authorEmail)); + } + + private async deleteFileUnlocked( + filePath: string, + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { // Remove the file from git @@ -485,6 +522,13 @@ export class GitService { async squashTo( preHead: string, options: { message: string; author: string; authorEmail: string; expectedAuthor?: string }, + ): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> { + return this.withMutationQueue(() => this.squashToUnlocked(preHead, options)); + } + + private async squashToUnlocked( + preHead: string, + options: { message: string; author: string; authorEmail: string; expectedAuthor?: string }, ): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> { const { message, author, authorEmail } = options; const expectedAuthor = options.expectedAuthor ?? author; @@ -560,6 +604,15 @@ export class GitService { author: string, authorEmail: string, commitMessage: string, + ): Promise { + return this.withMutationQueue(() => this.squashMergeIntoMainUnlocked(branch, author, authorEmail, commitMessage)); + } + + private async squashMergeIntoMainUnlocked( + branch: string, + author: string, + authorEmail: string, + commitMessage: string, ): Promise { // Diff of HEAD..branch (two dots) lists commits/files reachable from `branch` that // aren't on HEAD — i.e. exactly what the squash would apply. Three dots (HEAD...branch) @@ -615,7 +668,7 @@ export class GitService { * range, which can pause the sequencer on conflicts. */ async resetHardTo(targetSha: string): Promise { - await this.git.raw(['reset', '--hard', targetSha]); + await this.withMutationQueue(() => this.git.raw(['reset', '--hard', targetSha])); } /** @@ -667,6 +720,10 @@ export class GitService { * Used by the memory agent to isolate per-session writes from interactive saves on main. */ async addWorktree(path: string, branch: string, startSha: string): Promise { + await this.withMutationQueue(() => this.addWorktreeUnlocked(path, branch, startSha)); + } + + private async addWorktreeUnlocked(path: string, branch: string, startSha: string): Promise { try { await this.git.raw(['worktree', 'add', '-b', branch, path, startSha]); } catch (error) { @@ -679,6 +736,10 @@ export class GitService { * worktrees are ktx-internal — a clean working tree is not required. */ async removeWorktree(path: string): Promise { + await this.withMutationQueue(() => this.removeWorktreeUnlocked(path)); + } + + private async removeWorktreeUnlocked(path: string): Promise { try { await this.git.raw(['worktree', 'remove', '--force', path]); } catch (error) { @@ -724,7 +785,7 @@ export class GitService { } async deleteBranch(branch: string, force = false): Promise { - await this.git.raw(['branch', force ? '-D' : '-d', branch]); + await this.withMutationQueue(() => this.git.raw(['branch', force ? '-D' : '-d', branch])); } /** @@ -745,6 +806,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.deleteDirectoryUnlocked(directoryPath, commitMessage, author, authorEmail)); + } + + private async deleteDirectoryUnlocked( + directoryPath: string, + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { // Remove the directory recursively from git @@ -795,6 +865,17 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => + this.deleteDirectoriesUnlocked(directoryPaths, commitMessage, author, authorEmail), + ); + } + + private async deleteDirectoriesUnlocked( + directoryPaths: string[], + commitMessage: string, + author: string, + authorEmail: string, ): Promise { if (directoryPaths.length === 0) { return { @@ -852,4 +933,27 @@ export class GitService { created: true, }; } + + private async withMutationQueue(operation: () => Promise): Promise { + const key = this.configDir; + const previous = GitService.mutationQueues.get(key) ?? Promise.resolve(); + let release: () => void = () => {}; + const current = previous.catch(() => undefined).then( + () => + new Promise((resolve) => { + release = resolve; + }), + ); + GitService.mutationQueues.set(key, current); + + await previous.catch(() => undefined); + try { + return await operation(); + } finally { + release(); + if (GitService.mutationQueues.get(key) === current) { + GitService.mutationQueues.delete(key); + } + } + } } diff --git a/packages/context/src/ingest/adapters/metabase/chunk.test.ts b/packages/context/src/ingest/adapters/metabase/chunk.test.ts index 46a3ce97..1991e147 100644 --- a/packages/context/src/ingest/adapters/metabase/chunk.test.ts +++ b/packages/context/src/ingest/adapters/metabase/chunk.test.ts @@ -284,6 +284,18 @@ describe('chunkMetabaseStagedDir — syncMode enum coverage', () => { expect(allRawFiles).not.toContain('cards/200.json'); }); + it('ONLY with no selections includes every matching card for old generated configs', async () => { + await writeInline(dir, 'sync-config.json', { + ...BASE_SYNC, + syncMode: 'ONLY', + selections: [], + }); + const result = await chunkMetabaseStagedDir(dir); + const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles); + expect(allRawFiles).toContain('cards/100.json'); + expect(allRawFiles).toContain('cards/200.json'); + }); + it('EXCEPT excludes cards in selected collections; includes the rest', async () => { await writeInline(dir, 'sync-config.json', { ...BASE_SYNC, diff --git a/packages/context/src/ingest/adapters/metabase/chunk.ts b/packages/context/src/ingest/adapters/metabase/chunk.ts index 2fe719c5..ab2b1d78 100644 --- a/packages/context/src/ingest/adapters/metabase/chunk.ts +++ b/packages/context/src/ingest/adapters/metabase/chunk.ts @@ -66,7 +66,7 @@ function cardMatchesSyncConfig(card: StagedCardFile, config: StagedSyncConfig): if (card.archived) { return false; } - if (config.syncMode === 'ALL') { + if (config.syncMode === 'ALL' || (config.syncMode === 'ONLY' && config.selections.length === 0)) { return true; } const selectedCollections = new Set( diff --git a/packages/context/src/ingest/adapters/metabase/client.test.ts b/packages/context/src/ingest/adapters/metabase/client.test.ts index d6d7a4d9..f81939c6 100644 --- a/packages/context/src/ingest/adapters/metabase/client.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client.test.ts @@ -327,6 +327,40 @@ describe('MetabaseClient.getResolvedSql', () => { expect(result?.resolvedSql).toBe('SELECT * FROM (SELECT a, b FROM base) t '); }); + it('inlines native-query snippets before checking for remaining variables', async () => { + const requestSpy = vi.fn().mockResolvedValue([ + { + id: 1, + name: 'account_join', + content: 'LEFT JOIN accounts a ON a.account_id = mart.account_id', + }, + ]); + const requestWithCustomRetrySpy = vi.fn(); + const client = makeClient((client) => { + Reflect.set(client, 'request', requestSpy); + Reflect.set(client, 'requestWithCustomRetry', requestWithCustomRetrySpy); + }); + const card = nativeCard('SELECT a.account_name FROM mart {{snippet: account_join}}', { + 'snippet: account_join': { + id: 'snippet-tag', + name: 'snippet: account_join', + type: 'snippet', + 'snippet-name': 'account_join', + 'snippet-id': 1, + }, + }); + + const result = await client.getResolvedSql(card); + + expect(requestSpy).toHaveBeenCalledWith('GET', '/api/native-query-snippet'); + expect(requestWithCustomRetrySpy).not.toHaveBeenCalled(); + expect(result?.resolutionStatus).toBe('resolved'); + expect(result?.resolvedSql).toBe( + 'SELECT a.account_name FROM mart LEFT JOIN accounts a ON a.account_id = mart.account_id', + ); + expect(result?.resolvedSql).not.toContain('{{snippet:'); + }); + it('uses /api/dataset/native for naked variables and prepends a warning comment', async () => { const requestSpy = vi.fn().mockResolvedValue({ query: "SELECT * WHERE id = 'placeholder' AND n = 1" }); const client = makeClient((client) => { diff --git a/packages/context/src/ingest/adapters/metabase/client.ts b/packages/context/src/ingest/adapters/metabase/client.ts index 2ddd970a..70e70964 100644 --- a/packages/context/src/ingest/adapters/metabase/client.ts +++ b/packages/context/src/ingest/adapters/metabase/client.ts @@ -39,6 +39,13 @@ interface TemplateTagInfo { dummyValue: string | null; } +interface NativeQuerySnippet { + id: number; + name: string; + content: string; + archived?: boolean | null; +} + interface CreateCardParams { name: string; databaseId: number; @@ -100,6 +107,43 @@ function collectRemainingPlaceholderNames(sql: string): Set { return names; } +function collectRemainingSnippetNames(sql: string): Set { + const names = new Set(); + for (const match of sql.matchAll(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi)) { + names.add(match[1].trim()); + } + return names; +} + +function normalizeSnippetName(name: string | null | undefined): string { + return (name ?? '').replace(/^snippet:\s*/i, '').trim().toLowerCase(); +} + +function parseNativeQuerySnippets(value: unknown): NativeQuerySnippet[] { + const rawItems = Array.isArray(value) + ? value + : typeof value === 'object' && value !== null && Array.isArray((value as { data?: unknown }).data) + ? (value as { data: unknown[] }).data + : []; + const snippets: NativeQuerySnippet[] = []; + for (const item of rawItems) { + if (typeof item !== 'object' || item === null || Array.isArray(item)) { + continue; + } + const rec = item as Record; + if (typeof rec.id !== 'number' || typeof rec.name !== 'string' || typeof rec.content !== 'string') { + continue; + } + snippets.push({ + id: rec.id, + name: rec.name, + content: rec.content, + ...(typeof rec.archived === 'boolean' ? { archived: rec.archived } : {}), + }); + } + return snippets; +} + function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): MetabaseDatasetQuery { if (datasetQuery?.stages?.[0]?.native !== undefined) { const stages = [...(datasetQuery.stages ?? [])]; @@ -148,6 +192,7 @@ export class MetabaseClient implements MetabaseRuntimeClient { private readonly logger: MetabaseClientLogger; private readonly baseUrl: string; private readonly config: MetabaseClientConfig; + private snippetCache: Promise | null = null; constructor( runtime: MetabaseClientRuntimeConfig, @@ -261,6 +306,63 @@ export class MetabaseClient implements MetabaseRuntimeClient { return this.request('GET', '/api/card/?f=all'); } + private getNativeQuerySnippets(): Promise { + this.snippetCache ??= this.request('GET', '/api/native-query-snippet').then(parseNativeQuerySnippets); + return this.snippetCache; + } + + private async inlineNativeQuerySnippets( + sql: string, + templateTags: MetabaseTemplateTag[], + cardId: number, + ): Promise<{ sql: string; unresolved: string[] }> { + const names = collectRemainingSnippetNames(sql); + if (names.size === 0) { + return { sql, unresolved: [] }; + } + + let snippets: NativeQuerySnippet[]; + try { + snippets = await this.getNativeQuerySnippets(); + } catch (error) { + this.logger.warn( + `[metabase] failed to load native query snippets for card ${cardId}; leaving snippet placeholders unresolved: ${error instanceof Error ? error.message : String(error)}`, + ); + return { sql, unresolved: [...names] }; + } + + const snippetsById = new Map(); + const snippetsByName = new Map(); + for (const snippet of snippets) { + if (snippet.archived === true) { + continue; + } + snippetsById.set(snippet.id, snippet); + snippetsByName.set(normalizeSnippetName(snippet.name), snippet); + } + + const snippetTags = templateTags.filter((tag) => tag.type === 'snippet'); + const unresolved = new Set(); + const inlinedSql = sql.replace(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi, (match, rawName: string) => { + const normalizedName = normalizeSnippetName(rawName); + const tag = snippetTags.find( + (candidate) => + normalizeSnippetName(candidate['snippet-name']) === normalizedName || + normalizeSnippetName(candidate.name) === normalizedName, + ); + const snippet = + (typeof tag?.['snippet-id'] === 'number' ? snippetsById.get(tag['snippet-id']) : undefined) ?? + snippetsByName.get(normalizedName); + if (!snippet) { + unresolved.add(rawName.trim()); + return match; + } + return snippet.content; + }); + + return { sql: inlinedSql, unresolved: [...unresolved] }; + } + async convertMbqlToNative(datasetQuery: MetabaseDatasetQuery): Promise { return this.request('POST', '/api/dataset/native', { ...datasetQuery, @@ -351,7 +453,18 @@ export class MetabaseClient implements MetabaseRuntimeClient { // silently filter rows out — see incident with auction_seller_bidder_pair_suspicion). let processedSql = stripOptionalClauses(nativeQuery); - // Step 2: inline {{#CARD_ID}} card references locally. Recursively strip optional + // Step 2: inline native-query snippets. Metabase's substitution endpoint does not + // always expand {{snippet: name}} for fetched card SQL, but the snippets API does. + const snippetResult = await this.inlineNativeQuerySnippets(processedSql, templateTagEntries, card.id); + processedSql = snippetResult.sql; + if (snippetResult.unresolved.length > 0) { + this.logger.warn( + `[metabase] card ${card.id} has unresolved SQL snippets: ${snippetResult.unresolved.join(', ')}`, + ); + return { resolvedSql: processedSql, templateTags, resolutionStatus: 'fallback' }; + } + + // Step 3: inline {{#CARD_ID}} card references locally. Recursively strip optional // clauses in referenced cards too — the same reasoning applies all the way down. try { processedSql = await expandCardReferences(processedSql, { @@ -361,7 +474,17 @@ export class MetabaseClient implements MetabaseRuntimeClient { if (!referencedNative) { throw new Error(`referenced card ${id} has no native query`); } - return { native_query: stripOptionalClauses(referencedNative) }; + const referencedSnippetResult = await this.inlineNativeQuerySnippets( + stripOptionalClauses(referencedNative), + Object.values(this.getTemplateTags(referenced)), + referenced.id, + ); + if (referencedSnippetResult.unresolved.length > 0) { + throw new Error( + `referenced card ${id} has unresolved SQL snippets: ${referencedSnippetResult.unresolved.join(', ')}`, + ); + } + return { native_query: referencedSnippetResult.sql }; }, }); } catch (err) { @@ -372,7 +495,7 @@ export class MetabaseClient implements MetabaseRuntimeClient { throw err; } - // Step 3: collect template tags that still appear in the SQL after strip + inline. + // Step 4: collect template tags that still appear in the SQL after strip + inline. // Anything bracketed-only is gone now; anything card-referenced is inlined. const remainingNames = collectRemainingPlaceholderNames(processedSql); const remainingTags = templateTagEntries.filter((tag) => tag.type !== 'snippet' && remainingNames.has(tag.name)); @@ -381,7 +504,7 @@ export class MetabaseClient implements MetabaseRuntimeClient { return { resolvedSql: processedSql, templateTags, resolutionStatus: 'resolved' }; } - // Step 4: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's + // Step 5: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's // substitution endpoint. Only required because we can't translate dimension-tag // bindings to warehouse columns ourselves. Prepend a SQL comment listing every // dummy substitution so downstream consumers (the metabase_ingest LLM) know which diff --git a/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts b/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts index 1d8d2478..9768c0c9 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts @@ -57,13 +57,9 @@ describe('computeFetchScope', () => { }); }); - it('returns empty explicit scope for ONLY with no selections', () => { + it('treats generated ONLY with no selections as all', () => { const scope = computeFetchScope({ ...BASE_CONFIG, syncMode: 'ONLY', selections: [] }); - expect(scope).toEqual({ - kind: 'explicit', - includeCardIds: new Set(), - includeCollectionIds: new Set(), - }); + expect(scope).toEqual({ kind: 'all' }); }); }); diff --git a/packages/context/src/ingest/adapters/metabase/fetch-scope.ts b/packages/context/src/ingest/adapters/metabase/fetch-scope.ts index bee97ec8..e09ef7c3 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch-scope.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch-scope.ts @@ -11,7 +11,7 @@ export type FetchScope = * union the fetcher switches on. Pure function; no I/O, no side effects. */ export function computeFetchScope(syncConfig: StagedSyncConfig): FetchScope { - if (syncConfig.syncMode === 'ALL') { + if (syncConfig.syncMode === 'ALL' || (syncConfig.syncMode === 'ONLY' && syncConfig.selections.length === 0)) { return { kind: 'all' }; } const cardIds = new Set(); diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts index 2e492f07..0c854f6d 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts @@ -1,8 +1,21 @@ -import { describe, expect, it } from 'vitest'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import type { KtxProjectConnectionConfig } from '../../../project/index.js'; import { metabaseRuntimeConfigFromLocalConnection } from './local-metabase.adapter.js'; describe('metabaseRuntimeConfigFromLocalConnection', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-runtime-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + it('resolves api_url and env-backed api_key_ref from a flat ktx.yaml connection', () => { const connection: KtxProjectConnectionConfig = { driver: 'metabase', @@ -20,6 +33,21 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => { }); }); + it('resolves file-backed api_key_ref from pasted setup secrets', async () => { + const keyPath = join(tempDir, 'metabase-main-api-key'); + await writeFile(keyPath, 'mb_file_key\n', 'utf-8'); // pragma: allowlist secret + const connection: KtxProjectConnectionConfig = { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key_ref: `file:${keyPath}`, + }; + + expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({ + apiUrl: 'https://metabase.example.com', + apiKey: 'mb_file_key', // pragma: allowlist secret + }); + }); + it('accepts url as the local api URL alias', () => { const connection: KtxProjectConnectionConfig = { driver: 'metabase', diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts index a13b3923..bd81413f 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -1,5 +1,6 @@ import type { KtxLocalProject, KtxProjectConnectionConfig } from '../../../project/index.js'; import { ktxLocalStateDbPath } from '../../../project/index.js'; +import { resolveKtxConfigReference } from '../../../core/config-reference.js'; import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './client.js'; import { IngestMetabaseClientFactory, @@ -13,14 +14,6 @@ function stringField(value: unknown): string | null { return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; } -function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null { - if (!ref.startsWith('env:')) { - return null; - } - const name = ref.slice('env:'.length); - return stringField(env[name]); -} - function hasNetworkProxy(connection: KtxProjectConnectionConfig): boolean { return connection.networkProxy != null || connection.network_proxy != null; } @@ -42,7 +35,7 @@ export function metabaseRuntimeConfigFromLocalConnection( const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url); const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey); const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef); - const apiKey = literalApiKey ?? (apiKeyRef ? resolveEnvReference(apiKeyRef, env) : null); + const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null); if (!apiUrl) { throw new Error(`Connection "${connectionId}" is missing metabase api_url`); diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts index 6ba778e5..0515842a 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -79,6 +79,21 @@ function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['t return actions.filter((action) => action.target === target).length; } +function isStructuredToolFailure(output: unknown): boolean { + if (!output || typeof output !== 'object') { + return false; + } + const structured = (output as { structured?: unknown }).structured; + return !!structured && typeof structured === 'object' && (structured as { success?: unknown }).success === false; +} + +function isFailedToolCall(entry: ToolCallLogEntry): boolean { + if (entry.error) { + return true; + } + return (entry.toolName === 'sl_write_source' || entry.toolName === 'wiki_write') && isStructuredToolFailure(entry.output); +} + function reportIdFromCreateResult(result: unknown): string | undefined { if (!result || typeof result !== 'object' || !('id' in result)) { return undefined; @@ -344,7 +359,7 @@ export class IngestBundleRunner { toolNames: new Set(), } satisfies MutableToolTranscriptSummary); current.toolCallCount += 1; - current.errorCount += entry.error ? 1 : 0; + current.errorCount += isFailedToolCall(entry) ? 1 : 0; current.toolNames.add(entry.toolName); transcriptSummaries.set(entry.wuKey, current); }; @@ -712,6 +727,7 @@ export class IngestBundleRunner { sourceKey: job.sourceKey, connectionId: job.connectionId, jobId: job.jobId, + toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.errorCount ?? 0, onStepFinish: ({ stepIndex, stepBudget }) => { memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); }, diff --git a/packages/context/src/ingest/local-bundle-ingest.test.ts b/packages/context/src/ingest/local-bundle-ingest.test.ts index aa423d9e..6e9aa4aa 100644 --- a/packages/context/src/ingest/local-bundle-ingest.test.ts +++ b/packages/context/src/ingest/local-bundle-ingest.test.ts @@ -1,6 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; +import Database from 'better-sqlite3'; import { AgentRunnerService } from '../agent/index.js'; import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js'; import { makeLocalGitRepo } from '../test/make-local-git-repo.js'; @@ -57,6 +58,34 @@ class LookerSlWritingAgentRunner extends AgentRunnerService { } } +class WikiWritingAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags?.operationName === 'ingest-bundle-wu') { + const wikiWrite = params.toolSet.wiki_write; + if (!wikiWrite?.execute) { + throw new Error('wiki_write tool was not available to the WorkUnit'); + } + const result = await wikiWrite.execute( + { + key: 'orders_context', + summary: 'Orders source context', + content: 'Orders are purchase records used for revenue analysis.', + tags: ['orders'], + }, + { toolCallId: 'wiki-write' }, + ); + if (!result.structured.success) { + throw new Error(result.markdown); + } + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + function makeLookerRuntimeClient() { const lookerModels = { models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], @@ -252,6 +281,33 @@ describe('canonical local ingest', () => { }); }); + it('indexes wiki pages written by local ingest into the SQLite knowledge tables', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const agentRunner = new WikiWritingAgentRunner(); + + const result = await runLocalIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'wiki-local-1', + agentRunner, + }); + + expect(result.result.failedWorkUnits).toEqual([]); + const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true }); + try { + expect(db.prepare('SELECT key, summary FROM knowledge_pages ORDER BY key').all()).toEqual([ + { key: 'orders_context', summary: 'Orders source context' }, + ]); + } finally { + db.close(); + } + }); + it('rejects direct Metabase scheduled pulls before requiring a local ingest LLM provider', async () => { const projectDir = join(tempDir, 'metabase-project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 6665682b..f7c8be80 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -56,6 +56,8 @@ import { type KnowledgeIndexPort, KnowledgeWikiService, searchLocalKnowledgePages, + SqliteKnowledgeIndex, + type SqliteKnowledgeIndexPage, WikiListTagsTool, WikiReadTool, WikiRemoveTool, @@ -257,6 +259,17 @@ function parseWiki(raw: string): { summary: string; content: string } { }; } +function parseWikiTags(raw: string): string[] { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?/); + if (!match) { + return []; + } + const frontmatter = (YAML.parse(match[1]) ?? {}) as Record; + return Array.isArray(frontmatter.tags) + ? frontmatter.tags.filter((tag): tag is string => typeof tag === 'string') + : []; +} + function scoreText(text: string, query: string): number { const normalized = query.toLowerCase().trim(); if (!normalized) { @@ -271,21 +284,49 @@ function scoreText(text: string, query: string): number { } class LocalKnowledgeIndex implements KnowledgeIndexPort { - constructor(private readonly project: KtxLocalProject) {} + private readonly sqlite: SqliteKnowledgeIndex; - async upsertPage(): Promise {} - - async applyDiffTransactional(): Promise {} - - async getExistingSearchTexts(): Promise> { - return new Map(); + constructor(private readonly project: KtxLocalProject) { + this.sqlite = new SqliteKnowledgeIndex({ dbPath: ktxLocalStateDbPath(project) }); } - async deleteStale(): Promise {} + async upsertPage(): Promise { + await this.syncAllPagesFromDisk(); + } - async deleteByScope(): Promise {} + async applyDiffTransactional(): Promise { + await this.syncAllPagesFromDisk(); + } - async deleteByKey(): Promise {} + async getExistingSearchTexts( + scope: string, + scopeId: string | null, + ): Promise> { + const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${scopeId}/`; + const result = new Map(); + for (const [path, page] of this.sqlite.getExistingPages()) { + if (!path.startsWith(prefix)) { + continue; + } + result.set(path.slice(prefix.length).replace(/\.md$/, ''), { + searchText: page.searchText, + hasEmbedding: page.embedding !== null, + }); + } + return result; + } + + async deleteStale(): Promise { + await this.syncAllPagesFromDisk(); + } + + async deleteByScope(): Promise { + await this.syncAllPagesFromDisk(); + } + + async deleteByKey(): Promise { + await this.syncAllPagesFromDisk(); + } async findPageByKey(scope: string, scopeId: string | null, pageKey: string) { const path = scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`; @@ -344,6 +385,41 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { .sort((left, right) => right.rrfScore - left.rrfScore || left.pageKey.localeCompare(right.pageKey)) .slice(0, limit); } + + private async syncAllPagesFromDisk(): Promise { + const listed = await this.project.fileStore.listFiles('knowledge', true); + const pages: SqliteKnowledgeIndexPage[] = []; + for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { + const parsedPath = parseKnowledgeIndexPath(file); + if (!parsedPath) { + continue; + } + const path = `knowledge/${file}`; + const raw = await this.project.fileStore.readFile(path); + const parsed = parseWiki(raw.content); + pages.push({ + path, + key: parsedPath.pageKey, + scope: parsedPath.scope, + summary: parsed.summary, + content: parsed.content, + tags: parseWikiTags(raw.content), + embedding: null, + }); + } + this.sqlite.sync(pages); + } +} + +function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; pageKey: string } | null { + const segments = file.split('/'); + if (segments.length === 2 && segments[0] === 'global') { + return { scope: 'GLOBAL', pageKey: segments[1].replace(/\.md$/, '') }; + } + if (segments.length === 3 && segments[0] === 'user') { + return { scope: 'USER', pageKey: segments[2].replace(/\.md$/, '') }; + } + return null; } class NoopKnowledgeEventPort implements KnowledgeEventPort { diff --git a/packages/context/src/ingest/stages/stage-3-work-units.test.ts b/packages/context/src/ingest/stages/stage-3-work-units.test.ts index ba01d60d..23ec3fa8 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.test.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.test.ts @@ -106,6 +106,21 @@ describe('Stage 3 — executeWorkUnit', () => { expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); }); + it('tool failures reset to the pre-WU SHA and mark WU failed even when the loop ends naturally', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' }); + deps.toolFailureCount = vi.fn().mockReturnValue(2); + + const outcome = await executeWorkUnit(deps, makeWu()); + + expect(outcome.status).toBe('failed'); + expect(outcome.reason).toContain('2 tool call(s) failed'); + expect(outcome.actions).toEqual([]); + expect(outcome.touchedSlSources).toEqual([]); + expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); + }); + it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => { const deps = makeDeps(); deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); diff --git a/packages/context/src/ingest/stages/stage-3-work-units.ts b/packages/context/src/ingest/stages/stage-3-work-units.ts index bbf23079..b6e64f86 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.ts @@ -28,6 +28,7 @@ export interface WorkUnitExecutionDeps { connectionId: string; jobId: string; onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + toolFailureCount?: (unitKey: string) => number; } export interface WorkUnitOutcome { @@ -128,6 +129,11 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit) return failWithReset(runResult.error?.message ?? 'agent loop errored'); } + const toolFailureCount = deps.toolFailureCount?.(wu.unitKey) ?? 0; + if (toolFailureCount > 0) { + return failWithReset(`${toolFailureCount} tool call(s) failed during WorkUnit ${wu.unitKey}`); + } + const touched = listTouchedSlSources(deps.captureSession.touchedSlSources); if (touched.length > 0) { const validation = await deps.validateTouchedSources(touched); diff --git a/packages/context/src/mcp/local-project-ports.ts b/packages/context/src/mcp/local-project-ports.ts index 60808426..d2ad139f 100644 --- a/packages/context/src/mcp/local-project-ports.ts +++ b/packages/context/src/mcp/local-project-ports.ts @@ -116,8 +116,7 @@ function normalizeScanDriver(driver: string | undefined): KtxConnectionDriver { normalized === 'clickhouse' || normalized === 'sqlserver' || normalized === 'bigquery' || - normalized === 'snowflake' || - normalized === 'posthog' + normalized === 'snowflake' ) { return normalized === 'sqlite3' ? 'sqlite' : normalized; } diff --git a/packages/context/src/memory/memory-agent.service.ingest.test.ts b/packages/context/src/memory/memory-agent.service.ingest.test.ts index bf30a883..710ba956 100644 --- a/packages/context/src/memory/memory-agent.service.ingest.test.ts +++ b/packages/context/src/memory/memory-agent.service.ingest.test.ts @@ -17,7 +17,7 @@ interface BuiltMocks { appSettings: any; llmProvider: any; prompt: any; - posthog: any; + eventTracker: any; telemetry: any; skillsRegistry: any; wikiService: any; @@ -64,7 +64,7 @@ const buildMocks = (overrides: Partial = {}): BuiltMocks => { }, llmProvider: { getModel: vi.fn().mockReturnValue({}) }, prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') }, - posthog: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, + eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, telemetry: { isEnabled: () => false, appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } }, @@ -177,7 +177,7 @@ const buildService = (mocks: BuiltMocks): MemoryAgentService => slValidator: mocks.slValidator, toolsetFactory: mocks.toolsetFactory, telemetry: { - trackMemoryIngestion: mocks.posthog.trackEvent, + trackMemoryIngestion: mocks.eventTracker.trackEvent, }, }); diff --git a/packages/context/src/project/project.test.ts b/packages/context/src/project/project.test.ts index ec2120aa..b6e88604 100644 --- a/packages/context/src/project/project.test.ts +++ b/packages/context/src/project/project.test.ts @@ -32,6 +32,8 @@ describe('KTX local project runtime', () => { const gitignore = await readFile(join(projectDir, '.ktx/.gitignore'), 'utf-8'); expect(gitignore).toContain('cache/'); expect(gitignore).toContain('db.sqlite'); + expect(gitignore).toContain('db.sqlite-*'); + expect(gitignore).toContain('ingest-transcripts/'); expect(gitignore).toContain('secrets/'); expect(gitignore).toContain('setup/'); expect(gitignore).toContain('agents/'); diff --git a/packages/context/src/project/project.ts b/packages/context/src/project/project.ts index cbe522a5..59e594a2 100644 --- a/packages/context/src/project/project.ts +++ b/packages/context/src/project/project.ts @@ -35,7 +35,10 @@ export interface InitKtxProjectResult extends KtxLocalProject { } const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [ - { path: '.ktx/.gitignore', content: 'cache/\ndb.sqlite\nsecrets/\nsetup/\nagents/\n' }, + { + path: '.ktx/.gitignore', + content: 'cache/\ndb.sqlite\ndb.sqlite-*\ningest-transcripts/\nsecrets/\nsetup/\nagents/\n', + }, { path: '.ktx/prompts/.gitkeep', content: '' }, { path: '.ktx/skills/.gitkeep', content: '' }, { path: 'knowledge/global/.gitkeep', content: '' }, diff --git a/packages/context/src/project/setup-config.test.ts b/packages/context/src/project/setup-config.test.ts index 3fc8726b..212f16e1 100644 --- a/packages/context/src/project/setup-config.test.ts +++ b/packages/context/src/project/setup-config.test.ts @@ -67,10 +67,10 @@ describe('KTX setup config helpers', () => { it('merges setup-local gitignore entries without removing existing lines', () => { expect(mergeKtxSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe( - ['cache/', 'db.sqlite', 'secrets/', 'setup/', 'agents/', ''].join('\n'), + ['cache/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'secrets/', 'setup/', 'agents/', ''].join('\n'), ); expect(mergeKtxSetupGitignoreEntries('cache/\nsecrets/\n')).toBe( - ['cache/', 'secrets/', 'setup/', 'agents/', ''].join('\n'), + ['cache/', 'secrets/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'setup/', 'agents/', ''].join('\n'), ); }); }); diff --git a/packages/context/src/project/setup-config.ts b/packages/context/src/project/setup-config.ts index d0f46cf0..76951ef6 100644 --- a/packages/context/src/project/setup-config.ts +++ b/packages/context/src/project/setup-config.ts @@ -4,7 +4,15 @@ export const KTX_SETUP_STEPS = ['project', 'llm', 'embeddings', 'databases', 'so export type KtxSetupStep = (typeof KTX_SETUP_STEPS)[number]; -const SETUP_GITIGNORE_ENTRIES = ['secrets/', 'setup/', 'agents/'] as const; +const SETUP_GITIGNORE_ENTRIES = [ + 'cache/', + 'db.sqlite', + 'db.sqlite-*', + 'ingest-transcripts/', + 'secrets/', + 'setup/', + 'agents/', +] as const; export function markKtxSetupStepComplete(config: KtxProjectConfig, step: KtxSetupStep): KtxProjectConfig { const databaseConnectionIds = config.setup?.database_connection_ids ?? []; diff --git a/packages/context/src/scan/local-scan.ts b/packages/context/src/scan/local-scan.ts index 0919843f..15fdf6f3 100644 --- a/packages/context/src/scan/local-scan.ts +++ b/packages/context/src/scan/local-scan.ts @@ -103,13 +103,12 @@ function normalizeDriver(driver: string | undefined): KtxConnectionDriver { normalized === 'clickhouse' || normalized === 'sqlserver' || normalized === 'bigquery' || - normalized === 'snowflake' || - normalized === 'posthog' + normalized === 'snowflake' ) { return normalized === 'sqlite3' ? 'sqlite' : normalized; } throw new Error( - `Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake/posthog in this phase, received "${driver ?? 'unknown'}"`, + `Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake in this phase, received "${driver ?? 'unknown'}"`, ); } diff --git a/packages/context/src/scan/relationship-profiling.ts b/packages/context/src/scan/relationship-profiling.ts index 1fbeccd4..fa6acfac 100644 --- a/packages/context/src/scan/relationship-profiling.ts +++ b/packages/context/src/scan/relationship-profiling.ts @@ -71,7 +71,7 @@ const SAMPLE_VALUE_DELIMITER = '\u001f'; type QuoteStyle = 'double' | 'backtick' | 'bracket'; function quoteStyle(driver: KtxConnectionDriver): QuoteStyle { - if (driver === 'mysql' || driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'mysql' || driver === 'clickhouse') { return 'backtick'; } if (driver === 'sqlserver') { @@ -93,7 +93,7 @@ export function quoteKtxRelationshipIdentifier(driver: KtxConnectionDriver, iden export function formatKtxRelationshipTableRef(driver: KtxConnectionDriver, table: KtxTableRef): string { const parts = - driver === 'sqlite' || driver === 'posthog' + driver === 'sqlite' ? [table.name] : [table.catalog, table.db, table.name].filter((value): value is string => Boolean(value)); return parts.map((part) => quoteKtxRelationshipIdentifier(driver, part)).join('.'); @@ -109,7 +109,7 @@ function textLengthExpression(driver: KtxConnectionDriver, columnSql: string): s if (driver === 'bigquery') { return `LENGTH(CAST(${columnSql} AS STRING))`; } - if (driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'clickhouse') { return `length(toString(${columnSql}))`; } return `LENGTH(CAST(${columnSql} AS TEXT))`; @@ -223,7 +223,7 @@ function sampleAggregateSql(driver: KtxConnectionDriver, innerSql: string): stri if (driver === 'sqlserver') { return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; } - if (driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'clickhouse') { return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`; } return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`; diff --git a/packages/context/src/scan/types.test.ts b/packages/context/src/scan/types.test.ts index 3b68411a..309db88e 100644 --- a/packages/context/src/scan/types.test.ts +++ b/packages/context/src/scan/types.test.ts @@ -150,14 +150,14 @@ describe('KTX scan contract types', () => { }; const connector: KtxScanConnector = { - id: 'posthog:product', - driver: 'posthog', + id: 'clickhouse:product', + driver: 'clickhouse', capabilities: createKtxConnectorCapabilities({ eventStreamDiscovery: true }), eventStreamDiscovery: discovery, async introspect() { return { connectionId: 'product', - driver: 'posthog', + driver: 'clickhouse', extractedAt: '2026-04-29T00:00:00.000Z', scope: { catalogs: ['157881'] }, metadata: {}, diff --git a/packages/context/src/scan/types.ts b/packages/context/src/scan/types.ts index 66f70ba2..71bb3fb3 100644 --- a/packages/context/src/scan/types.ts +++ b/packages/context/src/scan/types.ts @@ -5,7 +5,6 @@ export type KtxConnectionDriver = | 'sqlserver' | 'bigquery' | 'snowflake' - | 'posthog' | 'mysql' | 'clickhouse'; diff --git a/packages/context/src/sl/description-normalization.ts b/packages/context/src/sl/description-normalization.ts new file mode 100644 index 00000000..5a1b5ab6 --- /dev/null +++ b/packages/context/src/sl/description-normalization.ts @@ -0,0 +1,136 @@ +type DescriptionMap = Record; + +interface NormalizeDescriptionOptions { + fillMissing?: boolean; +} + +function cleanText(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function cleanDescriptionMap(value: unknown): DescriptionMap { + const result: DescriptionMap = {}; + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return result; + } + for (const [key, text] of Object.entries(value)) { + const cleaned = cleanText(text); + if (cleaned) { + result[key] = cleaned; + } + } + return result; +} + +function hasDescriptions(descriptions: DescriptionMap): boolean { + return Object.keys(descriptions).length > 0; +} + +function withDescriptionMap(record: Record, fallback: string | null): Record { + const descriptions = cleanDescriptionMap(record.descriptions); + const flatDescription = cleanText(record.description); + if (flatDescription && !descriptions.user) { + descriptions.user = flatDescription; + } + if (!hasDescriptions(descriptions) && fallback) { + descriptions.ktx = fallback; + } + + const next = { ...record }; + delete next.description; + if (hasDescriptions(descriptions)) { + next.descriptions = descriptions; + } else { + delete next.descriptions; + } + return next; +} + +function humanizeIdentifier(value: string): string { + return value + .replace(/([a-z0-9])([A-Z])/g, '$1 $2') + .replace(/[_-]+/g, ' ') + .replace(/\s+/g, ' ') + .trim() + .toLowerCase(); +} + +function formatCount(count: number, singular: string, plural = `${singular}s`): string | null { + if (count <= 0) { + return null; + } + return `${count} ${count === 1 ? singular : plural}`; +} + +function sourceFallback(source: Record, sourceName: string): string { + const table = cleanText(source.table); + const sql = cleanText(source.sql); + if (table) { + return `Semantic-layer source for ${sourceName} backed by ${table}.`; + } + if (sql) { + return `Semantic-layer source for ${sourceName} backed by curated SQL.`; + } + + const counts = [ + formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'), + formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'), + formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'), + ].filter((item): item is string => Boolean(item)); + return counts.length > 0 + ? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.` + : `Semantic-layer overlay for ${sourceName}.`; +} + +function columnFallback(column: Record, sourceName: string): string { + const columnName = cleanText(column.name) ?? 'column'; + const label = humanizeIdentifier(columnName) || columnName; + const expr = cleanText(column.expr); + + if (expr) { + return `Computed ${label} value for ${sourceName}.`; + } + + if (columnName.toLowerCase() === 'id') { + return `Identifier column for ${sourceName}.`; + } + + const idMatch = columnName.match(/^(.+)_id$/i); + if (idMatch) { + const entity = humanizeIdentifier(idMatch[1] ?? ''); + return entity ? `Identifier for the related ${entity} on ${sourceName}.` : `Identifier column for ${sourceName}.`; + } + + if (/(^|_)(date|time|timestamp|created_at|updated_at|week_start|month_start)($|_)/i.test(columnName)) { + return `Date or time value for ${label} on ${sourceName}.`; + } + + return `Column ${label} from ${sourceName}.`; +} + +export function normalizeSemanticLayerDescriptions( + source: T, + options: NormalizeDescriptionOptions = {}, +): T { + const sourceRecord = source as Record; + const sourceName = cleanText(sourceRecord.name) ?? 'source'; + const normalized = withDescriptionMap( + sourceRecord, + options.fillMissing ? sourceFallback(sourceRecord, sourceName) : null, + ); + + if (Array.isArray(sourceRecord.columns)) { + normalized.columns = sourceRecord.columns.map((column) => { + if (!column || typeof column !== 'object' || Array.isArray(column)) { + return column; + } + const columnRecord = column as Record; + return withDescriptionMap( + columnRecord, + options.fillMissing ? columnFallback(columnRecord, sourceName) : null, + ); + }); + } + + return normalized as T; +} diff --git a/packages/context/src/sl/local-sl.ts b/packages/context/src/sl/local-sl.ts index b8d29e87..676b2522 100644 --- a/packages/context/src/sl/local-sl.ts +++ b/packages/context/src/sl/local-sl.ts @@ -5,6 +5,7 @@ import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js'; import type { KtxLocalProject } from '../project/index.js'; import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js'; import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js'; import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js'; @@ -180,14 +181,14 @@ function manifestTables(value: Record): Record, name: string): SemanticLayerSource { const source = parsed as Partial; - return { + return normalizeSemanticLayerDescriptions({ ...source, name, grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [], columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [], joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [], measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [], - }; + }); } export async function loadLocalSlSourceRecords( diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts index 55e07f22..56f415a8 100644 --- a/packages/context/src/sl/schemas.ts +++ b/packages/context/src/sl/schemas.ts @@ -23,6 +23,8 @@ const segmentDefinitionSchema = z.object({ description: z.string().optional(), }); +const descriptionsSchema = z.record(z.string(), z.string().min(1)); + const defaultTimeDimensionDbtSchema = z.object({ dbt: z.string().optional(), }); @@ -77,6 +79,7 @@ const sourceColumnSchema = z.object({ role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), description: z.string().optional(), + descriptions: descriptionsSchema.optional(), expr: z.string().optional(), constraints: sourceKeyedColumnConstraintsSchema.optional(), enum_values: sourceKeyedStringArraySchema.optional(), @@ -91,6 +94,7 @@ const overlayColumnSchema = z role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), description: z.string().optional(), + descriptions: descriptionsSchema.optional(), expr: z.string().optional(), }) .refine((col) => !col.type || col.expr, { @@ -102,6 +106,7 @@ export const sourceDefinitionSchema = z .object({ name: z.string().min(1), description: z.string().optional(), + descriptions: descriptionsSchema.optional(), // Accepted for documentation parity with the Python spec; behavior is driven // by the `table` / `sql` fields, not by this discriminator. source_type: z.enum(['table', 'sql']).optional(), diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 5d7413ac..0b9656de 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -257,12 +257,14 @@ describe('sourceDefinitionSchema', () => { it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => { const result = sourceDefinitionSchema.safeParse({ name: 'orders', + descriptions: { dbt: 'Order facts from dbt.' }, table: 'public.orders', grain: ['id'], columns: [ { name: 'status', type: 'string', + descriptions: { dbt: 'Order lifecycle status.' }, constraints: { dbt: { not_null: true, unique: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { @@ -282,7 +284,9 @@ describe('sourceDefinitionSchema', () => { if (!result.success) { return; } + expect(result.data.descriptions).toEqual({ dbt: 'Order facts from dbt.' }); expect(result.data.columns[0]).toMatchObject({ + descriptions: { dbt: 'Order lifecycle status.' }, constraints: { dbt: { not_null: true, unique: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { @@ -528,6 +532,31 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', ( const aav = sources.find((s) => s.name === 'aav_consignments'); expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]); }); + + it('normalizes legacy flat source and column descriptions when loading standalone files', async () => { + const standalonePath = 'semantic-layer/conn-1/orders.yaml'; + configService.listFiles.mockResolvedValue({ files: [standalonePath] }); + configService.readFile.mockResolvedValue({ + content: [ + 'name: orders', + 'description: Finance orders used for invoice reconciliation.', + 'table: public.orders', + 'grain: [id]', + 'columns:', + ' - name: id', + ' type: string', + ' description: Stable order identifier.', + ].join('\n'), + }); + + const sources = await service.loadAllSources('conn-1'); + + expect(sources[0]).toMatchObject({ + name: 'orders', + descriptions: { user: 'Finance orders used for invoice reconciliation.' }, + columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }], + }); + }); }); describe('validateWithProposedSource', () => { diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index 5d559a31..0ccce66a 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -2,6 +2,7 @@ import YAML from 'yaml'; import type { KtxFileStorePort, KtxLogger } from '../core/index.js'; import { noopLogger } from '../core/index.js'; import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js'; +import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput, SemanticLayerSource } from './types.js'; @@ -101,6 +102,7 @@ export class SemanticLayerService { const warnings: string[] = []; if (!options?.skipValidation) { + source = normalizeSemanticLayerDescriptions(source); const sourceData: Record = { ...source }; if ((sourceData.table || sourceData.sql) && (await this.isManifestBacked(connectionId, source.name))) { @@ -129,7 +131,8 @@ export class SemanticLayerService { } const path = this.sourcePath(connectionId, source.name); - const content = YAML.stringify(source, { indent: 2, lineWidth: 0 }); + const normalizedSource = normalizeSemanticLayerDescriptions(source); + const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0 }); const message = commitMessage ?? `Update semantic layer source: ${source.name}`; const result = await this.configService.writeFile(path, content, author, authorEmail, message, { skipLock: options?.skipLock, @@ -199,14 +202,14 @@ export class SemanticLayerService { if (sources.has(name)) { this.logger.warn(`Standalone source '${name}' in ${filePath} overrides manifest entry of the same name`); } - let standalone: SemanticLayerSource = { + let standalone: SemanticLayerSource = normalizeSemanticLayerDescriptions({ ...(data as Partial), name, grain: Array.isArray(data.grain) ? (data.grain as string[]) : [], columns: Array.isArray(data.columns) ? (data.columns as SemanticLayerSource['columns']) : [], joins: Array.isArray(data.joins) ? (data.joins as SemanticLayerSource['joins']) : [], measures: Array.isArray(data.measures) ? (data.measures as SemanticLayerSource['measures']) : [], - }; + }); // If the source declares `inherits_columns_from`, fill any blank // type/descriptions/role from the matching manifest entry. Lets the // agent write `columns: [{name: FOO}]` without redeclaring known fields. @@ -1005,7 +1008,8 @@ const COMPOSE_KNOWN_KEYS = new Set([ ]); export function composeOverlay(base: SemanticLayerSource, overlay: Record): SemanticLayerSource { - const unknownKeys = Object.keys(overlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k)); + const normalizedOverlay = normalizeSemanticLayerDescriptions(overlay); + const unknownKeys = Object.keys(normalizedOverlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k)); if (unknownKeys.length > 0) { throw new Error( `composeOverlay: overlay for '${base.name}' has unhandled keys [${unknownKeys.join(', ')}]. ` + @@ -1015,50 +1019,47 @@ export function composeOverlay(base: SemanticLayerSource, overlay: Record), + ...(normalizedOverlay.descriptions as Record), }; } // Filter out excluded columns - const excluded = new Set((overlay.exclude_columns as string[] | undefined) ?? []); + const excluded = new Set((normalizedOverlay.exclude_columns as string[] | undefined) ?? []); let columns = result.columns.filter((c) => !excluded.has(c.name)); // Append overlay computed columns - const overlayColumns = (overlay.columns as SemanticLayerSource['columns'] | undefined) ?? []; + const overlayColumns = (normalizedOverlay.columns as SemanticLayerSource['columns'] | undefined) ?? []; columns = [...columns, ...overlayColumns]; result.columns = columns; // Measures from overlay only - result.measures = (overlay.measures as SemanticLayerSource['measures'] | undefined) ?? []; + result.measures = (normalizedOverlay.measures as SemanticLayerSource['measures'] | undefined) ?? []; // Segments: overlay-replaces semantics. Manifest tables don't carry segments today; // if that changes, add a union branch here. - if (overlay.segments !== undefined) { - result.segments = overlay.segments as SemanticLayerSource['segments']; + if (normalizedOverlay.segments !== undefined) { + result.segments = normalizedOverlay.segments as SemanticLayerSource['segments']; } // Override grain - if (overlay.grain) { - result.grain = overlay.grain as string[]; + if (normalizedOverlay.grain) { + result.grain = normalizedOverlay.grain as string[]; } - if (overlay.default_time_dimension !== undefined) { - result.default_time_dimension = overlay.default_time_dimension as SemanticLayerSource['default_time_dimension']; + if (normalizedOverlay.default_time_dimension !== undefined) { + result.default_time_dimension = + normalizedOverlay.default_time_dimension as SemanticLayerSource['default_time_dimension']; } // Union + dedupe joins, apply suppressions - const disabled = new Set(((overlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs)); + const disabled = new Set(((normalizedOverlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs)); const manifestJoins = result.joins.filter((j) => !disabled.has(normalizeWs(j.on))); - const overlayJoins = (overlay.joins as SemanticLayerSource['joins'] | undefined) ?? []; + const overlayJoins = (normalizedOverlay.joins as SemanticLayerSource['joins'] | undefined) ?? []; const existingKeys = new Set(manifestJoins.map((j) => `${j.to}::${normalizeWs(j.on)}`)); const newJoins = overlayJoins.filter((j) => !existingKeys.has(`${j.to}::${normalizeWs(j.on)}`)); result.joins = [...manifestJoins, ...newJoins]; diff --git a/packages/context/src/sl/sl-search.service.ts b/packages/context/src/sl/sl-search.service.ts index e351011f..47743ae1 100644 --- a/packages/context/src/sl/sl-search.service.ts +++ b/packages/context/src/sl/sl-search.service.ts @@ -1,6 +1,7 @@ import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js'; import { noopLogger } from '../core/index.js'; import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import type { SlSourcesIndexPort } from './ports.js'; import type { SemanticLayerSource } from './types.js'; @@ -8,6 +9,7 @@ export function buildSemanticLayerSourceSearchText( source: SemanticLayerSource, priority: string[] = DEFAULT_PRIORITY, ): string { + source = normalizeSemanticLayerDescriptions(source); const config = { priority }; const parts: string[] = [source.name.replace(/_/g, ' ')]; diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts index 5165112a..d90f0356 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts @@ -127,6 +127,39 @@ describe('SlEditSourceTool — session gating', () => { ); expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); }); + + it('fills missing descriptions when an ingest session edits a source', async () => { + const { tool } = makeTool(); + const session = makeSession({ + ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'dbt' }, + }); + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: session.connectionId, + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + + expect(result.structured.success).toBe(true); + expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + descriptions: { ktx: expect.stringContaining('orders') }, + columns: [ + expect.objectContaining({ + descriptions: { ktx: expect.stringContaining('Identifier') }, + }), + ], + }), + expect.any(String), + expect.any(String), + expect.any(String), + ); + }); }); describe('SlEditSourceTool — manifest-backed source without overlay', () => { diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.ts b/packages/context/src/sl/tools/sl-edit-source.tool.ts index 29fa275d..17a85990 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.ts @@ -2,6 +2,7 @@ import YAML from 'yaml'; import { z } from 'zod'; import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js'; import { applySqlEdits } from '../../tools/sql-edit-replacer.js'; +import { normalizeSemanticLayerDescriptions } from '../description-normalization.js'; import type { SemanticLayerSource } from '../types.js'; import { BaseSemanticLayerTool, @@ -147,6 +148,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t } catch (e) { return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName); } + source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest }); // Re-serialize and write const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 }); diff --git a/packages/context/src/sl/tools/sl-write-source.tool.test.ts b/packages/context/src/sl/tools/sl-write-source.tool.test.ts index 4ad6bf53..1502c177 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.test.ts @@ -175,6 +175,89 @@ describe('SlWriteSourceTool — session gating', () => { ); expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); }); + + it('normalizes flat source and column descriptions before writing', async () => { + const { tool, semanticLayerService } = makeTool(); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'orders', + source: { + name: 'orders', + description: 'Finance orders used for invoice reconciliation.', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }], + measures: [], + joins: [], + } as any, + } as any, + baseContext, + ); + + expect(result.structured.success).toBe(true); + expect(semanticLayerService.writeSource).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + descriptions: { user: 'Finance orders used for invoice reconciliation.' }, + columns: [expect.objectContaining({ descriptions: { user: 'Stable order identifier.' } })], + }), + expect.any(String), + expect.any(String), + expect.any(String), + ); + }); + + it('fills missing descriptions for ingest-written overlays and columns', async () => { + const session = makeSession({ + ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'metabase' }, + semanticLayerService: { + loadSource: vi.fn().mockResolvedValue(null), + loadAllSources: vi.fn().mockResolvedValue([]), + validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), + deleteSource: vi.fn().mockResolvedValue(undefined), + listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']), + isManifestBacked: vi.fn().mockResolvedValue(false), + readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), + findManifestEntryByTableRef: vi.fn().mockResolvedValue(null), + } as any, + }); + const { tool } = makeTool(); + + const result = await tool.call( + { + connectionId: session.connectionId, + sourceName: 'mart_account_segments', + source: { + name: 'mart_account_segments', + columns: [{ name: 'is_large_contract', type: 'boolean', expr: 'contract_arr_cents >= 20000000' }], + measures: [{ name: 'account_count', expr: 'count(account_id)' }], + } as any, + } as any, + { ...baseContext, session }, + ); + + expect(result.structured.success).toBe(true); + expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + descriptions: { + ktx: expect.stringContaining('mart_account_segments'), + }, + columns: [ + expect.objectContaining({ + descriptions: { + ktx: expect.stringContaining('is large contract'), + }, + }), + ], + }), + expect.any(String), + expect.any(String), + expect.any(String), + ); + }); }); describe('SlWriteSourceTool — disconnected-components warning in markdown', () => { diff --git a/packages/context/src/sl/tools/sl-write-source.tool.ts b/packages/context/src/sl/tools/sl-write-source.tool.ts index 39a5ad5e..638b130e 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.ts @@ -10,6 +10,7 @@ import { type SemanticLayerStructured, sourceDefinitionSchema, } from './base-semantic-layer.tool.js'; +import { normalizeSemanticLayerDescriptions } from '../description-normalization.js'; import { slToolConnectionIdSchema } from './connection-id-schema.js'; const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]); @@ -154,14 +155,16 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co semanticLayerService: SemanticLayerService, skipIndex: boolean, ): Promise> { - const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql); + const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest }); + const isOverlay = + !('table' in normalizedSource && normalizedSource.table) && !('sql' in normalizedSource && normalizedSource.sql); const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName); const commitMessage = existing ? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}` : `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`; - const yamlContent = YAML.stringify(source); + const yamlContent = YAML.stringify(normalizedSource); const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent); if (orphanError) { @@ -172,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent }); } - const validatedSource = source as SemanticLayerSource; + const validatedSource = normalizedSource as SemanticLayerSource; const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource); const validationErrors = validationResult.errors; const validationWarnings = [...validationResult.warnings]; diff --git a/packages/context/src/wiki/tools/wiki-write.tool.test.ts b/packages/context/src/wiki/tools/wiki-write.tool.test.ts index 3b51c6e3..9e947d84 100644 --- a/packages/context/src/wiki/tools/wiki-write.tool.test.ts +++ b/packages/context/src/wiki/tools/wiki-write.tool.test.ts @@ -37,6 +37,42 @@ describe('WikiWriteTool', () => { expect(result.markdown).toMatch(/created/i); }); + it('normalizes accidentally escaped markdown newlines before writing', async () => { + const { tool, wikiService } = makeTool(); + + await tool.call( + { + key: 'large-contract-requesters', + summary: 'Cross-schema Metabase query', + content: + '# Large Contract Requesters\\n\\n**Source card:** Metabase #110\\n\\n## SQL\\n\\n```sql\\nselect * from orbit_analytics.mart_account_segments\\n```\\n', + } as any, + baseContext, + ); + + expect(wikiService.writePage.mock.calls[0][4]).toBe( + '# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n', + ); + expect(wikiService.syncSinglePage.mock.calls[0][4]).toBe( + '# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n', + ); + }); + + it('preserves intentional escaped newline examples in inline code', async () => { + const { tool, wikiService } = makeTool(); + + await tool.call( + { + key: 'newline-token', + summary: 'Escaped newline token', + content: 'Use `\\n\\n` when documenting the literal separator.', + } as any, + baseContext, + ); + + expect(wikiService.writePage.mock.calls[0][4]).toBe('Use `\\n\\n` when documenting the literal separator.'); + }); + it('skips syncSinglePage when session is worktree-scoped', async () => { const { tool, wikiService } = makeTool(); const session: ToolSession = { diff --git a/packages/context/src/wiki/tools/wiki-write.tool.ts b/packages/context/src/wiki/tools/wiki-write.tool.ts index f0ba954d..a2930fd8 100644 --- a/packages/context/src/wiki/tools/wiki-write.tool.ts +++ b/packages/context/src/wiki/tools/wiki-write.tool.ts @@ -47,6 +47,22 @@ interface WikiWriteStructured { action?: 'created' | 'updated'; } +function looksLikeEscapedMarkdown(content: string): boolean { + const withoutInlineCode = content.replace(/`[^`]*`/g, ''); + return /\\n\\n|(?:^|\\n)#{1,6}\s|\\n[-*]\s|\\n\d+\.\s|\\n```|\\n\|/.test(withoutInlineCode); +} + +function normalizeAccidentalEscapedMarkdownNewlines(content: string): string { + const escapedBreaks = content.match(/\\[rn]/g)?.length ?? 0; + if (escapedBreaks < 2) return content; + + const actualBreaks = content.match(/\r?\n/g)?.length ?? 0; + if (actualBreaks > 0 && escapedBreaks <= actualBreaks * 4) return content; + if (!looksLikeEscapedMarkdown(content)) return content; + + return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n'); +} + export class WikiWriteTool extends BaseTool { readonly name = 'wiki_write'; @@ -125,7 +141,7 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to }; if (input.content) { - finalContent = input.content; + finalContent = normalizeAccidentalEscapedMarkdownNewlines(input.content); } else { const editResult = applySqlEdits(existing?.content ?? '', input.replacements ?? []); if (!editResult.success) { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b900b9ed..12d1235a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -42,9 +42,6 @@ importers: '@ktx/connector-postgres': specifier: workspace:* version: file:packages/connector-postgres(ws@8.20.0) - '@ktx/connector-posthog': - specifier: workspace:* - version: file:packages/connector-posthog(ws@8.20.0) '@ktx/connector-snowflake': specifier: workspace:* version: file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0) @@ -53,7 +50,7 @@ importers: version: file:packages/connector-sqlite(ws@8.20.0) '@ktx/connector-sqlserver': specifier: workspace:* - version: file:packages/connector-sqlserver(@azure/core-client@1.10.1)(ws@8.20.0) + version: file:packages/connector-sqlserver(ws@8.20.0) '@ktx/context': specifier: workspace:* version: file:packages/context(ws@8.20.0) @@ -177,22 +174,6 @@ importers: specifier: ^4.0.18 version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) - packages/connector-posthog: - dependencies: - '@ktx/context': - specifier: workspace:* - version: file:packages/context - devDependencies: - '@types/node': - specifier: ^24.3.0 - version: 24.12.2 - typescript: - specifier: ^5.9.3 - version: 5.9.3 - vitest: - specifier: ^4.0.18 - version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) - packages/connector-snowflake: dependencies: '@ktx/context': @@ -919,10 +900,6 @@ packages: resolution: {directory: packages/connector-postgres, type: directory} engines: {node: '>=22.0.0'} - '@ktx/connector-posthog@file:packages/connector-posthog': - resolution: {directory: packages/connector-posthog, type: directory} - engines: {node: '>=22.0.0'} - '@ktx/connector-snowflake@file:packages/connector-snowflake': resolution: {directory: packages/connector-snowflake, type: directory} engines: {node: '>=22.0.0'} @@ -3632,6 +3609,11 @@ snapshots: '@azure/core-client': 1.10.1 '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-http-compat@2.4.0(@azure/core-rest-pipeline@1.23.0)': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-lro@2.7.2': dependencies: '@azure/abort-controller': 2.1.2 @@ -3703,6 +3685,24 @@ snapshots: transitivePeerDependencies: - supports-color + '@azure/keyvault-keys@4.10.0': + dependencies: + '@azure-rest/core-client': 2.6.0 + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-http-compat': 2.4.0(@azure/core-rest-pipeline@1.23.0) + '@azure/core-lro': 2.7.2 + '@azure/core-paging': 1.6.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/keyvault-common': 2.1.0 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + '@azure/keyvault-keys@4.10.0(@azure/core-client@1.10.1)': dependencies: '@azure-rest/core-client': 2.6.0 @@ -3981,16 +3981,6 @@ snapshots: - supports-color - ws - '@ktx/connector-posthog@file:packages/connector-posthog(ws@8.20.0)': - dependencies: - '@ktx/context': file:packages/context(ws@8.20.0) - transitivePeerDependencies: - - '@cfworker/json-schema' - - js-yaml - - pg-native - - supports-color - - ws - '@ktx/connector-snowflake@file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0)': dependencies: '@ktx/context': file:packages/context(ws@8.20.0) @@ -4016,10 +4006,10 @@ snapshots: - supports-color - ws - '@ktx/connector-sqlserver@file:packages/connector-sqlserver(@azure/core-client@1.10.1)(ws@8.20.0)': + '@ktx/connector-sqlserver@file:packages/connector-sqlserver(ws@8.20.0)': dependencies: '@ktx/context': file:packages/context(ws@8.20.0) - mssql: 12.5.0(@azure/core-client@1.10.1) + mssql: 12.5.0 transitivePeerDependencies: - '@azure/core-client' - '@cfworker/json-schema' @@ -5571,6 +5561,17 @@ snapshots: ms@2.1.3: {} + mssql@12.5.0: + dependencies: + '@tediousjs/connection-string': 1.1.0 + commander: 11.1.0 + debug: 4.4.3 + tarn: 3.0.2 + tedious: 19.2.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + mssql@12.5.0(@azure/core-client@1.10.1): dependencies: '@tediousjs/connection-string': 1.1.0 @@ -6073,6 +6074,22 @@ snapshots: tarn@3.0.2: {} + tedious@19.2.1: + dependencies: + '@azure/core-auth': 1.10.1 + '@azure/identity': 4.13.1 + '@azure/keyvault-keys': 4.10.0 + '@js-joda/core': 5.7.0 + '@types/node': 24.12.2 + bl: 6.1.6 + iconv-lite: 0.7.2 + js-md4: 0.3.2 + native-duplexpair: 1.0.0 + sprintf-js: 1.1.3 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + tedious@19.2.1(@azure/core-client@1.10.1): dependencies: '@azure/core-auth': 1.10.1 diff --git a/python/ktx-sl/semantic_layer/models.py b/python/ktx-sl/semantic_layer/models.py index 9a6a514f..7e922933 100644 --- a/python/ktx-sl/semantic_layer/models.py +++ b/python/ktx-sl/semantic_layer/models.py @@ -36,6 +36,22 @@ class SourceColumnTests(BaseModel): dbt_by_package: dict[str, list[str]] | None = None +_DEFAULT_DESCRIPTION_PRIORITY = ["user", "ai", "dbt", "db"] + + +def _resolve_description_map(descriptions: dict[str, str] | None) -> str | None: + if not descriptions: + return None + for source in _DEFAULT_DESCRIPTION_PRIORITY: + text = descriptions.get(source) + if text: + return text + for text in descriptions.values(): + if text: + return text + return None + + class FreshnessDbt(BaseModel): raw: Any | None = None loaded_at_field: str | None = None @@ -47,12 +63,19 @@ class SourceColumn(BaseModel): visibility: ColumnVisibility = ColumnVisibility.PUBLIC role: ColumnRole = ColumnRole.DEFAULT description: str | None = None + descriptions: dict[str, str] | None = None expr: str | None = None natural_granularity: str | None = None constraints: dict[str, ColumnDbtConstraints] | None = None enum_values: dict[str, list[str]] | None = None tests: SourceColumnTests | None = None + @model_validator(mode="after") + def resolve_description(self) -> SourceColumn: + if self.description is None: + self.description = _resolve_description_map(self.descriptions) + return self + class JoinDeclaration(BaseModel): to: str @@ -84,6 +107,7 @@ class DefaultTimeDimensionDbt(BaseModel): class SourceDefinition(BaseModel): name: str description: str | None = None + descriptions: dict[str, str] | None = None table: str | None = None sql: str | None = None grain: list[str] @@ -97,6 +121,8 @@ class SourceDefinition(BaseModel): @model_validator(mode="after") def validate_source(self) -> SourceDefinition: + if self.description is None: + self.description = _resolve_description_map(self.descriptions) if self.table and self.sql: raise ValueError("'table' and 'sql' are mutually exclusive") if not self.grain: diff --git a/python/ktx-sl/tests/test_models.py b/python/ktx-sl/tests/test_models.py index b6468462..e227bef9 100644 --- a/python/ktx-sl/tests/test_models.py +++ b/python/ktx-sl/tests/test_models.py @@ -33,6 +33,14 @@ class TestSourceColumn: assert col.visibility == ColumnVisibility.HIDDEN assert col.role == ColumnRole.TIME + def test_descriptions_map_resolves_visible_description(self): + col = SourceColumn( + name="account_id", + type="string", + descriptions={"ktx": "Identifier for the related account."}, + ) + assert col.description == "Identifier for the related account." + def test_invalid_type(self): with pytest.raises(ValidationError): SourceColumn(name="id", type="integer") @@ -63,6 +71,16 @@ class TestSourceDefinition: assert src.is_sql_source assert not src.is_table_source + def test_descriptions_map_resolves_visible_description(self): + src = SourceDefinition( + name="orders", + descriptions={"ktx": "Semantic-layer source for orders."}, + table="public.orders", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + assert src.description == "Semantic-layer source for orders." + def test_table_and_sql_mutually_exclusive(self): with pytest.raises(ValidationError, match="mutually exclusive"): SourceDefinition( diff --git a/release-policy.json b/release-policy.json index 0ba6297f..ce814787 100644 --- a/release-policy.json +++ b/release-policy.json @@ -10,7 +10,6 @@ "@ktx/connector-clickhouse", "@ktx/connector-mysql", "@ktx/connector-postgres", - "@ktx/connector-posthog", "@ktx/connector-snowflake", "@ktx/connector-sqlite", "@ktx/connector-sqlserver", diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 3d3aa168..b2da21c8 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -77,7 +77,6 @@ describe('standalone example docs', () => { assert.match(rootReadme, /`packages\/connector-clickhouse`/); assert.match(rootReadme, /`packages\/connector-mysql`/); assert.match(rootReadme, /`packages\/connector-postgres`/); - assert.match(rootReadme, /`packages\/connector-posthog`/); assert.match(rootReadme, /`packages\/connector-snowflake`/); assert.match(rootReadme, /`packages\/connector-sqlite`/); assert.match(rootReadme, /`packages\/connector-sqlserver`/); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 52d49470..d05b30bf 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -17,7 +17,6 @@ export const NPM_ARTIFACT_PACKAGES = [ { name: '@ktx/connector-clickhouse', packageRoot: 'packages/connector-clickhouse' }, { name: '@ktx/connector-mysql', packageRoot: 'packages/connector-mysql' }, { name: '@ktx/connector-postgres', packageRoot: 'packages/connector-postgres' }, - { name: '@ktx/connector-posthog', packageRoot: 'packages/connector-posthog' }, { name: '@ktx/connector-snowflake', packageRoot: 'packages/connector-snowflake' }, { name: '@ktx/connector-sqlite', packageRoot: 'packages/connector-sqlite' }, { name: '@ktx/connector-sqlserver', packageRoot: 'packages/connector-sqlserver' }, @@ -516,7 +515,6 @@ const bigqueryConnector = await import('@ktx/connector-bigquery'); const clickhouseConnector = await import('@ktx/connector-clickhouse'); const mysqlConnector = await import('@ktx/connector-mysql'); const postgresConnector = await import('@ktx/connector-postgres'); -const posthogConnector = await import('@ktx/connector-posthog'); const snowflakeConnector = await import('@ktx/connector-snowflake'); const sqliteConnector = await import('@ktx/connector-sqlite'); const sqlserverConnector = await import('@ktx/connector-sqlserver'); @@ -587,7 +585,6 @@ const connectorExports = [ ['@ktx/connector-clickhouse', clickhouseConnector.KtxClickHouseScanConnector, clickhouseConnector.KtxClickHouseDialect], ['@ktx/connector-mysql', mysqlConnector.KtxMysqlScanConnector, mysqlConnector.KtxMysqlDialect], ['@ktx/connector-postgres', postgresConnector.KtxPostgresScanConnector, postgresConnector.KtxPostgresDialect], - ['@ktx/connector-posthog', posthogConnector.KtxPostHogScanConnector, posthogConnector.KtxPostHogDialect], ['@ktx/connector-snowflake', snowflakeConnector.KtxSnowflakeScanConnector, snowflakeConnector.KtxSnowflakeDialect], ['@ktx/connector-sqlite', sqliteConnector.KtxSqliteScanConnector, sqliteConnector.KtxSqliteDialect], ['@ktx/connector-sqlserver', sqlserverConnector.KtxSqlServerScanConnector, sqlserverConnector.KtxSqlServerDialect], diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 4aec3c6e..5b18a9ed 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -34,7 +34,6 @@ const CONNECTOR_PACKAGE_NAMES = [ '@ktx/connector-clickhouse', '@ktx/connector-mysql', '@ktx/connector-postgres', - '@ktx/connector-posthog', '@ktx/connector-snowflake', '@ktx/connector-sqlite', '@ktx/connector-sqlserver', @@ -517,7 +516,6 @@ describe('verification snippets', () => { assert.match(source, /KtxPostgresScanConnector/); assert.match(source, /KtxBigQueryScanConnector/); assert.match(source, /KtxSnowflakeScanConnector/); - assert.match(source, /KtxPostHogScanConnector/); }); it('asserts installed hybrid search exports and CLI smoke coverage', () => { diff --git a/scripts/precommit-check.mjs b/scripts/precommit-check.mjs index d112752d..fdd405bf 100644 --- a/scripts/precommit-check.mjs +++ b/scripts/precommit-check.mjs @@ -15,7 +15,6 @@ const packageNameByDir = new Map( 'connector-clickhouse', 'connector-mysql', 'connector-postgres', - 'connector-posthog', 'connector-snowflake', 'connector-sqlite', 'connector-sqlserver',