From 1b5a9fe120af8d3af830dacdb0579a6f1d33031f Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Sun, 10 May 2026 16:12:51 -0700 Subject: [PATCH 1/8] Improve connector credential setup UX --- packages/cli/src/context-build-view.test.ts | 130 +++++- packages/cli/src/context-build-view.ts | 80 +++- packages/cli/src/setup-sources.test.ts | 329 +++++++++++++- packages/cli/src/setup-sources.ts | 417 +++++++++++++++--- .../metabase/local-metabase.adapter.test.ts | 30 +- .../metabase/local-metabase.adapter.ts | 11 +- 6 files changed, 882 insertions(+), 115 deletions(-) diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index c14102ec..1c6965d8 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -98,11 +98,11 @@ describe('parseScanSummary', () => { describe('parseIngestSummary', () => { it('extracts work units and saved memory', () => { - expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('5 work units · 3 wiki, 2 SL'); + expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('5 items indexed · 3 wiki, 2 SL'); }); it('extracts work units alone when no saved memory', () => { - expect(parseIngestSummary('Work units: 5\nStatus: done')).toBe('5 work units'); + expect(parseIngestSummary('Work units: 5\nStatus: done')).toBe('5 items indexed'); }); it('extracts saved memory alone when no work units', () => { @@ -127,10 +127,18 @@ describe('initViewState', () => { expect(state.contextSources[0].target.connectionId).toBe('dbt-main'); expect(state.frame).toBe(0); }); + + it('initializes global timing fields', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + expect(state.startedAt).toBeNull(); + expect(state.totalElapsedMs).toBe(0); + }); }); describe('renderContextBuildView', () => { - it('renders all-queued state', () => { + it('renders all-queued state with ○ icon and progress counter', () => { const state = initViewState([ { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, @@ -138,6 +146,8 @@ describe('renderContextBuildView', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('Building KTX context'); + expect(output).toContain('(0/2)'); + expect(output).toContain('○'); expect(output).toContain('Primary sources:'); expect(output).toContain('warehouse'); expect(output).toContain('queued'); @@ -145,6 +155,29 @@ describe('renderContextBuildView', () => { expect(output).toContain('dbt-main'); }); + it('renders header with total elapsed time when set', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.totalElapsedMs = 65000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('(0/1 · 1m05s)'); + }); + + it('renders dynamic separator matching header width', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.totalElapsedMs = 120000; + + const output = renderContextBuildView(state, { styled: false }); + const lines = output.split('\n'); + const headerLine = lines.find((l) => l.includes('Building KTX context'))!; + const separatorLine = lines.find((l) => /^─+$/.test(l))!; + expect(separatorLine.length).toBeGreaterThanOrEqual(headerLine.length); + }); + it('renders completed state with summary', () => { const state = initViewState([ { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, @@ -156,6 +189,74 @@ describe('renderContextBuildView', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('42 tables'); expect(output).toContain('1m12s'); + expect(output).toContain('(1/1)'); + }); + + it('renders running target with elapsed time', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'running'; + state.primarySources[0].elapsedMs = 30000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('scanning...'); + expect(output).toContain('30s'); + }); + + it('renders running target with progress bar when percentage is available', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'running'; + state.primarySources[0].detailLine = '[50%] Scanning tables...'; + state.primarySources[0].elapsedMs = 15000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('██████░░░░░░'); + expect(output).toContain('50%'); + expect(output).toContain('Scanning tables...'); + expect(output).toContain('15s'); + }); + + it('renders completion summary when all targets are done', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + state.primarySources[0].status = 'done'; + state.primarySources[0].elapsedMs = 72000; + state.contextSources[0].status = 'done'; + state.contextSources[0].elapsedMs = 34000; + state.totalElapsedMs = 106000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Done in 1m46s · 2 sources processed'); + }); + + it('renders singular source label in completion summary', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'done'; + state.primarySources[0].elapsedMs = 5000; + state.totalElapsedMs = 5000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Done in 5s · 1 source processed'); + }); + + it('does not render completion summary while targets are still active', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + state.primarySources[0].status = 'done'; + state.contextSources[0].status = 'running'; + state.totalElapsedMs = 30000; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).not.toContain('Done in'); }); it('renders failed state', () => { @@ -178,6 +279,29 @@ describe('renderContextBuildView', () => { expect(output).not.toContain('Primary sources:'); expect(output).toContain('Context sources:'); }); + + it('preserves detach hint while targets are active', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'running'; + + const output = renderContextBuildView(state, { styled: false, showHint: true, projectDir: '/tmp/project' }); + expect(output).toContain('d to detach'); + expect(output).toContain('ktx setup --project-dir /tmp/project'); + expect(output).toContain('to resume'); + }); + + it('omits detach hint when all targets are done', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'done'; + state.totalElapsedMs = 5000; + + const output = renderContextBuildView(state, { styled: false, showHint: true }); + expect(output).not.toContain('d to detach'); + }); }); describe('runContextBuild', () => { diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 2e39537c..96a8aa57 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -30,6 +30,8 @@ export interface ContextBuildViewState { primarySources: ContextBuildTargetState[]; contextSources: ContextBuildTargetState[]; frame: number; + startedAt: number | null; + totalElapsedMs: number; } export interface ContextBuildArgs { @@ -79,7 +81,7 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st case 'running': return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'; default: - return '·'; + return '○'; } } switch (status) { @@ -90,10 +92,27 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st case 'running': return cyan(SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'); default: - return dim('·'); + return dim('○'); } } +function extractPercent(detailLine: string | null): number | null { + if (!detailLine) return null; + const match = detailLine.match(/^\[(\d+)%\]/); + return match ? Number(match[1]) : null; +} + +const BAR_WIDTH = 12; +const BAR_FILLED = '█'; +const BAR_EMPTY = '░'; + +function renderProgressBar(percent: number, styled: boolean): string { + const filled = Math.round((percent / 100) * BAR_WIDTH); + const empty = BAR_WIDTH - filled; + const bar = `${BAR_FILLED.repeat(filled)}${BAR_EMPTY.repeat(empty)}`; + return styled ? cyan(bar) : bar; +} + function targetDetail(target: ContextBuildTargetState, styled: boolean): string { if (target.status === 'done') { const parts: string[] = []; @@ -105,7 +124,17 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string return styled ? red('failed') : 'failed'; } if (target.status === 'running') { - return target.detailLine ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); + const percent = extractPercent(target.detailLine); + const progressText = target.detailLine?.replace(/^\[\d+%\]\s*/, '') + ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); + const elapsed = target.elapsedMs > 0 ? formatDuration(target.elapsedMs) : null; + const parts: string[] = []; + if (percent !== null) { + parts.push(`${renderProgressBar(percent, styled)} ${percent}%`); + } + parts.push(progressText); + if (elapsed) parts.push(styled ? dim(elapsed) : elapsed); + return parts.join(' '); } return styled ? dim('queued') : 'queued'; } @@ -140,17 +169,39 @@ export function renderContextBuildView( ): string { const styled = options.styled ?? true; const width = columnWidth(state); + const allTargets = [...state.primarySources, ...state.contextSources]; + const doneCount = allTargets.filter((t) => t.status === 'done' || t.status === 'failed').length; + const totalCount = allTargets.length; + const hasActive = allTargets.some((t) => t.status === 'running' || t.status === 'queued'); + const allDone = totalCount > 0 && !hasActive; + + const headerParts = ['Building KTX context']; + if (totalCount > 0) { + const progressParts: string[] = [`${doneCount}/${totalCount}`]; + if (state.totalElapsedMs > 0) progressParts.push(formatDuration(state.totalElapsedMs)); + const progress = `(${progressParts.join(' · ')})`; + headerParts.push(styled ? dim(progress) : progress); + } + const header = headerParts.join(' '); + const headerPlainLength = header.replace(/\x1b\[[0-9;]*m/g, '').length; + const separator = '─'.repeat(Math.max(21, headerPlainLength)); + const lines: string[] = [ '', - 'Building KTX context', - '─────────────────────', + header, + separator, ...renderTargetGroup('Primary sources', state.primarySources, state.frame, styled, width), ...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width), '', ]; - const hasActive = [...state.primarySources, ...state.contextSources].some( - (t) => t.status === 'running' || t.status === 'queued', - ); + + if (allDone && state.totalElapsedMs > 0) { + const sourcesLabel = totalCount === 1 ? '1 source' : `${totalCount} sources`; + const summary = ` Done in ${formatDuration(state.totalElapsedMs)} · ${sourcesLabel} processed`; + lines.push(styled ? green(summary) : summary); + lines.push(''); + } + if (options.showHint && hasActive) { const hint = ` d to detach · ${resumeCommand(options.projectDir)} to resume`; lines.push(styled ? dim(hint) : hint); @@ -177,7 +228,7 @@ export function parseScanSummary(output: string): string | null { export function parseIngestSummary(output: string): string | null { const parts: string[] = []; const workUnits = output.match(/Work units: (\d+)/); - if (workUnits) parts.push(`${workUnits[1]} work units`); + if (workUnits) parts.push(`${workUnits[1]} items indexed`); const savedMemory = output.match(/Saved memory: (.+)/); if (savedMemory) parts.push(savedMemory[1]); return parts.length > 0 ? parts.join(' · ') : null; @@ -289,6 +340,8 @@ export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuil primarySources: targets.filter((t) => t.operation === 'scan').map(makeTargetState), contextSources: targets.filter((t) => t.operation === 'source-ingest').map(makeTargetState), frame: 0, + startedAt: null, + totalElapsedMs: 0, }; } @@ -303,6 +356,8 @@ export async function runContextBuild( const isTTY = io.stdout.isTTY === true; const nowFn = deps.now ?? (() => Date.now()); + state.startedAt = nowFn(); + const repainter = isTTY ? createRepainter(io) : null; const viewOpts = { styled: true, projectDir: args.projectDir }; const paint = (hint: boolean) => repainter?.paint(renderContextBuildView(state, { ...viewOpts, showHint: hint })); @@ -312,6 +367,9 @@ export async function runContextBuild( if (repainter) { spinnerInterval = setInterval(() => { state.frame++; + if (state.startedAt !== null) { + state.totalElapsedMs = nowFn() - state.startedAt; + } for (const t of [...state.primarySources, ...state.contextSources]) { if (t.status === 'running' && t.startedAt !== null) { t.elapsedMs = nowFn() - t.startedAt; @@ -400,6 +458,10 @@ export async function runContextBuild( cleanupKeystroke?.(); } + if (state.startedAt !== null) { + state.totalElapsedMs = nowFn() - state.startedAt; + } + if (detached) { return { exitCode: 0, detached: true }; } diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index b8ff4eed..1ef973c9 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -1,4 +1,4 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { @@ -8,6 +8,7 @@ import { serializeKtxProjectConfig, } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { KtxCliIo } from './cli-runtime.js'; import { runKtxSetupSourcesStep, type KtxSetupSourcesDeps, @@ -41,14 +42,17 @@ function prompts(values: { multiselect?: string[][]; select?: string[]; text?: Array; + password?: Array; }): KtxSetupSourcesPromptAdapter { const multiselectValues = [...(values.multiselect ?? [])]; const selectValues = [...(values.select ?? [])]; const textValues = [...(values.text ?? [])]; + const passwordValues = [...(values.password ?? [])]; return { multiselect: vi.fn(async () => multiselectValues.shift() ?? []), select: vi.fn(async () => selectValues.shift() ?? 'skip'), text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')), + password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : undefined)), cancel: vi.fn(), log: vi.fn(), }; @@ -207,6 +211,193 @@ describe('setup sources step', () => { expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io); }); + it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => { + await addPrimarySource(); + const cases: Array<{ + source: 'metabase' | 'looker'; + text: string[]; + deps: KtxSetupSourcesDeps; + expectedConnection: Record; + }> = [ + { + source: 'metabase', + text: ['metabase-main', 'https://metabase.example.com'], + deps: { + discoverMetabaseDatabases: vi.fn(async () => [ + { id: 1, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' }, + ]), + validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'metabase', + mappings: { databaseMappings: { '1': 'warehouse' } }, + }, + }, + { + source: 'looker', + text: ['looker-main', 'https://looker.example.com', 'client-id', ''], + deps: { + validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'looker', + mappings: { connectionMappings: { warehouse: 'warehouse' } }, + }, + }, + ]; + + for (const testCase of cases) { + const testPrompts = prompts({ + multiselect: [[testCase.source]], + select: ['env', 'done'], + text: testCase.text, + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + ...testCase.deps, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] }); + + expect( + vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Mapped warehouse')), + ).toBe(false); + if (testCase.source === 'metabase') { + expect( + vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Metabase database id')), + ).toBe(false); + } + expect((await readConfig()).connections[`${testCase.source}-main`]).toMatchObject(testCase.expectedConnection); + } + }); + + it('prompts for the mapped warehouse when interactive Metabase and Looker source setup has multiple choices', async () => { + await addPrimarySource(); + await addConnection('analytics_warehouse', { + driver: 'snowflake', + account: 'acme', + database: 'analytics', + readonly: true, + }); + + const cases: Array<{ + source: 'metabase' | 'looker'; + text: string[]; + deps: KtxSetupSourcesDeps; + expectedConnection: Record; + }> = [ + { + source: 'metabase', + text: ['metabase-main', 'https://metabase.example.com'], + deps: { + discoverMetabaseDatabases: vi.fn(async () => [ + { id: 1, name: 'Finance', engine: 'postgres', host: 'db.example.com', dbName: 'finance' }, + { id: 2, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' }, + ]), + validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'metabase', + mappings: { databaseMappings: { '2': 'analytics_warehouse' } }, + }, + }, + { + source: 'looker', + text: ['looker-main', 'https://looker.example.com', 'client-id', 'analytics'], + deps: { + validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })), + runMapping: vi.fn(async () => 0), + }, + expectedConnection: { + driver: 'looker', + mappings: { connectionMappings: { analytics: 'analytics_warehouse' } }, + }, + }, + ]; + + for (const testCase of cases) { + const testPrompts = prompts({ + multiselect: [[testCase.source]], + select: testCase.source === 'metabase' ? ['env', 'analytics_warehouse', '2', 'done'] : ['env', 'analytics_warehouse', 'done'], + text: testCase.text, + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + ...testCase.deps, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] }); + + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'Mapped warehouse connection', + options: [ + { value: 'analytics_warehouse', label: 'analytics_warehouse (SNOWFLAKE)' }, + { value: 'warehouse', label: 'warehouse (POSTGRESQL)' }, + { value: 'back', label: 'Back' }, + ], + }); + if (testCase.source === 'metabase') { + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'Metabase database', + options: [ + { value: '1', label: '1: Finance (postgres)' }, + { value: '2', label: '2: Analytics (postgres)' }, + { value: 'back', label: 'Back' }, + ], + }); + expect( + vi.mocked(testPrompts.text).mock.calls.some(([options]) => options.message.includes('Metabase database id')), + ).toBe(false); + } + expect((await readConfig()).connections[`${testCase.source}-main`]).toMatchObject(testCase.expectedConnection); + } + }); + + it('lets visible Metabase mapping surface refresh and validation failures', async () => { + await addPrimarySource(); + const runMapping = vi.fn(async (_projectDir: string, _connectionId: string, io: KtxCliIo) => { + io.stderr.write('1: Metabase database does not match KTX connection database\n'); + return 1; + }); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['metabase']], + select: ['env'], + text: ['metabase-main', 'https://metabase.example.com'], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { + prompts: testPrompts, + discoverMetabaseDatabases: vi.fn(async () => [ + { id: 1, name: 'Analytics', engine: 'postgres', host: 'db.example.com', dbName: 'analytics' }, + ]), + runMapping, + }, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(runMapping).toHaveBeenCalledWith(projectDir, 'metabase-main', io.io); + expect(io.stderr()).toContain('1: Metabase database does not match KTX connection database'); + expect(io.stderr()).not.toContain('Metabase mapping validation failed'); + }); + it('does not mark sources complete when validation fails', async () => { await addPrimarySource(); const io = makeIo(); @@ -333,8 +524,8 @@ describe('setup sources step', () => { const io = makeIo(); const testPrompts = prompts({ multiselect: [['dbt']], - select: ['git'], - text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', '', 'env:GITHUB_TOKEN'], + select: ['git', 'env'], + text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', ''], }); await expect( @@ -350,19 +541,16 @@ describe('setup sources step', () => { ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); expect(testGitRepo).toHaveBeenCalledWith({ repoUrl: 'https://github.com/acme-org/private-repo' }); - expect(testPrompts.text).toHaveBeenNthCalledWith(5, { - message: textInputPrompt( - [ - 'This repo requires authentication.', - 'Generate a token at: https://github.com/settings/tokens/new', - 'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).', - 'Or use file:/absolute/path if the token is stored in a file.', - 'Press Enter to skip and try without authentication anyway.', - ].join('\n'), - ), - placeholder: 'env:GITHUB_TOKEN', + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'This repo requires authentication.', + options: [ + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, + { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'skip', label: 'Skip — try without authentication' }, + { value: 'back', label: 'Back' }, + ], }); - expect(testPrompts.text).toHaveBeenCalledTimes(5); + expect(testPrompts.text).toHaveBeenCalledTimes(4); }); it('enables the dbt adapter when adding a dbt source connection', async () => { @@ -692,13 +880,11 @@ describe('setup sources step', () => { }, { source: 'metabase', + select: ['back', 'env'], text: [ 'metabase-main', 'https://old-metabase.example.com', - undefined, 'https://metabase.example.com', - 'env:METABASE_API_KEY', - 'warehouse', '1', ], deps: { @@ -709,14 +895,13 @@ describe('setup sources step', () => { }, { source: 'looker', + select: ['env'], text: [ 'looker-main', 'https://old-looker.example.com', undefined, 'https://looker.example.com', 'client-id', - 'env:LOOKER_CLIENT_SECRET', - 'warehouse', '', ], deps: { @@ -727,10 +912,10 @@ describe('setup sources step', () => { }, { source: 'notion', - select: ['back', 'all_accessible'], - text: ['notion-main', 'env:NOTION_TOKEN', 'env:NOTION_TOKEN'], + select: ['env', 'back', 'env', 'all_accessible'], + text: ['notion-main'], deps: { validateNotion: vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })) }, - repeatedTextMessage: textInputPrompt('Notion token ref'), + repeatedSelectMessage: 'How should KTX find your Notion integration token?', }, ]; @@ -787,4 +972,102 @@ describe('setup sources step', () => { expect(io.stdout()).toContain('Connect a primary source before adding context sources.'); expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources'); }); + + it('auto-detects dbt_project.yml at the root of a local path', async () => { + await addPrimarySource(); + const dbtDir = join(tempDir, 'dbt-repo'); + await mkdir(dbtDir, { recursive: true }); + await writeFile(join(dbtDir, 'dbt_project.yml'), 'name: analytics\n'); + + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path'], + text: ['dbt-main', dbtDir], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.text).toHaveBeenCalledTimes(2); + const config = await readConfig(); + expect(config.connections['dbt-main']).toMatchObject({ driver: 'dbt', source_dir: dbtDir }); + expect(config.connections['dbt-main']).not.toHaveProperty('path'); + }); + + it('auto-detects dbt_project.yml in a subdirectory of a local path', async () => { + await addPrimarySource(); + const dbtDir = join(tempDir, 'monorepo'); + await mkdir(join(dbtDir, 'analytics', 'dbt'), { recursive: true }); + await writeFile(join(dbtDir, 'analytics', 'dbt', 'dbt_project.yml'), 'name: analytics\n'); + + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path'], + text: ['dbt-main', dbtDir], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.text).toHaveBeenCalledTimes(2); + expect(testPrompts.log).toHaveBeenCalledWith('Found dbt_project.yml in analytics/dbt/'); + const config = await readConfig(); + expect(config.connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: dbtDir, + path: 'analytics/dbt', + }); + }); + + it('shows a picker when multiple dbt projects are found in a local path', async () => { + await addPrimarySource(); + const dbtDir = join(tempDir, 'multi-dbt'); + await mkdir(join(dbtDir, 'analytics'), { recursive: true }); + await mkdir(join(dbtDir, 'staging'), { recursive: true }); + await writeFile(join(dbtDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n'); + await writeFile(join(dbtDir, 'staging', 'dbt_project.yml'), 'name: staging\n'); + + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path', 'staging'], + text: ['dbt-main', dbtDir], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Multiple dbt projects found — which one should KTX use?', + }), + ); + expect(testPrompts.text).toHaveBeenCalledTimes(2); + const config = await readConfig(); + expect(config.connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: dbtDir, + path: 'staging', + }); + }); }); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index b0e0fe2e..4690f29c 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -1,14 +1,18 @@ import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; -import { join, resolve } from 'node:path'; +import { join, relative, resolve } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; -import { cancel, isCancel, log, multiselect, select, text } from '@clack/prompts'; -import { resolveNotionAuthToken } from '@ktx/context/connections'; +import { cancel, isCancel, log, multiselect, password, select, text } from '@clack/prompts'; +import { localConnectionTypeForConfig, resolveNotionAuthToken } from '@ktx/context/connections'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { cloneOrPull, + DEFAULT_METABASE_CLIENT_CONFIG, + discoverMetabaseDatabases, + type DiscoveredMetabaseDatabase, loadDbtSchemaFiles, loadProjectInfo, + MetabaseClient, type NotionApi, NotionClient, parseLookmlStagedDir, @@ -28,6 +32,7 @@ import { runKtxConnection } from './connection.js'; import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxPublicIngest } from './public-ingest.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { writeProjectLocalSecretReference } from './setup-secrets.js'; export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion'; @@ -71,6 +76,7 @@ export interface KtxSetupSourcesPromptAdapter { }): Promise; select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; + password(options: { message: string }): Promise; cancel(message: string): void; log?(message: string): void; } @@ -86,6 +92,11 @@ export interface KtxSetupSourcesDeps { validateLooker?: (projectDir: string, connectionId: string) => Promise; validateLookml?: (connection: KtxProjectConnectionConfig) => Promise; validateNotion?: (connection: KtxProjectConnectionConfig) => Promise; + discoverMetabaseDatabases?: (args: { + sourceUrl: string; + sourceApiKeyRef: string; + sourceConnectionId: string; + }) => Promise; runMapping?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; runInitialIngest?: ( projectDir: string, @@ -143,6 +154,12 @@ function createPromptAdapter(): KtxSetupSourcesPromptAdapter { ); return isCancel(value) ? undefined : String(value); }, + async password(options) { + const value = await withSetupInterruptConfirmation(() => + password({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, cancel(message) { cancel(message); }, @@ -172,17 +189,6 @@ function connectionNamePrompt(label: string): string { return `Name this ${label} connection\nKTX will use this short name in commands and config. You can rename it now.`; } -function gitAuthAfterFailurePrompt(source: KtxSetupSourceType): string { - const label = source === 'dbt' ? 'This' : `This ${sourceLabel(source)}`; - return [ - `${label} repo requires authentication.`, - 'Generate a token at: https://github.com/settings/tokens/new', - 'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).', - 'Or use file:/absolute/path if the token is stored in a file.', - 'Press Enter to skip and try without authentication anyway.', - ].join('\n'); -} - function sourceSubpathPrompt(source: KtxSetupSourceType): string { if (source === 'dbt') { return [ @@ -198,6 +204,21 @@ function sourceSubpathPrompt(source: KtxSetupSourceType): string { ].join('\n'); } +const SCAN_SKIP_DIRS = new Set(['.git', 'node_modules', '.venv', 'target', 'dbt_packages', 'dbt_modules', '__pycache__']); + +async function findDbtProjectSubpaths(rootDir: string): Promise { + const entries = await readdir(rootDir, { withFileTypes: true, recursive: true }); + const subpaths: string[] = []; + for (const entry of entries) { + if (!entry.isFile()) continue; + if (entry.name !== 'dbt_project.yml' && entry.name !== 'dbt_project.yaml') continue; + const relDir = relative(rootDir, entry.parentPath); + if (relDir.split('/').some((part) => SCAN_SKIP_DIRS.has(part))) continue; + subpaths.push(relDir); + } + return subpaths; +} + async function promptText( prompts: KtxSetupSourcesPromptAdapter, options: { message: string; placeholder?: string; initialValue?: string }, @@ -222,6 +243,70 @@ function credentialRef(value: string | undefined, label: string): string { return ref; } +async function chooseSourceCredentialRef(input: { + prompts: KtxSetupSourcesPromptAdapter; + projectDir: string; + label: string; + envName: string; + secretFileName: string; +}): Promise { + while (true) { + const choice = await input.prompts.select({ + message: `How should KTX find your ${input.label}?`, + options: [ + { value: 'env', label: `Use ${input.envName} from the environment` }, + { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + if (choice === 'paste') { + const value = await input.prompts.password({ message: input.label }); + if (value === undefined) continue; + if (!value.trim()) continue; + return await writeProjectLocalSecretReference({ + projectDir: input.projectDir, + fileName: input.secretFileName, + value, + }); + } + return `env:${input.envName}`; + } +} + +async function chooseGitAuthCredentialRef(input: { + prompts: KtxSetupSourcesPromptAdapter; + projectDir: string; + source: KtxSetupSourceType; + connectionId: string; +}): Promise { + const label = input.source === 'dbt' ? 'This' : `This ${sourceLabel(input.source)}`; + while (true) { + const choice = await input.prompts.select({ + message: `${label} repo requires authentication.`, + options: [ + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, + { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'skip', label: 'Skip — try without authentication' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + if (choice === 'skip') return undefined; + if (choice === 'paste') { + const value = await input.prompts.password({ message: 'Git access token' }); + if (value === undefined) continue; + if (!value.trim()) continue; + return await writeProjectLocalSecretReference({ + projectDir: input.projectDir, + fileName: `${input.connectionId}-auth-token`, + value, + }); + } + return 'env:GITHUB_TOKEN'; + } +} + function repoOrLocalSource(args: KtxSetupSourcesArgs): { sourceDir?: string; repoUrl?: string } { if (args.sourcePath && args.sourceGitUrl) { throw new Error('Choose only one source location: --source-path or --source-git-url.'); @@ -512,16 +597,6 @@ async function defaultValidateMetricflow(connection: KtxProjectConnectionConfig) }; } -async function defaultValidateMetabase(projectDir: string, connectionId: string): Promise { - const code = await runKtxConnection( - { command: 'map', projectDir, sourceConnectionId: connectionId, json: true }, - { stdout: { write() {} }, stderr: { write() {} } }, - ); - return code === 0 - ? { ok: true, detail: 'mapping validated' } - : { ok: false, message: 'Metabase mapping validation failed' }; -} - async function defaultValidateLooker(projectDir: string, connectionId: string): Promise { const code = await runKtxConnectionMapping( { command: 'refresh', projectDir, connectionId, autoAccept: true }, @@ -634,6 +709,11 @@ type SourcePromptState = KtxSetupSourcesArgs & { type SourcePromptStep = (state: SourcePromptState) => Promise<'next' | 'back'>; +interface WarehouseConnectionChoice { + id: string; + connectionType: string; +} + type InteractiveSourceConnectionChoice = | { kind: 'existing'; connectionId: string; connection: KtxProjectConnectionConfig } | { kind: 'new'; args: KtxSetupSourcesArgs } @@ -672,6 +752,107 @@ function resetRepoLocationFields(state: SourcePromptState): void { delete state.sourceProjectName; } +function warehouseConnectionChoices(config: KtxProjectConfig): WarehouseConnectionChoice[] { + return Object.entries(config.connections) + .filter(([, connection]) => PRIMARY_SOURCE_DRIVERS.has(String(connection.driver ?? '').toLowerCase())) + .map(([id, connection]) => ({ id, connectionType: localConnectionTypeForConfig(id, connection) })) + .sort((left, right) => left.id.localeCompare(right.id)); +} + +async function chooseMappedWarehouseConnectionId(input: { + projectDir: string; + prompts: KtxSetupSourcesPromptAdapter; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + const choices = warehouseConnectionChoices(project.config); + if (choices.length === 1) { + return choices[0].id; + } + if (choices.length === 0) { + const entered = await promptText(input.prompts, { message: 'Mapped warehouse connection id' }); + return entered === undefined ? 'back' : entered; + } + + const selected = await input.prompts.select({ + message: 'Mapped warehouse connection', + options: [ + ...choices.map((choice) => ({ + value: choice.id, + label: `${choice.id} (${choice.connectionType})`, + })), + { value: 'back', label: 'Back' }, + ], + }); + return selected === 'back' ? 'back' : selected; +} + +async function defaultDiscoverMetabaseDatabases(input: { + sourceUrl: string; + sourceApiKeyRef: string; +}): Promise { + const apiKey = resolveKtxConfigReference(input.sourceApiKeyRef, process.env); + if (!apiKey) { + throw new Error('Metabase API key ref could not be resolved'); + } + const client = new MetabaseClient( + { apiUrl: input.sourceUrl, apiKey }, + DEFAULT_METABASE_CLIENT_CONFIG, + ); + try { + return await discoverMetabaseDatabases(client); + } finally { + await client.cleanup(); + } +} + +function metabaseDatabaseLabel(database: DiscoveredMetabaseDatabase): string { + const detail = [database.engine].filter(Boolean).join(', '); + return detail ? `${database.id}: ${database.name} (${detail})` : `${database.id}: ${database.name}`; +} + +async function chooseMetabaseDatabaseId(input: { + state: SourcePromptState; + prompts: KtxSetupSourcesPromptAdapter; + deps: KtxSetupSourcesDeps; +}): Promise { + const sourceUrl = input.state.sourceUrl; + const sourceApiKeyRef = input.state.sourceApiKeyRef; + if (sourceUrl && sourceApiKeyRef) { + try { + const discovered = await (input.deps.discoverMetabaseDatabases ?? defaultDiscoverMetabaseDatabases)({ + sourceUrl, + sourceApiKeyRef, + sourceConnectionId: input.state.sourceConnectionId ?? 'metabase-main', + }); + if (discovered.length === 1) { + return discovered[0].id; + } + if (discovered.length > 1) { + const selected = await input.prompts.select({ + message: 'Metabase database', + options: [ + ...discovered + .slice() + .sort((left, right) => left.id - right.id) + .map((database) => ({ + value: String(database.id), + label: metabaseDatabaseLabel(database), + })), + { value: 'back', label: 'Back' }, + ], + }); + return selected === 'back' ? 'back' : Number.parseInt(selected, 10); + } + } catch { + // Discovery is a convenience. Fall back to the raw id prompt when credentials + // are unavailable locally or the Metabase API cannot be reached yet. + } + } + + const databaseId = await promptText(input.prompts, { message: 'Metabase database id' }); + return databaseId === undefined ? 'back' : Number.parseInt(databaseId, 10); +} + function connectionIdPromptSteps( args: KtxSetupSourcesArgs, source: KtxSetupSourceType, @@ -703,6 +884,7 @@ async function promptForInteractiveSource( prompts: KtxSetupSourcesPromptAdapter, defaultConnectionId = `${source}-main`, testGitRepo: KtxSetupSourcesDeps['testGitRepo'] = testRepoConnection, + discoverMetabaseDatabaseList?: KtxSetupSourcesDeps['discoverMetabaseDatabases'], ): Promise { const initialState: SourcePromptState = { ...args, source }; if (args.sourceConnectionId) { @@ -757,23 +939,6 @@ async function promptForInteractiveSource( }, ] : []), - ...(state.sourceLocation - ? [ - async (currentState: SourcePromptState) => { - const subpath = await promptText(prompts, { - message: sourceSubpathPrompt(source), - placeholder: 'optional', - }); - if (subpath === undefined) return 'back'; - if (subpath) { - currentState.sourceSubpath = subpath; - } else { - delete currentState.sourceSubpath; - } - return 'next'; - }, - ] - : []), ...(state.sourceLocation === 'git' ? [ async (currentState: SourcePromptState) => { @@ -783,11 +948,13 @@ async function promptForInteractiveSource( prompts.log?.('Repository connected.'); return 'next'; } - const authRef = await promptText(prompts, { - message: gitAuthAfterFailurePrompt(source), - placeholder: 'env:GITHUB_TOKEN', + const authRef = await chooseGitAuthCredentialRef({ + prompts, + projectDir: args.projectDir, + source, + connectionId: currentState.sourceConnectionId ?? `${source}-main`, }); - if (authRef === undefined) return 'back'; + if (authRef === 'back') return 'back'; if (authRef) { currentState.sourceAuthTokenRef = authRef; } else { @@ -797,6 +964,79 @@ async function promptForInteractiveSource( }, ] : []), + ...(state.sourceLocation + ? [ + async (currentState: SourcePromptState) => { + if (source === 'dbt') { + let scanDir: string | undefined; + if (currentState.sourceLocation === 'path' && currentState.sourcePath) { + scanDir = currentState.sourcePath; + } else if (currentState.sourceLocation === 'git' && currentState.sourceGitUrl) { + try { + const cacheDir = await mkdtemp(join(tmpdir(), 'ktx-setup-dbt-scan-')); + const authToken = currentState.sourceAuthTokenRef + ? resolveKtxConfigReference(currentState.sourceAuthTokenRef, process.env) + : null; + await cloneOrPull({ + repoUrl: currentState.sourceGitUrl, + authToken, + cacheDir, + branch: currentState.sourceBranch ?? 'main', + }); + scanDir = cacheDir; + } catch { + // Clone failed — fall through to manual prompt + } + } + if (scanDir) { + try { + const subpaths = await findDbtProjectSubpaths(scanDir); + if (subpaths.length === 1) { + const found = subpaths[0]!; + if (found) { + currentState.sourceSubpath = found; + prompts.log?.(`Found dbt_project.yml in ${found}/`); + } else { + delete currentState.sourceSubpath; + } + return 'next'; + } + if (subpaths.length > 1) { + const selected = await prompts.select({ + message: 'Multiple dbt projects found — which one should KTX use?', + options: [ + ...subpaths.map((p) => ({ value: p || '.', label: p || '(project root)' })), + { value: 'back', label: 'Back' }, + ], + }); + if (selected === 'back') return 'back'; + const subpath = selected === '.' ? '' : selected; + if (subpath) { + currentState.sourceSubpath = subpath; + } else { + delete currentState.sourceSubpath; + } + return 'next'; + } + } catch { + // Directory unreadable — fall through to manual prompt + } + } + } + const subpath = await promptText(prompts, { + message: sourceSubpathPrompt(source), + placeholder: 'optional', + }); + if (subpath === undefined) return 'back'; + if (subpath) { + currentState.sourceSubpath = subpath; + } else { + delete currentState.sourceSubpath; + } + return 'next'; + }, + ] + : []), ]); } @@ -810,24 +1050,34 @@ async function promptForInteractiveSource( return 'next'; }, async (state) => { - const sourceApiKeyRef = await promptText(prompts, { - message: 'Metabase API key ref', - placeholder: 'env:METABASE_API_KEY', + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Metabase API key', + envName: 'METABASE_API_KEY', + secretFileName: `${state.sourceConnectionId ?? 'metabase-main'}-api-key`, }); - if (sourceApiKeyRef === undefined) return 'back'; - state.sourceApiKeyRef = sourceApiKeyRef; + if (ref === 'back') return 'back'; + state.sourceApiKeyRef = ref; return 'next'; }, async (state) => { - const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' }); - if (sourceWarehouseConnectionId === undefined) return 'back'; + const sourceWarehouseConnectionId = await chooseMappedWarehouseConnectionId({ + projectDir: args.projectDir, + prompts, + }); + if (sourceWarehouseConnectionId === 'back') return 'back'; state.sourceWarehouseConnectionId = sourceWarehouseConnectionId; return 'next'; }, async (state) => { - const databaseId = await promptText(prompts, { message: 'Metabase database id' }); - if (databaseId === undefined) return 'back'; - state.metabaseDatabaseId = Number.parseInt(databaseId, 10); + const databaseId = await chooseMetabaseDatabaseId({ + state, + prompts, + deps: { discoverMetabaseDatabases: discoverMetabaseDatabaseList }, + }); + if (databaseId === 'back') return 'back'; + state.metabaseDatabaseId = databaseId; return 'next'; }, ]); @@ -849,17 +1099,23 @@ async function promptForInteractiveSource( return 'next'; }, async (state) => { - const sourceClientSecretRef = await promptText(prompts, { - message: 'Looker client secret ref', - placeholder: 'env:LOOKER_CLIENT_SECRET', + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Looker client secret', + envName: 'LOOKER_CLIENT_SECRET', + secretFileName: `${state.sourceConnectionId ?? 'looker-main'}-client-secret`, }); - if (sourceClientSecretRef === undefined) return 'back'; - state.sourceClientSecretRef = sourceClientSecretRef; + if (ref === 'back') return 'back'; + state.sourceClientSecretRef = ref; return 'next'; }, async (state) => { - const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' }); - if (sourceWarehouseConnectionId === undefined) return 'back'; + const sourceWarehouseConnectionId = await chooseMappedWarehouseConnectionId({ + projectDir: args.projectDir, + prompts, + }); + if (sourceWarehouseConnectionId === 'back') return 'back'; state.sourceWarehouseConnectionId = sourceWarehouseConnectionId; return 'next'; }, @@ -882,12 +1138,15 @@ async function promptForInteractiveSource( return await runSourcePromptSteps(initialState, (state) => [ ...connectionSteps, async (currentState) => { - const sourceApiKeyRef = await promptText(prompts, { - message: 'Notion token ref', - placeholder: 'env:NOTION_TOKEN', + const ref = await chooseSourceCredentialRef({ + prompts, + projectDir: args.projectDir, + label: 'Notion integration token', + envName: 'NOTION_TOKEN', + secretFileName: `${currentState.sourceConnectionId ?? 'notion-main'}-token`, }); - if (sourceApiKeyRef === undefined) return 'back'; - currentState.sourceApiKeyRef = sourceApiKeyRef; + if (ref === 'back') return 'back'; + currentState.sourceApiKeyRef = ref; return 'next'; }, async (currentState) => { @@ -956,13 +1215,21 @@ async function chooseInteractiveSourceConnection(input: { connections: Record; prompts: KtxSetupSourcesPromptAdapter; testGitRepo?: KtxSetupSourcesDeps['testGitRepo']; + discoverMetabaseDatabases?: KtxSetupSourcesDeps['discoverMetabaseDatabases']; }): Promise { const existingIds = existingConnectionIdsBySource(input.connections, input.source); const defaultConnectionId = defaultConnectionIdForSource(input.connections, input.source); const label = sourceLabel(input.source); if (existingIds.length === 0) { - const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo); + const sourceArgs = await promptForInteractiveSource( + input.args, + input.source, + input.prompts, + defaultConnectionId, + input.testGitRepo, + input.discoverMetabaseDatabases, + ); return sourceArgs === 'back' ? 'back' : { kind: 'new', args: sourceArgs }; } @@ -987,7 +1254,14 @@ async function chooseInteractiveSourceConnection(input: { } continue; } - const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo); + const sourceArgs = await promptForInteractiveSource( + input.args, + input.source, + input.prompts, + defaultConnectionId, + input.testGitRepo, + input.discoverMetabaseDatabases, + ); if (sourceArgs === 'back') { continue; } @@ -1026,7 +1300,9 @@ async function validateSource( return await (deps.validateMetricflow ?? defaultValidateMetricflow)(args.connection); } if (source === 'metabase') { - return await (deps.validateMetabase ?? defaultValidateMetabase)(args.projectDir, args.connectionId); + return deps.validateMetabase + ? await deps.validateMetabase(args.projectDir, args.connectionId) + : { ok: true, detail: 'mapping validation runs after the connection is saved' }; } if (source === 'looker') { return await (deps.validateLooker ?? defaultValidateLooker)(args.projectDir, args.connectionId); @@ -1097,6 +1373,7 @@ export async function runKtxSetupSourcesStep( connections: (await loadKtxProject({ projectDir: args.projectDir })).config.connections, prompts, testGitRepo: deps.testGitRepo, + discoverMetabaseDatabases: deps.discoverMetabaseDatabases, }); if (sourceChoice === 'back') { if (args.source) { diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts index 2e492f07..0c854f6d 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts @@ -1,8 +1,21 @@ -import { describe, expect, it } from 'vitest'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import type { KtxProjectConnectionConfig } from '../../../project/index.js'; import { metabaseRuntimeConfigFromLocalConnection } from './local-metabase.adapter.js'; describe('metabaseRuntimeConfigFromLocalConnection', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-runtime-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + it('resolves api_url and env-backed api_key_ref from a flat ktx.yaml connection', () => { const connection: KtxProjectConnectionConfig = { driver: 'metabase', @@ -20,6 +33,21 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => { }); }); + it('resolves file-backed api_key_ref from pasted setup secrets', async () => { + const keyPath = join(tempDir, 'metabase-main-api-key'); + await writeFile(keyPath, 'mb_file_key\n', 'utf-8'); // pragma: allowlist secret + const connection: KtxProjectConnectionConfig = { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key_ref: `file:${keyPath}`, + }; + + expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({ + apiUrl: 'https://metabase.example.com', + apiKey: 'mb_file_key', // pragma: allowlist secret + }); + }); + it('accepts url as the local api URL alias', () => { const connection: KtxProjectConnectionConfig = { driver: 'metabase', diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts index a13b3923..bd81413f 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -1,5 +1,6 @@ import type { KtxLocalProject, KtxProjectConnectionConfig } from '../../../project/index.js'; import { ktxLocalStateDbPath } from '../../../project/index.js'; +import { resolveKtxConfigReference } from '../../../core/config-reference.js'; import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './client.js'; import { IngestMetabaseClientFactory, @@ -13,14 +14,6 @@ function stringField(value: unknown): string | null { return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; } -function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null { - if (!ref.startsWith('env:')) { - return null; - } - const name = ref.slice('env:'.length); - return stringField(env[name]); -} - function hasNetworkProxy(connection: KtxProjectConnectionConfig): boolean { return connection.networkProxy != null || connection.network_proxy != null; } @@ -42,7 +35,7 @@ export function metabaseRuntimeConfigFromLocalConnection( const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url); const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey); const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef); - const apiKey = literalApiKey ?? (apiKeyRef ? resolveEnvReference(apiKeyRef, env) : null); + const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null); if (!apiUrl) { throw new Error(`Connection "${connectionId}" is missing metabase api_url`); From 440a07d0d2e4dac5dae7e20f9affa3d867278567 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Sun, 10 May 2026 16:19:19 -0700 Subject: [PATCH 2/8] Summarize connector mapping validation --- packages/cli/src/setup-sources.ts | 43 ++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 4690f29c..73d191dc 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -264,11 +264,13 @@ async function chooseSourceCredentialRef(input: { const value = await input.prompts.password({ message: input.label }); if (value === undefined) continue; if (!value.trim()) continue; - return await writeProjectLocalSecretReference({ + const ref = await writeProjectLocalSecretReference({ projectDir: input.projectDir, fileName: input.secretFileName, value, }); + input.prompts.log?.(`Saved to .ktx/secrets/${input.secretFileName}`); + return ref; } return `env:${input.envName}`; } @@ -297,11 +299,14 @@ async function chooseGitAuthCredentialRef(input: { const value = await input.prompts.password({ message: 'Git access token' }); if (value === undefined) continue; if (!value.trim()) continue; - return await writeProjectLocalSecretReference({ + const fileName = `${input.connectionId}-auth-token`; + const ref = await writeProjectLocalSecretReference({ projectDir: input.projectDir, - fileName: `${input.connectionId}-auth-token`, + fileName, value, }); + input.prompts.log?.(`Saved to .ktx/secrets/${fileName}`); + return ref; } return 'env:GITHUB_TOKEN'; } @@ -634,8 +639,37 @@ async function defaultValidateNotion(connection: KtxProjectConnectionConfig): Pr return { ok: true, detail: `roots=${roots.length}` }; } +interface MappingJsonOutput { + connectionId: string; + refresh: { ok: boolean; output: string[] }; + validation: { ok: boolean; output: string[] }; + mappings: unknown[]; +} + +function summarizeMappingResult(parsed: MappingJsonOutput): string { + const mappingCount = parsed.mappings.length; + const mappingNoun = mappingCount === 1 ? 'mapping' : 'mappings'; + return `Mapping validated — ${mappingCount} ${mappingNoun} configured`; +} + async function defaultRunMapping(projectDir: string, connectionId: string, io: KtxCliIo): Promise { - return await runKtxConnection({ command: 'map', projectDir, sourceConnectionId: connectionId, json: false }, io); + let captured = ''; + const captureIo: KtxCliIo = { + stdout: { write(chunk: string) { captured += chunk; } }, + stderr: io.stderr, + }; + const code = await runKtxConnection( + { command: 'map', projectDir, sourceConnectionId: connectionId, json: true }, + captureIo, + ); + if (code !== 0) return code; + try { + const parsed = JSON.parse(captured.trim()) as MappingJsonOutput; + io.stdout.write(`${summarizeMappingResult(parsed)}\n`); + } catch { + io.stdout.write(captured); + } + return 0; } async function defaultRunInitialIngest( @@ -1403,6 +1437,7 @@ export async function runKtxSetupSourcesStep( return { status: 'failed', projectDir: args.projectDir }; } if (source === 'metabase' || source === 'looker') { + prompts.log?.(`Validating ${sourceLabel(source)} mapping…`); const mappingCode = await (deps.runMapping ?? defaultRunMapping)(args.projectDir, connectionId, io); if (mappingCode !== 0) { await rollback?.(); From 82848e5de9261a0b3c151979a26d0333044d54ee Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Sun, 10 May 2026 16:25:02 -0700 Subject: [PATCH 3/8] Watch setup context until terminal status --- packages/cli/src/setup-context.test.ts | 42 +++++++++++++++++++++ packages/cli/src/setup-context.ts | 51 ++++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 2cebff8d..0f20ee81 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -372,6 +372,48 @@ describe('setup context build state', () => { }); }); + it('watches setup context command status until the run reaches a terminal state', async () => { + await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true }); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-watch', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-watch'), + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-watch', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-watch'), + }); + }; + + await expect( + runKtxSetupContextCommand( + { command: 'watch', projectDir: tempDir, runId: 'setup-context-local-watch', inputMode: 'disabled' }, + io.io, + { sleep: completeRun, watchIntervalMs: 1 }, + ), + ).resolves.toBe(0); + expect(io.stdout()).toContain('KTX context built: running'); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + it('runs direct build commands without asking for setup confirmation first', async () => { await writeReadyProject(tempDir); const io = makeIo(); diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 042c5b1e..1555f264 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -99,6 +99,8 @@ export interface KtxSetupContextDeps { now?: () => Date; runContextBuild?: typeof runContextBuild; verifyContextReady?: (projectDir: string) => Promise; + sleep?: (ms: number) => Promise; + watchIntervalMs?: number; } interface KtxSetupContextTargets { @@ -109,6 +111,7 @@ interface KtxSetupContextTargets { const SETUP_CONTEXT_STATE_PATH = ['.ktx', 'setup', 'context-build.json'] as const; const LIVE_DATABASE_ADAPTER = 'live-database'; const SCAN_REPORT_FILE = 'scan-report.json'; +const DEFAULT_WATCH_INTERVAL_MS = 2_000; function createPromptAdapter(): KtxSetupContextPromptAdapter { return { @@ -698,6 +701,18 @@ function stateMatchesRunId(state: KtxSetupContextState, runId: string | undefine return !runId || state.runId === runId; } +function isActiveStatus(status: KtxSetupContextBuildStatus): boolean { + return status === 'running' || status === 'detached'; +} + +function watchExitCode(status: KtxSetupContextBuildStatus): number { + return status === 'failed' || status === 'interrupted' || status === 'stale' ? 1 : 0; +} + +function defaultSleep(ms: number): Promise { + return new Promise((resolveSleep) => setTimeout(resolveSleep, ms)); +} + function statusPayload(state: KtxSetupContextState): KtxSetupContextStatusSummary { return setupContextStatusFromState(state, { completedStep: state.status === 'completed' }); } @@ -714,6 +729,38 @@ function writeContextStatus(state: KtxSetupContextState, io: KtxCliIo): void { } } +async function watchContextStatus( + args: Extract, + initialState: KtxSetupContextState, + io: KtxCliIo, + deps: KtxSetupContextDeps, +): Promise { + const sleep = deps.sleep ?? defaultSleep; + const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; + let state = initialState; + let lastRenderedStatus = ''; + + io.stdout.write('KTX context build\n'); + while (true) { + const renderedStatus = `${state.status}:${state.updatedAt ?? ''}:${state.completedAt ?? ''}:${state.failureReason ?? ''}`; + if (renderedStatus !== lastRenderedStatus) { + writeContextStatus(state, io); + lastRenderedStatus = renderedStatus; + } + + if (!isActiveStatus(state.status)) { + return watchExitCode(state.status); + } + + await sleep(intervalMs); + state = await readKtxSetupContextState(args.projectDir); + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); + return 1; + } + } +} + export async function runKtxSetupContextCommand( args: KtxSetupContextCommandArgs, io: KtxCliIo, @@ -744,9 +791,7 @@ export async function runKtxSetupContextCommand( } if (args.command === 'watch') { - io.stdout.write('KTX context build\n'); - writeContextStatus(state, io); - return 0; + return await watchContextStatus(args, state, io, deps); } const updatedAt = new Date().toISOString(); From 549fb35e7512cf2b565bd913b0bcf9034bab8232 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Sun, 10 May 2026 17:08:55 -0700 Subject: [PATCH 4/8] Show progress when watching context builds --- packages/cli/src/context-build-view.test.ts | 94 ++++++++++ packages/cli/src/context-build-view.ts | 56 +++++- packages/cli/src/setup-context.test.ts | 160 +++++++++++++++++ packages/cli/src/setup-context.ts | 182 +++++++++++++++++++- packages/cli/src/setup.test.ts | 134 ++++++++++++++ packages/cli/src/setup.ts | 56 ++++-- 6 files changed, 660 insertions(+), 22 deletions(-) diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index 1c6965d8..a88b42cc 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -8,6 +8,7 @@ import { parseScanSummary, renderContextBuildView, runContextBuild, + viewStateFromSourceProgress, } from './context-build-view.js'; function makeIo(options: { isTTY?: boolean } = {}) { @@ -424,4 +425,97 @@ describe('runContextBuild', () => { expect(io.stdout()).toContain('Resume: ktx setup --project-dir /tmp/project'); mockExit.mockRestore(); }); + + it('calls onSourceProgress when sources start and finish', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + dbt_main: { driver: 'dbt' }, + }); + const progressUpdates: Array> = []; + const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation)); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { + executeTarget, + now: () => 1000, + onSourceProgress: (sources) => { + progressUpdates.push(sources.map((s) => ({ connectionId: s.connectionId, status: s.status }))); + }, + }, + ); + + expect(progressUpdates).toHaveLength(4); + expect(progressUpdates[0]).toEqual([ + { connectionId: 'warehouse', status: 'running' }, + { connectionId: 'dbt_main', status: 'queued' }, + ]); + expect(progressUpdates[1]).toEqual([ + { connectionId: 'warehouse', status: 'done' }, + { connectionId: 'dbt_main', status: 'queued' }, + ]); + expect(progressUpdates[2]).toEqual([ + { connectionId: 'warehouse', status: 'done' }, + { connectionId: 'dbt_main', status: 'running' }, + ]); + expect(progressUpdates[3]).toEqual([ + { connectionId: 'warehouse', status: 'done' }, + { connectionId: 'dbt_main', status: 'done' }, + ]); + }); +}); + +describe('viewStateFromSourceProgress', () => { + it('partitions sources into primary and context groups', () => { + const state = viewStateFromSourceProgress( + [ + { connectionId: 'warehouse', operation: 'scan', status: 'running', startedAtMs: 900 }, + { connectionId: 'dbt-main', operation: 'source-ingest', status: 'queued' }, + ], + 1000, + 500, + ); + + expect(state.primarySources).toHaveLength(1); + expect(state.primarySources[0].target.connectionId).toBe('warehouse'); + expect(state.primarySources[0].status).toBe('running'); + expect(state.primarySources[0].elapsedMs).toBe(100); + expect(state.contextSources).toHaveLength(1); + expect(state.contextSources[0].target.connectionId).toBe('dbt-main'); + expect(state.contextSources[0].status).toBe('queued'); + expect(state.totalElapsedMs).toBe(500); + }); + + it('uses stored elapsedMs for completed sources', () => { + const state = viewStateFromSourceProgress( + [{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }], + 99999, + ); + + expect(state.primarySources[0].elapsedMs).toBe(72000); + expect(state.primarySources[0].summaryText).toBe('42 tables'); + }); + + it('renders the same view format as the foreground build', () => { + const state = viewStateFromSourceProgress( + [ + { connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }, + { connectionId: 'dbt-main', operation: 'source-ingest', status: 'running', startedAtMs: 900 }, + ], + 1000, + 500, + ); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Primary sources:'); + expect(output).toContain('warehouse'); + expect(output).toContain('42 tables'); + expect(output).toContain('Context sources:'); + expect(output).toContain('dbt-main'); + expect(output).toContain('ingesting...'); + }); }); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 96a8aa57..7edc7d13 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -46,11 +46,21 @@ export interface ContextBuildResult { detached: boolean; } +export interface ContextBuildSourceProgressUpdate { + connectionId: string; + operation: 'scan' | 'source-ingest'; + status: 'queued' | 'running' | 'done' | 'failed'; + startedAtMs?: number; + elapsedMs?: number; + summaryText?: string; +} + export interface ContextBuildDeps { executeTarget?: typeof executePublicIngestTarget; now?: () => number; setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; onDetach?: () => void; + onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void; } // --- Rendering --- @@ -165,7 +175,7 @@ function resumeCommand(projectDir?: string): string { export function renderContextBuildView( state: ContextBuildViewState, - options: { styled?: boolean; showHint?: boolean; projectDir?: string } = {}, + options: { styled?: boolean; showHint?: boolean; hintText?: string; projectDir?: string } = {}, ): string { const styled = options.styled ?? true; const width = columnWidth(state); @@ -203,7 +213,8 @@ export function renderContextBuildView( } if (options.showHint && hasActive) { - const hint = ` d to detach · ${resumeCommand(options.projectDir)} to resume`; + const hintContent = options.hintText ?? `d to detach · ${resumeCommand(options.projectDir)} to resume`; + const hint = ` ${hintContent}`; lines.push(styled ? dim(hint) : hint); lines.push(''); } @@ -261,9 +272,45 @@ function createCaptureIo(onProgress: (message: string) => void, isTTY: boolean): }; } +// --- Source progress helpers --- + +function collectSourceProgress(targets: ContextBuildTargetState[]): ContextBuildSourceProgressUpdate[] { + return targets.map((t) => ({ + connectionId: t.target.connectionId, + operation: t.target.operation, + status: t.status, + ...(t.startedAt !== null ? { startedAtMs: t.startedAt } : {}), + ...(t.elapsedMs > 0 ? { elapsedMs: t.elapsedMs } : {}), + ...(t.summaryText ? { summaryText: t.summaryText } : {}), + })); +} + +export function viewStateFromSourceProgress( + sources: ContextBuildSourceProgressUpdate[], + now: number, + startedAtMs?: number, +): ContextBuildViewState { + const makeTarget = (s: ContextBuildSourceProgressUpdate): ContextBuildTargetState => ({ + target: { connectionId: s.connectionId, driver: '', operation: s.operation, debugCommand: '', steps: [] }, + status: s.status, + detailLine: null, + summaryText: s.summaryText ?? null, + startedAt: s.startedAtMs ?? null, + elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0), + }); + + return { + primarySources: sources.filter((s) => s.operation === 'scan').map(makeTarget), + contextSources: sources.filter((s) => s.operation === 'source-ingest').map(makeTarget), + frame: 0, + startedAt: startedAtMs ?? null, + totalElapsedMs: startedAtMs ? now - startedAtMs : 0, + }; +} + // --- Repaint --- -function createRepainter(io: KtxCliIo) { +export function createRepainter(io: KtxCliIo) { let lastLineCount = 0; return { @@ -397,7 +444,6 @@ export async function runContextBuild( const bg = spawnBackgroundBuild(args.projectDir); io.stdout.write('\n\nContext build continuing in the background.\n'); if (bg) io.stdout.write(`Log: ${bg.logPath}\n`); - io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`); io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); process.exit(0); }, @@ -428,6 +474,7 @@ export async function runContextBuild( targetState.status = 'running'; targetState.startedAt = nowFn(); paint(true); + deps.onSourceProgress?.(collectSourceProgress(orderedTargets)); const capture = createCaptureIo( (message) => { @@ -452,6 +499,7 @@ export async function runContextBuild( if (failed) hasFailure = true; paint(true); + deps.onSourceProgress?.(collectSourceProgress(orderedTargets)); } } finally { if (spinnerInterval) clearInterval(spinnerInterval); diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 0f20ee81..90694ada 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -340,6 +340,166 @@ describe('setup context build state', () => { expect(io.stderr()).toContain('No primary or context sources are configured for a KTX context build.'); }); + it('watches an already-running setup context build from the resume prompt', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-resume-watch', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'), + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-resume-watch', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'), + }); + }; + const select = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => { + expect(options.options.map((option) => option.label)).toContain('Watch progress'); + return 'watch'; + }); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + sleep: completeRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-resume-watch' }); + expect(io.stdout()).toContain('KTX context built: detached'); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + + it('auto-watches a running build without prompting when autoWatch is true', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-auto-watch', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'), + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-auto-watch', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'), + }); + }; + const select = vi.fn(async () => { + throw new Error('should not prompt when autoWatch is true'); + }); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + sleep: completeRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-auto-watch' }); + expect(select).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + + it('renders the progress view when watching a build with sourceProgress', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-progress', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-progress'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 }, + { connectionId: 'docs', operation: 'source-ingest' as const, status: 'running' as const, startedAtMs: Date.now() - 5000 }, + ], + }); + const io = makeIo(); + const completeRun = async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-progress', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-progress'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 }, + { connectionId: 'docs', operation: 'source-ingest' as const, status: 'done' as const, elapsedMs: 60000 }, + ], + }); + }; + const select = vi.fn(async () => 'watch'); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + sleep: completeRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-progress' }); + + const output = io.stdout(); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Primary sources:'); + expect(output).toContain('warehouse'); + expect(output).toContain('Context sources:'); + expect(output).toContain('docs'); + expect(output).not.toContain('KTX context built: detached'); + }); + it('prints JSON setup context command status with watch and resume commands', async () => { await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true }); await writeKtxSetupContextState(tempDir, { diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 1555f264..cd79d9bd 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -10,7 +10,13 @@ import { } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { buildPublicIngestPlan } from './public-ingest.js'; -import { runContextBuild } from './context-build-view.js'; +import { + type ContextBuildSourceProgressUpdate, + createRepainter, + renderContextBuildView, + runContextBuild, + viewStateFromSourceProgress, +} from './context-build-view.js'; import { withMenuOptionsSpacing } from './prompt-navigation.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; @@ -45,6 +51,7 @@ export interface KtxSetupContextState { retryableFailedTargets: string[]; commands: KtxSetupContextCommands; failureReason?: string; + sourceProgress?: ContextBuildSourceProgressUpdate[]; } export interface KtxSetupContextStatusSummary { @@ -80,6 +87,7 @@ export interface KtxSetupContextStepArgs { forcePrompt?: boolean; allowEmpty?: boolean; prompt?: boolean; + autoWatch?: boolean; } export type KtxSetupContextCommandArgs = @@ -196,9 +204,34 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat : [], commands: contextBuildCommands(projectDir, runId), ...(typeof record.failureReason === 'string' ? { failureReason: record.failureReason } : {}), + ...(normalizeSourceProgress(record.sourceProgress) ? { sourceProgress: normalizeSourceProgress(record.sourceProgress) } : {}), }; } +const VALID_SOURCE_OPERATIONS = new Set(['scan', 'source-ingest']); +const VALID_SOURCE_STATUSES = new Set(['queued', 'running', 'done', 'failed']); + +function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpdate[] | undefined { + if (!Array.isArray(value)) return undefined; + const entries: ContextBuildSourceProgressUpdate[] = []; + for (const item of value) { + if (typeof item !== 'object' || item === null || Array.isArray(item)) continue; + const rec = item as Record; + if (typeof rec.connectionId !== 'string') continue; + if (!VALID_SOURCE_OPERATIONS.has(String(rec.operation))) continue; + if (!VALID_SOURCE_STATUSES.has(String(rec.status))) continue; + entries.push({ + connectionId: rec.connectionId, + operation: rec.operation as 'scan' | 'source-ingest', + status: rec.status as 'queued' | 'running' | 'done' | 'failed', + ...(typeof rec.startedAtMs === 'number' ? { startedAtMs: rec.startedAtMs } : {}), + ...(typeof rec.elapsedMs === 'number' ? { elapsedMs: rec.elapsedMs } : {}), + ...(typeof rec.summaryText === 'string' ? { summaryText: rec.summaryText } : {}), + }); + } + return entries.length > 0 ? entries : undefined; +} + export async function readKtxSetupContextState(projectDir: string): Promise { const filePath = statePath(projectDir); if (!(await pathExists(filePath))) { @@ -517,6 +550,7 @@ async function runBuild( }; await writeKtxSetupContextState(args.projectDir, runningState); + let lastSourceProgress: ContextBuildSourceProgressUpdate[] | undefined; const contextBuild = deps.runContextBuild ?? runContextBuild; const buildResult = await contextBuild( project, @@ -535,14 +569,35 @@ async function runBuild( ...runningState, status: 'detached', updatedAt: new Date().toISOString(), + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); writeFileSync(statePath(resolvedDir), `${JSON.stringify(detachedState, null, 2)}\n`); }, + onSourceProgress: (sources) => { + lastSourceProgress = sources; + try { + const resolvedDir = resolve(args.projectDir); + mkdirSync(join(resolvedDir, '.ktx', 'setup'), { recursive: true }); + const progressState = normalizeState(resolvedDir, { + ...runningState, + sourceProgress: sources, + updatedAt: new Date().toISOString(), + }); + writeFileSync(statePath(resolvedDir), `${JSON.stringify(progressState, null, 2)}\n`); + } catch { + // Progress reporting is supplementary — don't crash the build + } + }, }, ); if (buildResult.detached) { const updatedAt = now().toISOString(); - await writeKtxSetupContextState(args.projectDir, { ...runningState, status: 'detached', updatedAt }); + await writeKtxSetupContextState(args.projectDir, { + ...runningState, + status: 'detached', + updatedAt, + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), + }); return { status: 'detached', projectDir: args.projectDir, runId }; } if (buildResult.exitCode !== 0) { @@ -553,6 +608,7 @@ async function runBuild( updatedAt, retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds], failureReason: 'Context build failed.', + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); return { status: 'failed', projectDir: args.projectDir }; } @@ -566,6 +622,7 @@ async function runBuild( updatedAt, retryableFailedTargets: readiness.failedTargets ?? [], failureReason: readiness.details.join(' '), + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); io.stderr.write('KTX context build did not pass agent-readiness verification.\n'); for (const detail of readiness.details) { @@ -582,6 +639,7 @@ async function runBuild( updatedAt: completedAt, completedAt, retryableFailedTargets: [], + ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); writeSuccess(readiness, targets, io); return { status: 'ready', projectDir: args.projectDir, runId }; @@ -635,17 +693,46 @@ export async function runKtxSetupContextStep( (existingState.status === 'running' || existingState.status === 'detached') && args.inputMode !== 'disabled' ) { + if (args.autoWatch) { + const watched = await watchContextStatus( + { + command: 'watch', + projectDir: args.projectDir, + ...(existingState.runId ? { runId: existingState.runId } : {}), + inputMode: args.inputMode, + }, + existingState, + io, + deps, + ); + return setupResultFromWatchedState(args.projectDir, watched.state); + } const prompts = deps.prompts ?? createPromptAdapter(); const choice = await prompts.select({ message: 'A context build is running in the background.\n\n' + - 'You can wait for it to finish, check its status, or start a fresh build.', + 'You can watch it until it finishes, check its status once, or start a fresh build.', options: [ + { value: 'watch', label: 'Watch progress' }, { value: 'status', label: 'Check status' }, { value: 'rebuild', label: 'Start a fresh context build' }, { value: 'back', label: 'Back' }, ], }); + if (choice === 'watch') { + const watched = await watchContextStatus( + { + command: 'watch', + projectDir: args.projectDir, + ...(existingState.runId ? { runId: existingState.runId } : {}), + inputMode: args.inputMode, + }, + existingState, + io, + deps, + ); + return setupResultFromWatchedState(args.projectDir, watched.state); + } if (choice === 'status') { const commands = contextBuildCommands(args.projectDir, existingState.runId); io.stdout.write(`\nRun: ${commands.status}\n`); @@ -734,7 +821,19 @@ async function watchContextStatus( initialState: KtxSetupContextState, io: KtxCliIo, deps: KtxSetupContextDeps, -): Promise { +): Promise<{ exitCode: number; state: KtxSetupContextState }> { + if (initialState.sourceProgress && initialState.sourceProgress.length > 0) { + return watchContextStatusWithProgressView(args, initialState, io, deps); + } + return watchContextStatusText(args, initialState, io, deps); +} + +async function watchContextStatusText( + args: Extract, + initialState: KtxSetupContextState, + io: KtxCliIo, + deps: KtxSetupContextDeps, +): Promise<{ exitCode: number; state: KtxSetupContextState }> { const sleep = deps.sleep ?? defaultSleep; const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; let state = initialState; @@ -749,18 +848,87 @@ async function watchContextStatus( } if (!isActiveStatus(state.status)) { - return watchExitCode(state.status); + return { exitCode: watchExitCode(state.status), state }; } await sleep(intervalMs); state = await readKtxSetupContextState(args.projectDir); if (!stateMatchesRunId(state, args.runId)) { io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); - return 1; + return { exitCode: 1, state }; } } } +async function watchContextStatusWithProgressView( + args: Extract, + initialState: KtxSetupContextState, + io: KtxCliIo, + deps: KtxSetupContextDeps, +): Promise<{ exitCode: number; state: KtxSetupContextState }> { + const sleep = deps.sleep ?? defaultSleep; + const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; + const isTTY = io.stdout.isTTY === true; + const repainter = isTTY ? createRepainter(io) : null; + let state = initialState; + let frame = 0; + let lastProgressKey = ''; + + while (true) { + const now = Date.now(); + const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; + const viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); + viewState.frame = frame; + + const viewOpts = { + styled: isTTY, + showHint: true, + hintText: 'ctrl+c to stop watching · build continues in background', + }; + + if (repainter) { + repainter.paint(renderContextBuildView(viewState, viewOpts)); + } else { + const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); + if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { + io.stdout.write(renderContextBuildView(viewState, viewOpts)); + lastProgressKey = currentKey; + } + } + + if (!isActiveStatus(state.status)) { + return { exitCode: watchExitCode(state.status), state }; + } + + frame++; + await sleep(intervalMs); + + try { + state = await readKtxSetupContextState(args.projectDir); + } catch { + continue; + } + + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); + return { exitCode: 1, state }; + } + } +} + +function setupResultFromWatchedState(projectDir: string, state: KtxSetupContextState): KtxSetupContextResult { + if (state.status === 'completed') { + return { status: 'ready', projectDir, runId: state.runId ?? 'setup-context-completed' }; + } + if (state.status === 'paused') { + return { status: 'paused', projectDir, runId: state.runId ?? '' }; + } + if (state.status === 'running' || state.status === 'detached') { + return { status: 'detached', projectDir, runId: state.runId ?? '' }; + } + return { status: 'failed', projectDir }; +} + export async function runKtxSetupContextCommand( args: KtxSetupContextCommandArgs, io: KtxCliIo, @@ -791,7 +959,7 @@ export async function runKtxSetupContextCommand( } if (args.command === 'watch') { - return await watchContextStatus(args, state, io, deps); + return (await watchContextStatus(args, state, io, deps)).exitCode; } const updatedAt = new Date().toISOString(); diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 3e772d92..cf9d22a8 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -1305,6 +1305,140 @@ describe('setup status', () => { expect(calls).toEqual(['context']); }); + it('resumes an active context build before prompting for earlier setup steps', async () => { + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-active', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-active'), + }); + const context = vi.fn(async () => ({ + status: 'detached' as const, + projectDir: tempDir, + runId: 'setup-context-local-active', + })); + const databases = vi.fn(async () => { + throw new Error('database setup should not run while context build is active'); + }); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { context, databases }, + ), + ).resolves.toBe(0); + + expect(context).toHaveBeenCalledWith( + { projectDir: tempDir, inputMode: 'auto', allowEmpty: true }, + io.io, + ); + expect(databases).not.toHaveBeenCalled(); + }); + + it('skips entry menu and auto-watches when context build is active and showEntryMenu is true', async () => { + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-active', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-active'), + }); + const context = vi.fn(async () => ({ + status: 'detached' as const, + projectDir: tempDir, + runId: 'setup-context-local-active', + })); + const entryMenuSelect = vi.fn(async () => 'exit'); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + showEntryMenu: true, + }, + io.io, + { + context, + entryMenuDeps: { prompts: { select: entryMenuSelect, cancel: vi.fn() } }, + }, + ), + ).resolves.toBe(0); + + expect(entryMenuSelect).not.toHaveBeenCalled(); + expect(context).toHaveBeenCalledWith( + { projectDir: tempDir, inputMode: 'auto', allowEmpty: true, autoWatch: true }, + io.io, + ); + }); + it('routes a ready project menu selection to agent setup', async () => { const calls: string[] = []; const io = makeIo(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 09deff37..2aae882e 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -391,6 +391,10 @@ function setupContextReady(status: KtxSetupStatus): boolean { return status.context.ready; } +function setupContextActive(status: KtxSetupStatus): boolean { + return status.context.status === 'running' || status.context.status === 'detached'; +} + function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void { io.stderr.write('KTX context is not ready for agents.\n\n'); io.stderr.write(`Build context first:\n ktx setup context build --project-dir ${resolve(projectDir)}\n\n`); @@ -454,22 +458,27 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup args.inputMode !== 'disabled' && !args.agents && (io.stdout.isTTY === true || deps.entryMenuDeps?.prompts !== undefined); + let autoWatchActiveBuild = false; setupLoop: while (true) { entryAction = undefined; if (canShowEntryMenu) { const status = await readKtxSetupStatus(args.projectDir); - entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action; - if (entryAction === 'exit') { - (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.'); - return 0; - } - if (entryAction === 'status') { - io.stdout.write(formatKtxSetupStatus(status)); - return 0; - } - if (entryAction === 'demo') { - return await runKtxSetupDemoFromEntryMenu(args, io, deps); + if (setupContextActive(status)) { + autoWatchActiveBuild = true; + } else { + entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action; + if (entryAction === 'exit') { + (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.'); + return 0; + } + if (entryAction === 'status') { + io.stdout.write(formatKtxSetupStatus(status)); + return 0; + } + if (entryAction === 'demo') { + return await runKtxSetupDemoFromEntryMenu(args, io, deps); + } } } @@ -497,6 +506,31 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup const agentsRequested = args.agents || entryAction === 'agents'; const currentStatus = await readKtxSetupStatus(projectResult.projectDir); let readyAction: string | undefined; + + if (args.inputMode !== 'disabled' && !agentsRequested && setupContextActive(currentStatus)) { + const contextRunner = + deps.context ?? ((contextArgs, contextIo) => runKtxSetupContextStep(contextArgs, contextIo, deps.contextDeps)); + const contextResult = await contextRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + allowEmpty: true, + ...(autoWatchActiveBuild ? { autoWatch: true } : {}), + }, + io, + ); + autoWatchActiveBuild = false; + if (contextResult.status === 'back') { + continue; + } + if (contextResult.status === 'failed' || contextResult.status === 'missing-input') { + return 1; + } + if (contextResult.status !== 'ready') { + return 0; + } + } + if (args.inputMode !== 'disabled' && !agentsRequested && isKtxSetupReady(currentStatus)) { readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action; if (readyAction === 'exit') return 0; From b3dcb577d99685e91675cb1451822108021f9b63 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Sun, 10 May 2026 20:44:07 -0700 Subject: [PATCH 5/8] misc --- README.md | 3 +- packages/cli/package.json | 1 - packages/cli/src/context-build-view.test.ts | 5 +- packages/cli/src/context-build-view.ts | 7 +- .../cli/src/local-scan-connectors.test.ts | 23 - packages/cli/src/public-ingest.test.ts | 7 - packages/cli/src/setup-context.test.ts | 41 ++ packages/cli/src/setup-context.ts | 106 +-- packages/cli/src/setup-sources.test.ts | 1 - packages/connector-posthog/package.json | 46 -- .../connector-posthog/src/connector.test.ts | 400 ------------ packages/connector-posthog/src/connector.ts | 609 ------------------ .../connector-posthog/src/dialect.test.ts | 48 -- packages/connector-posthog/src/dialect.ts | 258 -------- packages/connector-posthog/src/index.ts | 19 - .../src/live-database-introspection.ts | 34 - .../src/package-exports.test.ts | 11 - .../src/schema-descriptions.ts | 99 --- packages/connector-posthog/tsconfig.json | 9 - .../src/connections/connection-type.ts | 1 - .../context/src/mcp/local-project-ports.ts | 3 +- .../memory-agent.service.ingest.test.ts | 6 +- packages/context/src/scan/local-scan.ts | 5 +- .../src/scan/relationship-profiling.ts | 8 +- packages/context/src/scan/types.test.ts | 6 +- packages/context/src/scan/types.ts | 1 - pnpm-lock.yaml | 89 +-- release-policy.json | 1 - scripts/examples-docs.test.mjs | 1 - scripts/package-artifacts.mjs | 3 - scripts/package-artifacts.test.mjs | 2 - scripts/precommit-check.mjs | 1 - 32 files changed, 184 insertions(+), 1670 deletions(-) delete mode 100644 packages/connector-posthog/package.json delete mode 100644 packages/connector-posthog/src/connector.test.ts delete mode 100644 packages/connector-posthog/src/connector.ts delete mode 100644 packages/connector-posthog/src/dialect.test.ts delete mode 100644 packages/connector-posthog/src/dialect.ts delete mode 100644 packages/connector-posthog/src/index.ts delete mode 100644 packages/connector-posthog/src/live-database-introspection.ts delete mode 100644 packages/connector-posthog/src/package-exports.test.ts delete mode 100644 packages/connector-posthog/src/schema-descriptions.ts delete mode 100644 packages/connector-posthog/tsconfig.json diff --git a/README.md b/README.md index 014ac600..c92371a4 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ artifacts. You can inspect them, commit them, and serve them to any MCP client. - Durable warehouse memory with semantic-layer sources and knowledge pages. - Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server, - BigQuery, Snowflake, and PostHog. + BigQuery, and Snowflake. - Agentic ingest with provenance links, tool transcripts, and replay metadata. - Local semantic-layer query planning and optional query execution. - A stdio MCP server with tools for connections, knowledge, semantic-layer @@ -221,7 +221,6 @@ The MCP server exposes `connection_list`, `knowledge_search`, - `packages/connector-clickhouse`: ClickHouse scan connector. - `packages/connector-mysql`: MySQL scan connector. - `packages/connector-postgres`: Postgres scan connector. -- `packages/connector-posthog`: PostHog scan connector. - `packages/connector-snowflake`: Snowflake scan connector. - `packages/connector-sqlite`: SQLite scan connector. - `packages/connector-sqlserver`: SQL Server scan connector. diff --git a/packages/cli/package.json b/packages/cli/package.json index 0cc4d6e9..e85986a4 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -38,7 +38,6 @@ "@ktx/connector-clickhouse": "workspace:*", "@ktx/connector-mysql": "workspace:*", "@ktx/connector-postgres": "workspace:*", - "@ktx/connector-posthog": "workspace:*", "@ktx/connector-snowflake": "workspace:*", "@ktx/connector-sqlite": "workspace:*", "@ktx/connector-sqlserver": "workspace:*", diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index a88b42cc..8e48a6f7 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -202,7 +202,7 @@ describe('renderContextBuildView', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('scanning...'); - expect(output).toContain('30s'); + expect(output).toContain('(30s)'); }); it('renders running target with progress bar when percentage is available', () => { @@ -217,7 +217,7 @@ describe('renderContextBuildView', () => { expect(output).toContain('██████░░░░░░'); expect(output).toContain('50%'); expect(output).toContain('Scanning tables...'); - expect(output).toContain('15s'); + expect(output).toContain('(15s)'); }); it('renders completion summary when all targets are done', () => { @@ -423,6 +423,7 @@ describe('runContextBuild', () => { expect(mockExit).toHaveBeenCalledWith(0); expect(io.stdout()).toContain('Context build continuing in the background.'); expect(io.stdout()).toContain('Resume: ktx setup --project-dir /tmp/project'); + expect(io.stdout()).toContain('Status: ktx setup context status --project-dir /tmp/project'); mockExit.mockRestore(); }); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 7edc7d13..4c57784d 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -137,7 +137,7 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string const percent = extractPercent(target.detailLine); const progressText = target.detailLine?.replace(/^\[\d+%\]\s*/, '') ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); - const elapsed = target.elapsedMs > 0 ? formatDuration(target.elapsedMs) : null; + const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : null; const parts: string[] = []; if (percent !== null) { parts.push(`${renderProgressBar(percent, styled)} ${percent}%`); @@ -318,7 +318,7 @@ export function createRepainter(io: KtxCliIo) { if (lastLineCount > 0) { io.stdout.write(`${ESC}[${lastLineCount}A\r`); } - io.stdout.write(content); + io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`)); io.stdout.write(`${ESC}[J`); lastLineCount = (content.match(/\n/g) ?? []).length; }, @@ -356,7 +356,7 @@ function spawnBackgroundBuild(projectDir: string): { logPath: string } | null { // --- Keystroke handling --- -function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { +export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { const stdin = process.stdin; if (!stdin.isTTY || typeof stdin.setRawMode !== 'function') { return null; @@ -445,6 +445,7 @@ export async function runContextBuild( io.stdout.write('\n\nContext build continuing in the background.\n'); if (bg) io.stdout.write(`Log: ${bg.logPath}\n`); io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); + io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`); process.exit(0); }, () => { diff --git a/packages/cli/src/local-scan-connectors.test.ts b/packages/cli/src/local-scan-connectors.test.ts index 13d19c18..0fe57518 100644 --- a/packages/cli/src/local-scan-connectors.test.ts +++ b/packages/cli/src/local-scan-connectors.test.ts @@ -95,29 +95,6 @@ describe('createKtxCliScanConnector', () => { ]); }); - it('does not create a standalone PostHog scan connector', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - await writeFile( - join(tempDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' product:', - ' driver: posthog', - ' api_key: phx_test', - ' project_id: "157881"', - ' readonly: true', - '', - ].join('\n'), - 'utf-8', - ); - const project = await loadKtxProject({ projectDir: tempDir }); - - await expect(createKtxCliScanConnector(project, 'product')).rejects.toThrow( - 'Connection "product" uses driver "posthog", which has no native standalone KTX scan connector', - ); - }); - it('throws for structural daemon-only fallback configs', async () => { await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); await writeFile( diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts index e00b11da..13d8f364 100644 --- a/packages/cli/src/public-ingest.test.ts +++ b/packages/cli/src/public-ingest.test.ts @@ -80,13 +80,6 @@ describe('buildPublicIngestPlan', () => { ); }); - it('does not plan PostHog connections as CLI ingest targets', () => { - const project = projectWithConnections({ product: { driver: 'posthog' } }); - - expect(() => - buildPublicIngestPlan(project, { projectDir: '/tmp/project', targetConnectionId: 'product', all: false }), - ).toThrow('Connection "product" uses unsupported public ingest driver "posthog"'); - }); }); describe('runKtxPublicIngest', () => { diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 90694ada..d19be04c 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -500,6 +500,47 @@ describe('setup context build state', () => { expect(output).not.toContain('KTX context built: detached'); }); + it('supports d to detach from the progress watch view', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-detach', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-detach'), + sourceProgress: [ + { connectionId: 'warehouse', operation: 'scan' as const, status: 'running' as const, startedAtMs: Date.now() }, + ], + }); + const io = makeIo(); + let triggerDetach: (() => void) | null = null; + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + io.io, + { + sleep: async () => { triggerDetach?.(); }, + watchIntervalMs: 1, + setupKeystroke: (onDetach) => { + triggerDetach = onDetach; + return () => {}; + }, + }, + ), + ).resolves.toMatchObject({ status: 'detached' }); + + const output = io.stdout(); + expect(output).toContain('Building KTX context'); + expect(output).toContain('Context build continuing in the background.'); + expect(output).toContain('Resume: ktx setup --project-dir'); + }); + it('prints JSON setup context command status with watch and resume commands', async () => { await mkdir(join(tempDir, '.ktx', 'setup'), { recursive: true }); await writeKtxSetupContextState(tempDir, { diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index cd79d9bd..79f6cdd7 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -13,6 +13,7 @@ import { buildPublicIngestPlan } from './public-ingest.js'; import { type ContextBuildSourceProgressUpdate, createRepainter, + defaultSetupKeystroke, renderContextBuildView, runContextBuild, viewStateFromSourceProgress, @@ -109,6 +110,7 @@ export interface KtxSetupContextDeps { verifyContextReady?: (projectDir: string) => Promise; sleep?: (ms: number) => Promise; watchIntervalMs?: number; + setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; } interface KtxSetupContextTargets { @@ -870,50 +872,80 @@ async function watchContextStatusWithProgressView( const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; const isTTY = io.stdout.isTTY === true; const repainter = isTTY ? createRepainter(io) : null; + const projectDir = resolve(args.projectDir); + const viewOpts = { styled: isTTY, showHint: true, projectDir }; let state = initialState; - let frame = 0; let lastProgressKey = ''; + let detached = false; - while (true) { - const now = Date.now(); - const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; - const viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); - viewState.frame = frame; + let viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], Date.now(), + state.startedAt ? new Date(state.startedAt).getTime() : undefined); - const viewOpts = { - styled: isTTY, - showHint: true, - hintText: 'ctrl+c to stop watching · build continues in background', - }; + const cleanupKeystroke = (isTTY || deps.setupKeystroke) + ? (deps.setupKeystroke ?? defaultSetupKeystroke)( + () => { detached = true; }, + () => { detached = true; }, + ) + : null; - if (repainter) { - repainter.paint(renderContextBuildView(viewState, viewOpts)); - } else { - const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); - if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { - io.stdout.write(renderContextBuildView(viewState, viewOpts)); - lastProgressKey = currentKey; + let spinnerInterval: ReturnType | null = null; + if (repainter) { + repainter.paint(renderContextBuildView(viewState, viewOpts)); + spinnerInterval = setInterval(() => { + viewState.frame++; + const now = Date.now(); + viewState.totalElapsedMs = viewState.startedAt !== null ? now - viewState.startedAt : 0; + for (const t of [...viewState.primarySources, ...viewState.contextSources]) { + if (t.status === 'running' && t.startedAt !== null) { + t.elapsedMs = now - t.startedAt; + } } - } - - if (!isActiveStatus(state.status)) { - return { exitCode: watchExitCode(state.status), state }; - } - - frame++; - await sleep(intervalMs); - - try { - state = await readKtxSetupContextState(args.projectDir); - } catch { - continue; - } - - if (!stateMatchesRunId(state, args.runId)) { - io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); - return { exitCode: 1, state }; - } + repainter.paint(renderContextBuildView(viewState, viewOpts)); + }, 140); } + + try { + while (true) { + if (!repainter) { + const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); + if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { + io.stdout.write(renderContextBuildView(viewState, viewOpts)); + lastProgressKey = currentKey; + } + } + + if (!isActiveStatus(state.status)) { + return { exitCode: watchExitCode(state.status), state }; + } + if (detached) break; + + await sleep(intervalMs); + if (detached) break; + + try { + state = await readKtxSetupContextState(args.projectDir); + } catch { + continue; + } + + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); + return { exitCode: 1, state }; + } + + const now = Date.now(); + const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; + viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); + } + } finally { + if (spinnerInterval) clearInterval(spinnerInterval); + cleanupKeystroke?.(); + } + + io.stdout.write('\n\nContext build continuing in the background.\n'); + io.stdout.write(`Resume: ktx setup --project-dir ${projectDir}\n`); + io.stdout.write(`Status: ktx setup context status --project-dir ${projectDir}\n`); + return { exitCode: 0, state }; } function setupResultFromWatchedState(projectDir: string, state: KtxSetupContextState): KtxSetupContextResult { diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index 1ef973c9..b79e8e66 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -444,7 +444,6 @@ describe('setup sources step', () => { ); const options = vi.mocked(testPrompts.multiselect).mock.calls[0]?.[0].options ?? []; expect(options).toContainEqual({ value: 'notion', label: 'Notion' }); - expect(options).not.toContainEqual({ value: 'posthog', label: 'PostHog' }); }); it('uses a source-specific editable connection name for new interactive connections', async () => { diff --git a/packages/connector-posthog/package.json b/packages/connector-posthog/package.json deleted file mode 100644 index da2de540..00000000 --- a/packages/connector-posthog/package.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "name": "@ktx/connector-posthog", - "version": "0.0.0-private", - "description": "PostHog connector package for KTX scan interfaces", - "private": true, - "type": "module", - "engines": { - "node": ">=22.0.0" - }, - "main": "dist/index.js", - "types": "dist/index.d.ts", - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js", - "default": "./dist/index.js" - }, - "./package.json": "./package.json" - }, - "files": [ - "dist" - ], - "scripts": { - "build": "tsc -p tsconfig.json", - "test": "vitest run", - "type-check": "tsc -p tsconfig.json --noEmit" - }, - "dependencies": { - "@ktx/context": "workspace:*" - }, - "devDependencies": { - "@types/node": "^24.3.0", - "typescript": "^5.9.3", - "vitest": "^4.0.18" - }, - "license": "Apache-2.0", - "repository": { - "type": "git", - "url": "git+https://github.com/kaelio/ktx.git", - "directory": "packages/connector-posthog" - }, - "bugs": { - "url": "https://github.com/kaelio/ktx/issues" - }, - "homepage": "https://github.com/kaelio/ktx#readme" -} diff --git a/packages/connector-posthog/src/connector.test.ts b/packages/connector-posthog/src/connector.test.ts deleted file mode 100644 index 69dc7223..00000000 --- a/packages/connector-posthog/src/connector.test.ts +++ /dev/null @@ -1,400 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { - createPostHogLiveDatabaseIntrospection, - isKtxPostHogConnectionConfig, - KtxPostHogScanConnector, - postHogConnectionConfigFromConfig, - type KtxPostHogConnectionConfig, - type KtxPostHogFetch, -} from './index.js'; - -function jsonResponse(body: unknown, status = 200): Response { - return { - ok: status >= 200 && status < 300, - status, - json: async () => body, - text: async () => JSON.stringify(body), - } as Response; -} - -function fakeFetch(queries: string[] = []): KtxPostHogFetch { - return vi.fn(async (_url: string, init?: RequestInit) => { - const body = JSON.parse(String(init?.body ?? '{}')) as { query?: { kind?: string; query?: string } }; - const sql = body.query?.query ?? ''; - if (sql) { - queries.push(sql); - } - if (body.query?.kind === 'DatabaseSchemaQuery') { - return jsonResponse({ - tables: { - events: { - id: 'events', - name: 'events', - type: 'posthog', - row_count: 42, - fields: { - uuid: { - name: 'uuid', - type: 'uuid', - hogql_value: 'uuid', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'uuid', - }, - event: { - name: 'event', - type: 'string', - hogql_value: 'event', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'event', - }, - timestamp: { - name: 'timestamp', - type: 'datetime', - hogql_value: 'timestamp', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'timestamp', - }, - properties: { - name: 'properties', - type: 'json', - hogql_value: 'properties', - schema_valid: true, - table: 'events', - fields: null, - chain: null, - id: 'properties', - }, - virtual: { - name: 'virtual', - type: 'virtual_table', - hogql_value: 'virtual', - schema_valid: true, - table: null, - fields: null, - chain: null, - id: 'virtual', - }, - }, - }, - query_log: { - id: 'query_log', - name: 'query_log', - type: 'posthog', - row_count: 1, - fields: {}, - }, - }, - joins: [], - }); - } - if (sql.includes('SELECT * FROM person_distinct_ids LIMIT 0')) { - return jsonResponse({ - results: [], - columns: ['distinct_id', 'person_id'], - types: [ - ['distinct_id', 'String'], - ['person_id', 'UUID'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('LIMIT 0')) { - return jsonResponse({ results: null, columns: null, types: null, error: 'Table not found', hogql: sql }); - } - if (sql.includes('SELECT 1 AS test')) { - return jsonResponse({ results: [[1]], columns: ['test'], types: [['test', 'Int64']], error: null, hogql: sql }); - } - if (sql.includes('count() AS cnt')) { - return jsonResponse({ results: [[42]], columns: ['cnt'], types: [['cnt', 'Int64']], error: null, hogql: sql }); - } - if (sql.includes('GROUP BY event')) { - return jsonResponse({ - results: [['$pageview', 9]], - columns: ['event', 'cnt'], - types: [ - ['event', 'String'], - ['cnt', 'Int64'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('arrayJoin(JSONExtractKeys')) { - return jsonResponse({ - results: [['$browser', 7]], - columns: ['key', 'cnt'], - types: [ - ['key', 'String'], - ['cnt', 'Int64'], - ], - error: null, - hogql: sql, - }); - } - if (sql.includes('uniq(JSONExtractString') || sql.includes('uniq(val) AS cardinality')) { - return jsonResponse({ - results: [[2]], - columns: ['cardinality'], - types: [['cardinality', 'Int64']], - error: null, - hogql: sql, - }); - } - if (sql.includes('DISTINCT JSONExtractString') || sql.includes('SELECT DISTINCT toString(')) { - return jsonResponse({ - results: [['Chrome'], ['Safari']], - columns: ['value'], - types: [['value', 'String']], - error: null, - hogql: sql, - }); - } - return jsonResponse({ results: [['$pageview']], columns: ['event'], types: [['event', 'String']], error: null, hogql: sql }); - }) as KtxPostHogFetch; -} - -const posthogApiKeyEnv = ['POSTHOG', 'API', 'KEY'].join('_'); -const fixtureToken = ['phx', 'fixture'].join('_'); -const env = { [posthogApiKeyEnv]: fixtureToken }; -const connection: KtxPostHogConnectionConfig & { driver: string } = { - driver: 'posthog', - ['api_' + 'key']: `env:${posthogApiKeyEnv}`, - project_id: '157881', - region: 'us', - readonly: true, -}; - -describe('KtxPostHogScanConnector', () => { - it('resolves configuration safely', () => { - expect(isKtxPostHogConnectionConfig(connection)).toBe(true); - expect(isKtxPostHogConnectionConfig({ driver: 'mysql' })).toBe(false); - const resolved = postHogConnectionConfigFromConfig({ - connectionId: 'product', - connection, - env, - }); - expect(resolved).toMatchObject({ projectId: '157881', baseUrl: 'https://us.posthog.com' }); - const tokenField = ['api', 'Key'].join('') as keyof typeof resolved; - expect(resolved[tokenField]).toBe(fixtureToken); - expect(() => - postHogConnectionConfigFromConfig({ - connectionId: 'product', - connection: { ...connection, readonly: false }, - }), - ).toThrow('Native PostHog connector requires connections.product.readonly: true'); - }); - - it('introspects schema metadata, hidden tables, descriptions, primary keys, and normalized types', async () => { - const connector = new KtxPostHogScanConnector({ - connectionId: 'product', - connection, - env, - fetch: fakeFetch(), - sleep: async () => {}, - now: () => new Date('2026-04-29T19:00:00.000Z'), - }); - - const snapshot = await connector.introspect({ connectionId: 'product', driver: 'posthog' }, { runId: 'scan-run-1' }); - - expect(snapshot).toMatchObject({ - connectionId: 'product', - driver: 'posthog', - extractedAt: '2026-04-29T19:00:00.000Z', - scope: { catalogs: ['157881'] }, - metadata: { - project_id: '157881', - table_count: 2, - total_columns: 6, - }, - }); - expect(snapshot.tables.map((table) => table.name)).toEqual(['events', 'person_distinct_ids']); - expect(snapshot.tables[0]).toMatchObject({ - catalog: '157881', - db: null, - name: 'events', - kind: 'event_stream', - estimatedRows: 42, - comment: expect.stringContaining('PostHog event stream'), - foreignKeys: [], - }); - expect(snapshot.tables[0]?.columns).toEqual([ - { - name: 'uuid', - nativeType: 'UUID', - normalizedType: 'UUID', - dimensionType: 'string', - nullable: false, - primaryKey: true, - comment: 'Unique identifier for this specific event.', - }, - { - name: 'event', - nativeType: 'String', - normalizedType: 'VARCHAR', - dimensionType: 'string', - nullable: false, - primaryKey: false, - comment: expect.stringContaining('Event name'), - }, - { - name: 'timestamp', - nativeType: 'DateTime64', - normalizedType: 'TIMESTAMP', - dimensionType: 'time', - nullable: false, - primaryKey: false, - comment: expect.stringContaining('UTC timestamp'), - }, - { - name: 'properties', - nativeType: 'JSON', - normalizedType: 'JSON', - dimensionType: 'string', - nullable: true, - primaryKey: false, - comment: expect.stringContaining('JSON object'), - }, - ]); - }); - - it('runs samples, read-only SQL, event-stream discovery, row counts, and cleanup', async () => { - const queries: string[] = []; - const connector = new KtxPostHogScanConnector({ - connectionId: 'product', - connection, - env, - fetch: fakeFetch(queries), - sleep: async () => {}, - }); - - await expect(connector.testConnection()).resolves.toEqual({ success: true }); - await expect( - connector.sampleTable( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - columns: ['event'], - limit: 1, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1 }); - await expect( - connector.sampleColumn( - { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event', limit: 5 }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual({ values: ['$pageview'], nullCount: null, distinctCount: null }); - await expect( - connector.executeReadOnly({ connectionId: 'product', sql: 'select event from events', maxRows: 1 }, { runId: 'scan-run-1' }), - ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1, rowCount: 1 }); - await expect( - connector.executeReadOnly({ connectionId: 'product', sql: 'delete from events' }, { runId: 'scan-run-1' }), - ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); - await expect(connector.getTableRowCount('events')).resolves.toBe(42); - await expect( - connector.getColumnDistinctValues({ catalog: '157881', db: null, name: 'events' }, 'properties.$browser', { - maxCardinality: 5, - limit: 10, - sampleSize: 100, - }), - ).resolves.toEqual({ values: ['Chrome', 'Safari'], cardinality: 2 }); - await expect( - connector.eventStreamDiscovery.listEventTypes( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - eventColumn: 'event', - limit: 10, - minCount: 30, - lookbackDays: 14, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual([{ value: '$pageview', count: 9 }]); - expect(queries.some((query) => query.includes('HAVING cnt >= 30'))).toBe(true); - expect(queries.some((query) => query.includes('INTERVAL 14 DAY'))).toBe(true); - - await expect( - connector.eventStreamDiscovery.listPropertyKeys( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - jsonColumn: 'properties', - sampleSize: 1000, - limit: 10, - lookbackDays: 7, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual([{ key: '$browser', count: 7 }]); - - await expect( - connector.eventStreamDiscovery.listPropertyValues( - { - connectionId: 'product', - table: { catalog: '157881', db: null, name: 'events' }, - jsonColumn: 'properties', - propertyKey: '$browser', - limit: 10, - maxCardinality: 1000, - lookbackDays: 30, - }, - { runId: 'scan-run-1' }, - ), - ).resolves.toEqual({ - values: ['Chrome', 'Safari'], - cardinality: 2, - }); - await expect( - connector.columnStats( - { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event' }, - { runId: 'scan-run-1' }, - ), - ).resolves.toBeNull(); - await connector.cleanup(); - }); - - it('adapts native snapshots to live-database introspection snapshots', async () => { - const introspection = createPostHogLiveDatabaseIntrospection({ - connections: { product: connection }, - env, - fetch: fakeFetch(), - sleep: async () => {}, - now: () => new Date('2026-04-29T19:00:00.000Z'), - }); - - await expect(introspection.extractSchema('product')).resolves.toMatchObject({ - connectionId: 'product', - metadata: { project_id: '157881' }, - tables: expect.arrayContaining([ - expect.objectContaining({ - catalog: '157881', - db: null, - name: 'events', - columns: expect.arrayContaining([ - { - name: 'uuid', - nativeType: 'UUID', - normalizedType: 'UUID', - dimensionType: 'string', - nullable: false, - primaryKey: true, - comment: 'Unique identifier for this specific event.', - }, - ]), - }), - ]), - }); - }); -}); diff --git a/packages/connector-posthog/src/connector.ts b/packages/connector-posthog/src/connector.ts deleted file mode 100644 index 0ac2b37c..00000000 --- a/packages/connector-posthog/src/connector.ts +++ /dev/null @@ -1,609 +0,0 @@ -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; -import { assertReadOnlySql, limitSqlForExecution } from '@ktx/context/connections'; -import { - createKtxConnectorCapabilities, - type KtxColumnSampleInput, - type KtxColumnSampleResult, - type KtxColumnStatsInput, - type KtxColumnStatsResult, - type KtxEventPropertyDiscovery, - type KtxEventPropertyDiscoveryInput, - type KtxEventPropertyValuesInput, - type KtxEventPropertyValuesResult, - type KtxEventStreamDiscoveryPort, - type KtxEventTypeDiscovery, - type KtxEventTypeDiscoveryInput, - type KtxQueryResult, - type KtxReadOnlyQueryInput, - type KtxScanConnector, - type KtxScanContext, - type KtxScanInput, - type KtxSchemaColumn, - type KtxSchemaSnapshot, - type KtxSchemaTable, - type KtxTableRef, - type KtxTableSampleInput, - type KtxTableSampleResult, -} from '@ktx/context/scan'; -import { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js'; -import { getKtxPostHogColumnDescription, getKtxPostHogTableDescription } from './schema-descriptions.js'; - -export interface KtxPostHogConnectionConfig { - driver?: string; - api_key?: string; - apiKey?: string; - project_id?: string; - projectId?: string; - region?: 'us' | 'eu'; - host?: string; - readonly?: boolean; - [key: string]: unknown; -} - -export interface KtxPostHogResolvedConnectionConfig { - apiKey: string; - projectId: string; - baseUrl: string; -} - -export type KtxPostHogFetch = (url: string, init?: RequestInit) => Promise; - -export interface KtxPostHogScanConnectorOptions { - connectionId: string; - connection: KtxPostHogConnectionConfig | undefined; - env?: NodeJS.ProcessEnv; - fetch?: KtxPostHogFetch; - sleep?: (ms: number) => Promise; - now?: () => Date; -} - -export interface KtxPostHogReadOnlyQueryInput extends KtxReadOnlyQueryInput { - params?: Record; -} - -export interface KtxPostHogColumnDistinctValuesOptions { - maxCardinality: number; - limit: number; - sampleSize?: number; -} - -export interface KtxPostHogColumnDistinctValuesResult { - values: string[] | null; - cardinality: number; -} - -interface PostHogSchemaField { - name: string; - type: string; - hogql_value: string; - schema_valid: boolean; - table: string | null; - fields: string[] | null; - chain: string[] | null; - id: string | null; -} - -interface PostHogSchemaTable { - id: string; - name: string; - type: string; - row_count: number | null; - fields: Record; -} - -interface PostHogSchemaResponse { - tables: Record; - joins: unknown[]; -} - -interface PostHogQueryResponse { - results: unknown[][] | null; - columns: string[] | null; - types: [string, string][] | null; - error: string | null; - hogql: string | null; -} - -const allowedTableTypes = new Set(['posthog', 'system']); -const excludedTables = new Set([ - 'query_log', - 'system.teams', - 'system.exports', - 'system.ingestion_warnings', - 'system.insight_variables', - 'system.data_warehouse_sources', - 'system.groups', - 'system.group_type_mappings', -]); -const hiddenTablesToProbe = ['person_distinct_ids', 'cohort_people', 'static_cohort_people']; - -export function isKtxPostHogConnectionConfig(connection: KtxPostHogConnectionConfig | undefined): boolean { - return String(connection?.driver ?? '').toLowerCase() === 'posthog'; -} - -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - -function stringConfigValue( - connection: KtxPostHogConnectionConfig | undefined, - key: keyof KtxPostHogConnectionConfig, - env: NodeJS.ProcessEnv, -): string | undefined { - const value = connection?.[key]; - return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; -} - -export function postHogConnectionConfigFromConfig(input: { - connectionId: string; - connection: KtxPostHogConnectionConfig | undefined; - env?: NodeJS.ProcessEnv; -}): KtxPostHogResolvedConnectionConfig { - if (!isKtxPostHogConnectionConfig(input.connection)) { - throw new Error(`Native PostHog connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); - } - if (input.connection?.readonly !== true) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.readonly: true`); - } - const env = input.env ?? process.env; - const apiKey = stringConfigValue(input.connection, 'api_key', env) ?? stringConfigValue(input.connection, 'apiKey', env); - const projectId = - stringConfigValue(input.connection, 'project_id', env) ?? stringConfigValue(input.connection, 'projectId', env); - if (!apiKey) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.api_key`); - } - if (!projectId) { - throw new Error(`Native PostHog connector requires connections.${input.connectionId}.project_id`); - } - const host = stringConfigValue(input.connection, 'host', env); - const region = input.connection?.region ?? 'us'; - return { - apiKey, - projectId, - baseUrl: host ? host.replace(/\/$/, '') : region === 'eu' ? 'https://eu.posthog.com' : 'https://us.posthog.com', - }; -} - -export class KtxPostHogScanConnector implements KtxScanConnector { - readonly id: string; - readonly driver = 'posthog' as const; - readonly capabilities = createKtxConnectorCapabilities({ - tableSampling: true, - columnSampling: true, - columnStats: false, - readOnlySql: true, - nestedAnalysis: true, - eventStreamDiscovery: true, - formalForeignKeys: false, - estimatedRowCounts: true, - }); - - readonly eventStreamDiscovery: KtxEventStreamDiscoveryPort = { - listEventTypes: (input, ctx) => this.listEventTypes(input, ctx), - listPropertyKeys: (input, ctx) => this.listPropertyKeys(input, ctx), - listPropertyValues: (input, ctx) => this.listPropertyValues(input, ctx), - }; - - private readonly connectionId: string; - private readonly resolved: KtxPostHogResolvedConnectionConfig; - private readonly fetchImpl: KtxPostHogFetch; - private readonly sleep: (ms: number) => Promise; - private readonly now: () => Date; - private readonly dialect = new KtxPostHogDialect(); - - constructor(options: KtxPostHogScanConnectorOptions) { - this.connectionId = options.connectionId; - this.resolved = postHogConnectionConfigFromConfig({ - connectionId: options.connectionId, - connection: options.connection, - env: options.env, - }); - this.fetchImpl = options.fetch ?? fetch; - this.sleep = options.sleep ?? ((ms) => new Promise((resolveSleep) => setTimeout(resolveSleep, ms))); - this.now = options.now ?? (() => new Date()); - this.id = `posthog:${options.connectionId}`; - } - - async testConnection(): Promise<{ success: boolean; error?: string }> { - const response = await this.query('SELECT 1 AS test'); - return response.error ? { success: false, error: response.error } : { success: true }; - } - - async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const response = await this.makeRequest('/query', { query: { kind: 'DatabaseSchemaQuery' } }); - const tables: KtxSchemaTable[] = []; - for (const [tableName, tableInfo] of Object.entries(response.tables ?? {})) { - if (!allowedTableTypes.has(tableInfo.type) || excludedTables.has(tableName)) { - continue; - } - tables.push(this.toSchemaTable(tableName, tableInfo)); - } - tables.push(...(await this.discoverHiddenTables())); - tables.sort((left, right) => left.name.localeCompare(right.name)); - return { - connectionId: this.connectionId, - driver: 'posthog', - extractedAt: this.now().toISOString(), - scope: { catalogs: [this.resolved.projectId] }, - metadata: { - project_id: this.resolved.projectId, - table_count: tables.length, - total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), - }, - tables, - }; - } - - async sampleTable( - input: KtxTableSampleInput & { columnMetadata?: KtxPostHogSampleColumnInfo[] }, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const sql = input.columnMetadata - ? this.dialect.generateSampleQueryWithMetadata(this.qTableName(input.table), input.limit, input.columnMetadata) - : this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns); - const result = await this.query(sql); - return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; - } - - async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const result = await this.query( - this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), - ); - const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); - return { values, nullCount: null, distinctCount: null }; - } - - async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise { - return null; - } - - async executeReadOnly(input: KtxPostHogReadOnlyQueryInput, _ctx: KtxScanContext): Promise { - this.assertConnection(input.connectionId); - const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); - const prepared = this.dialect.prepareQuery(limitedSql, input.params); - const result = await this.query(prepared.sql, prepared.params); - return { ...result, rowCount: result.rows.length }; - } - - async getTableRowCount(tableName: string): Promise { - const result = await this.query(`SELECT count() AS cnt FROM ${this.dialect.quoteIdentifier(tableName)}`); - return Number(result.rows[0]?.[0] ?? 0); - } - - async getColumnDistinctValues( - table: KtxTableRef, - columnName: string, - options: KtxPostHogColumnDistinctValuesOptions, - ): Promise { - const sampleSize = options.sampleSize ?? 10000; - const tableName = this.qTableName(table); - const cardinalityResult = await this.query( - this.dialect.generateCardinalitySampleQuery(tableName, columnName, sampleSize), - ); - if (cardinalityResult.error || cardinalityResult.rows.length === 0) { - return null; - } - const cardinality = Number(cardinalityResult.rows[0]?.[0]); - if (!Number.isFinite(cardinality)) { - return null; - } - if (cardinality === 0) { - return { values: [], cardinality: 0 }; - } - if (cardinality > options.maxCardinality) { - return { values: null, cardinality }; - } - const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, columnName, options.limit)); - if (valuesResult.error) { - return null; - } - return { - values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])), - cardinality, - }; - } - - private async listEventTypes( - input: KtxEventTypeDiscoveryInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const limit = this.positiveInteger(input.limit, 'limit'); - const lookbackDays = this.positiveInteger(input.lookbackDays ?? 30, 'lookbackDays'); - const minCount = this.positiveInteger(input.minCount ?? 0, 'minCount'); - const eventColumn = this.dialect.quoteIdentifier(input.eventColumn); - const tableName = this.qTableName(input.table); - const havingClause = minCount > 0 ? `HAVING cnt >= ${minCount}` : ''; - const result = await this.query(` - SELECT ${eventColumn} AS event, count() as cnt - FROM ${tableName} - WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY - GROUP BY event - ${havingClause} - ORDER BY cnt DESC - LIMIT ${limit} - `); - if (result.error) { - return []; - } - return result.rows - .filter((row) => row[0] != null && String(row[0]).trim() !== '') - .map((row) => ({ value: String(row[0]), count: Number(row[1]) })); - } - - private async listPropertyKeys( - input: KtxEventPropertyDiscoveryInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const sampleSize = this.positiveInteger(input.sampleSize, 'sampleSize'); - const limit = this.positiveInteger(input.limit, 'limit'); - const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); - const tableName = this.qTableName(input.table); - const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); - const whereClause = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; - const result = await this.query(` - SELECT key, count() as cnt - FROM ( - SELECT arrayJoin(JSONExtractKeys(${jsonColumn})) AS key - FROM ${tableName} - ${whereClause} - LIMIT ${sampleSize} - ) - GROUP BY key - ORDER BY cnt DESC - LIMIT ${limit} - `); - if (result.error) { - return []; - } - return result.rows.map((row) => ({ key: String(row[0]), count: Number(row[1]) })); - } - - private async listPropertyValues( - input: KtxEventPropertyValuesInput, - _ctx: KtxScanContext, - ): Promise { - this.assertConnection(input.connectionId); - const limit = this.positiveInteger(input.limit, 'limit'); - const maxCardinality = this.positiveInteger(input.maxCardinality ?? 1000, 'maxCardinality'); - const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); - const tableName = this.qTableName(input.table); - const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); - const escapedKey = this.escapeHogQLString(input.propertyKey); - const timeFilter = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; - const cardinalityResult = await this.query(` - SELECT uniq(JSONExtractString(${jsonColumn}, '${escapedKey}')) as cardinality - FROM ${tableName} - ${timeFilter} - LIMIT 1000000 - `); - if (cardinalityResult.error || cardinalityResult.rows.length === 0) { - return null; - } - const cardinality = Number(cardinalityResult.rows[0]?.[0]); - if (!Number.isFinite(cardinality) || cardinality > maxCardinality) { - return null; - } - const valuesResult = await this.query(` - SELECT DISTINCT JSONExtractString(${jsonColumn}, '${escapedKey}') as value - FROM ${tableName} - WHERE JSONExtractString(${jsonColumn}, '${escapedKey}') IS NOT NULL - AND JSONExtractString(${jsonColumn}, '${escapedKey}') != '' - ${lookbackDays === null ? '' : `AND timestamp > now() - INTERVAL ${lookbackDays} DAY`} - ORDER BY value - LIMIT ${limit} - `); - if (valuesResult.error) { - return null; - } - const values = valuesResult.rows - .map((row) => (row[0] != null ? String(row[0]) : '')) - .filter((value) => { - const trimmed = value.trim(); - return trimmed !== '' && trimmed !== '[]' && trimmed !== '{}' && trimmed !== 'null'; - }); - return { values, cardinality }; - } - - async cleanup(): Promise {} - - qTableName(table: Pick): string { - return this.dialect.formatTableName(table); - } - - quoteIdentifier(identifier: string): string { - return this.dialect.quoteIdentifier(identifier); - } - - private toSchemaTable(tableName: string, tableInfo: PostHogSchemaTable): KtxSchemaTable { - return { - catalog: this.resolved.projectId, - db: null, - name: tableName, - kind: tableName === 'events' ? 'event_stream' : 'table', - comment: getKtxPostHogTableDescription(tableName) ?? null, - estimatedRows: tableInfo.row_count ?? null, - columns: this.extractColumns(tableName, tableInfo.fields), - foreignKeys: [], - }; - } - - private async discoverHiddenTables(): Promise { - const tables: KtxSchemaTable[] = []; - for (const tableName of hiddenTablesToProbe) { - const result = await this.query(`SELECT * FROM ${tableName} LIMIT 0`); - if (result.error) { - continue; - } - tables.push({ - catalog: this.resolved.projectId, - db: null, - name: tableName, - kind: 'table', - comment: getKtxPostHogTableDescription(tableName) ?? null, - estimatedRows: null, - columns: result.headers.map((header) => ({ - name: header, - nativeType: 'String', - normalizedType: 'VARCHAR', - dimensionType: 'string', - nullable: true, - primaryKey: false, - comment: getKtxPostHogColumnDescription(tableName, header) ?? null, - })), - foreignKeys: [], - }); - } - return tables; - } - - private extractColumns(tableName: string, fields: Record): KtxSchemaColumn[] { - const columns: KtxSchemaColumn[] = []; - for (const [fieldName, fieldInfo] of Object.entries(fields)) { - if ( - fieldInfo.type === 'lazy_table' || - fieldInfo.type === 'virtual_table' || - fieldInfo.type === 'field_traverser' || - fieldInfo.type === 'expression' - ) { - continue; - } - const nativeType = this.normalizeFieldType(fieldInfo.type); - columns.push({ - name: fieldName, - nativeType, - normalizedType: this.dialect.mapDataType(nativeType), - dimensionType: this.dialect.mapToDimensionType(nativeType), - nullable: this.isNullableField(tableName, fieldName, fieldInfo.type), - primaryKey: this.isPrimaryKeyField(tableName, fieldName), - comment: getKtxPostHogColumnDescription(tableName, fieldName) ?? null, - }); - } - return columns; - } - - private normalizeFieldType(posthogType: string): string { - const typeMap: Record = { - string: 'String', - integer: 'Int64', - datetime: 'DateTime64', - boolean: 'UInt8', - bool: 'Boolean', - json: 'JSON', - array: 'Array(String)', - uuid: 'UUID', - event: 'String', - }; - return typeMap[posthogType.toLowerCase()] ?? posthogType; - } - - private isNullableField(tableName: string, fieldName: string, fieldType: string): boolean { - if (tableName === 'events' && ['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldName)) { - return false; - } - return !['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldType.toLowerCase()); - } - - private isPrimaryKeyField(tableName: string, fieldName: string): boolean { - return ( - (tableName === 'events' && fieldName === 'uuid') || - (tableName === 'persons' && fieldName === 'id') || - (tableName === 'sessions' && fieldName === 'session_id') || - (tableName === 'groups' && fieldName === 'key') - ); - } - - private async query(sql: string, params?: Record): Promise { - const response = await this.makeRequest('/query', { - query: { - kind: 'HogQLQuery', - query: sql, - ...(params && Object.keys(params).length > 0 ? { values: params } : {}), - }, - }); - if (response.error) { - return { headers: [], rows: [], totalRows: 0, rowCount: null, error: response.error }; - } - const headers = response.columns ?? []; - const rows = response.results ?? []; - const headerTypes = response.types?.map((type) => type[1]); - return { - headers, - rows, - totalRows: rows.length, - rowCount: rows.length, - ...(headerTypes && headerTypes.length > 0 ? { headerTypes } : {}), - }; - } - - private async makeRequest(endpoint: string, body: Record, maxRetries = 3): Promise { - const url = `${this.resolved.baseUrl}/api/projects/${this.resolved.projectId}${endpoint}`; - let lastError: Error | null = null; - for (let attempt = 0; attempt <= maxRetries; attempt += 1) { - const response = await this.fetchImpl(url, { - method: 'POST', - headers: { - Authorization: `Bearer ${this.resolved.apiKey}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }); - if (response.ok) { - return response.json() as Promise; - } - const errorText = await response.text(); - const errorMessage = this.parseErrorMessage(errorText); - if (response.status === 429 && attempt < maxRetries) { - await this.sleep(this.parseRateLimitWaitTime(errorMessage) * 1000); - continue; - } - lastError = new Error(`PostHog API error (${response.status}): ${errorMessage}`); - } - throw lastError ?? new Error('PostHog API request failed after retries'); - } - - private parseErrorMessage(errorText: string): string { - try { - const errorJson = JSON.parse(errorText) as { detail?: unknown; error?: unknown }; - return String(errorJson.detail ?? errorJson.error ?? errorText); - } catch { - return errorText; - } - } - - private parseRateLimitWaitTime(errorMessage: string): number { - const match = errorMessage.match(/(?:Expected available in|retry after) (\d+) seconds?/i); - return match ? Number.parseInt(match[1] ?? '30', 10) + 2 : 30; - } - - private escapeHogQLString(value: string): string { - return value.replace(/\\/g, '\\\\').replace(/'/g, "''"); - } - - private positiveInteger(value: number, name: string): number { - if (!Number.isInteger(value) || value < 0) { - throw new Error(`PostHog event-stream discovery requires ${name} to be a non-negative integer`); - } - return value; - } - - private assertConnection(connectionId: string): void { - if (connectionId !== this.connectionId) { - throw new Error(`PostHog connector ${this.connectionId} cannot scan connection ${connectionId}`); - } - } -} diff --git a/packages/connector-posthog/src/dialect.test.ts b/packages/connector-posthog/src/dialect.test.ts deleted file mode 100644 index 5c5b2c43..00000000 --- a/packages/connector-posthog/src/dialect.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { KtxPostHogDialect } from './dialect.js'; - -describe('KtxPostHogDialect', () => { - const dialect = new KtxPostHogDialect(); - - it('quotes identifiers, formats table names, maps types, and prepares HogQL params', () => { - expect(dialect.quoteIdentifier('weird`name')).toBe('`weird\\`name`'); - expect(dialect.formatTableName({ name: 'events', catalog: '157881', db: null })).toBe('`events`'); - expect(dialect.mapDataType('Nullable(DateTime64(6, UTC))')).toBe('TIMESTAMP'); - expect(dialect.mapDataType('Array(String)')).toBe('JSON'); - expect(dialect.mapToDimensionType('UInt8')).toBe('number'); - expect(dialect.mapToDimensionType('Boolean')).toBe('boolean'); - expect(dialect.prepareQuery('SELECT * FROM events WHERE event = :event', { event: '$pageview' })).toEqual({ - sql: 'SELECT * FROM events WHERE event = {event}', - params: { event: '$pageview' }, - }); - }); - - it('builds sample and virtual-property queries without app dependencies', () => { - expect(dialect.generateSampleQuery('`events`', 5, ['event', 'timestamp'])).toBe( - 'SELECT `event`, `timestamp` FROM `events` ORDER BY rand() LIMIT 5', - ); - expect( - dialect.generateSampleQueryWithMetadata('`events`', 3, [ - { name: 'event', parentColumnId: null }, - { name: 'properties.$browser', parentColumnId: 'properties' }, - ]), - ).toBe( - "SELECT `event`, JSONExtractString(properties, '$browser') AS `properties.$browser` FROM `events` ORDER BY rand() LIMIT 3", - ); - expect(dialect.generateColumnSampleQuery('`events`', 'properties.$browser', 10)).toBe( - "SELECT JSONExtractString(properties, '$browser') FROM `events` WHERE JSONExtractString(properties, '$browser') IS NOT NULL ORDER BY rand() LIMIT 10", - ); - }); - - it('builds data-dictionary and time helper SQL', () => { - expect(dialect.generateCardinalitySampleQuery('events', 'properties.$browser', 100)).toContain( - "JSONExtractString(properties, '$browser') AS val", - ); - expect(dialect.generateDistinctValuesQuery('events', 'event', 20)).toContain('SELECT DISTINCT toString(`event`) AS val'); - expect(dialect.getNullCountExpression('event')).toBe('countIf(event IS NULL)'); - expect(dialect.getDistinctCountExpression('event')).toBe('uniq(event)'); - expect(dialect.getTimeTruncExpression('timestamp', 'week', 'UTC')).toBe("DATE_TRUNC('week', toTimeZone(timestamp, 'UTC'))"); - expect(dialect.parseIntervalToSql('7 day')).toBe('INTERVAL 7 DAY'); - expect(dialect.generateColumnStatisticsQuery('', 'events')).toBeNull(); - }); -}); diff --git a/packages/connector-posthog/src/dialect.ts b/packages/connector-posthog/src/dialect.ts deleted file mode 100644 index 36f6edee..00000000 --- a/packages/connector-posthog/src/dialect.ts +++ /dev/null @@ -1,258 +0,0 @@ -import type { KtxSchemaDimensionType, KtxTableRef } from '@ktx/context/scan'; - -type PostHogTableNameRef = Pick & Partial>; - -export interface KtxPostHogSampleColumnInfo { - name: string; - parentColumnId: string | null; -} - -export class KtxPostHogDialect { - readonly type = 'posthog'; - - private readonly typeMappings: Record = { - datetime64: 'time', - datetime: 'time', - date: 'time', - int64: 'number', - int32: 'number', - int16: 'number', - int8: 'number', - uint64: 'number', - uint32: 'number', - uint16: 'number', - uint8: 'number', - float64: 'number', - float32: 'number', - decimal: 'number', - integer: 'number', - string: 'string', - uuid: 'string', - json: 'string', - boolean: 'boolean', - bool: 'boolean', - }; - - quoteIdentifier(identifier: string): string { - return `\`${identifier.replace(/`/g, '\\`')}\``; - } - - formatTableName(table: PostHogTableNameRef): string { - return this.quoteIdentifier(table.name); - } - - mapDataType(nativeType: string): string { - const cleanType = this.cleanType(nativeType); - const typeMapping: Record = { - STRING: 'VARCHAR', - UUID: 'UUID', - INT64: 'BIGINT', - INT32: 'INTEGER', - INT16: 'SMALLINT', - INT8: 'TINYINT', - UINT64: 'BIGINT', - UINT32: 'INTEGER', - UINT16: 'SMALLINT', - UINT8: 'TINYINT', - FLOAT64: 'DOUBLE', - FLOAT32: 'FLOAT', - DATETIME64: 'TIMESTAMP', - DATETIME: 'TIMESTAMP', - DATE: 'DATE', - JSON: 'JSON', - ARRAY: 'JSON', - BOOLEAN: 'BOOLEAN', - BOOL: 'BOOLEAN', - }; - return typeMapping[cleanType] ?? cleanType; - } - - mapToDimensionType(nativeType: string): KtxSchemaDimensionType { - if (!nativeType) { - return 'string'; - } - const cleanType = this.cleanType(nativeType).toLowerCase(); - if (this.typeMappings[cleanType]) { - return this.typeMappings[cleanType]; - } - if (cleanType.includes('date') || cleanType.includes('time')) { - return 'time'; - } - if (cleanType.includes('int') || cleanType.includes('float') || cleanType.includes('decimal') || cleanType.includes('num')) { - return 'number'; - } - if (cleanType === 'bool' || cleanType === 'boolean') { - return 'boolean'; - } - return 'string'; - } - - generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { - const columnList = - columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; - return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; - } - - generateSampleQueryWithMetadata(tableName: string, limit: number, columnMetadata?: KtxPostHogSampleColumnInfo[]): string { - if (!columnMetadata || columnMetadata.length === 0) { - return this.generateSampleQuery(tableName, limit); - } - const columnList = columnMetadata - .map((column) => { - if (!column.parentColumnId) { - return this.quoteIdentifier(column.name); - } - const expression = this.formatColumnExpression(column.name); - return `${expression} AS ${this.quoteIdentifier(column.name)}`; - }) - .join(', '); - return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; - } - - generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { - const colExpr = this.formatColumnExpression(columnName); - return `SELECT ${colExpr} FROM ${tableName} WHERE ${colExpr} IS NOT NULL ORDER BY rand() LIMIT ${limit}`; - } - - prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { - if (!params) { - return { sql, params: undefined }; - } - let processedSql = sql; - const processedParams: Record = {}; - for (const [key, value] of Object.entries(params)) { - processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `{${key}}`); - processedParams[key] = value; - } - return { - sql: processedSql, - params: Object.keys(processedParams).length > 0 ? processedParams : undefined, - }; - } - - getRandomSampleFilter(samplePct: number): string { - if (samplePct <= 0 || samplePct >= 1) { - return ''; - } - return `rand() < ${samplePct}`; - } - - getTableSampleClause(_samplePct: number): string { - return ''; - } - - getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; - } - - getNullCountExpression(column: string): string { - return `countIf(${column} IS NULL)`; - } - - getDistinctCountExpression(column: string): string { - return `uniq(${column})`; - } - - generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT uniq(val) AS cardinality - FROM ( - SELECT ${colExpr} AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - LIMIT ${sampleSize} - ) - `; - } - - generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT DISTINCT toString(${colExpr}) AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - ORDER BY val - LIMIT ${limit} - `; - } - - generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { - return null; - } - - generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { - const colExpr = this.formatColumnExpression(columnName); - return ` - SELECT uniq(val) AS cardinality - FROM ( - SELECT ${colExpr} AS val - FROM ${tableName} - WHERE ${colExpr} IS NOT NULL - ORDER BY rand() - LIMIT ${sampleSize} - ) - `; - } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; - return `DATE_TRUNC('${granularity}', ${col})`; - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; - const [amount, unit] = interval.split(' '); - const seconds = Number(amount) * this.getUnitSeconds(unit ?? 'day'); - const originExpr = origin ? `toDateTime('${origin}')` : `toDateTime('1970-01-01')`; - return `${originExpr} + toIntervalSecond(intDiv(toUnixTimestamp(${col}) - toUnixTimestamp(${originExpr}), ${seconds}) * ${seconds})`; - } - - parseIntervalToSql(interval: string): string { - const [amount, unit] = interval.split(' '); - return `INTERVAL ${amount} ${unit?.toUpperCase() ?? 'DAY'}`; - } - - private formatColumnExpression(columnName: string): string { - const rawName = columnName.replace(/^`|`$/g, ''); - const propertyMatch = rawName.match(/^(properties|person\.properties)\.(.+)$/); - if (propertyMatch) { - const [, parentCol, propertyKey] = propertyMatch; - return `JSONExtractString(${parentCol}, '${propertyKey.replace(/'/g, "''")}')`; - } - return this.quoteIdentifier(rawName); - } - - private cleanType(nativeType: string): string { - let cleanType = nativeType.toUpperCase().trim(); - const nullableMatch = cleanType.match(/^NULLABLE\((.+)\)$/); - if (nullableMatch) { - cleanType = nullableMatch[1] ?? cleanType; - } - if (cleanType.startsWith('ARRAY(')) { - return 'ARRAY'; - } - if (cleanType.startsWith('DATETIME64')) { - return 'DATETIME64'; - } - return cleanType; - } - - private getUnitSeconds(unit: string): number { - const secondsByUnit: Record = { - second: 1, - minute: 60, - hour: 3600, - day: 86400, - week: 604800, - month: 2592000, - quarter: 7776000, - year: 31536000, - }; - return secondsByUnit[unit.toLowerCase()] ?? 86400; - } -} diff --git a/packages/connector-posthog/src/index.ts b/packages/connector-posthog/src/index.ts deleted file mode 100644 index 7fa61ebb..00000000 --- a/packages/connector-posthog/src/index.ts +++ /dev/null @@ -1,19 +0,0 @@ -export { KtxPostHogDialect, type KtxPostHogSampleColumnInfo } from './dialect.js'; -export { - getKtxPostHogColumnDescription, - getKtxPostHogPropertyDescription, - getKtxPostHogTableDescription, -} from './schema-descriptions.js'; -export { - isKtxPostHogConnectionConfig, - KtxPostHogScanConnector, - postHogConnectionConfigFromConfig, - type KtxPostHogColumnDistinctValuesOptions, - type KtxPostHogColumnDistinctValuesResult, - type KtxPostHogConnectionConfig, - type KtxPostHogFetch, - type KtxPostHogReadOnlyQueryInput, - type KtxPostHogResolvedConnectionConfig, - type KtxPostHogScanConnectorOptions, -} from './connector.js'; -export { createPostHogLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-posthog/src/live-database-introspection.ts b/packages/connector-posthog/src/live-database-introspection.ts deleted file mode 100644 index 04828a19..00000000 --- a/packages/connector-posthog/src/live-database-introspection.ts +++ /dev/null @@ -1,34 +0,0 @@ -import type { LiveDatabaseIntrospectionPort } from '@ktx/context/ingest'; -import type { KtxProjectConnectionConfig } from '@ktx/context/project'; -import { KtxPostHogScanConnector, type KtxPostHogConnectionConfig, type KtxPostHogFetch } from './connector.js'; - -interface CreatePostHogLiveDatabaseIntrospectionOptions { - connections: Record; - env?: NodeJS.ProcessEnv; - fetch?: KtxPostHogFetch; - sleep?: (ms: number) => Promise; - now?: () => Date; -} - -export function createPostHogLiveDatabaseIntrospection( - options: CreatePostHogLiveDatabaseIntrospectionOptions, -): LiveDatabaseIntrospectionPort { - return { - async extractSchema(connectionId: string) { - const connection = options.connections[connectionId] as KtxPostHogConnectionConfig | undefined; - const connector = new KtxPostHogScanConnector({ - connectionId, - connection, - env: options.env, - fetch: options.fetch, - sleep: options.sleep, - now: options.now, - }); - try { - return await connector.introspect({ connectionId, driver: 'posthog' }, { runId: `posthog-${connectionId}` }); - } finally { - await connector.cleanup(); - } - }, - }; -} diff --git a/packages/connector-posthog/src/package-exports.test.ts b/packages/connector-posthog/src/package-exports.test.ts deleted file mode 100644 index f9d822ae..00000000 --- a/packages/connector-posthog/src/package-exports.test.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import * as posthog from './index.js'; - -describe('@ktx/connector-posthog package exports', () => { - it('exports the connector, dialect, descriptions, and live-database adapter', () => { - expect(posthog.KtxPostHogDialect).toBeTypeOf('function'); - expect(posthog.KtxPostHogScanConnector).toBeTypeOf('function'); - expect(posthog.createPostHogLiveDatabaseIntrospection).toBeTypeOf('function'); - expect(posthog.getKtxPostHogPropertyDescription('$browser')).toBe('User browser name.'); - }); -}); diff --git a/packages/connector-posthog/src/schema-descriptions.ts b/packages/connector-posthog/src/schema-descriptions.ts deleted file mode 100644 index d333fcb4..00000000 --- a/packages/connector-posthog/src/schema-descriptions.ts +++ /dev/null @@ -1,99 +0,0 @@ -const TABLE_DESCRIPTIONS: Record = { - events: - 'PostHog event stream containing all tracked user interactions. Each row represents a single event with properties, timestamp, and user identifier.', - persons: - 'PostHog persons table containing unique users, identifiers, and user properties for segmentation and cohort analysis.', - sessions: - 'PostHog sessions table grouping events into user sessions with duration, entry and exit URLs, and device details.', - groups: - 'PostHog groups table for B2B and team-based analytics. Contains group identifiers and group properties.', - person_distinct_ids: 'PostHog identity resolution table mapping distinct_ids to person_ids.', - cohort_people: 'PostHog dynamic cohort membership table.', - static_cohort_people: 'PostHog static cohort membership table.', - 'system.cohorts': 'PostHog cohort definitions table.', - 'system.feature_flags': 'PostHog feature flag definitions table.', - 'system.experiments': 'PostHog A/B test and experiment definitions table.', - 'system.surveys': 'PostHog survey definitions table.', - 'system.dashboards': 'PostHog dashboard metadata table.', - 'system.insights': 'PostHog saved insight and chart definitions table.', -}; - -const COLUMN_DESCRIPTIONS: Record = { - 'events.uuid': 'Unique identifier for this specific event.', - 'events.event': 'Event name such as $pageview, $autocapture, $identify, or a custom event.', - 'events.distinct_id': 'User identifier that links events to persons.', - 'events.timestamp': 'UTC timestamp when the event occurred.', - 'events.created_at': 'Timestamp when the event was ingested into PostHog.', - 'events.properties': 'JSON object containing event-specific properties.', - 'events.person_id': 'Internal PostHog person UUID.', - 'events.$session_id': 'Session identifier linking this event to sessions.', - 'persons.id': 'Internal PostHog person UUID.', - 'persons.distinct_id': 'Primary user identifier for joins with events.', - 'persons.properties': 'JSON object containing user properties.', - 'persons.created_at': 'Timestamp when this person was first seen in PostHog.', - 'persons.is_identified': 'Whether the person has been explicitly identified.', - 'sessions.session_id': 'Unique session identifier.', - 'sessions.distinct_id': 'User identifier for this session.', - 'sessions.$start_timestamp': 'Timestamp when the session started.', - 'sessions.$end_timestamp': 'Timestamp when the session ended.', - 'sessions.$session_duration': 'Total session duration in seconds.', - 'groups.index': 'Index identifying the configured PostHog group type.', - 'groups.key': 'Unique identifier for this group.', - 'groups.properties': 'JSON object containing group properties.', - 'groups.created_at': 'Timestamp when this group was first seen.', - 'person_distinct_ids.distinct_id': 'Device or browser identifier for a person.', - 'person_distinct_ids.person_id': 'Internal PostHog person UUID mapped to the distinct_id.', - 'cohort_people.person_id': 'Person UUID belonging to the cohort.', - 'cohort_people.cohort_id': 'Cohort identifier.', - 'static_cohort_people.person_id': 'Person UUID belonging to the static cohort.', - 'static_cohort_people.cohort_id': 'Static cohort identifier.', - 'system.cohorts.id': 'Unique cohort identifier.', - 'system.cohorts.name': 'Human-readable cohort name.', - 'system.feature_flags.id': 'Unique feature flag identifier.', - 'system.feature_flags.key': 'Feature flag key used in code.', - 'system.experiments.id': 'Unique experiment identifier.', - 'system.experiments.name': 'Experiment name.', - 'system.surveys.id': 'Unique survey identifier.', - 'system.surveys.name': 'Survey name.', - 'system.dashboards.id': 'Unique dashboard identifier.', - 'system.dashboards.name': 'Dashboard name.', - 'system.insights.id': 'Unique insight identifier.', - 'system.insights.name': 'Insight or chart name.', -}; - -const PROPERTY_DESCRIPTIONS: Record = { - $browser: 'User browser name.', - $browser_version: 'User browser version.', - $os: 'Operating system.', - $os_version: 'Operating system version.', - $device: 'Device name.', - $device_type: 'Device type.', - $current_url: 'Full URL of the current page.', - $pathname: 'Path portion of the current URL.', - $host: 'Hostname of the current page.', - $referrer: 'Referrer URL.', - $referring_domain: 'Referrer domain.', - $utm_source: 'UTM source parameter.', - $utm_medium: 'UTM medium parameter.', - $utm_campaign: 'UTM campaign parameter.', - $utm_content: 'UTM content parameter.', - $utm_term: 'UTM term parameter.', - $lib: 'PostHog library name used to capture the event.', - $lib_version: 'PostHog library version.', - $insert_id: 'Unique identifier for event deduplication.', - $active_feature_flags: 'List of active feature flags for this user or event.', - $feature_flag: 'Feature flag name for flag-related events.', - $feature_flag_response: 'Feature flag value or variant.', -}; - -export function getKtxPostHogTableDescription(tableName: string): string | undefined { - return TABLE_DESCRIPTIONS[tableName]; -} - -export function getKtxPostHogColumnDescription(tableName: string, columnName: string): string | undefined { - return COLUMN_DESCRIPTIONS[`${tableName}.${columnName}`]; -} - -export function getKtxPostHogPropertyDescription(propertyKey: string): string | null { - return PROPERTY_DESCRIPTIONS[propertyKey] ?? null; -} diff --git a/packages/connector-posthog/tsconfig.json b/packages/connector-posthog/tsconfig.json deleted file mode 100644 index 965e6978..00000000 --- a/packages/connector-posthog/tsconfig.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "extends": "../../tsconfig.base.json", - "compilerOptions": { - "outDir": "./dist", - "rootDir": "./src" - }, - "include": ["src/**/*.ts"], - "exclude": ["dist", "node_modules"] -} diff --git a/packages/context/src/connections/connection-type.ts b/packages/context/src/connections/connection-type.ts index 81c17bb4..6cd48042 100644 --- a/packages/context/src/connections/connection-type.ts +++ b/packages/context/src/connections/connection-type.ts @@ -18,7 +18,6 @@ export const connectionTypeSchema = z.enum([ 'METABASE', 'LOOKER', 'NOTION', - 'POSTHOG', 'MYSQL', 'CLICKHOUSE', 'PLAIN', diff --git a/packages/context/src/mcp/local-project-ports.ts b/packages/context/src/mcp/local-project-ports.ts index 60808426..d2ad139f 100644 --- a/packages/context/src/mcp/local-project-ports.ts +++ b/packages/context/src/mcp/local-project-ports.ts @@ -116,8 +116,7 @@ function normalizeScanDriver(driver: string | undefined): KtxConnectionDriver { normalized === 'clickhouse' || normalized === 'sqlserver' || normalized === 'bigquery' || - normalized === 'snowflake' || - normalized === 'posthog' + normalized === 'snowflake' ) { return normalized === 'sqlite3' ? 'sqlite' : normalized; } diff --git a/packages/context/src/memory/memory-agent.service.ingest.test.ts b/packages/context/src/memory/memory-agent.service.ingest.test.ts index bf30a883..710ba956 100644 --- a/packages/context/src/memory/memory-agent.service.ingest.test.ts +++ b/packages/context/src/memory/memory-agent.service.ingest.test.ts @@ -17,7 +17,7 @@ interface BuiltMocks { appSettings: any; llmProvider: any; prompt: any; - posthog: any; + eventTracker: any; telemetry: any; skillsRegistry: any; wikiService: any; @@ -64,7 +64,7 @@ const buildMocks = (overrides: Partial = {}): BuiltMocks => { }, llmProvider: { getModel: vi.fn().mockReturnValue({}) }, prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') }, - posthog: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, + eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, telemetry: { isEnabled: () => false, appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } }, @@ -177,7 +177,7 @@ const buildService = (mocks: BuiltMocks): MemoryAgentService => slValidator: mocks.slValidator, toolsetFactory: mocks.toolsetFactory, telemetry: { - trackMemoryIngestion: mocks.posthog.trackEvent, + trackMemoryIngestion: mocks.eventTracker.trackEvent, }, }); diff --git a/packages/context/src/scan/local-scan.ts b/packages/context/src/scan/local-scan.ts index 0919843f..15fdf6f3 100644 --- a/packages/context/src/scan/local-scan.ts +++ b/packages/context/src/scan/local-scan.ts @@ -103,13 +103,12 @@ function normalizeDriver(driver: string | undefined): KtxConnectionDriver { normalized === 'clickhouse' || normalized === 'sqlserver' || normalized === 'bigquery' || - normalized === 'snowflake' || - normalized === 'posthog' + normalized === 'snowflake' ) { return normalized === 'sqlite3' ? 'sqlite' : normalized; } throw new Error( - `Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake/posthog in this phase, received "${driver ?? 'unknown'}"`, + `Standalone ktx scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake in this phase, received "${driver ?? 'unknown'}"`, ); } diff --git a/packages/context/src/scan/relationship-profiling.ts b/packages/context/src/scan/relationship-profiling.ts index 1fbeccd4..fa6acfac 100644 --- a/packages/context/src/scan/relationship-profiling.ts +++ b/packages/context/src/scan/relationship-profiling.ts @@ -71,7 +71,7 @@ const SAMPLE_VALUE_DELIMITER = '\u001f'; type QuoteStyle = 'double' | 'backtick' | 'bracket'; function quoteStyle(driver: KtxConnectionDriver): QuoteStyle { - if (driver === 'mysql' || driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'mysql' || driver === 'clickhouse') { return 'backtick'; } if (driver === 'sqlserver') { @@ -93,7 +93,7 @@ export function quoteKtxRelationshipIdentifier(driver: KtxConnectionDriver, iden export function formatKtxRelationshipTableRef(driver: KtxConnectionDriver, table: KtxTableRef): string { const parts = - driver === 'sqlite' || driver === 'posthog' + driver === 'sqlite' ? [table.name] : [table.catalog, table.db, table.name].filter((value): value is string => Boolean(value)); return parts.map((part) => quoteKtxRelationshipIdentifier(driver, part)).join('.'); @@ -109,7 +109,7 @@ function textLengthExpression(driver: KtxConnectionDriver, columnSql: string): s if (driver === 'bigquery') { return `LENGTH(CAST(${columnSql} AS STRING))`; } - if (driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'clickhouse') { return `length(toString(${columnSql}))`; } return `LENGTH(CAST(${columnSql} AS TEXT))`; @@ -223,7 +223,7 @@ function sampleAggregateSql(driver: KtxConnectionDriver, innerSql: string): stri if (driver === 'sqlserver') { return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; } - if (driver === 'clickhouse' || driver === 'posthog') { + if (driver === 'clickhouse') { return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`; } return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`; diff --git a/packages/context/src/scan/types.test.ts b/packages/context/src/scan/types.test.ts index 3b68411a..309db88e 100644 --- a/packages/context/src/scan/types.test.ts +++ b/packages/context/src/scan/types.test.ts @@ -150,14 +150,14 @@ describe('KTX scan contract types', () => { }; const connector: KtxScanConnector = { - id: 'posthog:product', - driver: 'posthog', + id: 'clickhouse:product', + driver: 'clickhouse', capabilities: createKtxConnectorCapabilities({ eventStreamDiscovery: true }), eventStreamDiscovery: discovery, async introspect() { return { connectionId: 'product', - driver: 'posthog', + driver: 'clickhouse', extractedAt: '2026-04-29T00:00:00.000Z', scope: { catalogs: ['157881'] }, metadata: {}, diff --git a/packages/context/src/scan/types.ts b/packages/context/src/scan/types.ts index 66f70ba2..71bb3fb3 100644 --- a/packages/context/src/scan/types.ts +++ b/packages/context/src/scan/types.ts @@ -5,7 +5,6 @@ export type KtxConnectionDriver = | 'sqlserver' | 'bigquery' | 'snowflake' - | 'posthog' | 'mysql' | 'clickhouse'; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b900b9ed..12d1235a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -42,9 +42,6 @@ importers: '@ktx/connector-postgres': specifier: workspace:* version: file:packages/connector-postgres(ws@8.20.0) - '@ktx/connector-posthog': - specifier: workspace:* - version: file:packages/connector-posthog(ws@8.20.0) '@ktx/connector-snowflake': specifier: workspace:* version: file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0) @@ -53,7 +50,7 @@ importers: version: file:packages/connector-sqlite(ws@8.20.0) '@ktx/connector-sqlserver': specifier: workspace:* - version: file:packages/connector-sqlserver(@azure/core-client@1.10.1)(ws@8.20.0) + version: file:packages/connector-sqlserver(ws@8.20.0) '@ktx/context': specifier: workspace:* version: file:packages/context(ws@8.20.0) @@ -177,22 +174,6 @@ importers: specifier: ^4.0.18 version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) - packages/connector-posthog: - dependencies: - '@ktx/context': - specifier: workspace:* - version: file:packages/context - devDependencies: - '@types/node': - specifier: ^24.3.0 - version: 24.12.2 - typescript: - specifier: ^5.9.3 - version: 5.9.3 - vitest: - specifier: ^4.0.18 - version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) - packages/connector-snowflake: dependencies: '@ktx/context': @@ -919,10 +900,6 @@ packages: resolution: {directory: packages/connector-postgres, type: directory} engines: {node: '>=22.0.0'} - '@ktx/connector-posthog@file:packages/connector-posthog': - resolution: {directory: packages/connector-posthog, type: directory} - engines: {node: '>=22.0.0'} - '@ktx/connector-snowflake@file:packages/connector-snowflake': resolution: {directory: packages/connector-snowflake, type: directory} engines: {node: '>=22.0.0'} @@ -3632,6 +3609,11 @@ snapshots: '@azure/core-client': 1.10.1 '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-http-compat@2.4.0(@azure/core-rest-pipeline@1.23.0)': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-lro@2.7.2': dependencies: '@azure/abort-controller': 2.1.2 @@ -3703,6 +3685,24 @@ snapshots: transitivePeerDependencies: - supports-color + '@azure/keyvault-keys@4.10.0': + dependencies: + '@azure-rest/core-client': 2.6.0 + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-http-compat': 2.4.0(@azure/core-rest-pipeline@1.23.0) + '@azure/core-lro': 2.7.2 + '@azure/core-paging': 1.6.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/keyvault-common': 2.1.0 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + '@azure/keyvault-keys@4.10.0(@azure/core-client@1.10.1)': dependencies: '@azure-rest/core-client': 2.6.0 @@ -3981,16 +3981,6 @@ snapshots: - supports-color - ws - '@ktx/connector-posthog@file:packages/connector-posthog(ws@8.20.0)': - dependencies: - '@ktx/context': file:packages/context(ws@8.20.0) - transitivePeerDependencies: - - '@cfworker/json-schema' - - js-yaml - - pg-native - - supports-color - - ws - '@ktx/connector-snowflake@file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0)': dependencies: '@ktx/context': file:packages/context(ws@8.20.0) @@ -4016,10 +4006,10 @@ snapshots: - supports-color - ws - '@ktx/connector-sqlserver@file:packages/connector-sqlserver(@azure/core-client@1.10.1)(ws@8.20.0)': + '@ktx/connector-sqlserver@file:packages/connector-sqlserver(ws@8.20.0)': dependencies: '@ktx/context': file:packages/context(ws@8.20.0) - mssql: 12.5.0(@azure/core-client@1.10.1) + mssql: 12.5.0 transitivePeerDependencies: - '@azure/core-client' - '@cfworker/json-schema' @@ -5571,6 +5561,17 @@ snapshots: ms@2.1.3: {} + mssql@12.5.0: + dependencies: + '@tediousjs/connection-string': 1.1.0 + commander: 11.1.0 + debug: 4.4.3 + tarn: 3.0.2 + tedious: 19.2.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + mssql@12.5.0(@azure/core-client@1.10.1): dependencies: '@tediousjs/connection-string': 1.1.0 @@ -6073,6 +6074,22 @@ snapshots: tarn@3.0.2: {} + tedious@19.2.1: + dependencies: + '@azure/core-auth': 1.10.1 + '@azure/identity': 4.13.1 + '@azure/keyvault-keys': 4.10.0 + '@js-joda/core': 5.7.0 + '@types/node': 24.12.2 + bl: 6.1.6 + iconv-lite: 0.7.2 + js-md4: 0.3.2 + native-duplexpair: 1.0.0 + sprintf-js: 1.1.3 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + tedious@19.2.1(@azure/core-client@1.10.1): dependencies: '@azure/core-auth': 1.10.1 diff --git a/release-policy.json b/release-policy.json index 0ba6297f..ce814787 100644 --- a/release-policy.json +++ b/release-policy.json @@ -10,7 +10,6 @@ "@ktx/connector-clickhouse", "@ktx/connector-mysql", "@ktx/connector-postgres", - "@ktx/connector-posthog", "@ktx/connector-snowflake", "@ktx/connector-sqlite", "@ktx/connector-sqlserver", diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 3d3aa168..b2da21c8 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -77,7 +77,6 @@ describe('standalone example docs', () => { assert.match(rootReadme, /`packages\/connector-clickhouse`/); assert.match(rootReadme, /`packages\/connector-mysql`/); assert.match(rootReadme, /`packages\/connector-postgres`/); - assert.match(rootReadme, /`packages\/connector-posthog`/); assert.match(rootReadme, /`packages\/connector-snowflake`/); assert.match(rootReadme, /`packages\/connector-sqlite`/); assert.match(rootReadme, /`packages\/connector-sqlserver`/); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 52d49470..d05b30bf 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -17,7 +17,6 @@ export const NPM_ARTIFACT_PACKAGES = [ { name: '@ktx/connector-clickhouse', packageRoot: 'packages/connector-clickhouse' }, { name: '@ktx/connector-mysql', packageRoot: 'packages/connector-mysql' }, { name: '@ktx/connector-postgres', packageRoot: 'packages/connector-postgres' }, - { name: '@ktx/connector-posthog', packageRoot: 'packages/connector-posthog' }, { name: '@ktx/connector-snowflake', packageRoot: 'packages/connector-snowflake' }, { name: '@ktx/connector-sqlite', packageRoot: 'packages/connector-sqlite' }, { name: '@ktx/connector-sqlserver', packageRoot: 'packages/connector-sqlserver' }, @@ -516,7 +515,6 @@ const bigqueryConnector = await import('@ktx/connector-bigquery'); const clickhouseConnector = await import('@ktx/connector-clickhouse'); const mysqlConnector = await import('@ktx/connector-mysql'); const postgresConnector = await import('@ktx/connector-postgres'); -const posthogConnector = await import('@ktx/connector-posthog'); const snowflakeConnector = await import('@ktx/connector-snowflake'); const sqliteConnector = await import('@ktx/connector-sqlite'); const sqlserverConnector = await import('@ktx/connector-sqlserver'); @@ -587,7 +585,6 @@ const connectorExports = [ ['@ktx/connector-clickhouse', clickhouseConnector.KtxClickHouseScanConnector, clickhouseConnector.KtxClickHouseDialect], ['@ktx/connector-mysql', mysqlConnector.KtxMysqlScanConnector, mysqlConnector.KtxMysqlDialect], ['@ktx/connector-postgres', postgresConnector.KtxPostgresScanConnector, postgresConnector.KtxPostgresDialect], - ['@ktx/connector-posthog', posthogConnector.KtxPostHogScanConnector, posthogConnector.KtxPostHogDialect], ['@ktx/connector-snowflake', snowflakeConnector.KtxSnowflakeScanConnector, snowflakeConnector.KtxSnowflakeDialect], ['@ktx/connector-sqlite', sqliteConnector.KtxSqliteScanConnector, sqliteConnector.KtxSqliteDialect], ['@ktx/connector-sqlserver', sqlserverConnector.KtxSqlServerScanConnector, sqlserverConnector.KtxSqlServerDialect], diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 4aec3c6e..5b18a9ed 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -34,7 +34,6 @@ const CONNECTOR_PACKAGE_NAMES = [ '@ktx/connector-clickhouse', '@ktx/connector-mysql', '@ktx/connector-postgres', - '@ktx/connector-posthog', '@ktx/connector-snowflake', '@ktx/connector-sqlite', '@ktx/connector-sqlserver', @@ -517,7 +516,6 @@ describe('verification snippets', () => { assert.match(source, /KtxPostgresScanConnector/); assert.match(source, /KtxBigQueryScanConnector/); assert.match(source, /KtxSnowflakeScanConnector/); - assert.match(source, /KtxPostHogScanConnector/); }); it('asserts installed hybrid search exports and CLI smoke coverage', () => { diff --git a/scripts/precommit-check.mjs b/scripts/precommit-check.mjs index d112752d..fdd405bf 100644 --- a/scripts/precommit-check.mjs +++ b/scripts/precommit-check.mjs @@ -15,7 +15,6 @@ const packageNameByDir = new Map( 'connector-clickhouse', 'connector-mysql', 'connector-postgres', - 'connector-posthog', 'connector-snowflake', 'connector-sqlite', 'connector-sqlserver', From c82989119b853918f66ce268fa07ef737f8cb395 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Sun, 10 May 2026 23:13:17 -0700 Subject: [PATCH 6/8] Update setup and ingest flows --- packages/cli/src/context-build-view.test.ts | 39 ++++- packages/cli/src/context-build-view.ts | 58 ++++++- packages/cli/src/ingest.test.ts | 141 +++++++++++++++++- packages/cli/src/ingest.ts | 19 ++- packages/cli/src/setup-agents.test.ts | 75 +++++++++- packages/cli/src/setup-agents.ts | 96 ++++++++++-- packages/cli/src/setup-context.test.ts | 9 +- packages/cli/src/setup-context.ts | 10 ++ packages/cli/src/setup-databases.test.ts | 88 ++++++++++- packages/cli/src/setup-databases.ts | 134 ++++++++++++++++- packages/cli/src/setup-ready-menu.test.ts | 9 +- packages/cli/src/setup-ready-menu.ts | 9 +- packages/cli/src/setup-sources.test.ts | 4 +- packages/cli/src/setup-sources.ts | 2 +- packages/cli/src/setup.test.ts | 96 ++++++++++++ packages/cli/src/setup.ts | 17 ++- packages/context/src/core/git.service.test.ts | 25 ++++ packages/context/src/core/git.service.ts | 108 +++++++++++++- .../ingest/adapters/metabase/chunk.test.ts | 12 ++ .../src/ingest/adapters/metabase/chunk.ts | 2 +- .../ingest/adapters/metabase/client.test.ts | 34 +++++ .../src/ingest/adapters/metabase/client.ts | 131 +++++++++++++++- .../adapters/metabase/fetch-scope.test.ts | 8 +- .../ingest/adapters/metabase/fetch-scope.ts | 2 +- .../src/ingest/ingest-bundle.runner.ts | 18 ++- .../src/ingest/local-bundle-ingest.test.ts | 56 +++++++ .../src/ingest/local-bundle-runtime.ts | 96 ++++++++++-- .../ingest/stages/stage-3-work-units.test.ts | 15 ++ .../src/ingest/stages/stage-3-work-units.ts | 6 + 29 files changed, 1253 insertions(+), 66 deletions(-) diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index 8e48a6f7..c6ef0eb1 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -99,11 +99,11 @@ describe('parseScanSummary', () => { describe('parseIngestSummary', () => { it('extracts work units and saved memory', () => { - expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('5 items indexed · 3 wiki, 2 SL'); + expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL'); }); it('extracts work units alone when no saved memory', () => { - expect(parseIngestSummary('Work units: 5\nStatus: done')).toBe('5 items indexed'); + expect(parseIngestSummary('Work units: 5\nStatus: done')).toBe('5 work units'); }); it('extracts saved memory alone when no work units', () => { @@ -467,6 +467,41 @@ describe('runContextBuild', () => { { connectionId: 'dbt_main', status: 'done' }, ]); }); + + it('returns report IDs and artifact paths parsed from target output', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + dbt_main: { driver: 'dbt' }, + }); + const executeTarget = vi.fn(async (target, _args, targetIo) => { + if (target.operation === 'scan') { + targetIo.stdout.write('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n'); + targetIo.stdout.write('Raw sources: raw-sources/warehouse/live-database/sync-1\n'); + } else { + targetIo.stdout.write('Report: report-dbt-1\n'); + targetIo.stdout.write('Saved memory: 2 wiki, 3 SL\n'); + } + return successResult(target.connectionId, target.driver, target.operation); + }); + + const result = await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(result).toMatchObject({ + exitCode: 0, + detached: false, + reportIds: ['report-dbt-1'], + artifactPaths: [ + 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + 'raw-sources/warehouse/live-database/sync-1', + ], + }); + }); }); describe('viewStateFromSourceProgress', () => { diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 4c57784d..bb661655 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -44,6 +44,8 @@ export interface ContextBuildArgs { export interface ContextBuildResult { exitCode: number; detached: boolean; + reportIds?: string[]; + artifactPaths?: string[]; } export interface ContextBuildSourceProgressUpdate { @@ -237,12 +239,41 @@ export function parseScanSummary(output: string): string | null { } export function parseIngestSummary(output: string): string | null { - const parts: string[] = []; - const workUnits = output.match(/Work units: (\d+)/); - if (workUnits) parts.push(`${workUnits[1]} items indexed`); const savedMemory = output.match(/Saved memory: (.+)/); - if (savedMemory) parts.push(savedMemory[1]); - return parts.length > 0 ? parts.join(' · ') : null; + if (savedMemory) return savedMemory[1]; + const workUnits = output.match(/Work units: (\d+)/); + if (workUnits) return `${workUnits[1]} work units`; + return null; +} + +function collectOutputMetadata( + output: string, + operation: KtxPublicIngestPlanTarget['operation'], +): { reportIds: string[]; artifactPaths: string[] } { + const reportIds = new Set(); + const artifactPaths = new Set(); + for (const line of output.split(/\r?\n/)) { + const trimmed = line.trim(); + const reportLine = trimmed.match(/^Report:\s*(.+)$/); + if (reportLine) { + const value = reportLine[1].trim(); + if (value && value !== 'none') { + if (operation === 'scan') artifactPaths.add(value); + else reportIds.add(value); + } + } + const rawSourcesLine = trimmed.match(/^Raw sources:\s*(.+)$/); + if (rawSourcesLine) { + const value = rawSourcesLine[1].trim(); + if (value && value !== 'none') artifactPaths.add(value); + } + if (operation === 'source-ingest') { + for (const match of trimmed.matchAll(/\breport=([^\s]+)/g)) { + reportIds.add(match[1]); + } + } + } + return { reportIds: [...reportIds], artifactPaths: [...artifactPaths] }; } interface CapturedIo { @@ -428,6 +459,8 @@ export async function runContextBuild( const orderedTargets = [...state.primarySources, ...state.contextSources]; const execTarget = deps.executeTarget ?? executePublicIngestTarget; + const reportIds = new Set(); + const artifactPaths = new Set(); let detached = false; let cleanupKeystroke: (() => void) | null = null; @@ -492,10 +525,14 @@ export async function runContextBuild( targetState.status = failed ? 'failed' : 'done'; targetState.detailLine = null; if (!failed) { + const capturedOutput = capture.captured(); + const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation); + for (const reportId of metadata.reportIds) reportIds.add(reportId); + for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath); targetState.summaryText = targetState.target.operation === 'scan' - ? parseScanSummary(capture.captured()) - : parseIngestSummary(capture.captured()); + ? parseScanSummary(capturedOutput) + : parseIngestSummary(capturedOutput); } if (failed) hasFailure = true; @@ -521,5 +558,10 @@ export async function runContextBuild( paint(false); } - return { exitCode: hasFailure ? 1 : 0, detached: false }; + return { + exitCode: hasFailure ? 1 : 0, + detached: false, + ...(reportIds.size > 0 ? { reportIds: [...reportIds] } : {}), + ...(artifactPaths.size > 0 ? { artifactPaths: [...artifactPaths] } : {}), + }; } diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 5c536f0f..5a18938b 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -222,6 +222,39 @@ function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): L }; } +function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const failedWorkUnit = { + ...bundleReportSnapshot().body.workUnits[0], + status: 'failed' as const, + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }; + const nextReport = localFakeBundleReport(jobId, { + id: 'report-failed-1', + runId: 'run-failed-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + body: { + workUnits: [failedWorkUnit], + failedWorkUnits: [failedWorkUnit.unitKey], + }, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + class CliLookerSlWritingAgentRunner extends AgentRunnerService { override runLoop = vi.fn(async (params: RunLoopParams) => { if ( @@ -621,7 +654,10 @@ function makeCliLookerParser() { }; } -function localFakeBundleReport(jobId: string, overrides: Partial = {}): IngestReportSnapshot { +function localFakeBundleReport( + jobId: string, + overrides: Partial> & { body?: Partial } = {}, +): IngestReportSnapshot { const report = bundleReportSnapshot(); return { ...report, @@ -826,6 +862,77 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); }); + it('returns a non-zero code when Metabase fan-out has failed children', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + body: { + failedWorkUnits: ['metabase-db-1'], + workUnits: [ + { + unitKey: 'metabase-db-1', + rawFiles: ['cards/1.json'], + status: 'failed', + reason: 'tool write failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'partial_failure', + totals: { workUnits: 1, failedWorkUnits: 1 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 1, + failedWorkUnits: ['metabase-db-1'], + artifactsWritten: 0, + commitSha: null, + }, + report, + }, + ], + }), + }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain('Metabase fan-out: partial_failure'); + expect(io.stdout()).toContain('Failed work units: 1'); + expect(io.stdout()).toContain('status=error'); + expect(io.stderr()).toBe(''); + }); + it('prints Metabase fan-out progress before the final summary', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); @@ -1143,6 +1250,38 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Diff: +2/~0/-0/=0\n'); }); + it('returns a non-zero code when local ingest reports failed work units', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => failedLocalBundleRun(input, 'local-job-failed')); + + const io = makeIo(); + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'local-job-failed', + }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Status: error\n'); + }); + it('passes the debug LLM request file to local ingest runs', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index d6748991..2e33372c 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -111,6 +111,16 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void } function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIngestIo): void { + const counts = result.children.reduce( + (acc, child) => { + const childCounts = reportActionCounts(child.report); + return { + wikiCount: acc.wikiCount + childCounts.wikiCount, + slCount: acc.slCount + childCounts.slCount, + }; + }, + { wikiCount: 0, slCount: 0 }, + ); io.stdout.write(`Metabase fan-out: ${result.status}\n`); io.stdout.write(`Source: ${result.metabaseConnectionId}\n`); io.stdout.write(`Children: ${result.children.length}\n`); @@ -118,10 +128,11 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng io.stdout.write(`Work units: ${result.totals.workUnits}\n`); io.stdout.write(`Failed work units: ${result.totals.failedWorkUnits}\n`); } + io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); for (const child of result.children) { const status = reportStatus(child.report); io.stdout.write( - `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId}\n`, + `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`, ); } } @@ -326,7 +337,7 @@ export async function runKtxIngest( } else { writeMetabaseFanoutStatus(result, io); } - return 0; + return result.status === 'all_succeeded' ? 0 : 1; } const jobId = deps.jobIdFactory?.(); @@ -377,14 +388,14 @@ export async function runKtxIngest( liveTui?.close(); liveTui = null; io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); - return 0; + return reportStatus(result.report) === 'done' ? 0 : 1; } await writeReportRecord(result.report, runOutputMode, io, { interactive: (args.inputMode ?? 'auto') === 'auto', renderStoredMemoryFlow: deps.renderStoredMemoryFlow, env, }); - return 0; + return reportStatus(result.report) === 'done' ? 0 : 1; } finally { liveTui?.close(); } diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index 739c9912..e41fc8a5 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { + formatInstallSummary, plannedKtxAgentFiles, readKtxAgentInstallManifest, removeKtxAgentInstall, @@ -37,11 +38,13 @@ describe('setup agents', () => { it('plans project-scoped CLI and MCP files for every target', () => { expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'both' })).toEqual([ - { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md') }, + { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' }, { kind: 'json-key', path: join(tempDir, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([ - { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md') }, + { kind: 'file', path: join(tempDir, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.codex/instructions/ktx.md'), role: 'rule' }, ]); expect(plannedKtxAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'mcp' })).toEqual([ { kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, @@ -113,6 +116,7 @@ describe('setup agents', () => { await expect(removeKtxAgentInstall(tempDir, io.io)).resolves.toBe(0); await expect(stat(join(tempDir, '.claude/skills/ktx/SKILL.md'))).rejects.toThrow(); + await expect(stat(join(tempDir, '.claude/rules/ktx.md'))).rejects.toThrow(); await expect(stat(join(tempDir, '.claude/skills/ktx/keep.txt'))).resolves.toBeDefined(); await expect(readKtxAgentInstallManifest(tempDir)).resolves.toEqual(null); }); @@ -173,4 +177,71 @@ describe('setup agents', () => { }), ); }); + + it('prints per-agent install summary after successful installation', async () => { + const io = makeIo(); + + await runKtxSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + yes: true, + agents: true, + target: 'claude-code', + scope: 'project', + mode: 'both', + skipAgents: false, + }, + io.io, + ); + + const output = io.stdout(); + expect(output).toContain('Agent integration complete'); + expect(output).toContain('Claude Code'); + expect(output).toContain('+ Skill installed'); + expect(output).toContain('.claude/skills/ktx/SKILL.md'); + expect(output).toContain('+ Rule installed'); + expect(output).toContain('.claude/rules/ktx.md'); + expect(output).toContain('+ MCP config added'); + expect(output).toContain('.mcp.json'); + }); + + it('formats summary with relative paths for project scope', () => { + const summary = formatInstallSummary( + [{ target: 'cursor', scope: 'project', mode: 'both' }], + [ + { kind: 'file', path: join(tempDir, '.cursor/rules/ktx.mdc') }, + { kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, + ], + tempDir, + ); + + expect(summary).toContain('Cursor'); + expect(summary).toContain('+ Rule installed'); + expect(summary).toContain('.cursor/rules/ktx.mdc'); + expect(summary).toContain('+ MCP config added'); + expect(summary).toContain('.cursor/mcp.json'); + expect(summary).not.toContain(tempDir); + }); + + it('formats summary with multiple agent targets', () => { + const summary = formatInstallSummary( + [ + { target: 'claude-code', scope: 'project', mode: 'cli' }, + { target: 'codex', scope: 'project', mode: 'mcp' }, + ], + [ + { kind: 'file', path: join(tempDir, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + { kind: 'file', path: join(tempDir, '.claude/rules/ktx.md'), role: 'rule' }, + { kind: 'json-key', path: join(tempDir, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] }, + ], + tempDir, + ); + + expect(summary).toContain('Claude Code'); + expect(summary).toContain('+ Skill installed'); + expect(summary).toContain('+ Rule installed'); + expect(summary).toContain('Codex'); + expect(summary).toContain('+ MCP config added'); + }); }); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index 303f5844..55bb9a76 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -1,5 +1,5 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; -import { dirname, join, resolve } from 'node:path'; +import { dirname, join, relative, resolve } from 'node:path'; import { cancel, isCancel, multiselect, select } from '@clack/prompts'; import { loadKtxProject, markKtxSetupStepComplete, serializeKtxProjectConfig } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; @@ -37,7 +37,10 @@ export interface KtxAgentInstallManifest { projectDir: string; installedAt: string; installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>; - entries: Array<{ kind: 'file'; path: string } | { kind: 'json-key'; path: string; jsonPath: string[] }>; + entries: Array< + | { kind: 'file'; path: string; role?: 'skill' | 'rule' } + | { kind: 'json-key'; path: string; jsonPath: string[] } + >; } type InstallEntry = KtxAgentInstallManifest['entries'][number]; @@ -54,11 +57,17 @@ export function plannedKtxAgentFiles(input: { }): InstallEntry[] { if (input.scope === 'global') { if (input.target === 'claude-code') { - return [{ kind: 'file', path: join(process.env.HOME ?? '', '.claude/skills/ktx/SKILL.md') }]; + const home = process.env.HOME ?? ''; + return [ + { kind: 'file', path: join(home, '.claude/skills/ktx/SKILL.md'), role: 'skill' as const }, + { kind: 'file', path: join(home, '.claude/rules/ktx.md'), role: 'rule' as const }, + ]; } if (input.target === 'codex') { + const codexHome = process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'); return [ - { kind: 'file', path: join(process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'), 'skills/ktx/SKILL.md') }, + { kind: 'file', path: join(codexHome, 'skills/ktx/SKILL.md'), role: 'skill' as const }, + { kind: 'file', path: join(codexHome, 'instructions/ktx.md'), role: 'rule' as const }, ]; } throw new Error(`Global ${input.target} installation is not supported; use --project.`); @@ -66,12 +75,16 @@ export function plannedKtxAgentFiles(input: { const root = resolve(input.projectDir); const cliEntries: Partial> = { - 'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md') }, - codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') }, + 'claude-code': { kind: 'file', path: join(root, '.claude/skills/ktx/SKILL.md'), role: 'skill' }, + codex: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md'), role: 'skill' }, cursor: { kind: 'file', path: join(root, '.cursor/rules/ktx.mdc') }, opencode: { kind: 'file', path: join(root, '.opencode/commands/ktx.md') }, universal: { kind: 'file', path: join(root, '.agents/skills/ktx/SKILL.md') }, }; + const ruleEntries: Partial> = { + 'claude-code': { kind: 'file', path: join(root, '.claude/rules/ktx.md'), role: 'rule' }, + codex: { kind: 'file', path: join(root, '.codex/instructions/ktx.md'), role: 'rule' }, + }; const mcpEntries: Record = { 'claude-code': { kind: 'json-key', path: join(root, '.mcp.json'), jsonPath: ['mcpServers', 'ktx'] }, codex: { kind: 'json-key', path: join(root, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] }, @@ -80,7 +93,7 @@ export function plannedKtxAgentFiles(input: { universal: { kind: 'json-key', path: join(root, '.agents/mcp/ktx.json'), jsonPath: ['mcpServers', 'ktx'] }, }; return [ - ...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target]] : []), + ...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target], ruleEntries[input.target]] : []), ...(input.mode === 'mcp' || input.mode === 'both' ? [mcpEntries[input.target]] : []), ].filter((entry): entry is InstallEntry => entry !== undefined); } @@ -113,6 +126,17 @@ function cliInstructionContent(input: { projectDir: string; target: KtxAgentTarg ].join('\n'); } +function ruleInstructionContent(input: { projectDir: string }): string { + return [ + `Use the \`ktx\` CLI to query local semantic context, wiki knowledge, and execute safe SQL for this project (\`--project-dir ${input.projectDir}\`).`, + '', + 'Use when the user asks about data schemas, metrics, dimensions, database structure, or wants to run SQL queries.', + '', + 'Do not use for general programming, code review, or tasks unrelated to data and analytics.', + '', + ].join('\n'); +} + function mcpConfig(projectDir: string): Record { return { command: 'ktx', @@ -245,6 +269,55 @@ function createPromptAdapter(): KtxSetupAgentsPromptAdapter { }; } +const targetDisplayNames: Record = { + 'claude-code': 'Claude Code', + codex: 'Codex', + cursor: 'Cursor', + opencode: 'OpenCode', + universal: 'Universal .agents', +}; + +const fileEntryLabels: Record = { + 'claude-code': 'Skill installed', + codex: 'Skill installed', + cursor: 'Rule installed', + opencode: 'Command installed', + universal: 'Skill installed', +}; + +export function formatInstallSummary( + installs: Array<{ target: KtxAgentTarget; scope: KtxAgentScope; mode: KtxAgentInstallMode }>, + entries: InstallEntry[], + projectDir: string, +): string { + const entriesByTarget = new Map(); + let idx = 0; + for (const install of installs) { + const planned = plannedKtxAgentFiles({ projectDir, ...install }); + entriesByTarget.set(install.target, entries.slice(idx, idx + planned.length)); + idx += planned.length; + } + + const lines: string[] = []; + for (const install of installs) { + const targetEntries = entriesByTarget.get(install.target) ?? []; + lines.push(` ${targetDisplayNames[install.target]}`); + for (const entry of targetEntries) { + const displayPath = + install.scope === 'global' ? entry.path : relative(projectDir, entry.path); + if (entry.kind === 'file') { + const label = entry.role === 'rule' ? 'Rule installed' : fileEntryLabels[install.target]; + lines.push(` + ${label}`); + lines.push(` ${displayPath}`); + } else { + lines.push(` + MCP config added`); + lines.push(` ${displayPath}`); + } + } + } + return lines.join('\n'); +} + async function installTarget(input: { projectDir: string; target: KtxAgentTarget; @@ -254,8 +327,12 @@ async function installTarget(input: { const entries = plannedKtxAgentFiles(input); for (const entry of entries) { if (entry.kind === 'file') { + const content = + entry.role === 'rule' + ? ruleInstructionContent({ projectDir: input.projectDir }) + : cliInstructionContent({ projectDir: input.projectDir, target: input.target }); await mkdir(dirname(entry.path), { recursive: true }); - await writeFile(entry.path, cliInstructionContent({ projectDir: input.projectDir, target: input.target }), 'utf-8'); + await writeFile(entry.path, content, 'utf-8'); } else { await writeJsonKey(entry.path, entry.jsonPath, mcpConfig(input.projectDir)); } @@ -311,7 +388,6 @@ export async function runKtxSetupAgentsStep( { value: 'cursor', label: 'Cursor' }, { value: 'opencode', label: 'OpenCode' }, { value: 'universal', label: 'Universal .agents' }, - { value: 'back', label: 'Back' }, ], required: true, })) as KtxAgentTarget[]); @@ -327,7 +403,7 @@ export async function runKtxSetupAgentsStep( for (const install of installs) entries.push(...(await installTarget({ projectDir: args.projectDir, ...install }))); await writeManifest(args.projectDir, mergeManifest(args.projectDir, await readKtxAgentInstallManifest(args.projectDir), installs, entries)); await markAgentsComplete(args.projectDir); - io.stdout.write(`Agent integration installed for ${installs.map((install) => install.target).join(', ')}.\n`); + io.stdout.write(`\nAgent integration complete\n\n${formatInstallSummary(installs, entries, args.projectDir)}\n`); return { status: 'ready', projectDir: args.projectDir, installs }; } catch (error) { io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index d19be04c..0d803b7b 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -166,7 +166,12 @@ describe('setup context build state', () => { it('runs setup context build, verifies readiness, and marks context complete', async () => { await writeReadyProject(tempDir); const io = makeIo(); - const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); + const runContextBuildMock = vi.fn(async () => ({ + exitCode: 0, + detached: false, + reportIds: ['report-docs-1'], + artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'], + })); const verifyContextReady = vi.fn(async () => ({ ready: true, agentContextReady: true, @@ -204,6 +209,8 @@ describe('setup context build state', () => { runId: 'setup-context-local-abc123', status: 'completed', completedAt: '2026-05-09T10:00:00.000Z', + reportIds: ['report-docs-1'], + artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'], }); expect(io.stdout()).toContain('KTX context is ready for agents.'); }); diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 79f6cdd7..f88635f4 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -592,12 +592,16 @@ async function runBuild( }, }, ); + const completedReportIds = buildResult.reportIds ?? []; + const completedArtifactPaths = buildResult.artifactPaths ?? []; if (buildResult.detached) { const updatedAt = now().toISOString(); await writeKtxSetupContextState(args.projectDir, { ...runningState, status: 'detached', updatedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); return { status: 'detached', projectDir: args.projectDir, runId }; @@ -608,6 +612,8 @@ async function runBuild( ...runningState, status: 'failed', updatedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds], failureReason: 'Context build failed.', ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), @@ -622,6 +628,8 @@ async function runBuild( ...runningState, status: 'failed', updatedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, retryableFailedTargets: readiness.failedTargets ?? [], failureReason: readiness.details.join(' '), ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), @@ -640,6 +648,8 @@ async function runBuild( status: 'completed', updatedAt: completedAt, completedAt, + reportIds: completedReportIds, + artifactPaths: completedArtifactPaths, retryableFailedTargets: [], ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 41b12f95..3f268ce8 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -962,10 +962,95 @@ describe('setup databases step', () => { }); }); + it('prompts for discovered Postgres schemas before the first scan', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + multiselectValues: [['orbit_analytics', 'orbit_raw']], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async asyncScanProjectDir => { + const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['postgres-warehouse']).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw'], + }); + return 0; + }); + const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection, listSchemas }, + ); + + expect(result.status).toBe('ready'); + expect(listSchemas).toHaveBeenCalledWith(tempDir, 'postgres-warehouse'); + expect(prompts.multiselect).toHaveBeenCalledWith({ + message: expect.stringContaining('PostgreSQL schemas to scan'), + options: [ + { value: 'orbit_analytics', label: 'orbit_analytics' }, + { value: 'orbit_raw', label: 'orbit_raw' }, + { value: 'public', label: 'public' }, + ], + initialValues: ['orbit_analytics', 'orbit_raw'], + required: true, + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['postgres-warehouse']).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw'], + }); + expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw'); + }); + + it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({}); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async asyncScanProjectDir => { + const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw', 'public'], + }); + return 0; + }); + const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection, listSchemas }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.multiselect).not.toHaveBeenCalled(); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + schemas: ['orbit_analytics', 'orbit_raw', 'public'], + }); + expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw, public'); + }); + it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => { const io = makeIo(); const testConnection = vi.fn(async () => 0); const scanConnection = vi.fn(async () => 0); + const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); const result = await runKtxSetupDatabasesStep( { @@ -978,10 +1063,11 @@ describe('setup databases step', () => { skipDatabases: false, }, io.io, - { testConnection, scanConnection }, + { testConnection, scanConnection, listSchemas }, ); expect(result.status).toBe('ready'); + expect(listSchemas).not.toHaveBeenCalled(); expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index e22f4741..1838725d 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -52,6 +52,7 @@ export interface KtxSetupDatabasesPromptAdapter { message: string; options: Array<{ value: string; label: string }>; required?: boolean; + initialValues?: string[]; }): Promise; select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; @@ -76,6 +77,7 @@ export interface KtxSetupDatabasesDeps { prompts?: KtxSetupDatabasesPromptAdapter; testConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; scanConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; + listSchemas?: (projectDir: string, connectionId: string) => Promise; historicSqlProbe?: KtxSetupHistoricSqlProbe; } @@ -255,6 +257,21 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr } } +async function defaultListSchemas(projectDir: string, connectionId: string): Promise { + const project = await loadKtxProject({ projectDir }); + const connection = project.config.connections[connectionId]; + const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres'); + if (!isKtxPostgresConnectionConfig(connection)) { + return []; + } + const connector = new KtxPostgresScanConnector({ connectionId, connection }); + try { + return await connector.listSchemas(); + } finally { + await connector.cleanup(); + } +} + function existingConnectionIdsByDriver( connections: Record, driver: KtxSetupDatabaseDriver, @@ -814,6 +831,113 @@ async function writeConnectionConfig(input: { } } +function configuredSchemas(connection: KtxProjectConnectionConfig | undefined): string[] { + if (!connection) return []; + if (Array.isArray(connection.schemas)) { + return connection.schemas + .filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0) + .map((schema) => schema.trim()); + } + return typeof connection.schema === 'string' && connection.schema.trim().length > 0 ? [connection.schema.trim()] : []; +} + +function defaultSchemaSelection(schemas: string[]): string[] { + const nonPublic = schemas.filter((schema) => schema !== 'public'); + return nonPublic.length > 0 ? nonPublic : schemas; +} + +async function writeConnectionSchemas(input: { + projectDir: string; + connectionId: string; + schemas: string[]; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + const connection = project.config.connections[input.connectionId]; + if (!connection) return; + const { schema: _schema, ...connectionWithoutLegacySchema } = connection; + await writeConnectionConfig({ + projectDir: input.projectDir, + connectionId: input.connectionId, + connection: { + ...connectionWithoutLegacySchema, + schemas: unique(input.schemas), + }, + }); +} + +async function maybeConfigurePostgresSchemas(input: { + projectDir: string; + connectionId: string; + args: KtxSetupDatabasesArgs; + prompts: KtxSetupDatabasesPromptAdapter; + deps: KtxSetupDatabasesDeps; + io: KtxCliIo; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + const connection = project.config.connections[input.connectionId]; + if (normalizeDriver(connection?.driver) !== 'postgres') { + return true; + } + + if (configuredSchemas(connection).length > 0) { + return true; + } + + if (input.args.databaseSchemas.length > 0) { + await writeConnectionSchemas({ + projectDir: input.projectDir, + connectionId: input.connectionId, + schemas: input.args.databaseSchemas, + }); + return true; + } + + let discoveredSchemas: string[]; + try { + discoveredSchemas = unique( + await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId), + ); + } catch (error) { + input.io.stderr.write( + `Could not discover PostgreSQL schemas for ${input.connectionId}; continuing with existing schema scope. ` + + `Pass --database-schema to set it explicitly. ${error instanceof Error ? error.message : String(error)}\n`, + ); + return true; + } + if (discoveredSchemas.length === 0) { + return true; + } + + let selectedSchemas: string[]; + if (input.args.inputMode === 'disabled' || discoveredSchemas.length === 1) { + selectedSchemas = discoveredSchemas; + } else { + const initialValues = defaultSchemaSelection(discoveredSchemas); + const choices = await input.prompts.multiselect({ + message: withMultiselectNavigation( + 'PostgreSQL schemas to scan\nKTX found multiple non-system schemas. Select every schema agents should use.', + ), + options: discoveredSchemas.map((schema) => ({ value: schema, label: schema })), + initialValues, + required: true, + }); + if (choices.includes('back')) { + return false; + } + selectedSchemas = choices.length > 0 ? choices : initialValues; + } + + await writeConnectionSchemas({ + projectDir: input.projectDir, + connectionId: input.connectionId, + schemas: selectedSchemas, + }); + writeSetupSection(input.io, `Selecting schemas for ${input.connectionId}`, [ + `Schemas: ${selectedSchemas.join(', ')}`, + ]); + return true; +} + async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); if (project.config.ingest.adapters.includes('historic-sql')) { @@ -902,6 +1026,8 @@ async function validateAndScanConnection(input: { connectionId: string; io: KtxCliIo; deps: KtxSetupDatabasesDeps; + args: KtxSetupDatabasesArgs; + prompts: KtxSetupDatabasesPromptAdapter; }): Promise { const testConnection = input.deps.testConnection ?? defaultTestConnection; const scanConnection = input.deps.scanConnection ?? defaultScanConnection; @@ -923,6 +1049,10 @@ async function validateAndScanConnection(input: { testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`); writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines); + if (!(await maybeConfigurePostgresSchemas(input))) { + return false; + } + await maybeRunHistoricSqlSetupProbe({ projectDir: input.projectDir, connectionId: input.connectionId, @@ -1069,7 +1199,7 @@ export async function runKtxSetupDatabasesStep( prompts, }); if (historicSqlResult === 'back') return { status: 'back', projectDir: args.projectDir }; - if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps }))) { + if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps, args, prompts }))) { return { status: 'failed', projectDir: args.projectDir }; } selectedConnectionIds.push(connectionId); @@ -1209,6 +1339,8 @@ export async function runKtxSetupDatabasesStep( connectionId: connectionChoice.connectionId, io, deps, + args, + prompts, })) ) { if (args.inputMode === 'disabled') return { status: 'failed', projectDir: args.projectDir }; diff --git a/packages/cli/src/setup-ready-menu.test.ts b/packages/cli/src/setup-ready-menu.test.ts index 1e64488e..643d8b3d 100644 --- a/packages/cli/src/setup-ready-menu.test.ts +++ b/packages/cli/src/setup-ready-menu.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it, vi } from 'vitest'; -import { isKtxSetupReady, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js'; +import { isKtxPreAgentSetupReady, isKtxSetupReady, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js'; import type { KtxSetupStatus } from './setup.js'; const readyStatus: KtxSetupStatus = { @@ -20,6 +20,13 @@ describe('setup ready menu', () => { expect(isKtxSetupReady({ ...readyStatus, agents: [] })).toBe(false); }); + it('recognizes pre-agent readiness without requiring agents', () => { + expect(isKtxPreAgentSetupReady(readyStatus)).toBe(true); + expect(isKtxPreAgentSetupReady({ ...readyStatus, agents: [] })).toBe(true); + expect(isKtxPreAgentSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false); + expect(isKtxPreAgentSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false); + }); + it('maps ready-project menu choices to setup sections', async () => { const prompts = { select: vi.fn(async () => 'agents'), cancel: vi.fn() }; diff --git a/packages/cli/src/setup-ready-menu.ts b/packages/cli/src/setup-ready-menu.ts index 675655f2..a101e45a 100644 --- a/packages/cli/src/setup-ready-menu.ts +++ b/packages/cli/src/setup-ready-menu.ts @@ -14,18 +14,21 @@ export interface KtxSetupReadyMenuDeps { prompts?: KtxSetupReadyMenuPromptAdapter; } -export function isKtxSetupReady(status: KtxSetupStatus): boolean { +export function isKtxPreAgentSetupReady(status: KtxSetupStatus): boolean { return ( status.project.ready && status.llm.ready && status.embeddings.ready && status.databases.every((database) => database.ready) && status.sources.every((source) => source.ready) && - status.context.ready && - status.agents.some((agent) => agent.ready) + status.context.ready ); } +export function isKtxSetupReady(status: KtxSetupStatus): boolean { + return isKtxPreAgentSetupReady(status) && status.agents.some((agent) => agent.ready); +} + function createPromptAdapter(): KtxSetupReadyMenuPromptAdapter { return { async select(options) { diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index b79e8e66..1a281261 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -205,7 +205,7 @@ describe('setup sources step', () => { mappings: { databaseMappings: { '1': 'warehouse' }, syncEnabled: { '1': true }, - syncMode: 'ONLY', + syncMode: 'ALL', }, }); expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io); @@ -707,7 +707,7 @@ describe('setup sources step', () => { mappings: { databaseMappings: { '1': 'warehouse' }, syncEnabled: { '1': true }, - syncMode: 'ONLY', + syncMode: 'ALL', }, }, deps: { diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 73d191dc..e6e7f41b 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -463,7 +463,7 @@ function buildMetabaseConnection(args: KtxSetupSourcesArgs): KtxProjectConnectio mappings: { databaseMappings: { [String(args.metabaseDatabaseId)]: args.sourceWarehouseConnectionId }, syncEnabled: { [String(args.metabaseDatabaseId)]: true }, - syncMode: 'ONLY', + syncMode: 'ALL', }, }; } diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index cf9d22a8..20f12e6e 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -1550,6 +1550,102 @@ describe('setup status', () => { expect(calls).toEqual(['agents']); }); + it('skips to agent setup when context is ready but agents are not configured', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' completed_steps:', + ' - project', + ' - llm', + ' - embeddings', + ' - sources', + ' - context', + ' database_connection_ids: []', + 'connections: {}', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-ready', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: [], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-ready'), + }); + + const readyMenuSelect = vi.fn(); + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + readyMenuDeps: { prompts: { select: readyMenuSelect, cancel: vi.fn() } }, + model: async (args) => { + expect(args.skipLlm).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + embeddings: async (args) => { + expect(args.skipEmbeddings).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + databases: async (args) => { + expect(args.skipDatabases).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + sources: async (args) => { + expect(args.skipSources).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + agents: async () => { + calls.push('agents'); + return { + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(readyMenuSelect).not.toHaveBeenCalled(); + expect(calls).toEqual(['agents']); + }); + it('runs only project resolution, context gate, and agent setup in --agents mode', async () => { const io = makeIo(); const context = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-local-test' })); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 2aae882e..b9b0b412 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -24,7 +24,12 @@ import { import { type KtxSetupEmbeddingsDeps, runKtxSetupEmbeddingsStep } from './setup-embeddings.js'; import { type KtxSetupModelDeps, runKtxSetupAnthropicModelStep } from './setup-models.js'; import { type KtxSetupProjectDeps, runKtxSetupProjectStep } from './setup-project.js'; -import { isKtxSetupReady, type KtxSetupReadyMenuDeps, runKtxSetupReadyChangeMenu } from './setup-ready-menu.js'; +import { + isKtxPreAgentSetupReady, + isKtxSetupReady, + type KtxSetupReadyMenuDeps, + runKtxSetupReadyChangeMenu, +} from './setup-ready-menu.js'; import { type KtxSetupSourcesDeps, type KtxSetupSourceType, runKtxSetupSourcesStep } from './setup-sources.js'; import { withMenuOptionsSpacing } from './prompt-navigation.js'; import { @@ -531,9 +536,13 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup } } - if (args.inputMode !== 'disabled' && !agentsRequested && isKtxSetupReady(currentStatus)) { - readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action; - if (readyAction === 'exit') return 0; + if (args.inputMode !== 'disabled' && !agentsRequested) { + if (isKtxSetupReady(currentStatus)) { + readyAction = (await runKtxSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action; + if (readyAction === 'exit') return 0; + } else if (isKtxPreAgentSetupReady(currentStatus)) { + readyAction = 'agents'; + } } const runOnly = readyAction; diff --git a/packages/context/src/core/git.service.test.ts b/packages/context/src/core/git.service.test.ts index 308bbd4d..14e93495 100644 --- a/packages/context/src/core/git.service.test.ts +++ b/packages/context/src/core/git.service.test.ts @@ -256,6 +256,31 @@ describe('GitService', () => { await service.removeWorktree(wtDir).catch(() => undefined); await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); }); + + it('serializes concurrent commits from scoped services targeting the same worktree', async () => { + const { commitHash } = await writeAndCommit('seed.md', 'seed'); + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`); + await service.addWorktree(wtDir, 'session/concurrent', commitHash); + + const first = service.forWorktree(wtDir); + const second = service.forWorktree(wtDir); + await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8'); + await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8'); + + const [a, b] = await Promise.all([ + first.commitFile('a.md', 'add a', 'System User', 'system@example.com'), + second.commitFile('b.md', 'add b', 'System User', 'system@example.com'), + ]); + + expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/); + expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n'); + await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n'); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); }); describe('squashMergeIntoMain', () => { diff --git a/packages/context/src/core/git.service.ts b/packages/context/src/core/git.service.ts index 5da67e59..6539f9fd 100644 --- a/packages/context/src/core/git.service.ts +++ b/packages/context/src/core/git.service.ts @@ -32,6 +32,8 @@ export type SquashMergeResult = | { ok: false; conflict: true; conflictPaths: string[] }; export class GitService { + private static readonly mutationQueues = new Map>(); + private readonly logger: KtxLogger; private git!: SimpleGit; private configDir: string; @@ -92,6 +94,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.commitFileUnlocked(filePath, commitMessage, author, authorEmail)); + } + + private async commitFileUnlocked( + filePath: string, + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { // Stage the file @@ -166,6 +177,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.commitFilesUnlocked(filePaths, commitMessage, author, authorEmail)); + } + + private async commitFilesUnlocked( + filePaths: string[], + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { for (const filePath of filePaths) { @@ -231,6 +251,10 @@ export class GitService { if (filePaths.length === 0) { return; } + return this.withMutationQueue(() => this.checkoutFilesUnlocked(filePaths)); + } + + private async checkoutFilesUnlocked(filePaths: string[]): Promise { try { await this.git.checkout(['--', ...filePaths]); } catch (error) { @@ -292,6 +316,10 @@ export class GitService { if (!trimmed) { return; } + return this.withMutationQueue(() => this.addNoteUnlocked(commitHash, trimmed)); + } + + private async addNoteUnlocked(commitHash: string, trimmed: string): Promise { try { await this.git.raw(['notes', 'add', '-f', '-m', trimmed, commitHash]); } catch (error) { @@ -343,6 +371,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.deleteFileUnlocked(filePath, commitMessage, author, authorEmail)); + } + + private async deleteFileUnlocked( + filePath: string, + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { // Remove the file from git @@ -485,6 +522,13 @@ export class GitService { async squashTo( preHead: string, options: { message: string; author: string; authorEmail: string; expectedAuthor?: string }, + ): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> { + return this.withMutationQueue(() => this.squashToUnlocked(preHead, options)); + } + + private async squashToUnlocked( + preHead: string, + options: { message: string; author: string; authorEmail: string; expectedAuthor?: string }, ): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> { const { message, author, authorEmail } = options; const expectedAuthor = options.expectedAuthor ?? author; @@ -560,6 +604,15 @@ export class GitService { author: string, authorEmail: string, commitMessage: string, + ): Promise { + return this.withMutationQueue(() => this.squashMergeIntoMainUnlocked(branch, author, authorEmail, commitMessage)); + } + + private async squashMergeIntoMainUnlocked( + branch: string, + author: string, + authorEmail: string, + commitMessage: string, ): Promise { // Diff of HEAD..branch (two dots) lists commits/files reachable from `branch` that // aren't on HEAD — i.e. exactly what the squash would apply. Three dots (HEAD...branch) @@ -615,7 +668,7 @@ export class GitService { * range, which can pause the sequencer on conflicts. */ async resetHardTo(targetSha: string): Promise { - await this.git.raw(['reset', '--hard', targetSha]); + await this.withMutationQueue(() => this.git.raw(['reset', '--hard', targetSha])); } /** @@ -667,6 +720,10 @@ export class GitService { * Used by the memory agent to isolate per-session writes from interactive saves on main. */ async addWorktree(path: string, branch: string, startSha: string): Promise { + await this.withMutationQueue(() => this.addWorktreeUnlocked(path, branch, startSha)); + } + + private async addWorktreeUnlocked(path: string, branch: string, startSha: string): Promise { try { await this.git.raw(['worktree', 'add', '-b', branch, path, startSha]); } catch (error) { @@ -679,6 +736,10 @@ export class GitService { * worktrees are ktx-internal — a clean working tree is not required. */ async removeWorktree(path: string): Promise { + await this.withMutationQueue(() => this.removeWorktreeUnlocked(path)); + } + + private async removeWorktreeUnlocked(path: string): Promise { try { await this.git.raw(['worktree', 'remove', '--force', path]); } catch (error) { @@ -724,7 +785,7 @@ export class GitService { } async deleteBranch(branch: string, force = false): Promise { - await this.git.raw(['branch', force ? '-D' : '-d', branch]); + await this.withMutationQueue(() => this.git.raw(['branch', force ? '-D' : '-d', branch])); } /** @@ -745,6 +806,15 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => this.deleteDirectoryUnlocked(directoryPath, commitMessage, author, authorEmail)); + } + + private async deleteDirectoryUnlocked( + directoryPath: string, + commitMessage: string, + author: string, + authorEmail: string, ): Promise { try { // Remove the directory recursively from git @@ -795,6 +865,17 @@ export class GitService { commitMessage: string, author: string, authorEmail: string, + ): Promise { + return this.withMutationQueue(() => + this.deleteDirectoriesUnlocked(directoryPaths, commitMessage, author, authorEmail), + ); + } + + private async deleteDirectoriesUnlocked( + directoryPaths: string[], + commitMessage: string, + author: string, + authorEmail: string, ): Promise { if (directoryPaths.length === 0) { return { @@ -852,4 +933,27 @@ export class GitService { created: true, }; } + + private async withMutationQueue(operation: () => Promise): Promise { + const key = this.configDir; + const previous = GitService.mutationQueues.get(key) ?? Promise.resolve(); + let release: () => void = () => {}; + const current = previous.catch(() => undefined).then( + () => + new Promise((resolve) => { + release = resolve; + }), + ); + GitService.mutationQueues.set(key, current); + + await previous.catch(() => undefined); + try { + return await operation(); + } finally { + release(); + if (GitService.mutationQueues.get(key) === current) { + GitService.mutationQueues.delete(key); + } + } + } } diff --git a/packages/context/src/ingest/adapters/metabase/chunk.test.ts b/packages/context/src/ingest/adapters/metabase/chunk.test.ts index 46a3ce97..1991e147 100644 --- a/packages/context/src/ingest/adapters/metabase/chunk.test.ts +++ b/packages/context/src/ingest/adapters/metabase/chunk.test.ts @@ -284,6 +284,18 @@ describe('chunkMetabaseStagedDir — syncMode enum coverage', () => { expect(allRawFiles).not.toContain('cards/200.json'); }); + it('ONLY with no selections includes every matching card for old generated configs', async () => { + await writeInline(dir, 'sync-config.json', { + ...BASE_SYNC, + syncMode: 'ONLY', + selections: [], + }); + const result = await chunkMetabaseStagedDir(dir); + const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles); + expect(allRawFiles).toContain('cards/100.json'); + expect(allRawFiles).toContain('cards/200.json'); + }); + it('EXCEPT excludes cards in selected collections; includes the rest', async () => { await writeInline(dir, 'sync-config.json', { ...BASE_SYNC, diff --git a/packages/context/src/ingest/adapters/metabase/chunk.ts b/packages/context/src/ingest/adapters/metabase/chunk.ts index 2fe719c5..ab2b1d78 100644 --- a/packages/context/src/ingest/adapters/metabase/chunk.ts +++ b/packages/context/src/ingest/adapters/metabase/chunk.ts @@ -66,7 +66,7 @@ function cardMatchesSyncConfig(card: StagedCardFile, config: StagedSyncConfig): if (card.archived) { return false; } - if (config.syncMode === 'ALL') { + if (config.syncMode === 'ALL' || (config.syncMode === 'ONLY' && config.selections.length === 0)) { return true; } const selectedCollections = new Set( diff --git a/packages/context/src/ingest/adapters/metabase/client.test.ts b/packages/context/src/ingest/adapters/metabase/client.test.ts index d6d7a4d9..f81939c6 100644 --- a/packages/context/src/ingest/adapters/metabase/client.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client.test.ts @@ -327,6 +327,40 @@ describe('MetabaseClient.getResolvedSql', () => { expect(result?.resolvedSql).toBe('SELECT * FROM (SELECT a, b FROM base) t '); }); + it('inlines native-query snippets before checking for remaining variables', async () => { + const requestSpy = vi.fn().mockResolvedValue([ + { + id: 1, + name: 'account_join', + content: 'LEFT JOIN accounts a ON a.account_id = mart.account_id', + }, + ]); + const requestWithCustomRetrySpy = vi.fn(); + const client = makeClient((client) => { + Reflect.set(client, 'request', requestSpy); + Reflect.set(client, 'requestWithCustomRetry', requestWithCustomRetrySpy); + }); + const card = nativeCard('SELECT a.account_name FROM mart {{snippet: account_join}}', { + 'snippet: account_join': { + id: 'snippet-tag', + name: 'snippet: account_join', + type: 'snippet', + 'snippet-name': 'account_join', + 'snippet-id': 1, + }, + }); + + const result = await client.getResolvedSql(card); + + expect(requestSpy).toHaveBeenCalledWith('GET', '/api/native-query-snippet'); + expect(requestWithCustomRetrySpy).not.toHaveBeenCalled(); + expect(result?.resolutionStatus).toBe('resolved'); + expect(result?.resolvedSql).toBe( + 'SELECT a.account_name FROM mart LEFT JOIN accounts a ON a.account_id = mart.account_id', + ); + expect(result?.resolvedSql).not.toContain('{{snippet:'); + }); + it('uses /api/dataset/native for naked variables and prepends a warning comment', async () => { const requestSpy = vi.fn().mockResolvedValue({ query: "SELECT * WHERE id = 'placeholder' AND n = 1" }); const client = makeClient((client) => { diff --git a/packages/context/src/ingest/adapters/metabase/client.ts b/packages/context/src/ingest/adapters/metabase/client.ts index 2ddd970a..70e70964 100644 --- a/packages/context/src/ingest/adapters/metabase/client.ts +++ b/packages/context/src/ingest/adapters/metabase/client.ts @@ -39,6 +39,13 @@ interface TemplateTagInfo { dummyValue: string | null; } +interface NativeQuerySnippet { + id: number; + name: string; + content: string; + archived?: boolean | null; +} + interface CreateCardParams { name: string; databaseId: number; @@ -100,6 +107,43 @@ function collectRemainingPlaceholderNames(sql: string): Set { return names; } +function collectRemainingSnippetNames(sql: string): Set { + const names = new Set(); + for (const match of sql.matchAll(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi)) { + names.add(match[1].trim()); + } + return names; +} + +function normalizeSnippetName(name: string | null | undefined): string { + return (name ?? '').replace(/^snippet:\s*/i, '').trim().toLowerCase(); +} + +function parseNativeQuerySnippets(value: unknown): NativeQuerySnippet[] { + const rawItems = Array.isArray(value) + ? value + : typeof value === 'object' && value !== null && Array.isArray((value as { data?: unknown }).data) + ? (value as { data: unknown[] }).data + : []; + const snippets: NativeQuerySnippet[] = []; + for (const item of rawItems) { + if (typeof item !== 'object' || item === null || Array.isArray(item)) { + continue; + } + const rec = item as Record; + if (typeof rec.id !== 'number' || typeof rec.name !== 'string' || typeof rec.content !== 'string') { + continue; + } + snippets.push({ + id: rec.id, + name: rec.name, + content: rec.content, + ...(typeof rec.archived === 'boolean' ? { archived: rec.archived } : {}), + }); + } + return snippets; +} + function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): MetabaseDatasetQuery { if (datasetQuery?.stages?.[0]?.native !== undefined) { const stages = [...(datasetQuery.stages ?? [])]; @@ -148,6 +192,7 @@ export class MetabaseClient implements MetabaseRuntimeClient { private readonly logger: MetabaseClientLogger; private readonly baseUrl: string; private readonly config: MetabaseClientConfig; + private snippetCache: Promise | null = null; constructor( runtime: MetabaseClientRuntimeConfig, @@ -261,6 +306,63 @@ export class MetabaseClient implements MetabaseRuntimeClient { return this.request('GET', '/api/card/?f=all'); } + private getNativeQuerySnippets(): Promise { + this.snippetCache ??= this.request('GET', '/api/native-query-snippet').then(parseNativeQuerySnippets); + return this.snippetCache; + } + + private async inlineNativeQuerySnippets( + sql: string, + templateTags: MetabaseTemplateTag[], + cardId: number, + ): Promise<{ sql: string; unresolved: string[] }> { + const names = collectRemainingSnippetNames(sql); + if (names.size === 0) { + return { sql, unresolved: [] }; + } + + let snippets: NativeQuerySnippet[]; + try { + snippets = await this.getNativeQuerySnippets(); + } catch (error) { + this.logger.warn( + `[metabase] failed to load native query snippets for card ${cardId}; leaving snippet placeholders unresolved: ${error instanceof Error ? error.message : String(error)}`, + ); + return { sql, unresolved: [...names] }; + } + + const snippetsById = new Map(); + const snippetsByName = new Map(); + for (const snippet of snippets) { + if (snippet.archived === true) { + continue; + } + snippetsById.set(snippet.id, snippet); + snippetsByName.set(normalizeSnippetName(snippet.name), snippet); + } + + const snippetTags = templateTags.filter((tag) => tag.type === 'snippet'); + const unresolved = new Set(); + const inlinedSql = sql.replace(/\{\{\s*snippet:\s*([^}]+?)\s*\}\}/gi, (match, rawName: string) => { + const normalizedName = normalizeSnippetName(rawName); + const tag = snippetTags.find( + (candidate) => + normalizeSnippetName(candidate['snippet-name']) === normalizedName || + normalizeSnippetName(candidate.name) === normalizedName, + ); + const snippet = + (typeof tag?.['snippet-id'] === 'number' ? snippetsById.get(tag['snippet-id']) : undefined) ?? + snippetsByName.get(normalizedName); + if (!snippet) { + unresolved.add(rawName.trim()); + return match; + } + return snippet.content; + }); + + return { sql: inlinedSql, unresolved: [...unresolved] }; + } + async convertMbqlToNative(datasetQuery: MetabaseDatasetQuery): Promise { return this.request('POST', '/api/dataset/native', { ...datasetQuery, @@ -351,7 +453,18 @@ export class MetabaseClient implements MetabaseRuntimeClient { // silently filter rows out — see incident with auction_seller_bidder_pair_suspicion). let processedSql = stripOptionalClauses(nativeQuery); - // Step 2: inline {{#CARD_ID}} card references locally. Recursively strip optional + // Step 2: inline native-query snippets. Metabase's substitution endpoint does not + // always expand {{snippet: name}} for fetched card SQL, but the snippets API does. + const snippetResult = await this.inlineNativeQuerySnippets(processedSql, templateTagEntries, card.id); + processedSql = snippetResult.sql; + if (snippetResult.unresolved.length > 0) { + this.logger.warn( + `[metabase] card ${card.id} has unresolved SQL snippets: ${snippetResult.unresolved.join(', ')}`, + ); + return { resolvedSql: processedSql, templateTags, resolutionStatus: 'fallback' }; + } + + // Step 3: inline {{#CARD_ID}} card references locally. Recursively strip optional // clauses in referenced cards too — the same reasoning applies all the way down. try { processedSql = await expandCardReferences(processedSql, { @@ -361,7 +474,17 @@ export class MetabaseClient implements MetabaseRuntimeClient { if (!referencedNative) { throw new Error(`referenced card ${id} has no native query`); } - return { native_query: stripOptionalClauses(referencedNative) }; + const referencedSnippetResult = await this.inlineNativeQuerySnippets( + stripOptionalClauses(referencedNative), + Object.values(this.getTemplateTags(referenced)), + referenced.id, + ); + if (referencedSnippetResult.unresolved.length > 0) { + throw new Error( + `referenced card ${id} has unresolved SQL snippets: ${referencedSnippetResult.unresolved.join(', ')}`, + ); + } + return { native_query: referencedSnippetResult.sql }; }, }); } catch (err) { @@ -372,7 +495,7 @@ export class MetabaseClient implements MetabaseRuntimeClient { throw err; } - // Step 3: collect template tags that still appear in the SQL after strip + inline. + // Step 4: collect template tags that still appear in the SQL after strip + inline. // Anything bracketed-only is gone now; anything card-referenced is inlined. const remainingNames = collectRemainingPlaceholderNames(processedSql); const remainingTags = templateTagEntries.filter((tag) => tag.type !== 'snippet' && remainingNames.has(tag.name)); @@ -381,7 +504,7 @@ export class MetabaseClient implements MetabaseRuntimeClient { return { resolvedSql: processedSql, templateTags, resolutionStatus: 'resolved' }; } - // Step 4: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's + // Step 5: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's // substitution endpoint. Only required because we can't translate dimension-tag // bindings to warehouse columns ourselves. Prepend a SQL comment listing every // dummy substitution so downstream consumers (the metabase_ingest LLM) know which diff --git a/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts b/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts index 1d8d2478..9768c0c9 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts @@ -57,13 +57,9 @@ describe('computeFetchScope', () => { }); }); - it('returns empty explicit scope for ONLY with no selections', () => { + it('treats generated ONLY with no selections as all', () => { const scope = computeFetchScope({ ...BASE_CONFIG, syncMode: 'ONLY', selections: [] }); - expect(scope).toEqual({ - kind: 'explicit', - includeCardIds: new Set(), - includeCollectionIds: new Set(), - }); + expect(scope).toEqual({ kind: 'all' }); }); }); diff --git a/packages/context/src/ingest/adapters/metabase/fetch-scope.ts b/packages/context/src/ingest/adapters/metabase/fetch-scope.ts index bee97ec8..e09ef7c3 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch-scope.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch-scope.ts @@ -11,7 +11,7 @@ export type FetchScope = * union the fetcher switches on. Pure function; no I/O, no side effects. */ export function computeFetchScope(syncConfig: StagedSyncConfig): FetchScope { - if (syncConfig.syncMode === 'ALL') { + if (syncConfig.syncMode === 'ALL' || (syncConfig.syncMode === 'ONLY' && syncConfig.selections.length === 0)) { return { kind: 'all' }; } const cardIds = new Set(); diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts index 6ba778e5..0515842a 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -79,6 +79,21 @@ function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['t return actions.filter((action) => action.target === target).length; } +function isStructuredToolFailure(output: unknown): boolean { + if (!output || typeof output !== 'object') { + return false; + } + const structured = (output as { structured?: unknown }).structured; + return !!structured && typeof structured === 'object' && (structured as { success?: unknown }).success === false; +} + +function isFailedToolCall(entry: ToolCallLogEntry): boolean { + if (entry.error) { + return true; + } + return (entry.toolName === 'sl_write_source' || entry.toolName === 'wiki_write') && isStructuredToolFailure(entry.output); +} + function reportIdFromCreateResult(result: unknown): string | undefined { if (!result || typeof result !== 'object' || !('id' in result)) { return undefined; @@ -344,7 +359,7 @@ export class IngestBundleRunner { toolNames: new Set(), } satisfies MutableToolTranscriptSummary); current.toolCallCount += 1; - current.errorCount += entry.error ? 1 : 0; + current.errorCount += isFailedToolCall(entry) ? 1 : 0; current.toolNames.add(entry.toolName); transcriptSummaries.set(entry.wuKey, current); }; @@ -712,6 +727,7 @@ export class IngestBundleRunner { sourceKey: job.sourceKey, connectionId: job.connectionId, jobId: job.jobId, + toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.errorCount ?? 0, onStepFinish: ({ stepIndex, stepBudget }) => { memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); }, diff --git a/packages/context/src/ingest/local-bundle-ingest.test.ts b/packages/context/src/ingest/local-bundle-ingest.test.ts index aa423d9e..6e9aa4aa 100644 --- a/packages/context/src/ingest/local-bundle-ingest.test.ts +++ b/packages/context/src/ingest/local-bundle-ingest.test.ts @@ -1,6 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; +import Database from 'better-sqlite3'; import { AgentRunnerService } from '../agent/index.js'; import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js'; import { makeLocalGitRepo } from '../test/make-local-git-repo.js'; @@ -57,6 +58,34 @@ class LookerSlWritingAgentRunner extends AgentRunnerService { } } +class WikiWritingAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags?.operationName === 'ingest-bundle-wu') { + const wikiWrite = params.toolSet.wiki_write; + if (!wikiWrite?.execute) { + throw new Error('wiki_write tool was not available to the WorkUnit'); + } + const result = await wikiWrite.execute( + { + key: 'orders_context', + summary: 'Orders source context', + content: 'Orders are purchase records used for revenue analysis.', + tags: ['orders'], + }, + { toolCallId: 'wiki-write' }, + ); + if (!result.structured.success) { + throw new Error(result.markdown); + } + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + function makeLookerRuntimeClient() { const lookerModels = { models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], @@ -252,6 +281,33 @@ describe('canonical local ingest', () => { }); }); + it('indexes wiki pages written by local ingest into the SQLite knowledge tables', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const agentRunner = new WikiWritingAgentRunner(); + + const result = await runLocalIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'wiki-local-1', + agentRunner, + }); + + expect(result.result.failedWorkUnits).toEqual([]); + const db = new Database(join(project.projectDir, '.ktx', 'db.sqlite'), { readonly: true }); + try { + expect(db.prepare('SELECT key, summary FROM knowledge_pages ORDER BY key').all()).toEqual([ + { key: 'orders_context', summary: 'Orders source context' }, + ]); + } finally { + db.close(); + } + }); + it('rejects direct Metabase scheduled pulls before requiring a local ingest LLM provider', async () => { const projectDir = join(tempDir, 'metabase-project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 6665682b..f7c8be80 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -56,6 +56,8 @@ import { type KnowledgeIndexPort, KnowledgeWikiService, searchLocalKnowledgePages, + SqliteKnowledgeIndex, + type SqliteKnowledgeIndexPage, WikiListTagsTool, WikiReadTool, WikiRemoveTool, @@ -257,6 +259,17 @@ function parseWiki(raw: string): { summary: string; content: string } { }; } +function parseWikiTags(raw: string): string[] { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?/); + if (!match) { + return []; + } + const frontmatter = (YAML.parse(match[1]) ?? {}) as Record; + return Array.isArray(frontmatter.tags) + ? frontmatter.tags.filter((tag): tag is string => typeof tag === 'string') + : []; +} + function scoreText(text: string, query: string): number { const normalized = query.toLowerCase().trim(); if (!normalized) { @@ -271,21 +284,49 @@ function scoreText(text: string, query: string): number { } class LocalKnowledgeIndex implements KnowledgeIndexPort { - constructor(private readonly project: KtxLocalProject) {} + private readonly sqlite: SqliteKnowledgeIndex; - async upsertPage(): Promise {} - - async applyDiffTransactional(): Promise {} - - async getExistingSearchTexts(): Promise> { - return new Map(); + constructor(private readonly project: KtxLocalProject) { + this.sqlite = new SqliteKnowledgeIndex({ dbPath: ktxLocalStateDbPath(project) }); } - async deleteStale(): Promise {} + async upsertPage(): Promise { + await this.syncAllPagesFromDisk(); + } - async deleteByScope(): Promise {} + async applyDiffTransactional(): Promise { + await this.syncAllPagesFromDisk(); + } - async deleteByKey(): Promise {} + async getExistingSearchTexts( + scope: string, + scopeId: string | null, + ): Promise> { + const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${scopeId}/`; + const result = new Map(); + for (const [path, page] of this.sqlite.getExistingPages()) { + if (!path.startsWith(prefix)) { + continue; + } + result.set(path.slice(prefix.length).replace(/\.md$/, ''), { + searchText: page.searchText, + hasEmbedding: page.embedding !== null, + }); + } + return result; + } + + async deleteStale(): Promise { + await this.syncAllPagesFromDisk(); + } + + async deleteByScope(): Promise { + await this.syncAllPagesFromDisk(); + } + + async deleteByKey(): Promise { + await this.syncAllPagesFromDisk(); + } async findPageByKey(scope: string, scopeId: string | null, pageKey: string) { const path = scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`; @@ -344,6 +385,41 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { .sort((left, right) => right.rrfScore - left.rrfScore || left.pageKey.localeCompare(right.pageKey)) .slice(0, limit); } + + private async syncAllPagesFromDisk(): Promise { + const listed = await this.project.fileStore.listFiles('knowledge', true); + const pages: SqliteKnowledgeIndexPage[] = []; + for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { + const parsedPath = parseKnowledgeIndexPath(file); + if (!parsedPath) { + continue; + } + const path = `knowledge/${file}`; + const raw = await this.project.fileStore.readFile(path); + const parsed = parseWiki(raw.content); + pages.push({ + path, + key: parsedPath.pageKey, + scope: parsedPath.scope, + summary: parsed.summary, + content: parsed.content, + tags: parseWikiTags(raw.content), + embedding: null, + }); + } + this.sqlite.sync(pages); + } +} + +function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; pageKey: string } | null { + const segments = file.split('/'); + if (segments.length === 2 && segments[0] === 'global') { + return { scope: 'GLOBAL', pageKey: segments[1].replace(/\.md$/, '') }; + } + if (segments.length === 3 && segments[0] === 'user') { + return { scope: 'USER', pageKey: segments[2].replace(/\.md$/, '') }; + } + return null; } class NoopKnowledgeEventPort implements KnowledgeEventPort { diff --git a/packages/context/src/ingest/stages/stage-3-work-units.test.ts b/packages/context/src/ingest/stages/stage-3-work-units.test.ts index ba01d60d..23ec3fa8 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.test.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.test.ts @@ -106,6 +106,21 @@ describe('Stage 3 — executeWorkUnit', () => { expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); }); + it('tool failures reset to the pre-WU SHA and mark WU failed even when the loop ends naturally', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' }); + deps.toolFailureCount = vi.fn().mockReturnValue(2); + + const outcome = await executeWorkUnit(deps, makeWu()); + + expect(outcome.status).toBe('failed'); + expect(outcome.reason).toContain('2 tool call(s) failed'); + expect(outcome.actions).toEqual([]); + expect(outcome.touchedSlSources).toEqual([]); + expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); + }); + it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => { const deps = makeDeps(); deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); diff --git a/packages/context/src/ingest/stages/stage-3-work-units.ts b/packages/context/src/ingest/stages/stage-3-work-units.ts index bbf23079..b6e64f86 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.ts @@ -28,6 +28,7 @@ export interface WorkUnitExecutionDeps { connectionId: string; jobId: string; onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + toolFailureCount?: (unitKey: string) => number; } export interface WorkUnitOutcome { @@ -128,6 +129,11 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit) return failWithReset(runResult.error?.message ?? 'agent loop errored'); } + const toolFailureCount = deps.toolFailureCount?.(wu.unitKey) ?? 0; + if (toolFailureCount > 0) { + return failWithReset(`${toolFailureCount} tool call(s) failed during WorkUnit ${wu.unitKey}`); + } + const touched = listTouchedSlSources(deps.captureSession.touchedSlSources); if (touched.length > 0) { const validation = await deps.validateTouchedSources(touched); From 86c818a454ffe0a3aa7da20e1ba87e2ad8c3f8b8 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Mon, 11 May 2026 00:31:15 -0700 Subject: [PATCH 7/8] Normalize semantic layer descriptions --- packages/cli/src/cli-runtime.ts | 2 +- packages/cli/src/context-build-view.test.ts | 29 +++- packages/cli/src/context-build-view.ts | 38 ++++- packages/context/src/project/project.test.ts | 2 + packages/context/src/project/project.ts | 5 +- .../context/src/project/setup-config.test.ts | 4 +- packages/context/src/project/setup-config.ts | 10 +- .../src/sl/description-normalization.ts | 136 ++++++++++++++++++ packages/context/src/sl/local-sl.ts | 5 +- packages/context/src/sl/schemas.ts | 5 + .../src/sl/semantic-layer.service.test.ts | 29 ++++ .../context/src/sl/semantic-layer.service.ts | 43 +++--- packages/context/src/sl/sl-search.service.ts | 2 + .../src/sl/tools/sl-edit-source.tool.test.ts | 33 +++++ .../src/sl/tools/sl-edit-source.tool.ts | 2 + .../src/sl/tools/sl-write-source.tool.test.ts | 83 +++++++++++ .../src/sl/tools/sl-write-source.tool.ts | 9 +- .../src/wiki/tools/wiki-write.tool.test.ts | 36 +++++ .../context/src/wiki/tools/wiki-write.tool.ts | 18 ++- python/ktx-sl/semantic_layer/models.py | 26 ++++ python/ktx-sl/tests/test_models.py | 18 +++ 21 files changed, 498 insertions(+), 37 deletions(-) create mode 100644 packages/context/src/sl/description-normalization.ts diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 60129922..124d132d 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -22,7 +22,7 @@ export interface KtxCliPackageInfo { } export interface KtxCliIo { - stdout: { isTTY?: boolean; write(chunk: string): void }; + stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void }; stderr: { write(chunk: string): void }; } diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index c6ef0eb1..d7069578 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it, vi } from 'vitest'; import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js'; import { extractProgressMessage, + createRepainter, initViewState, parseIngestSummary, parseScanSummary, @@ -11,13 +12,14 @@ import { viewStateFromSourceProgress, } from './context-build-view.js'; -function makeIo(options: { isTTY?: boolean } = {}) { +function makeIo(options: { isTTY?: boolean; columns?: number } = {}) { let stdout = ''; let stderr = ''; return { io: { stdout: { isTTY: options.isTTY, + columns: options.columns, write: (chunk: string) => { stdout += chunk; }, @@ -305,6 +307,31 @@ describe('renderContextBuildView', () => { }); }); +describe('createRepainter', () => { + it('moves up visual rows, not just newline count, when content wraps', () => { + const io = makeIo({ isTTY: true, columns: 5 }); + const repainter = createRepainter(io.io); + + repainter.paint('abcdefghijk\n'); + repainter.paint('updated\n'); + repainter.paint('done\n'); + + const cursorMoves = [...io.stdout().matchAll(/\u001b\[(\d+)A\r/g)].map((match) => Number(match[1])); + expect(cursorMoves).toEqual([3, 2]); + }); + + it('returns to the start of a single-line frame without moving up when content has no newline', () => { + const io = makeIo({ isTTY: true, columns: 80 }); + const repainter = createRepainter(io.io); + + repainter.paint('hello'); + repainter.paint('bye'); + + expect(io.stdout()).toContain('\rbye'); + expect(io.stdout()).not.toContain('\u001b[1A\rbye'); + }); +}); + describe('runContextBuild', () => { it('executes scan targets before source-ingest targets', async () => { const io = makeIo(); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index bb661655..571c71dd 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -226,6 +226,7 @@ export function renderContextBuildView( // --- IO Capture --- const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g'); +const ANSI_RE = /\x1b\[[0-9;]*m/g; export function extractProgressMessage(chunk: string): string | null { const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim(); @@ -342,16 +343,45 @@ export function viewStateFromSourceProgress( // --- Repaint --- export function createRepainter(io: KtxCliIo) { - let lastLineCount = 0; + let hasPainted = false; + let lastCursorUpRows = 0; + + const terminalColumns = () => { + for (const columns of [io.stdout.columns, process.stdout.columns]) { + if (typeof columns === 'number' && Number.isFinite(columns) && columns > 0) return columns; + } + return 80; + }; + + const visualRows = (line: string, columns: number) => { + const plainLength = line.replace(ANSI_RE, '').length; + return Math.max(1, Math.ceil(plainLength / columns)); + }; + + const cursorUpRowsAfterWrite = (content: string) => { + const columns = terminalColumns(); + const endsWithNewline = content.endsWith('\n'); + const lines = content.split('\n'); + return lines.reduce((sum, line, index) => { + if (index === lines.length - 1) { + return endsWithNewline ? sum : sum + Math.max(0, visualRows(line, columns) - 1); + } + return sum + visualRows(line, columns); + }, 0); + }; return { paint(content: string) { - if (lastLineCount > 0) { - io.stdout.write(`${ESC}[${lastLineCount}A\r`); + if (hasPainted) { + if (lastCursorUpRows > 0) { + io.stdout.write(`${ESC}[${lastCursorUpRows}A`); + } + io.stdout.write('\r'); } io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`)); io.stdout.write(`${ESC}[J`); - lastLineCount = (content.match(/\n/g) ?? []).length; + hasPainted = true; + lastCursorUpRows = cursorUpRowsAfterWrite(content); }, }; } diff --git a/packages/context/src/project/project.test.ts b/packages/context/src/project/project.test.ts index ec2120aa..b6e88604 100644 --- a/packages/context/src/project/project.test.ts +++ b/packages/context/src/project/project.test.ts @@ -32,6 +32,8 @@ describe('KTX local project runtime', () => { const gitignore = await readFile(join(projectDir, '.ktx/.gitignore'), 'utf-8'); expect(gitignore).toContain('cache/'); expect(gitignore).toContain('db.sqlite'); + expect(gitignore).toContain('db.sqlite-*'); + expect(gitignore).toContain('ingest-transcripts/'); expect(gitignore).toContain('secrets/'); expect(gitignore).toContain('setup/'); expect(gitignore).toContain('agents/'); diff --git a/packages/context/src/project/project.ts b/packages/context/src/project/project.ts index cbe522a5..59e594a2 100644 --- a/packages/context/src/project/project.ts +++ b/packages/context/src/project/project.ts @@ -35,7 +35,10 @@ export interface InitKtxProjectResult extends KtxLocalProject { } const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [ - { path: '.ktx/.gitignore', content: 'cache/\ndb.sqlite\nsecrets/\nsetup/\nagents/\n' }, + { + path: '.ktx/.gitignore', + content: 'cache/\ndb.sqlite\ndb.sqlite-*\ningest-transcripts/\nsecrets/\nsetup/\nagents/\n', + }, { path: '.ktx/prompts/.gitkeep', content: '' }, { path: '.ktx/skills/.gitkeep', content: '' }, { path: 'knowledge/global/.gitkeep', content: '' }, diff --git a/packages/context/src/project/setup-config.test.ts b/packages/context/src/project/setup-config.test.ts index 3fc8726b..212f16e1 100644 --- a/packages/context/src/project/setup-config.test.ts +++ b/packages/context/src/project/setup-config.test.ts @@ -67,10 +67,10 @@ describe('KTX setup config helpers', () => { it('merges setup-local gitignore entries without removing existing lines', () => { expect(mergeKtxSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe( - ['cache/', 'db.sqlite', 'secrets/', 'setup/', 'agents/', ''].join('\n'), + ['cache/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'secrets/', 'setup/', 'agents/', ''].join('\n'), ); expect(mergeKtxSetupGitignoreEntries('cache/\nsecrets/\n')).toBe( - ['cache/', 'secrets/', 'setup/', 'agents/', ''].join('\n'), + ['cache/', 'secrets/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'setup/', 'agents/', ''].join('\n'), ); }); }); diff --git a/packages/context/src/project/setup-config.ts b/packages/context/src/project/setup-config.ts index d0f46cf0..76951ef6 100644 --- a/packages/context/src/project/setup-config.ts +++ b/packages/context/src/project/setup-config.ts @@ -4,7 +4,15 @@ export const KTX_SETUP_STEPS = ['project', 'llm', 'embeddings', 'databases', 'so export type KtxSetupStep = (typeof KTX_SETUP_STEPS)[number]; -const SETUP_GITIGNORE_ENTRIES = ['secrets/', 'setup/', 'agents/'] as const; +const SETUP_GITIGNORE_ENTRIES = [ + 'cache/', + 'db.sqlite', + 'db.sqlite-*', + 'ingest-transcripts/', + 'secrets/', + 'setup/', + 'agents/', +] as const; export function markKtxSetupStepComplete(config: KtxProjectConfig, step: KtxSetupStep): KtxProjectConfig { const databaseConnectionIds = config.setup?.database_connection_ids ?? []; diff --git a/packages/context/src/sl/description-normalization.ts b/packages/context/src/sl/description-normalization.ts new file mode 100644 index 00000000..5a1b5ab6 --- /dev/null +++ b/packages/context/src/sl/description-normalization.ts @@ -0,0 +1,136 @@ +type DescriptionMap = Record; + +interface NormalizeDescriptionOptions { + fillMissing?: boolean; +} + +function cleanText(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function cleanDescriptionMap(value: unknown): DescriptionMap { + const result: DescriptionMap = {}; + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return result; + } + for (const [key, text] of Object.entries(value)) { + const cleaned = cleanText(text); + if (cleaned) { + result[key] = cleaned; + } + } + return result; +} + +function hasDescriptions(descriptions: DescriptionMap): boolean { + return Object.keys(descriptions).length > 0; +} + +function withDescriptionMap(record: Record, fallback: string | null): Record { + const descriptions = cleanDescriptionMap(record.descriptions); + const flatDescription = cleanText(record.description); + if (flatDescription && !descriptions.user) { + descriptions.user = flatDescription; + } + if (!hasDescriptions(descriptions) && fallback) { + descriptions.ktx = fallback; + } + + const next = { ...record }; + delete next.description; + if (hasDescriptions(descriptions)) { + next.descriptions = descriptions; + } else { + delete next.descriptions; + } + return next; +} + +function humanizeIdentifier(value: string): string { + return value + .replace(/([a-z0-9])([A-Z])/g, '$1 $2') + .replace(/[_-]+/g, ' ') + .replace(/\s+/g, ' ') + .trim() + .toLowerCase(); +} + +function formatCount(count: number, singular: string, plural = `${singular}s`): string | null { + if (count <= 0) { + return null; + } + return `${count} ${count === 1 ? singular : plural}`; +} + +function sourceFallback(source: Record, sourceName: string): string { + const table = cleanText(source.table); + const sql = cleanText(source.sql); + if (table) { + return `Semantic-layer source for ${sourceName} backed by ${table}.`; + } + if (sql) { + return `Semantic-layer source for ${sourceName} backed by curated SQL.`; + } + + const counts = [ + formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'), + formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'), + formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'), + ].filter((item): item is string => Boolean(item)); + return counts.length > 0 + ? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.` + : `Semantic-layer overlay for ${sourceName}.`; +} + +function columnFallback(column: Record, sourceName: string): string { + const columnName = cleanText(column.name) ?? 'column'; + const label = humanizeIdentifier(columnName) || columnName; + const expr = cleanText(column.expr); + + if (expr) { + return `Computed ${label} value for ${sourceName}.`; + } + + if (columnName.toLowerCase() === 'id') { + return `Identifier column for ${sourceName}.`; + } + + const idMatch = columnName.match(/^(.+)_id$/i); + if (idMatch) { + const entity = humanizeIdentifier(idMatch[1] ?? ''); + return entity ? `Identifier for the related ${entity} on ${sourceName}.` : `Identifier column for ${sourceName}.`; + } + + if (/(^|_)(date|time|timestamp|created_at|updated_at|week_start|month_start)($|_)/i.test(columnName)) { + return `Date or time value for ${label} on ${sourceName}.`; + } + + return `Column ${label} from ${sourceName}.`; +} + +export function normalizeSemanticLayerDescriptions( + source: T, + options: NormalizeDescriptionOptions = {}, +): T { + const sourceRecord = source as Record; + const sourceName = cleanText(sourceRecord.name) ?? 'source'; + const normalized = withDescriptionMap( + sourceRecord, + options.fillMissing ? sourceFallback(sourceRecord, sourceName) : null, + ); + + if (Array.isArray(sourceRecord.columns)) { + normalized.columns = sourceRecord.columns.map((column) => { + if (!column || typeof column !== 'object' || Array.isArray(column)) { + return column; + } + const columnRecord = column as Record; + return withDescriptionMap( + columnRecord, + options.fillMissing ? columnFallback(columnRecord, sourceName) : null, + ); + }); + } + + return normalized as T; +} diff --git a/packages/context/src/sl/local-sl.ts b/packages/context/src/sl/local-sl.ts index b8d29e87..676b2522 100644 --- a/packages/context/src/sl/local-sl.ts +++ b/packages/context/src/sl/local-sl.ts @@ -5,6 +5,7 @@ import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js'; import type { KtxLocalProject } from '../project/index.js'; import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js'; import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js'; import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js'; @@ -180,14 +181,14 @@ function manifestTables(value: Record): Record, name: string): SemanticLayerSource { const source = parsed as Partial; - return { + return normalizeSemanticLayerDescriptions({ ...source, name, grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [], columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [], joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [], measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [], - }; + }); } export async function loadLocalSlSourceRecords( diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts index 55e07f22..56f415a8 100644 --- a/packages/context/src/sl/schemas.ts +++ b/packages/context/src/sl/schemas.ts @@ -23,6 +23,8 @@ const segmentDefinitionSchema = z.object({ description: z.string().optional(), }); +const descriptionsSchema = z.record(z.string(), z.string().min(1)); + const defaultTimeDimensionDbtSchema = z.object({ dbt: z.string().optional(), }); @@ -77,6 +79,7 @@ const sourceColumnSchema = z.object({ role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), description: z.string().optional(), + descriptions: descriptionsSchema.optional(), expr: z.string().optional(), constraints: sourceKeyedColumnConstraintsSchema.optional(), enum_values: sourceKeyedStringArraySchema.optional(), @@ -91,6 +94,7 @@ const overlayColumnSchema = z role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), description: z.string().optional(), + descriptions: descriptionsSchema.optional(), expr: z.string().optional(), }) .refine((col) => !col.type || col.expr, { @@ -102,6 +106,7 @@ export const sourceDefinitionSchema = z .object({ name: z.string().min(1), description: z.string().optional(), + descriptions: descriptionsSchema.optional(), // Accepted for documentation parity with the Python spec; behavior is driven // by the `table` / `sql` fields, not by this discriminator. source_type: z.enum(['table', 'sql']).optional(), diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 5d7413ac..0b9656de 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -257,12 +257,14 @@ describe('sourceDefinitionSchema', () => { it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => { const result = sourceDefinitionSchema.safeParse({ name: 'orders', + descriptions: { dbt: 'Order facts from dbt.' }, table: 'public.orders', grain: ['id'], columns: [ { name: 'status', type: 'string', + descriptions: { dbt: 'Order lifecycle status.' }, constraints: { dbt: { not_null: true, unique: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { @@ -282,7 +284,9 @@ describe('sourceDefinitionSchema', () => { if (!result.success) { return; } + expect(result.data.descriptions).toEqual({ dbt: 'Order facts from dbt.' }); expect(result.data.columns[0]).toMatchObject({ + descriptions: { dbt: 'Order lifecycle status.' }, constraints: { dbt: { not_null: true, unique: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { @@ -528,6 +532,31 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', ( const aav = sources.find((s) => s.name === 'aav_consignments'); expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]); }); + + it('normalizes legacy flat source and column descriptions when loading standalone files', async () => { + const standalonePath = 'semantic-layer/conn-1/orders.yaml'; + configService.listFiles.mockResolvedValue({ files: [standalonePath] }); + configService.readFile.mockResolvedValue({ + content: [ + 'name: orders', + 'description: Finance orders used for invoice reconciliation.', + 'table: public.orders', + 'grain: [id]', + 'columns:', + ' - name: id', + ' type: string', + ' description: Stable order identifier.', + ].join('\n'), + }); + + const sources = await service.loadAllSources('conn-1'); + + expect(sources[0]).toMatchObject({ + name: 'orders', + descriptions: { user: 'Finance orders used for invoice reconciliation.' }, + columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }], + }); + }); }); describe('validateWithProposedSource', () => { diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index 5d559a31..0ccce66a 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -2,6 +2,7 @@ import YAML from 'yaml'; import type { KtxFileStorePort, KtxLogger } from '../core/index.js'; import { noopLogger } from '../core/index.js'; import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js'; +import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput, SemanticLayerSource } from './types.js'; @@ -101,6 +102,7 @@ export class SemanticLayerService { const warnings: string[] = []; if (!options?.skipValidation) { + source = normalizeSemanticLayerDescriptions(source); const sourceData: Record = { ...source }; if ((sourceData.table || sourceData.sql) && (await this.isManifestBacked(connectionId, source.name))) { @@ -129,7 +131,8 @@ export class SemanticLayerService { } const path = this.sourcePath(connectionId, source.name); - const content = YAML.stringify(source, { indent: 2, lineWidth: 0 }); + const normalizedSource = normalizeSemanticLayerDescriptions(source); + const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0 }); const message = commitMessage ?? `Update semantic layer source: ${source.name}`; const result = await this.configService.writeFile(path, content, author, authorEmail, message, { skipLock: options?.skipLock, @@ -199,14 +202,14 @@ export class SemanticLayerService { if (sources.has(name)) { this.logger.warn(`Standalone source '${name}' in ${filePath} overrides manifest entry of the same name`); } - let standalone: SemanticLayerSource = { + let standalone: SemanticLayerSource = normalizeSemanticLayerDescriptions({ ...(data as Partial), name, grain: Array.isArray(data.grain) ? (data.grain as string[]) : [], columns: Array.isArray(data.columns) ? (data.columns as SemanticLayerSource['columns']) : [], joins: Array.isArray(data.joins) ? (data.joins as SemanticLayerSource['joins']) : [], measures: Array.isArray(data.measures) ? (data.measures as SemanticLayerSource['measures']) : [], - }; + }); // If the source declares `inherits_columns_from`, fill any blank // type/descriptions/role from the matching manifest entry. Lets the // agent write `columns: [{name: FOO}]` without redeclaring known fields. @@ -1005,7 +1008,8 @@ const COMPOSE_KNOWN_KEYS = new Set([ ]); export function composeOverlay(base: SemanticLayerSource, overlay: Record): SemanticLayerSource { - const unknownKeys = Object.keys(overlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k)); + const normalizedOverlay = normalizeSemanticLayerDescriptions(overlay); + const unknownKeys = Object.keys(normalizedOverlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k)); if (unknownKeys.length > 0) { throw new Error( `composeOverlay: overlay for '${base.name}' has unhandled keys [${unknownKeys.join(', ')}]. ` + @@ -1015,50 +1019,47 @@ export function composeOverlay(base: SemanticLayerSource, overlay: Record), + ...(normalizedOverlay.descriptions as Record), }; } // Filter out excluded columns - const excluded = new Set((overlay.exclude_columns as string[] | undefined) ?? []); + const excluded = new Set((normalizedOverlay.exclude_columns as string[] | undefined) ?? []); let columns = result.columns.filter((c) => !excluded.has(c.name)); // Append overlay computed columns - const overlayColumns = (overlay.columns as SemanticLayerSource['columns'] | undefined) ?? []; + const overlayColumns = (normalizedOverlay.columns as SemanticLayerSource['columns'] | undefined) ?? []; columns = [...columns, ...overlayColumns]; result.columns = columns; // Measures from overlay only - result.measures = (overlay.measures as SemanticLayerSource['measures'] | undefined) ?? []; + result.measures = (normalizedOverlay.measures as SemanticLayerSource['measures'] | undefined) ?? []; // Segments: overlay-replaces semantics. Manifest tables don't carry segments today; // if that changes, add a union branch here. - if (overlay.segments !== undefined) { - result.segments = overlay.segments as SemanticLayerSource['segments']; + if (normalizedOverlay.segments !== undefined) { + result.segments = normalizedOverlay.segments as SemanticLayerSource['segments']; } // Override grain - if (overlay.grain) { - result.grain = overlay.grain as string[]; + if (normalizedOverlay.grain) { + result.grain = normalizedOverlay.grain as string[]; } - if (overlay.default_time_dimension !== undefined) { - result.default_time_dimension = overlay.default_time_dimension as SemanticLayerSource['default_time_dimension']; + if (normalizedOverlay.default_time_dimension !== undefined) { + result.default_time_dimension = + normalizedOverlay.default_time_dimension as SemanticLayerSource['default_time_dimension']; } // Union + dedupe joins, apply suppressions - const disabled = new Set(((overlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs)); + const disabled = new Set(((normalizedOverlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs)); const manifestJoins = result.joins.filter((j) => !disabled.has(normalizeWs(j.on))); - const overlayJoins = (overlay.joins as SemanticLayerSource['joins'] | undefined) ?? []; + const overlayJoins = (normalizedOverlay.joins as SemanticLayerSource['joins'] | undefined) ?? []; const existingKeys = new Set(manifestJoins.map((j) => `${j.to}::${normalizeWs(j.on)}`)); const newJoins = overlayJoins.filter((j) => !existingKeys.has(`${j.to}::${normalizeWs(j.on)}`)); result.joins = [...manifestJoins, ...newJoins]; diff --git a/packages/context/src/sl/sl-search.service.ts b/packages/context/src/sl/sl-search.service.ts index e351011f..47743ae1 100644 --- a/packages/context/src/sl/sl-search.service.ts +++ b/packages/context/src/sl/sl-search.service.ts @@ -1,6 +1,7 @@ import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js'; import { noopLogger } from '../core/index.js'; import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import type { SlSourcesIndexPort } from './ports.js'; import type { SemanticLayerSource } from './types.js'; @@ -8,6 +9,7 @@ export function buildSemanticLayerSourceSearchText( source: SemanticLayerSource, priority: string[] = DEFAULT_PRIORITY, ): string { + source = normalizeSemanticLayerDescriptions(source); const config = { priority }; const parts: string[] = [source.name.replace(/_/g, ' ')]; diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts index 5165112a..d90f0356 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts @@ -127,6 +127,39 @@ describe('SlEditSourceTool — session gating', () => { ); expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); }); + + it('fills missing descriptions when an ingest session edits a source', async () => { + const { tool } = makeTool(); + const session = makeSession({ + ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'dbt' }, + }); + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: session.connectionId, + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + + expect(result.structured.success).toBe(true); + expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + descriptions: { ktx: expect.stringContaining('orders') }, + columns: [ + expect.objectContaining({ + descriptions: { ktx: expect.stringContaining('Identifier') }, + }), + ], + }), + expect.any(String), + expect.any(String), + expect.any(String), + ); + }); }); describe('SlEditSourceTool — manifest-backed source without overlay', () => { diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.ts b/packages/context/src/sl/tools/sl-edit-source.tool.ts index 29fa275d..17a85990 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.ts @@ -2,6 +2,7 @@ import YAML from 'yaml'; import { z } from 'zod'; import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js'; import { applySqlEdits } from '../../tools/sql-edit-replacer.js'; +import { normalizeSemanticLayerDescriptions } from '../description-normalization.js'; import type { SemanticLayerSource } from '../types.js'; import { BaseSemanticLayerTool, @@ -147,6 +148,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t } catch (e) { return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName); } + source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest }); // Re-serialize and write const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 }); diff --git a/packages/context/src/sl/tools/sl-write-source.tool.test.ts b/packages/context/src/sl/tools/sl-write-source.tool.test.ts index 4ad6bf53..1502c177 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.test.ts @@ -175,6 +175,89 @@ describe('SlWriteSourceTool — session gating', () => { ); expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); }); + + it('normalizes flat source and column descriptions before writing', async () => { + const { tool, semanticLayerService } = makeTool(); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'orders', + source: { + name: 'orders', + description: 'Finance orders used for invoice reconciliation.', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }], + measures: [], + joins: [], + } as any, + } as any, + baseContext, + ); + + expect(result.structured.success).toBe(true); + expect(semanticLayerService.writeSource).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + descriptions: { user: 'Finance orders used for invoice reconciliation.' }, + columns: [expect.objectContaining({ descriptions: { user: 'Stable order identifier.' } })], + }), + expect.any(String), + expect.any(String), + expect.any(String), + ); + }); + + it('fills missing descriptions for ingest-written overlays and columns', async () => { + const session = makeSession({ + ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'metabase' }, + semanticLayerService: { + loadSource: vi.fn().mockResolvedValue(null), + loadAllSources: vi.fn().mockResolvedValue([]), + validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), + deleteSource: vi.fn().mockResolvedValue(undefined), + listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']), + isManifestBacked: vi.fn().mockResolvedValue(false), + readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), + findManifestEntryByTableRef: vi.fn().mockResolvedValue(null), + } as any, + }); + const { tool } = makeTool(); + + const result = await tool.call( + { + connectionId: session.connectionId, + sourceName: 'mart_account_segments', + source: { + name: 'mart_account_segments', + columns: [{ name: 'is_large_contract', type: 'boolean', expr: 'contract_arr_cents >= 20000000' }], + measures: [{ name: 'account_count', expr: 'count(account_id)' }], + } as any, + } as any, + { ...baseContext, session }, + ); + + expect(result.structured.success).toBe(true); + expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + descriptions: { + ktx: expect.stringContaining('mart_account_segments'), + }, + columns: [ + expect.objectContaining({ + descriptions: { + ktx: expect.stringContaining('is large contract'), + }, + }), + ], + }), + expect.any(String), + expect.any(String), + expect.any(String), + ); + }); }); describe('SlWriteSourceTool — disconnected-components warning in markdown', () => { diff --git a/packages/context/src/sl/tools/sl-write-source.tool.ts b/packages/context/src/sl/tools/sl-write-source.tool.ts index 39a5ad5e..638b130e 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.ts @@ -10,6 +10,7 @@ import { type SemanticLayerStructured, sourceDefinitionSchema, } from './base-semantic-layer.tool.js'; +import { normalizeSemanticLayerDescriptions } from '../description-normalization.js'; import { slToolConnectionIdSchema } from './connection-id-schema.js'; const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]); @@ -154,14 +155,16 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co semanticLayerService: SemanticLayerService, skipIndex: boolean, ): Promise> { - const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql); + const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest }); + const isOverlay = + !('table' in normalizedSource && normalizedSource.table) && !('sql' in normalizedSource && normalizedSource.sql); const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName); const commitMessage = existing ? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}` : `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`; - const yamlContent = YAML.stringify(source); + const yamlContent = YAML.stringify(normalizedSource); const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent); if (orphanError) { @@ -172,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent }); } - const validatedSource = source as SemanticLayerSource; + const validatedSource = normalizedSource as SemanticLayerSource; const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource); const validationErrors = validationResult.errors; const validationWarnings = [...validationResult.warnings]; diff --git a/packages/context/src/wiki/tools/wiki-write.tool.test.ts b/packages/context/src/wiki/tools/wiki-write.tool.test.ts index 3b51c6e3..9e947d84 100644 --- a/packages/context/src/wiki/tools/wiki-write.tool.test.ts +++ b/packages/context/src/wiki/tools/wiki-write.tool.test.ts @@ -37,6 +37,42 @@ describe('WikiWriteTool', () => { expect(result.markdown).toMatch(/created/i); }); + it('normalizes accidentally escaped markdown newlines before writing', async () => { + const { tool, wikiService } = makeTool(); + + await tool.call( + { + key: 'large-contract-requesters', + summary: 'Cross-schema Metabase query', + content: + '# Large Contract Requesters\\n\\n**Source card:** Metabase #110\\n\\n## SQL\\n\\n```sql\\nselect * from orbit_analytics.mart_account_segments\\n```\\n', + } as any, + baseContext, + ); + + expect(wikiService.writePage.mock.calls[0][4]).toBe( + '# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n', + ); + expect(wikiService.syncSinglePage.mock.calls[0][4]).toBe( + '# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n', + ); + }); + + it('preserves intentional escaped newline examples in inline code', async () => { + const { tool, wikiService } = makeTool(); + + await tool.call( + { + key: 'newline-token', + summary: 'Escaped newline token', + content: 'Use `\\n\\n` when documenting the literal separator.', + } as any, + baseContext, + ); + + expect(wikiService.writePage.mock.calls[0][4]).toBe('Use `\\n\\n` when documenting the literal separator.'); + }); + it('skips syncSinglePage when session is worktree-scoped', async () => { const { tool, wikiService } = makeTool(); const session: ToolSession = { diff --git a/packages/context/src/wiki/tools/wiki-write.tool.ts b/packages/context/src/wiki/tools/wiki-write.tool.ts index f0ba954d..a2930fd8 100644 --- a/packages/context/src/wiki/tools/wiki-write.tool.ts +++ b/packages/context/src/wiki/tools/wiki-write.tool.ts @@ -47,6 +47,22 @@ interface WikiWriteStructured { action?: 'created' | 'updated'; } +function looksLikeEscapedMarkdown(content: string): boolean { + const withoutInlineCode = content.replace(/`[^`]*`/g, ''); + return /\\n\\n|(?:^|\\n)#{1,6}\s|\\n[-*]\s|\\n\d+\.\s|\\n```|\\n\|/.test(withoutInlineCode); +} + +function normalizeAccidentalEscapedMarkdownNewlines(content: string): string { + const escapedBreaks = content.match(/\\[rn]/g)?.length ?? 0; + if (escapedBreaks < 2) return content; + + const actualBreaks = content.match(/\r?\n/g)?.length ?? 0; + if (actualBreaks > 0 && escapedBreaks <= actualBreaks * 4) return content; + if (!looksLikeEscapedMarkdown(content)) return content; + + return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n'); +} + export class WikiWriteTool extends BaseTool { readonly name = 'wiki_write'; @@ -125,7 +141,7 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to }; if (input.content) { - finalContent = input.content; + finalContent = normalizeAccidentalEscapedMarkdownNewlines(input.content); } else { const editResult = applySqlEdits(existing?.content ?? '', input.replacements ?? []); if (!editResult.success) { diff --git a/python/ktx-sl/semantic_layer/models.py b/python/ktx-sl/semantic_layer/models.py index 9a6a514f..7e922933 100644 --- a/python/ktx-sl/semantic_layer/models.py +++ b/python/ktx-sl/semantic_layer/models.py @@ -36,6 +36,22 @@ class SourceColumnTests(BaseModel): dbt_by_package: dict[str, list[str]] | None = None +_DEFAULT_DESCRIPTION_PRIORITY = ["user", "ai", "dbt", "db"] + + +def _resolve_description_map(descriptions: dict[str, str] | None) -> str | None: + if not descriptions: + return None + for source in _DEFAULT_DESCRIPTION_PRIORITY: + text = descriptions.get(source) + if text: + return text + for text in descriptions.values(): + if text: + return text + return None + + class FreshnessDbt(BaseModel): raw: Any | None = None loaded_at_field: str | None = None @@ -47,12 +63,19 @@ class SourceColumn(BaseModel): visibility: ColumnVisibility = ColumnVisibility.PUBLIC role: ColumnRole = ColumnRole.DEFAULT description: str | None = None + descriptions: dict[str, str] | None = None expr: str | None = None natural_granularity: str | None = None constraints: dict[str, ColumnDbtConstraints] | None = None enum_values: dict[str, list[str]] | None = None tests: SourceColumnTests | None = None + @model_validator(mode="after") + def resolve_description(self) -> SourceColumn: + if self.description is None: + self.description = _resolve_description_map(self.descriptions) + return self + class JoinDeclaration(BaseModel): to: str @@ -84,6 +107,7 @@ class DefaultTimeDimensionDbt(BaseModel): class SourceDefinition(BaseModel): name: str description: str | None = None + descriptions: dict[str, str] | None = None table: str | None = None sql: str | None = None grain: list[str] @@ -97,6 +121,8 @@ class SourceDefinition(BaseModel): @model_validator(mode="after") def validate_source(self) -> SourceDefinition: + if self.description is None: + self.description = _resolve_description_map(self.descriptions) if self.table and self.sql: raise ValueError("'table' and 'sql' are mutually exclusive") if not self.grain: diff --git a/python/ktx-sl/tests/test_models.py b/python/ktx-sl/tests/test_models.py index b6468462..e227bef9 100644 --- a/python/ktx-sl/tests/test_models.py +++ b/python/ktx-sl/tests/test_models.py @@ -33,6 +33,14 @@ class TestSourceColumn: assert col.visibility == ColumnVisibility.HIDDEN assert col.role == ColumnRole.TIME + def test_descriptions_map_resolves_visible_description(self): + col = SourceColumn( + name="account_id", + type="string", + descriptions={"ktx": "Identifier for the related account."}, + ) + assert col.description == "Identifier for the related account." + def test_invalid_type(self): with pytest.raises(ValidationError): SourceColumn(name="id", type="integer") @@ -63,6 +71,16 @@ class TestSourceDefinition: assert src.is_sql_source assert not src.is_table_source + def test_descriptions_map_resolves_visible_description(self): + src = SourceDefinition( + name="orders", + descriptions={"ktx": "Semantic-layer source for orders."}, + table="public.orders", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + assert src.description == "Semantic-layer source for orders." + def test_table_and_sql_mutually_exclusive(self): with pytest.raises(ValidationError, match="mutually exclusive"): SourceDefinition( From 76fde8979812a5dfe9fc4818465f89b8f9cd4660 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Mon, 11 May 2026 09:55:42 +0200 Subject: [PATCH 8/8] [codex] Add Conductor workspace scripts (#2) * Add Conductor workspace scripts * Fix conductor boundary check fixture * Remove stale frontend conductor guard * Remove stale app service references * Optimize relationship discovery benchmarks * test: move slow suites to ci tier --- .github/workflows/ci.yml | 6 + conductor.json | 7 + examples/postgres-historic/README.md | 8 +- examples/postgres-historic/scripts/smoke.sh | 31 +- package.json | 1 + packages/cli/package.json | 3 +- packages/cli/src/ingest-viz.test.ts | 863 +++++++++ packages/cli/src/ingest.test-utils.ts | 746 ++++++++ packages/cli/src/ingest.test.ts | 1595 +---------------- packages/context/package.json | 4 +- .../src/scan/relationship-benchmarks.test.ts | 8 +- .../src/scan/relationship-candidates.ts | 50 +- .../context/src/scan/relationship-locality.ts | 35 +- packages/context/src/sl/schemas.ts | 2 +- .../http-sql-analysis-port.test.ts | 4 +- packages/context/src/tools/base-tool.ts | 2 +- scripts/build-benchmark-snapshot.test.mjs | 10 + scripts/check-boundaries.test.mjs | 2 +- scripts/ci-artifact-upload.test.mjs | 33 +- scripts/conductor-run.sh | 98 + scripts/conductor-scripts.test.mjs | 40 + scripts/conductor-setup.sh | 110 ++ scripts/examples-docs.test.mjs | 4 + scripts/precommit-check.test.mjs | 2 +- scripts/standalone-ci-workflow.test.mjs | 2 + scripts/test-tiering.test.mjs | 73 + 26 files changed, 2085 insertions(+), 1654 deletions(-) create mode 100644 conductor.json create mode 100644 packages/cli/src/ingest-viz.test.ts create mode 100644 packages/cli/src/ingest.test-utils.ts create mode 100755 scripts/conductor-run.sh create mode 100644 scripts/conductor-scripts.test.mjs create mode 100755 scripts/conductor-setup.sh create mode 100644 scripts/test-tiering.test.mjs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d37c6812..750c71d5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,12 @@ jobs: - name: Run TypeScript checks run: pnpm run check + - name: Run slow TypeScript tests + run: pnpm run test:slow + + - name: Run CLI smoke tests + run: pnpm run smoke + - name: Setup Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/conductor.json b/conductor.json new file mode 100644 index 00000000..e1a79ff9 --- /dev/null +++ b/conductor.json @@ -0,0 +1,7 @@ +{ + "scripts": { + "setup": "bash scripts/conductor-setup.sh", + "run": "bash scripts/conductor-run.sh" + }, + "runScriptMode": "nonconcurrent" +} diff --git a/examples/postgres-historic/README.md b/examples/postgres-historic/README.md index 3e27b462..1a97cba2 100644 --- a/examples/postgres-historic/README.md +++ b/examples/postgres-historic/README.md @@ -13,8 +13,8 @@ generates query workload under separate users, runs `ktx setup` with - Docker with Compose v2 - Node and pnpm matching the KTX workspace -- `python-service/.venv` already created, or `KTX_SQL_ANALYSIS_URL` pointing at - a running service that exposes `/api/sql/analyze-for-fingerprint` +- `KTX_SQL_ANALYSIS_URL` or `KTX_DAEMON_URL` pointing at a running SQL-analysis + service that exposes `/api/sql/analyze-for-fingerprint` ## Run @@ -111,5 +111,5 @@ The manifest should have `dialect: "postgres"`, `degraded: true`, - Missing grants: confirm `GRANT pg_read_all_stats TO ktx_reader;`. - Empty templates: rerun `scripts/generate-workload.sh base` and keep `--historic-sql-min-calls 2` for the smoke. -- SQL-analysis failures: set `KTX_SQL_ANALYSIS_URL` to the running service URL - or create `python-service/.venv` before running `scripts/smoke.sh`. +- SQL-analysis failures: set `KTX_SQL_ANALYSIS_URL` or `KTX_DAEMON_URL` to a + running service URL before running `scripts/smoke.sh`. diff --git a/examples/postgres-historic/scripts/smoke.sh b/examples/postgres-historic/scripts/smoke.sh index d948cf8e..5b1be929 100755 --- a/examples/postgres-historic/scripts/smoke.sh +++ b/examples/postgres-historic/scripts/smoke.sh @@ -4,46 +4,23 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" KTX_ROOT="$(cd "$EXAMPLE_DIR/../.." && pwd)" -REPO_ROOT="$(cd "$KTX_ROOT/.." && pwd)" COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml" PROJECT_PARENT="${KTX_POSTGRES_HISTORIC_PROJECT_PARENT:-$(mktemp -d)}" PROJECT_DIR="$PROJECT_PARENT/postgres-historic-ktx" KTX_BIN="$KTX_ROOT/packages/cli/dist/bin.js" -PYTHON_SERVICE_LOG="$PROJECT_PARENT/python-service.log" -PYTHON_SERVICE_PID="" cleanup() { - if [[ -n "$PYTHON_SERVICE_PID" ]]; then - kill "$PYTHON_SERVICE_PID" >/dev/null 2>&1 || true - fi if [[ "${KTX_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true fi } trap cleanup EXIT -start_sql_analysis_if_needed() { - if [[ -n "${KTX_SQL_ANALYSIS_URL:-}" ]]; then +require_sql_analysis_url() { + if [[ -n "${KTX_SQL_ANALYSIS_URL:-}" || -n "${KTX_DAEMON_URL:-}" ]]; then return fi - if [[ ! -d "$REPO_ROOT/python-service/.venv" ]]; then - echo "Set KTX_SQL_ANALYSIS_URL or create python-service/.venv before running this smoke." >&2 - exit 1 - fi - ( - cd "$REPO_ROOT/python-service" - source .venv/bin/activate - uvicorn app.main:app --host 127.0.0.1 --port 18081 >"$PYTHON_SERVICE_LOG" 2>&1 - ) & - PYTHON_SERVICE_PID="$!" - export KTX_SQL_ANALYSIS_URL="http://127.0.0.1:18081" - for _ in $(seq 1 60); do - if curl -fsS "$KTX_SQL_ANALYSIS_URL/health" >/dev/null 2>&1; then - return - fi - sleep 1 - done - echo "SQL analysis service did not become healthy. Log: $PYTHON_SERVICE_LOG" >&2 + echo "Set KTX_SQL_ANALYSIS_URL or KTX_DAEMON_URL before running this smoke." >&2 exit 1 } @@ -111,7 +88,7 @@ NODE cd "$KTX_ROOT" pnpm --filter @ktx/context run build pnpm --filter @ktx/cli run build -start_sql_analysis_if_needed +require_sql_analysis_url docker compose -f "$COMPOSE_FILE" up -d --wait "$EXAMPLE_DIR/scripts/generate-workload.sh" base diff --git a/package.json b/package.json index 08fda8a6..bda78420 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "relationships:verify-orbit": "node scripts/relationship-orbit-verification.mjs", "smoke": "pnpm run build && pnpm --filter @ktx/cli run smoke", "test": "node --test scripts/*.test.mjs && pnpm --filter './packages/*' run test", + "test:slow": "pnpm --filter @ktx/context run test:slow && pnpm --filter @ktx/cli run test:slow", "type-check": "pnpm --filter './packages/*' run type-check" }, "devDependencies": { diff --git a/packages/cli/package.json b/packages/cli/package.json index e85986a4..e386e56f 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -28,7 +28,8 @@ "assets:demo": "node scripts/build-demo-assets.mjs", "build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs", "smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000", - "test": "vitest run", + "test": "vitest run --exclude src/standalone-smoke.test.ts --exclude src/example-smoke.test.ts --exclude src/setup-databases.test.ts --exclude src/scan.test.ts --exclude src/commands/connection-metabase-setup.test.ts --exclude src/setup-models.test.ts --exclude src/setup-sources.test.ts --exclude src/setup.test.ts --exclude src/connection.test.ts --exclude src/setup-embeddings.test.ts --exclude src/ingest.test.ts --exclude src/commands/connection-mapping.test.ts --exclude src/ingest-viz.test.ts --exclude src/demo.test.ts --exclude src/setup-project.test.ts --exclude src/sl.test.ts --exclude src/local-scan-connectors.test.ts --exclude src/commands/connection-notion.test.ts", + "test:slow": "vitest run src/setup-databases.test.ts src/scan.test.ts src/commands/connection-metabase-setup.test.ts src/setup-models.test.ts src/setup-sources.test.ts src/setup.test.ts src/connection.test.ts src/setup-embeddings.test.ts src/ingest.test.ts src/commands/connection-mapping.test.ts src/ingest-viz.test.ts src/demo.test.ts src/setup-project.test.ts src/sl.test.ts src/local-scan-connectors.test.ts src/commands/connection-notion.test.ts --testTimeout 30000", "type-check": "tsc -p tsconfig.json --noEmit" }, "dependencies": { diff --git a/packages/cli/src/ingest-viz.test.ts b/packages/cli/src/ingest-viz.test.ts new file mode 100644 index 00000000..936490d7 --- /dev/null +++ b/packages/cli/src/ingest-viz.test.ts @@ -0,0 +1,863 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + type LocalIngestResult, + type MemoryFlowReplayInput, + type RunLocalIngestOptions, +} from '@ktx/context/ingest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKtxIngest } from './ingest.js'; +import { + completedLocalBundleRun, + emitLiveLocalMemoryFlow, + localFakeBundleReport, + makeIo, + persistLocalBundleReport, + writeBundleReportFile, + writeWarehouseConfig, +} from './ingest.test-utils.js'; +import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; + +describe('runKtxIngest viz and replay', () => { + let tempDir: string; + let originalTerm: string | undefined; + + beforeEach(async () => { + resetVizFallbackWarningsForTest(); + originalTerm = process.env.TERM; + process.env.TERM = 'xterm-256color'; + tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-ingest-')); + }); + + afterEach(async () => { + if (originalTerm === undefined) { + delete process.env.TERM; + } else { + process.env.TERM = originalTerm; + } + await rm(tempDir, { recursive: true, force: true }); + }); + + it('renders live memory-flow frames for run --viz when stdout is interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); + input.memoryFlow?.update({ syncId: 'sync-live-1' }); + input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); + input.memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); + input.memoryFlow?.finish('done'); + + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + now: () => new Date('2026-04-30T14:00:00.000Z'), + }, + ), + ).resolves.toBe(0); + + expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.any(Object) })); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + expect((io.stdout().match(/KTX memory flow/g) ?? []).length).toBeGreaterThan(1); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('fake-orders'); + expect(io.stderr()).toBe(''); + }); + + it('uses the TUI live session for run --viz when stdin and stdout are interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + now: () => new Date('2026-04-30T14:00:00.000Z'), + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(startLiveMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'live-viz-run', + connectionId: 'warehouse', + adapter: 'fake', + status: 'running', + }); + expect(liveSession.update).toHaveBeenCalled(); + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toBe(''); + }); + + it('prints a final plain summary after live viz completes', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-summary'); + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'viz', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('Memory-flow summary: done'); + expect(io.stdout()).toContain('Connection: warehouse'); + }); + + it('falls back to text live rendering when the TUI live session is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('falls back to text live rendering when TUI startup fails with a redacted warning', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn( + async (_input: MemoryFlowReplayInput, ioArg: { stderr: { write(chunk: string): void } }) => { + ioArg.stderr.write('TUI visualization unavailable: Failed [redacted-url] [redacted]; using text renderer.\n'); + return null; + }, + ); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain('TUI visualization unavailable: Failed [redacted-url] [redacted]'); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + }); + + it('does not start live TUI when run --viz disables input', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + return completedLocalBundleRun(input, 'no-input-live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + })); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('does not attach a live memory-flow sink for plain run output', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'plain-run')); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { runLocalIngest: runLocal }, + ), + ).resolves.toBe(0); + + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('Job: plain-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + }); + + it('falls back to plain run output for run --viz when stdout is not interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const io = makeIo({ isTTY: false }); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'non-tty-viz-run')); + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'non-tty-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Job: non-tty-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('falls back to plain run output for run --viz when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'raw-missing-viz-run')); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + })); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'raw-missing-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('Job: raw-missing-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('returns an error code for missing status', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo(); + + await expect( + runKtxIngest({ command: 'status', projectDir, runId: 'missing-run', outputMode: 'plain' }, io.io), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found'); + }); + + it('uses the latest local ingest report when status has no run id', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + await persistLocalBundleReport(projectDir, localFakeBundleReport('older-run')); + await persistLocalBundleReport(projectDir, localFakeBundleReport('newer-run')); + const io = makeIo(); + + await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-newer-run'); + expect(io.stdout()).toContain('Job: newer-run'); + expect(io.stderr()).toBe(''); + }); + + it('renders the latest local ingest report through watch when run id is omitted', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + await persistLocalBundleReport(projectDir, localFakeBundleReport('watch-latest')); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest({ command: 'watch', projectDir, outputMode: 'viz', inputMode: 'disabled' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('Run: run-watch-latest'); + expect(io.stderr()).toBe(''); + }); + + it('renders report-file replay through the memory-flow TUI', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'job-1', + reportFile, + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/metabase done'); + expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); + expect(io.stdout()).toContain('Commit: abc12345 Run: run-1 Report: report-1'); + expect(io.stdout()).toContain('SOURCE'); + expect(io.stdout()).toContain('ACTIONS'); + expect(io.stdout()).toContain('SAVED'); + expect(io.stderr()).toBe(''); + }); + + it('prints report-file JSON without looking up local ingest status', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo(); + + await expect( + runKtxIngest({ command: 'status', projectDir, runId: 'report-1', reportFile, outputMode: 'json' }, io.io), + ).resolves.toBe(0); + + const parsed = JSON.parse(io.stdout()); + expect(parsed).toMatchObject({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + }); + expect(io.stderr()).toBe(''); + }); + + it('routes interactive report-file replay through the stored TUI renderer', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'run-1', + reportFile, + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'run-1', + reportId: 'report-1', + connectionId: 'warehouse', + adapter: 'metabase', + }); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); + + it('rejects report-file replay when the requested id does not match the report', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo(); + + await expect( + runKtxIngest({ command: 'replay', projectDir, runId: 'unrelated-id', reportFile, outputMode: 'plain' }, io.io), + ).resolves.toBe(1); + + expect(io.stderr()).toContain( + `Report file ${reportFile} does not match ingest replay id "unrelated-id"; expected one of report-1, run-1, job-1`, + ); + expect(io.stdout()).toBe(''); + }); + + it('renders memory-flow snapshot for status --viz when stdout is interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-1')); + + const io = makeIo({ isTTY: true }); + await expect( + runKtxIngest( + { command: 'status', projectDir, runId: 'viz-run-1', outputMode: 'viz', inputMode: 'disabled' }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('SOURCE'); + expect(io.stdout()).toContain('CHUNKS'); + expect(io.stdout()).toContain('WORKUNITS'); + expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); + expect(io.stderr()).toBe(''); + }); + + it('uses the TUI renderer for stored status --viz when stdin and stdout are interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + runId: 'tui-viz-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'run-tui-viz-run', + connectionId: 'warehouse', + adapter: 'fake', + }); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); + + it('falls back to the text renderer when TUI declines stored status --viz', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-fallback-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120, keypresses: [{ name: 'q' }] }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => false); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + runId: 'tui-fallback-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('does not use TUI for stored --viz when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-no-input-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'tui-no-input-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('falls back to plain status for stored --viz when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('raw-missing-stored-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'raw-missing-stored-viz-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Run: run-raw-missing-stored-viz-run'); + expect(io.stdout()).toContain('Job: raw-missing-stored-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('keeps stored --viz snapshot-only when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('no-input-viz-run')); + + const io = makeIo({ isTTY: true, columns: 120 }); + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'no-input-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stderr()).toBe(''); + }); + + it('keeps disabled-input stored --viz snapshot output even when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('disabled-raw-missing-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'disabled-raw-missing-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stderr()).toBe(''); + }); + + it('degrades stored --viz snapshots to plain status when stdout is redirected even when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('redirected-no-input-viz-run')); + + const io = makeIo({ isTTY: false }); + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'redirected-no-input-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-redirected-no-input-viz-run'); + expect(io.stdout()).toContain('Job: redirected-no-input-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('degrades ingest replay --viz to plain status when TERM is dumb', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('dumb-terminal-viz-run')); + + const io = makeIo({ isTTY: true }); + await expect( + runKtxIngest( + { command: 'replay', projectDir, runId: 'dumb-terminal-viz-run', outputMode: 'viz' }, + io.io, + { env: { ...process.env, TERM: 'dumb' } }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-dumb-terminal-viz-run'); + expect(io.stdout()).toContain('Job: dumb-terminal-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.', + ); + }); + + it('falls back to plain status for --viz when stdout is not interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-2')); + + const io = makeIo({ isTTY: false }); + await expect( + runKtxIngest({ command: 'replay', projectDir, runId: 'viz-run-2', outputMode: 'viz' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-viz-run-2'); + expect(io.stdout()).toContain('Job: viz-run-2'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('prints JSON for status --json', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('json-run-1')); + + const io = makeIo(); + await expect( + runKtxIngest({ command: 'status', projectDir, runId: 'json-run-1', outputMode: 'json' }, io.io), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + runId: 'run-json-run-1', + jobId: 'json-run-1', + sourceKey: 'fake', + connectionId: 'warehouse', + }); + expect(io.stderr()).toBe(''); + }); +}); diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts new file mode 100644 index 00000000..a83b38be --- /dev/null +++ b/packages/cli/src/ingest.test-utils.ts @@ -0,0 +1,746 @@ +import { EventEmitter } from 'node:events'; +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent'; +import { + LocalLookerRuntimeStore, + LocalMetabaseSourceStateReader, + MetabaseSourceAdapter, + getLocalIngestStatus, + type ChunkResult, + type FetchContext, + type IngestReportSnapshot, + type LocalIngestResult, + type LocalMetabaseFanoutProgress, + type LookerMappingClient, + type LookerRuntimeClient, + type LookerTableIdentifierParser, + type MemoryFlowEventSink, + type MemoryFlowReplayInput, + type MetabaseCard, + type MetabaseCardSummary, + type MetabaseClientFactory, + type MetabaseRuntimeClient, + type RunLocalIngestOptions, + type SourceAdapter, + type SqliteBundleIngestStore, +} from '@ktx/context/ingest'; +import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; +import { expect, vi } from 'vitest'; +import { type KtxIngestArgs, runKtxIngest } from './ingest.js'; + +export function makeIo( + options: { + isTTY?: boolean; + stdinIsTTY?: boolean; + columns?: number; + rawMode?: boolean; + keypresses?: { name?: string; ctrl?: boolean }[]; + } = {}, +) { + let stdout = ''; + let stderr = ''; + type TestKey = { name?: string; ctrl?: boolean }; + + class TestStdin extends EventEmitter { + isTTY = options.stdinIsTTY ?? false; + isRaw = false; + + setRawMode = + options.rawMode === false + ? undefined + : (value: boolean): void => { + this.isRaw = value; + }; + + resume(): void { + return undefined; + } + + pause(): void { + return undefined; + } + + override on(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + const result = super.on(eventName, listener); + if (eventName === 'keypress') { + for (const key of options.keypresses ?? []) { + queueMicrotask(() => listener('', key)); + } + } + return result; + } + + override off(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + return super.off(eventName, listener); + } + + override removeListener(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + return super.removeListener(eventName, listener); + } + } + + const stdin = new TestStdin(); + + return { + io: { + stdin, + stdout: { + isTTY: options.isTTY, + columns: options.columns, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +export async function writeWarehouseConfig(projectDir: string): Promise { + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' warehouse_a:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + '', + ].join('\n'), + 'utf-8', + ); +} + +export async function writeMetabaseConfig(projectDir: string): Promise { + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); +} + +export function bundleReportSnapshot(): IngestReportSnapshot { + return { + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-04-30T12:00:00.000Z', + body: { + syncId: 'sync-1', + diffSummary: { added: 2, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: 'abc12345', + workUnits: [ + { + unitKey: 'cards', + rawFiles: ['cards/1.json', 'cards/2.json'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, + { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, + ], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'cards/1.json', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/revenue.md', + actionType: 'wiki_written', + }, + { + rawPath: 'cards/2.json', + artifactKind: 'sl', + artifactKey: 'warehouse.orders', + actionType: 'measure_added', + }, + ], + toolTranscripts: [ + { + unitKey: 'cards', + path: 'tool-transcripts/cards.jsonl', + toolCallCount: 4, + errorCount: 0, + toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'], + }, + ], + }, + }; +} + +export function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const nextReport = localFakeBundleReport(jobId, { + id: 'report-live-1', + runId: 'run-live-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + +export function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const failedWorkUnit = { + ...bundleReportSnapshot().body.workUnits[0], + status: 'failed' as const, + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }; + const nextReport = localFakeBundleReport(jobId, { + id: 'report-failed-1', + runId: 'run-failed-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + body: { + workUnits: [failedWorkUnit], + failedWorkUnits: [failedWorkUnit.unitKey], + }, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + +export class CliLookerSlWritingAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: RunLoopParams) => { + if ( + params.telemetryTags?.operationName === 'ingest-bundle-wu' && + params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' + ) { + const slWrite = params.toolSet.sl_write_source; + if (!slWrite?.execute) { + throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); + } + const result = await slWrite.execute( + { + connectionId: 'prod-warehouse', + sourceName: 'looker__ecommerce__orders', + source: { + name: 'looker__ecommerce__orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'revenue', type: 'number' }, + ], + measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }], + }, + }, + { toolCallId: 'cli-looker-sl-write', messages: [] }, + ); + if (!result.structured.success) { + throw new Error(result.markdown); + } + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +export class CliMetabaseAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const })); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +export class CliMetabaseSourceAdapter implements SourceAdapter { + readonly source = 'metabase'; + readonly skillNames: string[] = []; + readonly fetchCalls: Array<{ metabaseConnectionId: string; metabaseDatabaseId: number; connectionId: string }> = []; + private readonly databaseByStagedDir = new Map(); + + detect(): Promise { + return Promise.resolve(true); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; + this.fetchCalls.push({ + metabaseConnectionId: config.metabaseConnectionId, + metabaseDatabaseId: config.metabaseDatabaseId, + connectionId: ctx.connectionId, + }); + this.databaseByStagedDir.set(stagedDir, config.metabaseDatabaseId); + await mkdir(join(stagedDir, 'cards'), { recursive: true }); + await mkdir(join(stagedDir, 'databases'), { recursive: true }); + await writeFile( + join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }), + 'utf-8', + ); + } + + async chunk(stagedDir: string): Promise { + const databaseId = this.databaseByStagedDir.get(stagedDir); + if (!databaseId) { + throw new Error(`Missing Metabase database id for staged dir ${stagedDir}`); + } + return { + workUnits: [ + { + unitKey: `metabase-db-${databaseId}`, + rawFiles: [`cards/${databaseId}.json`], + peerFileIndex: [], + dependencyPaths: [`databases/${databaseId}.json`], + }, + ], + }; + } +} + +const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ + { + id: 101, + name: 'Collection 12 Revenue', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 12, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, + parameters: [], + dashboard_count: 0, + }, + { + id: 102, + name: 'Collection 12 Margin', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 12, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, + parameters: [], + dashboard_count: 0, + }, + { + id: 103, + name: 'Collection 13 Pipeline', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 13, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, + parameters: [], + dashboard_count: 0, + }, +]; + +function metabaseCardSummary(card: MetabaseCard): MetabaseCardSummary { + return { + id: card.id, + name: card.name, + archived: card.archived, + database_id: card.database_id, + collection_id: card.collection_id, + }; +} + +function createSyncModeMetabaseClient(): MetabaseRuntimeClient { + const cardsById = new Map(SYNC_MODE_METABASE_CARDS.map((card) => [card.id, card])); + return { + testConnection: async () => ({ success: true }), + getCurrentUser: async () => ({ id: 1, email: 'local@example.test' }), + getDatabases: async () => [{ id: 1, name: 'Warehouse A', engine: 'postgres' }], + getDatabase: async (id) => ({ id, name: 'Warehouse A', engine: 'postgres' }), + getCollectionTree: async () => [ + { id: 12, name: 'Selected Collection', parent_id: 'root', children: [] }, + { id: 13, name: 'Other Collection', parent_id: 'root', children: [] }, + ], + getCollection: async (id) => ({ + id, + name: id === 12 ? 'Selected Collection' : 'Other Collection', + parent_id: 'root', + children: [], + }), + getCollectionItems: async (collectionId) => + SYNC_MODE_METABASE_CARDS.filter((card) => card.collection_id === collectionId).map((card) => ({ + id: card.id, + model: 'card', + name: card.name, + collection_id: card.collection_id, + database_id: card.database_id, + })), + getCard: async (id) => { + const card = cardsById.get(id); + if (!card) { + throw new Error(`unexpected card ${id}`); + } + return card; + }, + getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), + convertMbqlToNative: async () => ({ query: 'select 1' }), + getNativeSql: (card) => card.dataset_query?.native?.query ?? null, + getTemplateTags: () => ({}), + getCardSql: async (card) => card.dataset_query?.native?.query ?? null, + getResolvedSql: async (card) => ({ + resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, + templateTags: [], + resolutionStatus: 'resolved', + }), + cleanup: async () => undefined, + }; +} + +export class StaticMetabaseClientFactory implements MetabaseClientFactory { + constructor(private readonly client: MetabaseRuntimeClient) {} + + createClient(): MetabaseRuntimeClient { + return this.client; + } +} + +type SyncModeCase = { + name: string; + syncMode: 'ALL' | 'ONLY' | 'EXCEPT'; + selections: Array<{ selectionType: 'collection' | 'item'; metabaseObjectId: number }>; + expectedRawFiles: string[]; + expectedWorkUnitKeys: string[]; +}; + +export async function runPublicMetabaseSyncModeCase(tempDir: string, input: SyncModeCase): Promise { + const projectDir = join(tempDir, `metabase-sync-mode-${input.name}`); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + `project: metabase-sync-mode-${input.name}`, + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' api_url: https://metabase.example.test', + ' api_key: literal-test-key', + ' warehouse_a:', + ' driver: postgres', + ' url: postgresql://readonly@db.example.test/warehouse_a', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + + const project = await loadKtxProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: input.syncMode, + defaultTagNames: ['sync-mode-smoke'], + selections: input.selections, + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Warehouse A', + metabaseEngine: 'postgres', + metabaseHost: 'db.example.test', + metabaseDbName: 'warehouse_a', + targetConnectionId: 'warehouse_a', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + + const adapter = new MetabaseSourceAdapter({ + clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()), + sourceStateReader: store, + }); + const jobId = `metabase-sync-mode-${input.name}-child`; + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + createAdapters: vi.fn(() => [adapter]), + jobIdFactory: () => jobId, + localIngestOptions: { + agentRunner: new CliMetabaseAgentRunner(), + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`); + + const report = await getLocalIngestStatus(project, jobId); + expect(report).not.toBeNull(); + expect(report?.body.workUnits.map((wu) => wu.unitKey).sort()).toEqual(input.expectedWorkUnitKeys); + expect(report?.body.workUnits.flatMap((wu) => wu.rawFiles).sort()).toEqual(input.expectedRawFiles); +} + +type CliLookerRuntimeClient = LookerRuntimeClient & + Pick & { + cleanup: ReturnType>>; + }; + +export function makeCliLookerRuntimeClient(): CliLookerRuntimeClient { + const lookerModels = { + source: 'looker', + fetchedAt: '2026-05-05T00:00:00.000Z', + models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], + }; + const lookerExplore = { + source: 'looker', + modelName: 'ecommerce', + exploreName: 'orders', + label: 'Orders', + description: null, + connectionName: 'analytics', + viewName: 'orders', + rawSqlTableName: 'public.orders', + fields: { + dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }], + }, + joins: [ + { + name: 'users', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'public.users', + sqlOn: '${orders.user_id} = ${users.id}', + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, + }; + + return { + listLookerConnections: vi.fn().mockResolvedValue([ + { + name: 'analytics', + host: 'db.example.test', + database: 'analytics', + schema: null, + dialect: 'postgres', + }, + ]), + listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]), + getDashboard: vi.fn().mockResolvedValue({ + lookerId: '10', + title: 'Revenue Overview', + description: 'Revenue dashboard', + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:00:00.000Z', + tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }], + }), + listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]), + getLook: vi.fn().mockResolvedValue({ + lookerId: '20', + title: 'Revenue Look', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:10:00.000Z', + query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] }, + }), + listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }), + listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]), + listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]), + listLookmlModels: vi.fn().mockResolvedValue(lookerModels), + getExplore: vi.fn().mockResolvedValue(lookerExplore), + getSignals: vi.fn().mockResolvedValue({ + dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }], + lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }], + scheduledPlans: [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 }, + ], + favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }], + }), + cleanup: vi.fn>().mockResolvedValue(undefined), + }; +} + +interface TestLookerTableIdentifierParser extends LookerTableIdentifierParser { + parse: ReturnType>; +} + +export function makeCliLookerParser(): TestLookerTableIdentifierParser { + return { + parse: vi.fn().mockResolvedValue({ + 'ecommerce.orders': { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + 'ecommerce.orders.users': { + ok: true, + catalog: null, + schema: 'public', + name: 'users', + canonical_table: 'public.users', + }, + }), + }; +} + +export function localFakeBundleReport( + jobId: string, + overrides: Partial> & { body?: Partial } = {}, +): IngestReportSnapshot { + const report = bundleReportSnapshot(); + return { + ...report, + id: `report-${jobId}`, + runId: `run-${jobId}`, + jobId, + connectionId: 'warehouse', + sourceKey: 'fake', + ...overrides, + body: { + ...report.body, + syncId: 'sync-live-1', + ...(overrides.body ?? {}), + }, + }; +} + +export async function localBundleStore(projectDir: string, ids: [string, string]): Promise { + const { SqliteBundleIngestStore } = await import('@ktx/context/ingest'); + const project = await loadKtxProject({ projectDir }); + return new SqliteBundleIngestStore({ + dbPath: ktxLocalStateDbPath(project), + idFactory: (() => { + let index = 0; + return () => ids[index++] ?? `generated-${index}`; + })(), + }); +} + +export async function persistLocalBundleReport(projectDir: string, report = bundleReportSnapshot()): Promise { + const store = await localBundleStore(projectDir, [report.runId, report.id]); + const run = await store.create({ + jobId: report.jobId, + connectionId: report.connectionId, + sourceKey: report.sourceKey, + syncId: report.body.syncId, + trigger: 'manual_resync', + }); + await store.markCompleted(run.id, report.body.diffSummary); + await store.create({ + runId: run.id, + jobId: report.jobId, + connectionId: report.connectionId, + sourceKey: report.sourceKey, + body: report.body, + }); +} + +export async function writeBundleReportFile(tempDir: string, report = bundleReportSnapshot()): Promise { + const reportFile = join(tempDir, 'bundle-report.json'); + await writeFile(reportFile, `${JSON.stringify(report, null, 2)}\n`, 'utf-8'); + return reportFile; +} + +export function emitLiveLocalMemoryFlow(memoryFlow: MemoryFlowEventSink | undefined): void { + memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); + memoryFlow?.update({ syncId: 'sync-live-1' }); + memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); + memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); + memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); + memoryFlow?.finish('done'); +} diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 5a18938b..d1299d8a 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -1,737 +1,38 @@ -import { EventEmitter } from 'node:events'; import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent'; import { LocalLookerRuntimeStore, LocalMetabaseSourceStateReader, - MetabaseSourceAdapter, getLocalIngestStatus, - type ChunkResult, - type FetchContext, - type IngestReportSnapshot, type LocalIngestResult, type LocalMetabaseFanoutProgress, - type MemoryFlowEventSink, type MemoryFlowReplayInput, - type MetabaseCard, - type MetabaseCardSummary, - type MetabaseClientFactory, - type MetabaseRuntimeClient, type RunLocalIngestOptions, type SourceAdapter, - type SqliteBundleIngestStore, } from '@ktx/context/ingest'; -import { initKtxProject, ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; +import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { type KtxIngestArgs, runKtxIngest } from './ingest.js'; +import { + CliLookerSlWritingAgentRunner, + CliMetabaseAgentRunner, + CliMetabaseSourceAdapter, + completedLocalBundleRun, + emitLiveLocalMemoryFlow, + failedLocalBundleRun, + localFakeBundleReport, + makeCliLookerParser, + makeCliLookerRuntimeClient, + makeIo, + persistLocalBundleReport, + runPublicMetabaseSyncModeCase, + writeBundleReportFile, + writeMetabaseConfig, + writeWarehouseConfig, +} from './ingest.test-utils.js'; import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; -function makeIo( - options: { - isTTY?: boolean; - stdinIsTTY?: boolean; - columns?: number; - rawMode?: boolean; - keypresses?: { name?: string; ctrl?: boolean }[]; - } = {}, -) { - let stdout = ''; - let stderr = ''; - type TestKey = { name?: string; ctrl?: boolean }; - - class TestStdin extends EventEmitter { - isTTY = options.stdinIsTTY ?? false; - isRaw = false; - - setRawMode = - options.rawMode === false - ? undefined - : (value: boolean): void => { - this.isRaw = value; - }; - - resume(): void { - return undefined; - } - - pause(): void { - return undefined; - } - - override on(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { - const result = super.on(eventName, listener); - if (eventName === 'keypress') { - for (const key of options.keypresses ?? []) { - queueMicrotask(() => listener('', key)); - } - } - return result; - } - - override off(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { - return super.off(eventName, listener); - } - - override removeListener(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { - return super.removeListener(eventName, listener); - } - } - - const stdin = new TestStdin(); - - return { - io: { - stdin, - stdout: { - isTTY: options.isTTY, - columns: options.columns, - write: (chunk: string) => { - stdout += chunk; - }, - }, - stderr: { - write: (chunk: string) => { - stderr += chunk; - }, - }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -async function writeWarehouseConfig(projectDir: string): Promise { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' prod-metabase:', - ' driver: metabase', - ' warehouse_a:', - ' driver: postgres', - 'ingest:', - ' adapters:', - ' - fake', - '', - ].join('\n'), - 'utf-8', - ); -} - -async function writeMetabaseConfig(projectDir: string): Promise { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' warehouse:', - ' driver: postgres', - 'ingest:', - ' adapters:', - ' - metabase', - ' embeddings:', - ' backend: deterministic', - '', - ].join('\n'), - 'utf-8', - ); -} - -function bundleReportSnapshot(): IngestReportSnapshot { - return { - id: 'report-1', - runId: 'run-1', - jobId: 'job-1', - connectionId: 'warehouse', - sourceKey: 'metabase', - createdAt: '2026-04-30T12:00:00.000Z', - body: { - syncId: 'sync-1', - diffSummary: { added: 2, modified: 0, deleted: 0, unchanged: 0 }, - commitSha: 'abc12345', - workUnits: [ - { - unitKey: 'cards', - rawFiles: ['cards/1.json', 'cards/2.json'], - status: 'success', - actions: [ - { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, - { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, - ], - touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], - }, - ], - failedWorkUnits: [], - reconciliationSkipped: false, - conflictsResolved: [], - evictionsApplied: [], - unmappedFallbacks: [], - evictionInputs: [], - unresolvedCards: [], - supersededBy: null, - overrideOf: null, - provenanceRows: [ - { - rawPath: 'cards/1.json', - artifactKind: 'wiki', - artifactKey: 'knowledge/global/revenue.md', - actionType: 'wiki_written', - }, - { - rawPath: 'cards/2.json', - artifactKind: 'sl', - artifactKey: 'warehouse.orders', - actionType: 'measure_added', - }, - ], - toolTranscripts: [ - { - unitKey: 'cards', - path: 'tool-transcripts/cards.jsonl', - toolCallCount: 4, - errorCount: 0, - toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'], - }, - ], - }, - }; -} - -function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { - const nextReport = localFakeBundleReport(jobId, { - id: 'report-live-1', - runId: 'run-live-1', - connectionId: input.connectionId, - sourceKey: input.adapter, - }); - return { - result: { - jobId, - runId: nextReport.runId, - syncId: nextReport.body.syncId, - diffSummary: nextReport.body.diffSummary, - workUnitCount: nextReport.body.workUnits.length, - failedWorkUnits: nextReport.body.failedWorkUnits, - artifactsWritten: nextReport.body.provenanceRows.length, - commitSha: nextReport.body.commitSha, - }, - report: nextReport, - }; -} - -function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { - const failedWorkUnit = { - ...bundleReportSnapshot().body.workUnits[0], - status: 'failed' as const, - reason: 'writer tool failed', - actions: [], - touchedSlSources: [], - }; - const nextReport = localFakeBundleReport(jobId, { - id: 'report-failed-1', - runId: 'run-failed-1', - connectionId: input.connectionId, - sourceKey: input.adapter, - body: { - workUnits: [failedWorkUnit], - failedWorkUnits: [failedWorkUnit.unitKey], - }, - }); - return { - result: { - jobId, - runId: nextReport.runId, - syncId: nextReport.body.syncId, - diffSummary: nextReport.body.diffSummary, - workUnitCount: nextReport.body.workUnits.length, - failedWorkUnits: nextReport.body.failedWorkUnits, - artifactsWritten: nextReport.body.provenanceRows.length, - commitSha: nextReport.body.commitSha, - }, - report: nextReport, - }; -} - -class CliLookerSlWritingAgentRunner extends AgentRunnerService { - override runLoop = vi.fn(async (params: RunLoopParams) => { - if ( - params.telemetryTags?.operationName === 'ingest-bundle-wu' && - params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' - ) { - const slWrite = params.toolSet.sl_write_source; - if (!slWrite?.execute) { - throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); - } - const result = await slWrite.execute( - { - connectionId: 'prod-warehouse', - sourceName: 'looker__ecommerce__orders', - source: { - name: 'looker__ecommerce__orders', - table: 'public.orders', - grain: ['id'], - columns: [ - { name: 'id', type: 'number' }, - { name: 'revenue', type: 'number' }, - ], - measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }], - }, - }, - { toolCallId: 'cli-looker-sl-write', messages: [] }, - ); - if (!result.structured.success) { - throw new Error(result.markdown); - } - } - return { stopReason: 'natural' as const }; - }); - - constructor() { - super({ llmProvider: { getModel: () => ({}) as never } as never }); - } -} - -class CliMetabaseAgentRunner extends AgentRunnerService { - override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const })); - - constructor() { - super({ llmProvider: { getModel: () => ({}) as never } as never }); - } -} - -class CliMetabaseSourceAdapter implements SourceAdapter { - readonly source = 'metabase'; - readonly skillNames: string[] = []; - readonly fetchCalls: Array<{ metabaseConnectionId: string; metabaseDatabaseId: number; connectionId: string }> = []; - private readonly databaseByStagedDir = new Map(); - - detect(): Promise { - return Promise.resolve(true); - } - - async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { - const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; - this.fetchCalls.push({ - metabaseConnectionId: config.metabaseConnectionId, - metabaseDatabaseId: config.metabaseDatabaseId, - connectionId: ctx.connectionId, - }); - this.databaseByStagedDir.set(stagedDir, config.metabaseDatabaseId); - await mkdir(join(stagedDir, 'cards'), { recursive: true }); - await mkdir(join(stagedDir, 'databases'), { recursive: true }); - await writeFile( - join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`), - JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }), - 'utf-8', - ); - await writeFile( - join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`), - JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }), - 'utf-8', - ); - } - - async chunk(stagedDir: string): Promise { - const databaseId = this.databaseByStagedDir.get(stagedDir); - if (!databaseId) { - throw new Error(`Missing Metabase database id for staged dir ${stagedDir}`); - } - return { - workUnits: [ - { - unitKey: `metabase-db-${databaseId}`, - rawFiles: [`cards/${databaseId}.json`], - peerFileIndex: [], - dependencyPaths: [`databases/${databaseId}.json`], - }, - ], - }; - } -} - -const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ - { - id: 101, - name: 'Collection 12 Revenue', - description: null, - type: 'question', - query_type: 'native', - database_id: 1, - collection_id: 12, - archived: false, - result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, - parameters: [], - dashboard_count: 0, - }, - { - id: 102, - name: 'Collection 12 Margin', - description: null, - type: 'question', - query_type: 'native', - database_id: 1, - collection_id: 12, - archived: false, - result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, - parameters: [], - dashboard_count: 0, - }, - { - id: 103, - name: 'Collection 13 Pipeline', - description: null, - type: 'question', - query_type: 'native', - database_id: 1, - collection_id: 13, - archived: false, - result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, - parameters: [], - dashboard_count: 0, - }, -]; - -function metabaseCardSummary(card: MetabaseCard): MetabaseCardSummary { - return { - id: card.id, - name: card.name, - archived: card.archived, - database_id: card.database_id, - collection_id: card.collection_id, - }; -} - -function createSyncModeMetabaseClient(): MetabaseRuntimeClient { - const cardsById = new Map(SYNC_MODE_METABASE_CARDS.map((card) => [card.id, card])); - return { - testConnection: async () => ({ success: true }), - getCurrentUser: async () => ({ id: 1, email: 'local@example.test' }), - getDatabases: async () => [{ id: 1, name: 'Warehouse A', engine: 'postgres' }], - getDatabase: async (id) => ({ id, name: 'Warehouse A', engine: 'postgres' }), - getCollectionTree: async () => [ - { id: 12, name: 'Selected Collection', parent_id: 'root', children: [] }, - { id: 13, name: 'Other Collection', parent_id: 'root', children: [] }, - ], - getCollection: async (id) => ({ - id, - name: id === 12 ? 'Selected Collection' : 'Other Collection', - parent_id: 'root', - children: [], - }), - getCollectionItems: async (collectionId) => - SYNC_MODE_METABASE_CARDS.filter((card) => card.collection_id === collectionId).map((card) => ({ - id: card.id, - model: 'card', - name: card.name, - collection_id: card.collection_id, - database_id: card.database_id, - })), - getCard: async (id) => { - const card = cardsById.get(id); - if (!card) { - throw new Error(`unexpected card ${id}`); - } - return card; - }, - getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), - convertMbqlToNative: async () => ({ query: 'select 1' }), - getNativeSql: (card) => card.dataset_query?.native?.query ?? null, - getTemplateTags: () => ({}), - getCardSql: async (card) => card.dataset_query?.native?.query ?? null, - getResolvedSql: async (card) => ({ - resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, - templateTags: [], - resolutionStatus: 'resolved', - }), - cleanup: async () => undefined, - }; -} - -class StaticMetabaseClientFactory implements MetabaseClientFactory { - constructor(private readonly client: MetabaseRuntimeClient) {} - - createClient(): MetabaseRuntimeClient { - return this.client; - } -} - -type SyncModeCase = { - name: string; - syncMode: 'ALL' | 'ONLY' | 'EXCEPT'; - selections: Array<{ selectionType: 'collection' | 'item'; metabaseObjectId: number }>; - expectedRawFiles: string[]; - expectedWorkUnitKeys: string[]; -}; - -async function runPublicMetabaseSyncModeCase(tempDir: string, input: SyncModeCase): Promise { - const projectDir = join(tempDir, `metabase-sync-mode-${input.name}`); - await initKtxProject({ projectDir, projectName: `metabase-sync-mode-${input.name}` }); - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - `project: metabase-sync-mode-${input.name}`, - 'connections:', - ' prod-metabase:', - ' driver: metabase', - ' api_url: https://metabase.example.test', - ' api_key: literal-test-key', - ' warehouse_a:', - ' driver: postgres', - ' url: postgresql://readonly@db.example.test/warehouse_a', - 'ingest:', - ' adapters:', - ' - metabase', - ' embeddings:', - ' backend: deterministic', - '', - ].join('\n'), - 'utf-8', - ); - - const project = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); - await store.replaceSourceState({ - connectionId: 'prod-metabase', - syncMode: input.syncMode, - defaultTagNames: ['sync-mode-smoke'], - selections: input.selections, - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Warehouse A', - metabaseEngine: 'postgres', - metabaseHost: 'db.example.test', - metabaseDbName: 'warehouse_a', - targetConnectionId: 'warehouse_a', - syncEnabled: true, - source: 'refresh', - }, - ], - }); - - const adapter = new MetabaseSourceAdapter({ - clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()), - sourceStateReader: store, - }); - const jobId = `metabase-sync-mode-${input.name}-child`; - const io = makeIo(); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'prod-metabase', - adapter: 'metabase', - outputMode: 'plain', - }, - io.io, - { - createAdapters: vi.fn(() => [adapter]), - jobIdFactory: () => jobId, - localIngestOptions: { - agentRunner: new CliMetabaseAgentRunner(), - }, - }, - ), - ).resolves.toBe(0); - - expect(io.stderr()).toBe(''); - expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); - expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`); - - const report = await getLocalIngestStatus(project, jobId); - expect(report).not.toBeNull(); - expect(report?.body.workUnits.map((wu) => wu.unitKey).sort()).toEqual(input.expectedWorkUnitKeys); - expect(report?.body.workUnits.flatMap((wu) => wu.rawFiles).sort()).toEqual(input.expectedRawFiles); -} - -function makeCliLookerRuntimeClient() { - const lookerModels = { - source: 'looker', - fetchedAt: '2026-05-05T00:00:00.000Z', - models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], - }; - const lookerExplore = { - source: 'looker', - modelName: 'ecommerce', - exploreName: 'orders', - label: 'Orders', - description: null, - connectionName: 'analytics', - viewName: 'orders', - rawSqlTableName: 'public.orders', - fields: { - dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }], - measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }], - }, - joins: [ - { - name: 'users', - type: 'left_outer', - relationship: 'many_to_one', - rawSqlTableName: 'public.users', - sqlOn: '${orders.user_id} = ${users.id}', - from: null, - targetTable: null, - }, - ], - targetWarehouseConnectionId: null, - targetTable: null, - }; - - return { - listLookerConnections: vi.fn().mockResolvedValue([ - { - name: 'analytics', - host: 'db.example.test', - database: 'analytics', - schema: null, - dialect: 'postgres', - }, - ]), - listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]), - getDashboard: vi.fn().mockResolvedValue({ - lookerId: '10', - title: 'Revenue Overview', - description: 'Revenue dashboard', - folderId: '7', - ownerId: '3', - updatedAt: '2026-05-05T08:00:00.000Z', - tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }], - }), - listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]), - getLook: vi.fn().mockResolvedValue({ - lookerId: '20', - title: 'Revenue Look', - description: null, - folderId: '7', - ownerId: '3', - updatedAt: '2026-05-05T08:10:00.000Z', - query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] }, - }), - listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }), - listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]), - listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]), - listLookmlModels: vi.fn().mockResolvedValue(lookerModels), - getExplore: vi.fn().mockResolvedValue(lookerExplore), - getSignals: vi.fn().mockResolvedValue({ - dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }], - lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }], - scheduledPlans: [ - { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 }, - ], - favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }], - }), - cleanup: vi.fn().mockResolvedValue(undefined), - }; -} - -function makeCliLookerParser() { - return { - parse: vi.fn().mockResolvedValue({ - 'ecommerce.orders': { - ok: true, - catalog: null, - schema: 'public', - name: 'orders', - canonical_table: 'public.orders', - }, - 'ecommerce.orders.users': { - ok: true, - catalog: null, - schema: 'public', - name: 'users', - canonical_table: 'public.users', - }, - }), - }; -} - -function localFakeBundleReport( - jobId: string, - overrides: Partial> & { body?: Partial } = {}, -): IngestReportSnapshot { - const report = bundleReportSnapshot(); - return { - ...report, - id: `report-${jobId}`, - runId: `run-${jobId}`, - jobId, - connectionId: 'warehouse', - sourceKey: 'fake', - ...overrides, - body: { - ...report.body, - syncId: 'sync-live-1', - ...(overrides.body ?? {}), - }, - }; -} - -async function localBundleStore(projectDir: string, ids: [string, string]): Promise { - const { SqliteBundleIngestStore } = await import('@ktx/context/ingest'); - const project = await loadKtxProject({ projectDir }); - return new SqliteBundleIngestStore({ - dbPath: ktxLocalStateDbPath(project), - idFactory: (() => { - let index = 0; - return () => ids[index++] ?? `generated-${index}`; - })(), - }); -} - -async function persistLocalBundleReport(projectDir: string, report = bundleReportSnapshot()): Promise { - const store = await localBundleStore(projectDir, [report.runId, report.id]); - const run = await store.create({ - jobId: report.jobId, - connectionId: report.connectionId, - sourceKey: report.sourceKey, - syncId: report.body.syncId, - trigger: 'manual_resync', - }); - await store.markCompleted(run.id, report.body.diffSummary); - await store.create({ - runId: run.id, - jobId: report.jobId, - connectionId: report.connectionId, - sourceKey: report.sourceKey, - body: report.body, - }); -} - -async function writeBundleReportFile(tempDir: string, report = bundleReportSnapshot()): Promise { - const reportFile = join(tempDir, 'bundle-report.json'); - await writeFile(reportFile, `${JSON.stringify(report, null, 2)}\n`, 'utf-8'); - return reportFile; -} - -function emitLiveLocalMemoryFlow(memoryFlow: MemoryFlowEventSink | undefined): void { - memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); - memoryFlow?.update({ syncId: 'sync-live-1' }); - memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); - memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); - memoryFlow?.update({ - plannedWorkUnits: [ - { - unitKey: 'fake-orders', - rawFiles: ['orders/orders.json'], - peerFileCount: 0, - dependencyCount: 0, - }, - ], - }); - memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); - memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); - memoryFlow?.finish('done'); -} - describe('runKtxIngest', () => { let tempDir: string; let originalTerm: string | undefined; @@ -754,7 +55,6 @@ describe('runKtxIngest', () => { it('runs local ingest and reads status', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -806,7 +106,6 @@ describe('runKtxIngest', () => { it('routes metabase scheduled pulls to the fan-out runner and prints child summaries', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); const report = localFakeBundleReport('metabase-child-1', { @@ -864,7 +163,6 @@ describe('runKtxIngest', () => { it('returns a non-zero code when Metabase fan-out has failed children', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); const report = localFakeBundleReport('metabase-child-1', { @@ -935,7 +233,6 @@ describe('runKtxIngest', () => { it('prints Metabase fan-out progress before the final summary', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); const report = localFakeBundleReport('metabase-child-1', { @@ -1015,7 +312,7 @@ describe('runKtxIngest', () => { it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => { const projectDir = join(tempDir, 'metabase-cli-project'); - await initKtxProject({ projectDir, projectName: 'metabase-cli' }); + await writeWarehouseConfig(projectDir); await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -1153,7 +450,6 @@ describe('runKtxIngest', () => { it('prints metabase fan-out JSON results', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -1188,7 +484,6 @@ describe('runKtxIngest', () => { it('rejects source-dir uploads through the metabase fan-out route', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -1218,7 +513,6 @@ describe('runKtxIngest', () => { it('prints previous run and diff summary for local ingest results', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -1252,7 +546,6 @@ describe('runKtxIngest', () => { it('returns a non-zero code when local ingest reports failed work units', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -1284,7 +577,6 @@ describe('runKtxIngest', () => { it('passes the debug LLM request file to local ingest runs', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'job-debug'), @@ -1311,7 +603,6 @@ describe('runKtxIngest', () => { it('passes daemon database introspection URL to default local ingest adapters', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -1359,7 +650,7 @@ describe('runKtxIngest', () => { it('passes the target connection id when constructing local historic-sql adapters', async () => { const projectDir = join(tempDir, 'historic-sql-project'); - await initKtxProject({ projectDir, projectName: 'historic-sql-project' }); + await writeWarehouseConfig(projectDir); await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -1420,7 +711,6 @@ describe('runKtxIngest', () => { it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const pullConfigOptions = { looker: { @@ -1474,7 +764,7 @@ describe('runKtxIngest', () => { it('runs Looker scheduled ingest through the public CLI command path', async () => { const projectDir = join(tempDir, 'looker-project'); - await initKtxProject({ projectDir, projectName: 'looker-cli' }); + await writeWarehouseConfig(projectDir); await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -1568,847 +858,4 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); - it('renders live memory-flow frames for run --viz when stdout is interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); - input.memoryFlow?.update({ syncId: 'sync-live-1' }); - input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); - input.memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); - input.memoryFlow?.update({ - plannedWorkUnits: [ - { - unitKey: 'fake-orders', - rawFiles: ['orders/orders.json'], - peerFileCount: 0, - dependencyCount: 0, - }, - ], - }); - input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); - input.memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); - input.memoryFlow?.finish('done'); - - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - now: () => new Date('2026-04-30T14:00:00.000Z'), - }, - ), - ).resolves.toBe(0); - - expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.any(Object) })); - expect(io.stdout()).toContain('\u001b[2J\u001b[H'); - expect((io.stdout().match(/KTX memory flow/g) ?? []).length).toBeGreaterThan(1); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('fake-orders'); - expect(io.stderr()).toBe(''); - }); - - it('uses the TUI live session for run --viz when stdin and stdout are interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const liveSession = { - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - }; - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - now: () => new Date('2026-04-30T14:00:00.000Z'), - }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); - expect(startLiveMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ - runId: 'live-viz-run', - connectionId: 'warehouse', - adapter: 'fake', - status: 'running', - }); - expect(liveSession.update).toHaveBeenCalled(); - expect(liveSession.close).toHaveBeenCalledTimes(1); - expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toBe(''); - }); - - it('prints a final plain summary after live viz completes', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const liveSession = { - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - }; - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-summary'); - }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - outputMode: 'viz', - }, - io.io, - { runLocalIngest: runLocal, startLiveMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(liveSession.close).toHaveBeenCalledTimes(1); - expect(io.stdout()).toContain('Memory-flow summary: done'); - expect(io.stdout()).toContain('Connection: warehouse'); - }); - - it('falls back to text live rendering when the TUI live session is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); - expect(io.stdout()).toContain('\u001b[2J\u001b[H'); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('falls back to text live rendering when TUI startup fails with a redacted warning', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const startLiveMemoryFlow = vi.fn( - async (_input: MemoryFlowReplayInput, ioArg: { stderr: { write(chunk: string): void } }) => { - ioArg.stderr.write('TUI visualization unavailable: Failed [redacted-url] [redacted]; using text renderer.\n'); - return null; - }, - ); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(io.stderr()).toContain('TUI visualization unavailable: Failed [redacted-url] [redacted]'); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('\u001b[2J\u001b[H'); - }); - - it('does not start live TUI when run --viz disables input', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - return completedLocalBundleRun(input, 'no-input-live-viz-run'); - }); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - })); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - { runLocalIngest: runLocal, startLiveMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).not.toHaveBeenCalled(); - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('does not attach a live memory-flow sink for plain run output', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'plain-run')); - const io = makeIo({ isTTY: true }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'plain', - }, - io.io, - { runLocalIngest: runLocal }, - ), - ).resolves.toBe(0); - - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('Job: plain-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - }); - - it('falls back to plain run output for run --viz when stdout is not interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const io = makeIo({ isTTY: false }); - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'non-tty-viz-run')); - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - jobIdFactory: () => 'non-tty-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Job: non-tty-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdout is not an interactive terminal; printing plain output.', - ); - }); - - it('falls back to plain run output for run --viz when stdin raw mode is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'raw-missing-viz-run')); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - })); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'raw-missing-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).not.toHaveBeenCalled(); - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('Job: raw-missing-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdin raw mode is unavailable; printing plain output.', - ); - }); - - it('returns an error code for missing status', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const io = makeIo(); - - await expect( - runKtxIngest({ command: 'status', projectDir, runId: 'missing-run', outputMode: 'plain' }, io.io), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found'); - }); - - it('uses the latest local ingest report when status has no run id', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - await persistLocalBundleReport(projectDir, localFakeBundleReport('older-run')); - await persistLocalBundleReport(projectDir, localFakeBundleReport('newer-run')); - const io = makeIo(); - - await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-newer-run'); - expect(io.stdout()).toContain('Job: newer-run'); - expect(io.stderr()).toBe(''); - }); - - it('renders the latest local ingest report through watch when run id is omitted', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - await persistLocalBundleReport(projectDir, localFakeBundleReport('watch-latest')); - const io = makeIo({ isTTY: true }); - - await expect( - runKtxIngest({ command: 'watch', projectDir, outputMode: 'viz', inputMode: 'disabled' }, io.io), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('Run: run-watch-latest'); - expect(io.stderr()).toBe(''); - }); - - it('renders report-file replay through the memory-flow TUI', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo({ isTTY: true }); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'job-1', - reportFile, - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/metabase done'); - expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); - expect(io.stdout()).toContain('Commit: abc12345 Run: run-1 Report: report-1'); - expect(io.stdout()).toContain('SOURCE'); - expect(io.stdout()).toContain('ACTIONS'); - expect(io.stdout()).toContain('SAVED'); - expect(io.stderr()).toBe(''); - }); - - it('prints report-file JSON without looking up local ingest status', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo(); - - await expect( - runKtxIngest({ command: 'status', projectDir, runId: 'report-1', reportFile, outputMode: 'json' }, io.io), - ).resolves.toBe(0); - - const parsed = JSON.parse(io.stdout()); - expect(parsed).toMatchObject({ - id: 'report-1', - runId: 'run-1', - jobId: 'job-1', - connectionId: 'warehouse', - sourceKey: 'metabase', - }); - expect(io.stderr()).toBe(''); - }); - - it('routes interactive report-file replay through the stored TUI renderer', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'run-1', - reportFile, - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); - expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ - runId: 'run-1', - reportId: 'report-1', - connectionId: 'warehouse', - adapter: 'metabase', - }); - expect(io.stdout()).toBe(''); - expect(io.stderr()).toBe(''); - }); - - it('rejects report-file replay when the requested id does not match the report', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo(); - - await expect( - runKtxIngest({ command: 'replay', projectDir, runId: 'unrelated-id', reportFile, outputMode: 'plain' }, io.io), - ).resolves.toBe(1); - - expect(io.stderr()).toContain( - `Report file ${reportFile} does not match ingest replay id "unrelated-id"; expected one of report-1, run-1, job-1`, - ); - expect(io.stdout()).toBe(''); - }); - - it('renders memory-flow snapshot for status --viz when stdout is interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-1')); - - const io = makeIo({ isTTY: true }); - await expect( - runKtxIngest( - { command: 'status', projectDir, runId: 'viz-run-1', outputMode: 'viz', inputMode: 'disabled' }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('SOURCE'); - expect(io.stdout()).toContain('CHUNKS'); - expect(io.stdout()).toContain('WORKUNITS'); - expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); - expect(io.stderr()).toBe(''); - }); - - it('uses the TUI renderer for stored status --viz when stdin and stdout are interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-viz-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'status', - projectDir, - runId: 'tui-viz-run', - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); - expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ - runId: 'run-tui-viz-run', - connectionId: 'warehouse', - adapter: 'fake', - }); - expect(io.stdout()).toBe(''); - expect(io.stderr()).toBe(''); - }); - - it('falls back to the text renderer when TUI declines stored status --viz', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-fallback-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120, keypresses: [{ name: 'q' }] }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => false); - - await expect( - runKtxIngest( - { - command: 'status', - projectDir, - runId: 'tui-fallback-run', - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('does not use TUI for stored --viz when input is disabled', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-no-input-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'tui-no-input-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('falls back to plain status for stored --viz when stdin raw mode is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('raw-missing-stored-viz-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'raw-missing-stored-viz-run', - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('Run: run-raw-missing-stored-viz-run'); - expect(io.stdout()).toContain('Job: raw-missing-stored-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdin raw mode is unavailable; printing plain output.', - ); - }); - - it('keeps stored --viz snapshot-only when input is disabled', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('no-input-viz-run')); - - const io = makeIo({ isTTY: true, columns: 120 }); - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'no-input-viz-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); - expect(io.stderr()).toBe(''); - }); - - it('keeps disabled-input stored --viz snapshot output even when stdin raw mode is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('disabled-raw-missing-viz-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'disabled-raw-missing-viz-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); - expect(io.stderr()).toBe(''); - }); - - it('degrades stored --viz snapshots to plain status when stdout is redirected even when input is disabled', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('redirected-no-input-viz-run')); - - const io = makeIo({ isTTY: false }); - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'redirected-no-input-viz-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-redirected-no-input-viz-run'); - expect(io.stdout()).toContain('Job: redirected-no-input-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdout is not an interactive terminal; printing plain output.', - ); - }); - - it('degrades ingest replay --viz to plain status when TERM is dumb', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('dumb-terminal-viz-run')); - - const io = makeIo({ isTTY: true }); - await expect( - runKtxIngest( - { command: 'replay', projectDir, runId: 'dumb-terminal-viz-run', outputMode: 'viz' }, - io.io, - { env: { ...process.env, TERM: 'dumb' } }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-dumb-terminal-viz-run'); - expect(io.stdout()).toContain('Job: dumb-terminal-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.', - ); - }); - - it('falls back to plain status for --viz when stdout is not interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-2')); - - const io = makeIo({ isTTY: false }); - await expect( - runKtxIngest({ command: 'replay', projectDir, runId: 'viz-run-2', outputMode: 'viz' }, io.io), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-viz-run-2'); - expect(io.stdout()).toContain('Job: viz-run-2'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdout is not an interactive terminal; printing plain output.', - ); - }); - - it('prints JSON for status --json', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('json-run-1')); - - const io = makeIo(); - await expect( - runKtxIngest({ command: 'status', projectDir, runId: 'json-run-1', outputMode: 'json' }, io.io), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - runId: 'run-json-run-1', - jobId: 'json-run-1', - sourceKey: 'fake', - connectionId: 'warehouse', - }); - expect(io.stderr()).toBe(''); - }); }); diff --git a/packages/context/package.json b/packages/context/package.json index b85947e5..78d3c4de 100644 --- a/packages/context/package.json +++ b/packages/context/package.json @@ -120,10 +120,12 @@ "scripts": { "build": "tsc -p tsconfig.json", "relationships:benchmarks": "pnpm --silent run build && node scripts/relationship-benchmark-report.mjs", + "relationships:benchmarks:test": "KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts", "search:pglite-spike": "node scripts/pglite-hybrid-search-spike.mjs", "search:pglite-owner-prototype": "node scripts/pglite-owner-process-prototype.mjs", "search:pglite-sl-prototype": "node scripts/pglite-sl-search-prototype.mjs", - "test": "vitest run", + "test": "vitest run --exclude src/scan/relationship-benchmarks.test.ts --exclude src/scan/local-scan.test.ts --exclude src/mcp/local-project-ports.test.ts --exclude src/ingest/local-stage-ingest.test.ts --exclude src/sl/pglite-sl-search-prototype.test.ts --exclude src/core/git.service.test.ts --exclude src/ingest/local-adapters.test.ts --exclude src/ingest/local-bundle-ingest.test.ts --exclude src/ingest/local-metabase-ingest.test.ts --exclude src/sl/local-sl.test.ts --exclude src/search/pglite-owner-process.test.ts --exclude src/scan/local-enrichment-artifacts.test.ts --exclude src/search/pglite-spike.test.ts --exclude src/wiki/local-knowledge.test.ts --exclude src/sl/local-query.test.ts --exclude src/scan/relationship-review-decisions.test.ts --exclude src/scan/relationship-profiling.test.ts", + "test:slow": "vitest run src/scan/local-scan.test.ts src/mcp/local-project-ports.test.ts src/ingest/local-stage-ingest.test.ts src/sl/pglite-sl-search-prototype.test.ts src/core/git.service.test.ts src/ingest/local-adapters.test.ts src/ingest/local-bundle-ingest.test.ts src/ingest/local-metabase-ingest.test.ts src/sl/local-sl.test.ts src/search/pglite-owner-process.test.ts src/scan/local-enrichment-artifacts.test.ts src/search/pglite-spike.test.ts src/wiki/local-knowledge.test.ts src/sl/local-query.test.ts src/scan/relationship-review-decisions.test.ts src/scan/relationship-profiling.test.ts --testTimeout 30000", "type-check": "tsc -p tsconfig.json --noEmit" }, "dependencies": { diff --git a/packages/context/src/scan/relationship-benchmarks.test.ts b/packages/context/src/scan/relationship-benchmarks.test.ts index ecc9e4a9..b4e5c782 100644 --- a/packages/context/src/scan/relationship-benchmarks.test.ts +++ b/packages/context/src/scan/relationship-benchmarks.test.ts @@ -53,6 +53,12 @@ const CHECKED_IN_FIXTURE_ORIGINS = { semantic_embedding_aliases_no_declared_constraints: 'synthetic', } as const; +function runAdHocRelationshipBenchmarks(): boolean { + return process.env.KTX_RUN_RELATIONSHIP_BENCHMARKS === '1'; +} + +const adHocRelationshipBenchmarkIt = runAdHocRelationshipBenchmarks() ? it : it.skip; + function snapshot(): KtxSchemaSnapshot { return { connectionId: 'warehouse', @@ -644,7 +650,7 @@ describe('relationship benchmarks', () => { expect(fixture.expected.expectedLinks).toHaveLength(1900); }); - it('runs the scale stress fixture inside the benchmark validation budget', async () => { + adHocRelationshipBenchmarkIt('runs the scale stress fixture inside the benchmark validation budget', async () => { const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); const fixture = await loadKtxRelationshipBenchmarkFixture( join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints'), diff --git a/packages/context/src/scan/relationship-candidates.ts b/packages/context/src/scan/relationship-candidates.ts index b10aa069..cd3b7767 100644 --- a/packages/context/src/scan/relationship-candidates.ts +++ b/packages/context/src/scan/relationship-candidates.ts @@ -7,6 +7,7 @@ import type { } from './enrichment-types.js'; import { localCandidateTables } from './relationship-locality.js'; import { + type KtxRelationshipNormalizedName, normalizeKtxRelationshipName, pluralizeKtxRelationshipToken, singularizeKtxRelationshipToken, @@ -97,9 +98,22 @@ const REFERENCE_SUFFIXES: Array<{ suffix: string; reason: string }> = [ { suffix: '_uuid', reason: 'foreign_key_uuid_suffix' }, ]; const RELATIONSHIP_KEY_TARGET_SUFFIXES = ['_id', '_key', '_code', '_uuid'] as const; +const tableAliasesCache = new WeakMap>(); +const parentTableNameAliasesCache = new WeakMap>(); +const normalizedColumnNameCache = new WeakMap(); + +function normalizedColumnName(column: KtxEnrichedColumn): KtxRelationshipNormalizedName { + const cached = normalizedColumnNameCache.get(column); + if (cached) { + return cached; + } + const normalized = normalizeKtxRelationshipName(column.name); + normalizedColumnNameCache.set(column, normalized); + return normalized; +} function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean { - const normalized = normalizeKtxRelationshipName(column.name); + const normalized = normalizedColumnName(column); return ( normalized.tokens.length >= 2 && RELATIONSHIP_KEY_TARGET_SUFFIXES.some((suffix) => normalized.normalized.endsWith(suffix)) @@ -107,8 +121,8 @@ function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean { } function columnSuffixMatchesTarget(input: { fromColumn: KtxEnrichedColumn; toColumn: KtxEnrichedColumn }): boolean { - const source = normalizeKtxRelationshipName(input.fromColumn.name).normalized; - const target = normalizeKtxRelationshipName(input.toColumn.name).normalized; + const source = normalizedColumnName(input.fromColumn).normalized; + const target = normalizedColumnName(input.toColumn).normalized; return source !== target && target.length > 0 && source.endsWith(`_${target}`); } @@ -160,7 +174,7 @@ function hasUsableEmbedding(column: KtxEnrichedColumn): boolean { } function sourceColumnReference(column: KtxEnrichedColumn): KtxRelationshipSourceColumnReference | null { - const normalized = normalizeKtxRelationshipName(column.name); + const normalized = normalizedColumnName(column); if (SELF_REFERENCE_NAMES.has(normalized.normalized)) { return { base: normalized.normalized.replace(/_id$/u, ''), reason: 'foreign_key_suffix' }; } @@ -192,6 +206,11 @@ function addNormalizedTableAlias(aliases: Set, name: string): void { } function tableAliases(table: KtxEnrichedTable): Set { + const cached = tableAliasesCache.get(table); + if (cached) { + return cached; + } + const normalized = normalizeKtxRelationshipName(table.ref.name); const aliases = new Set([normalized.normalized, normalized.singular, normalized.plural]); if (normalized.tokens.length > 1) { @@ -203,6 +222,7 @@ function tableAliases(table: KtxEnrichedTable): Set { aliases.add(pluralizeKtxRelationshipToken(singularLastToken)); } } + tableAliasesCache.set(table, aliases); return aliases; } @@ -212,13 +232,19 @@ function finalTableNamePart(table: KtxEnrichedTable): string { } function parentTableNameAliases(table: KtxEnrichedTable): Set { - const aliases = tableAliases(table); + const cached = parentTableNameAliasesCache.get(table); + if (cached) { + return cached; + } + + const aliases = new Set(tableAliases(table)); addNormalizedTableAlias(aliases, finalTableNamePart(table)); + parentTableNameAliasesCache.set(table, aliases); return aliases; } function targetKeyScore(table: KtxEnrichedTable, column: KtxEnrichedColumn): number { - const columnName = normalizeKtxRelationshipName(column.name).normalized; + const columnName = normalizedColumnName(column).normalized; const tableKeyBases = parentTableNameAliases(table); if (column.primaryKey) { return 1; @@ -338,7 +364,7 @@ function candidateParentTables(input: { maxParentTables, }).map((item) => item.table); - const normalizedColumn = normalizeKtxRelationshipName(input.fromColumn.name).normalized; + const normalizedColumn = normalizedColumnName(input.fromColumn).normalized; if (!SELF_REFERENCE_NAMES.has(normalizedColumn) || ranked.some((table) => table.id === input.fromTable.id)) { return ranked; } @@ -364,7 +390,7 @@ function targetKeyEvidence( return { score: 0, reasons: [] }; } - const columnName = normalizeKtxRelationshipName(column.name).normalized; + const columnName = normalizedColumnName(column).normalized; if (columnName === 'code' || columnName.endsWith('_code') || columnName === 'key' || columnName.endsWith('_key')) { return { score: 0.86, reasons: ['profile_unique_target'] }; } @@ -500,7 +526,7 @@ function createCandidate(input: { evidence: { sourceColumnBase: input.sourceBase, targetTableBase: input.targetBase, - targetColumnBase: normalizeKtxRelationshipName(input.toColumn.name).normalized, + targetColumnBase: normalizedColumnName(input.toColumn).normalized, targetKeyScore: input.targetKeyScore, nameScore: input.nameScore, reasons: input.reasons, @@ -553,7 +579,7 @@ function generateKtxEmbeddingRelationshipCandidates( continue; } - const sourceBase = normalizeKtxRelationshipName(fromColumn.name).normalized; + const sourceBase = normalizedColumnName(fromColumn).normalized; const targetBase = normalizeKtxRelationshipName(toTable.ref.name).singular; const reasons = ['embedding_similarity', ...keyEvidence.reasons]; const candidate = createCandidate({ @@ -620,7 +646,7 @@ export function generateKtxRelationshipDiscoveryCandidates( const sameTable = fromTable.id === toTable.id; const nameMatchesTarget = strictAliases.has(sourceBase); const parentTableNameMatcher = !sameTable && !nameMatchesTarget && parentAliases.has(sourceBase); - const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizeKtxRelationshipName(fromColumn.name).normalized); + const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizedColumnName(fromColumn).normalized); const strictTableMatcher = (!sameTable && nameMatchesTarget) || selfReference; for (const toColumn of toTable.columns) { @@ -675,7 +701,7 @@ export function generateKtxRelationshipDiscoveryCandidates( if ( !suffixMatcher && !parentTableNameMatcher && - normalizeKtxRelationshipName(fromColumn.name).normalized === normalizeKtxRelationshipName(toColumn.name).normalized + normalizedColumnName(fromColumn).normalized === normalizedColumnName(toColumn).normalized ) { reasons.push('exact_column_name'); nameScore = Math.max(nameScore, 0.9); diff --git a/packages/context/src/scan/relationship-locality.ts b/packages/context/src/scan/relationship-locality.ts index 5b180430..246ce84d 100644 --- a/packages/context/src/scan/relationship-locality.ts +++ b/packages/context/src/scan/relationship-locality.ts @@ -18,20 +18,28 @@ export interface LocalKtxRelationshipCandidateTablesInput { const DEFAULT_MAX_PARENT_TABLES = 20; const RELATIONSHIP_SUFFIX_TOKENS = new Set(['id', 'ids', 'key', 'keys', 'code', 'codes', 'uuid', 'uuids']); +const normalizedTokenVariantsCache = new Map(); function roundedScore(value: number): number { return Number(Math.max(0, Math.min(1, value)).toFixed(3)); } function normalizedTokenVariants(name: string): string[] { + const cached = normalizedTokenVariantsCache.get(name); + if (cached) { + return cached; + } + const normalized = normalizeKtxRelationshipName(name); - return Array.from( + const variants = Array.from( new Set([ ...normalized.tokens, ...tokenizeKtxRelationshipName(normalized.singular), ...tokenizeKtxRelationshipName(normalized.plural), ]), ).filter(Boolean); + normalizedTokenVariantsCache.set(name, variants); + return variants; } function childColumnLocalityTokens(column: KtxEnrichedColumn): string[] { @@ -91,24 +99,29 @@ function parentEmbeddingScore(childColumn: KtxEnrichedColumn, parentTable: KtxEn } function tableTokenScore(input: { - childTable: KtxEnrichedTable; - childColumn: KtxEnrichedColumn; + childTableId: string; + childTableTokens: readonly string[]; + childColumnTokens: readonly string[]; parentTable: KtxEnrichedTable; }): number { - const childTableTokens = normalizedTokenVariants(input.childTable.ref.name); - const childColumnTokens = childColumnLocalityTokens(input.childColumn); const parentTokens = normalizedTokenVariants(input.parentTable.ref.name); - const columnOnlyScore = jaccard(childColumnTokens, parentTokens); - if (input.parentTable.id === input.childTable.id) { + const columnOnlyScore = jaccard(input.childColumnTokens, parentTokens); + if (parentTokens.length === 0) { + return 0; + } + if (input.parentTable.id === input.childTableId) { return columnOnlyScore; } - const columnAndTableScore = jaccard(uniqueTokens([...childTableTokens, ...childColumnTokens]), parentTokens); + const columnAndTableScore = jaccard(uniqueTokens([...input.childTableTokens, ...input.childColumnTokens]), parentTokens); return Math.max(columnOnlyScore, columnAndTableScore * 0.6); } function localityScore(input: { childTable: KtxEnrichedTable; + childTableId: string; + childTableTokens: readonly string[]; childColumn: KtxEnrichedColumn; + childColumnTokens: readonly string[]; parentTable: KtxEnrichedTable; }): Omit { const tokenScore = roundedScore(tableTokenScore(input)); @@ -143,12 +156,18 @@ export function localCandidateTables( return []; } + const childTableTokens = normalizedTokenVariants(input.childTable.ref.name); + const childColumnTokens = childColumnLocalityTokens(input.childColumn); + return input.parentTables .map((table) => ({ table, ...localityScore({ childTable: input.childTable, + childTableId: input.childTable.id, + childTableTokens, childColumn: input.childColumn, + childColumnTokens, parentTable: table, }), })) diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts index 56f415a8..218c0435 100644 --- a/packages/context/src/sl/schemas.ts +++ b/packages/context/src/sl/schemas.ts @@ -1,7 +1,7 @@ import { z } from 'zod'; // Literal vocabularies — kept in lockstep with the Python Pydantic model at -// python-service/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole / +// python/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole / // ColumnVisibility / JoinDeclaration). If these diverge, YAMLs can pass // TypeScript validation at ingest time but fail Python loading at query time. const columnTypeValues = ['string', 'number', 'time', 'boolean'] as const; diff --git a/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts b/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts index f6cdd3fe..f9bf513b 100644 --- a/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts +++ b/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest'; import { createHttpSqlAnalysisPort } from './http-sql-analysis-port.js'; describe('createHttpSqlAnalysisPort', () => { - it('calls the python-service fingerprint endpoint and maps snake_case response fields', async () => { + it('calls the SQL-analysis fingerprint endpoint and maps snake_case response fields', async () => { const requestJson = vi.fn(async () => ({ fingerprint: 'fingerprint-template', normalized_sql: 'SELECT * FROM analytics.orders WHERE status = ?', @@ -26,7 +26,7 @@ describe('createHttpSqlAnalysisPort', () => { }); }); - it('preserves python-service parse errors in the mapped result', async () => { + it('preserves SQL-analysis parse errors in the mapped result', async () => { const requestJson = vi.fn(async () => ({ fingerprint: '', normalized_sql: '', diff --git a/packages/context/src/tools/base-tool.ts b/packages/context/src/tools/base-tool.ts index 37da69a0..0566a0ca 100644 --- a/packages/context/src/tools/base-tool.ts +++ b/packages/context/src/tools/base-tool.ts @@ -151,7 +151,7 @@ export abstract class BaseTool { } } }, - // Send only markdown to LLM - frontend still receives full { markdown, structured } via stream + // Send only markdown to the LLM; tool callers still receive the structured output. toModelOutput: ({ output }) => { if (output && typeof output === 'object' && 'markdown' in output) { return { type: 'content', value: [{ type: 'text', text: output.markdown as string }] }; diff --git a/scripts/build-benchmark-snapshot.test.mjs b/scripts/build-benchmark-snapshot.test.mjs index adc30173..26ac6419 100644 --- a/scripts/build-benchmark-snapshot.test.mjs +++ b/scripts/build-benchmark-snapshot.test.mjs @@ -1,4 +1,5 @@ import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; import { createRequire } from 'node:module'; import { describe, it } from 'node:test'; import { buildBenchmarkSnapshot } from './build-benchmark-snapshot.mjs'; @@ -250,4 +251,13 @@ describe('buildBenchmarkSnapshot', () => { }, ]); }); + + it('exposes relationship benchmarks as an explicit context package script', async () => { + const packageJson = JSON.parse(await readFile(new URL('../packages/context/package.json', import.meta.url), 'utf8')); + + assert.equal( + packageJson.scripts['relationships:benchmarks:test'], + 'KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts', + ); + }); }); diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs index fab2b87e..39946464 100644 --- a/scripts/check-boundaries.test.mjs +++ b/scripts/check-boundaries.test.mjs @@ -14,7 +14,7 @@ function lowerProductName() { describe('scanFileContent', () => { it('rejects source imports from application directories', () => { const serverAlias = '@' + 'server/contracts'; - const pythonAppPath = 'python-service/' + 'app/api/endpoints/semantic_layer.py'; + const pythonAppPath = `${['python', 'service'].join('-')}/app/api/endpoints/semantic_layer.py`; const violations = [ ...scanFileContent('packages/context/src/index.ts', `import { orpc } from '${serverAlias}';`), diff --git a/scripts/ci-artifact-upload.test.mjs b/scripts/ci-artifact-upload.test.mjs index d18db979..3fecdfbc 100644 --- a/scripts/ci-artifact-upload.test.mjs +++ b/scripts/ci-artifact-upload.test.mjs @@ -4,7 +4,7 @@ import { dirname, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { describe, it } from 'node:test'; -const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '..', '..'); +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '..'); const ciWorkflowPath = resolve(repoRoot, '.github', 'workflows', 'ci.yml'); async function readCiWorkflowOrSkip(testContext) { @@ -21,7 +21,7 @@ async function readCiWorkflowOrSkip(testContext) { } describe('KTX CI artifact upload contract', () => { - it('uploads verified KTX package artifacts from check-ktx-subtree', async (testContext) => { + it('uploads verified KTX package artifacts from the standalone check job', async (testContext) => { const workflow = await readCiWorkflowOrSkip(testContext); if (workflow === null) { return; @@ -29,42 +29,35 @@ describe('KTX CI artifact upload contract', () => { assert.match( workflow, - /name: Build ktx package artifacts and verify public smoke\s+run: cd ktx && pnpm run artifacts:build && pnpm run artifacts:verify-manifest && pnpm run artifacts:verify-demo\s+- name: Upload ktx package artifacts/s, + /name: Build and verify package artifacts\s+run: pnpm run artifacts:check\s+- name: Upload package artifacts/s, ); assert.match(workflow, /uses: actions\/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f/); assert.match(workflow, /name: ktx-package-artifacts-\$\{\{ github\.sha \}\}/); - assert.match(workflow, /ktx\/dist\/artifacts\/manifest\.json/); - assert.match(workflow, /ktx\/dist\/artifacts\/npm\/\*\.tgz/); - assert.match(workflow, /ktx\/dist\/artifacts\/python\/\*\.whl/); - assert.match(workflow, /ktx\/dist\/artifacts\/python\/\*\.tar\.gz/); + assert.match(workflow, /dist\/artifacts\/manifest\.json/); + assert.match(workflow, /dist\/artifacts\/npm\/\*\.tgz/); + assert.match(workflow, /dist\/artifacts\/python\/\*\.whl/); + assert.match(workflow, /dist\/artifacts\/python\/\*\.tar\.gz/); assert.match(workflow, /if-no-files-found: error/); assert.match(workflow, /retention-days: 7/); }); - it('runs packed demo artifact smoke on Linux and macOS', async (testContext) => { + it('runs TypeScript and Python checks in the standalone workflow', async (testContext) => { const workflow = await readCiWorkflowOrSkip(testContext); if (workflow === null) { return; } - assert.match(workflow, /check-ktx-packed-demo:/); - assert.match(workflow, /matrix:\s+os: \[ubuntu-latest, macos-latest\]/s); - assert.match(workflow, /name: Download ktx package artifacts/); - assert.match(workflow, /path: ktx\/dist\/artifacts/); - assert.match(workflow, /run: cd ktx && pnpm run artifacts:verify-demo/); + assert.match(workflow, /run: pnpm run check/); + assert.match(workflow, /run: uv sync --all-packages/); + assert.match(workflow, /run: uv run pytest/); }); - it('includes packed demo artifact smoke in ci-success', async (testContext) => { + it('does not depend on host application CI jobs', async (testContext) => { const workflow = await readCiWorkflowOrSkip(testContext); if (workflow === null) { return; } - assert.match( - workflow, - /needs: \[check-ktx-subtree, check-ktx-packed-demo, build-python-service, test-server, build-frontend, run-pre-commit, build-docker-images\]/, - ); - assert.match(workflow, /needs\.check-ktx-packed-demo\.result.*== "failure"/); - assert.match(workflow, /needs\.check-ktx-packed-demo\.result.*== "cancelled"/); + assert.doesNotMatch(workflow, /build-python-service|test-server|build-frontend|build-docker-images/); }); }); diff --git a/scripts/conductor-run.sh b/scripts/conductor-run.sh new file mode 100755 index 00000000..33b40b20 --- /dev/null +++ b/scripts/conductor-run.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# conductor-run.sh - Starts the long-lived local KTX daemon for Conductor. +# +# Uses a fixed port because Conductor runs this workspace in nonconcurrent mode. + +set -e +set -o pipefail + +read_required_uv_version() { + local project_file="$1" + + if [ ! -f "$project_file" ]; then + return 1 + fi + + sed -nE 's/^[[:space:]]*required-version[[:space:]]*=[[:space:]]*"([^"]+)".*/\1/p' "$project_file" | head -n 1 +} + +uv_version() { + local uv_bin="$1" + + "$uv_bin" --version 2>/dev/null | awk '{print $2}' +} + +install_workspace_uv() { + local required_version="$1" + local install_dir="$PWD/.context/bin/uv-$required_version" + + mkdir -p "$install_dir" + + if [ ! -x "$install_dir/uv" ] || [ "$(uv_version "$install_dir/uv")" != "$required_version" ]; then + echo "Installing workspace-local uv $required_version..." >&2 + curl -LsSf "https://astral.sh/uv/$required_version/install.sh" | + env UV_INSTALL_DIR="$install_dir" UV_NO_MODIFY_PATH=1 sh >&2 + fi + + printf '%s\n' "$install_dir/uv" +} + +resolve_uv_for_project() { + local project_file="$1" + local required_version + local system_uv + local system_version + local workspace_uv + + required_version="$(read_required_uv_version "$project_file" || true)" + required_version="${required_version#==}" + + if [ -z "$required_version" ]; then + command -v uv + return + fi + + if ! [[ "$required_version" =~ ^[0-9]+[.][0-9]+[.][0-9]+$ ]]; then + echo "WARNING: Unsupported uv required-version '$required_version'; using uv from PATH." >&2 + command -v uv + return + fi + + if command -v uv >/dev/null 2>&1; then + system_uv="$(command -v uv)" + system_version="$(uv_version "$system_uv")" + + if [ "$system_version" = "$required_version" ]; then + printf '%s\n' "$system_uv" + return + fi + + echo "Found uv $system_version at $system_uv; $project_file requires uv $required_version." >&2 + else + echo "uv is not installed on PATH; $project_file requires uv $required_version." >&2 + fi + + workspace_uv="$(install_workspace_uv "$required_version")" + + if [ "$(uv_version "$workspace_uv")" != "$required_version" ]; then + echo "ERROR: Expected uv $required_version at $workspace_uv, got $("$workspace_uv" --version 2>&1 || true)." >&2 + return 1 + fi + + printf '%s\n' "$workspace_uv" +} + +echo "=== Starting KTX for Conductor ===" + +echo "Building KTX packages..." +pnpm run build + +KTX_UV_BIN="$(resolve_uv_for_project "pyproject.toml")" +export PATH="$(dirname "$KTX_UV_BIN"):$PATH" + +if [ -f ".venv/bin/activate" ]; then + source .venv/bin/activate +fi + +echo "KTX daemon: http://127.0.0.1:8765" +exec uv run ktx-daemon serve-http --host 127.0.0.1 --port 8765 diff --git a/scripts/conductor-scripts.test.mjs b/scripts/conductor-scripts.test.mjs new file mode 100644 index 00000000..38689def --- /dev/null +++ b/scripts/conductor-scripts.test.mjs @@ -0,0 +1,40 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +async function readText(relativePath) { + return readFile(new URL(`../${relativePath}`, import.meta.url), 'utf8'); +} + +describe('Conductor workspace scripts', () => { + it('registers setup and run scripts in nonconcurrent mode', async () => { + const manifest = JSON.parse(await readText('conductor.json')); + + assert.deepEqual(manifest.scripts, { + setup: 'bash scripts/conductor-setup.sh', + run: 'bash scripts/conductor-run.sh', + }); + assert.equal(manifest.runScriptMode, 'nonconcurrent'); + }); + + it('sets up exact uv, Python packages, JS packages, and the built CLI', async () => { + const setupScript = await readText('scripts/conductor-setup.sh'); + + assert.match(setupScript, /read_required_uv_version\(\)/); + assert.match(setupScript, /\.context\/bin\/uv-\$required_version/); + assert.match(setupScript, /uv sync --all-packages --all-groups/); + assert.match(setupScript, /pnpm install --frozen-lockfile --prefer-offline/); + assert.match(setupScript, /pnpm run native:rebuild/); + assert.match(setupScript, /pnpm run build/); + assert.match(setupScript, /packages\/cli\/dist\/bin\.js dev doctor setup --no-input/); + }); + + it('runs the KTX daemon on the documented fixed local port', async () => { + const runScript = await readText('scripts/conductor-run.sh'); + + assert.match(runScript, /pnpm run build/); + assert.match(runScript, /source \.venv\/bin\/activate/); + assert.match(runScript, /uv run ktx-daemon serve-http --host 127\.0\.0\.1 --port 8765/); + assert.doesNotMatch(runScript, /\bnpx\b/); + }); +}); diff --git a/scripts/conductor-setup.sh b/scripts/conductor-setup.sh new file mode 100755 index 00000000..729b03b0 --- /dev/null +++ b/scripts/conductor-setup.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# conductor-setup.sh - Runs once when Conductor creates a KTX workspace. +# +# Prepares the standalone pnpm + uv workspace and builds the local CLI. + +set -e +set -o pipefail + +read_required_uv_version() { + local project_file="$1" + + if [ ! -f "$project_file" ]; then + return 1 + fi + + sed -nE 's/^[[:space:]]*required-version[[:space:]]*=[[:space:]]*"([^"]+)".*/\1/p' "$project_file" | head -n 1 +} + +uv_version() { + local uv_bin="$1" + + "$uv_bin" --version 2>/dev/null | awk '{print $2}' +} + +install_workspace_uv() { + local required_version="$1" + local install_dir="$PWD/.context/bin/uv-$required_version" + + mkdir -p "$install_dir" + + if [ ! -x "$install_dir/uv" ] || [ "$(uv_version "$install_dir/uv")" != "$required_version" ]; then + echo "Installing workspace-local uv $required_version..." >&2 + curl -LsSf "https://astral.sh/uv/$required_version/install.sh" | + env UV_INSTALL_DIR="$install_dir" UV_NO_MODIFY_PATH=1 sh >&2 + fi + + printf '%s\n' "$install_dir/uv" +} + +resolve_uv_for_project() { + local project_file="$1" + local required_version + local system_uv + local system_version + local workspace_uv + + required_version="$(read_required_uv_version "$project_file" || true)" + required_version="${required_version#==}" + + if [ -z "$required_version" ]; then + command -v uv + return + fi + + if ! [[ "$required_version" =~ ^[0-9]+[.][0-9]+[.][0-9]+$ ]]; then + echo "WARNING: Unsupported uv required-version '$required_version'; using uv from PATH." >&2 + command -v uv + return + fi + + if command -v uv >/dev/null 2>&1; then + system_uv="$(command -v uv)" + system_version="$(uv_version "$system_uv")" + + if [ "$system_version" = "$required_version" ]; then + printf '%s\n' "$system_uv" + return + fi + + echo "Found uv $system_version at $system_uv; $project_file requires uv $required_version." >&2 + else + echo "uv is not installed on PATH; $project_file requires uv $required_version." >&2 + fi + + workspace_uv="$(install_workspace_uv "$required_version")" + + if [ "$(uv_version "$workspace_uv")" != "$required_version" ]; then + echo "ERROR: Expected uv $required_version at $workspace_uv, got $("$workspace_uv" --version 2>&1 || true)." >&2 + return 1 + fi + + printf '%s\n' "$workspace_uv" +} + +echo "=== Conductor KTX workspace setup ===" + +if [ -n "${CONDUCTOR_ROOT_PATH:-}" ] && [ -f "$CONDUCTOR_ROOT_PATH/.env" ]; then + ln -sf "$CONDUCTOR_ROOT_PATH/.env" .env + echo "Linked .env" +fi + +KTX_UV_BIN="$(resolve_uv_for_project "pyproject.toml")" +export PATH="$(dirname "$KTX_UV_BIN"):$PATH" + +echo "Installing KTX Python dependencies..." +uv sync --all-packages --all-groups + +echo "Installing KTX JS dependencies..." +pnpm install --frozen-lockfile --prefer-offline + +echo "Rebuilding native JS dependencies..." +pnpm run native:rebuild + +echo "Building KTX packages..." +pnpm run build + +echo "Running KTX setup doctor..." +node packages/cli/dist/bin.js dev doctor setup --no-input + +echo "=== Setup complete ===" diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index b2da21c8..6eeeb13c 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -66,6 +66,8 @@ describe('standalone example docs', () => { assert.match(smoke, /assert_manifest "\$FIRST_MANIFEST" true/); assert.match(smoke, /assert_manifest "\$SECOND_MANIFEST" false/); assert.match(smoke, /assert_manifest "\$RESET_MANIFEST" true/); + assert.doesNotMatch(readme, /python-service/); + assert.doesNotMatch(smoke, /python-service|PYTHON_SERVICE|REPO_ROOT/); }); it('lists every published TypeScript package in the package root README', async () => { @@ -140,6 +142,8 @@ describe('standalone example docs', () => { assert.match(packageJson.scripts.smoke, /src\/standalone-smoke\.test\.ts/); assert.match(packageJson.scripts.smoke, /src\/example-smoke\.test\.ts/); + assert.match(packageJson.scripts.test, /--exclude src\/standalone-smoke\.test\.ts/); + assert.match(packageJson.scripts.test, /--exclude src\/example-smoke\.test\.ts/); }); it('documents daemon HTTP database, source generation, LookML, embedding, and code execution support', async () => { diff --git a/scripts/precommit-check.test.mjs b/scripts/precommit-check.test.mjs index 94f71e09..55ef66bb 100644 --- a/scripts/precommit-check.test.mjs +++ b/scripts/precommit-check.test.mjs @@ -9,7 +9,7 @@ function commandKeys(files) { describe('precommit-check', () => { it('skips files outside ktx', () => { - assert.deepEqual(commandKeys(['server/src/app.ts']), []); + assert.deepEqual(commandKeys(['outside-workspace/src/app.ts']), []); }); it('runs only the touched package checks for package code', () => { diff --git a/scripts/standalone-ci-workflow.test.mjs b/scripts/standalone-ci-workflow.test.mjs index bfcc1b64..230b9e2f 100644 --- a/scripts/standalone-ci-workflow.test.mjs +++ b/scripts/standalone-ci-workflow.test.mjs @@ -27,6 +27,8 @@ describe('standalone KTX CI workflow', () => { 'cache-dependency-path: "pnpm-lock.yaml"', 'pnpm install --frozen-lockfile', 'pnpm run check', + 'pnpm run test:slow', + 'pnpm run smoke', 'actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405', 'python-version: "3.13"', 'astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b', diff --git a/scripts/test-tiering.test.mjs b/scripts/test-tiering.test.mjs new file mode 100644 index 00000000..5a984dd1 --- /dev/null +++ b/scripts/test-tiering.test.mjs @@ -0,0 +1,73 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +async function readJson(path) { + return JSON.parse(await readFile(new URL(path, import.meta.url), 'utf8')); +} + +function assertScriptContainsAll(script, expected) { + for (const item of expected) { + assert.match(script, new RegExp(item.replaceAll('/', '\\/').replaceAll('.', '\\.'))); + } +} + +describe('test tiering', () => { + const cliSlowTests = [ + 'src/setup-databases.test.ts', + 'src/scan.test.ts', + 'src/commands/connection-metabase-setup.test.ts', + 'src/setup-models.test.ts', + 'src/setup-sources.test.ts', + 'src/setup.test.ts', + 'src/connection.test.ts', + 'src/setup-embeddings.test.ts', + 'src/ingest.test.ts', + 'src/commands/connection-mapping.test.ts', + 'src/ingest-viz.test.ts', + 'src/demo.test.ts', + 'src/setup-project.test.ts', + 'src/sl.test.ts', + 'src/local-scan-connectors.test.ts', + 'src/commands/connection-notion.test.ts', + ]; + + const contextSlowTests = [ + 'src/scan/local-scan.test.ts', + 'src/mcp/local-project-ports.test.ts', + 'src/ingest/local-stage-ingest.test.ts', + 'src/sl/pglite-sl-search-prototype.test.ts', + 'src/core/git.service.test.ts', + 'src/ingest/local-adapters.test.ts', + 'src/ingest/local-bundle-ingest.test.ts', + 'src/ingest/local-metabase-ingest.test.ts', + 'src/sl/local-sl.test.ts', + 'src/search/pglite-owner-process.test.ts', + 'src/scan/local-enrichment-artifacts.test.ts', + 'src/search/pglite-spike.test.ts', + 'src/wiki/local-knowledge.test.ts', + 'src/sl/local-query.test.ts', + 'src/scan/relationship-review-decisions.test.ts', + 'src/scan/relationship-profiling.test.ts', + ]; + + it('keeps slow package tests out of default local package test scripts', async () => { + const cliPackage = await readJson('../packages/cli/package.json'); + const contextPackage = await readJson('../packages/context/package.json'); + + assertScriptContainsAll(cliPackage.scripts.test, cliSlowTests.map((file) => `--exclude ${file}`)); + assertScriptContainsAll(contextPackage.scripts.test, contextSlowTests.map((file) => `--exclude ${file}`)); + assert.match(contextPackage.scripts.test, /--exclude src\/scan\/relationship-benchmarks\.test\.ts/); + }); + + it('provides explicit slow package test scripts for CI', async () => { + const rootPackage = await readJson('../package.json'); + const cliPackage = await readJson('../packages/cli/package.json'); + const contextPackage = await readJson('../packages/context/package.json'); + + assert.equal(rootPackage.scripts['test:slow'], 'pnpm --filter @ktx/context run test:slow && pnpm --filter @ktx/cli run test:slow'); + assertScriptContainsAll(cliPackage.scripts['test:slow'], cliSlowTests); + assertScriptContainsAll(contextPackage.scripts['test:slow'], contextSlowTests); + assert.doesNotMatch(contextPackage.scripts['test:slow'], /relationship-benchmarks\.test\.ts/); + }); +});