diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d37c6812..750c71d5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,12 @@ jobs: - name: Run TypeScript checks run: pnpm run check + - name: Run slow TypeScript tests + run: pnpm run test:slow + + - name: Run CLI smoke tests + run: pnpm run smoke + - name: Setup Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/conductor.json b/conductor.json new file mode 100644 index 00000000..e1a79ff9 --- /dev/null +++ b/conductor.json @@ -0,0 +1,7 @@ +{ + "scripts": { + "setup": "bash scripts/conductor-setup.sh", + "run": "bash scripts/conductor-run.sh" + }, + "runScriptMode": "nonconcurrent" +} diff --git a/examples/postgres-historic/README.md b/examples/postgres-historic/README.md index 3e27b462..1a97cba2 100644 --- a/examples/postgres-historic/README.md +++ b/examples/postgres-historic/README.md @@ -13,8 +13,8 @@ generates query workload under separate users, runs `ktx setup` with - Docker with Compose v2 - Node and pnpm matching the KTX workspace -- `python-service/.venv` already created, or `KTX_SQL_ANALYSIS_URL` pointing at - a running service that exposes `/api/sql/analyze-for-fingerprint` +- `KTX_SQL_ANALYSIS_URL` or `KTX_DAEMON_URL` pointing at a running SQL-analysis + service that exposes `/api/sql/analyze-for-fingerprint` ## Run @@ -111,5 +111,5 @@ The manifest should have `dialect: "postgres"`, `degraded: true`, - Missing grants: confirm `GRANT pg_read_all_stats TO ktx_reader;`. - Empty templates: rerun `scripts/generate-workload.sh base` and keep `--historic-sql-min-calls 2` for the smoke. -- SQL-analysis failures: set `KTX_SQL_ANALYSIS_URL` to the running service URL - or create `python-service/.venv` before running `scripts/smoke.sh`. +- SQL-analysis failures: set `KTX_SQL_ANALYSIS_URL` or `KTX_DAEMON_URL` to a + running service URL before running `scripts/smoke.sh`. diff --git a/examples/postgres-historic/scripts/smoke.sh b/examples/postgres-historic/scripts/smoke.sh index d948cf8e..5b1be929 100755 --- a/examples/postgres-historic/scripts/smoke.sh +++ b/examples/postgres-historic/scripts/smoke.sh @@ -4,46 +4,23 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" KTX_ROOT="$(cd "$EXAMPLE_DIR/../.." && pwd)" -REPO_ROOT="$(cd "$KTX_ROOT/.." && pwd)" COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml" PROJECT_PARENT="${KTX_POSTGRES_HISTORIC_PROJECT_PARENT:-$(mktemp -d)}" PROJECT_DIR="$PROJECT_PARENT/postgres-historic-ktx" KTX_BIN="$KTX_ROOT/packages/cli/dist/bin.js" -PYTHON_SERVICE_LOG="$PROJECT_PARENT/python-service.log" -PYTHON_SERVICE_PID="" cleanup() { - if [[ -n "$PYTHON_SERVICE_PID" ]]; then - kill "$PYTHON_SERVICE_PID" >/dev/null 2>&1 || true - fi if [[ "${KTX_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true fi } trap cleanup EXIT -start_sql_analysis_if_needed() { - if [[ -n "${KTX_SQL_ANALYSIS_URL:-}" ]]; then +require_sql_analysis_url() { + if [[ -n "${KTX_SQL_ANALYSIS_URL:-}" || -n "${KTX_DAEMON_URL:-}" ]]; then return fi - if [[ ! -d "$REPO_ROOT/python-service/.venv" ]]; then - echo "Set KTX_SQL_ANALYSIS_URL or create python-service/.venv before running this smoke." >&2 - exit 1 - fi - ( - cd "$REPO_ROOT/python-service" - source .venv/bin/activate - uvicorn app.main:app --host 127.0.0.1 --port 18081 >"$PYTHON_SERVICE_LOG" 2>&1 - ) & - PYTHON_SERVICE_PID="$!" - export KTX_SQL_ANALYSIS_URL="http://127.0.0.1:18081" - for _ in $(seq 1 60); do - if curl -fsS "$KTX_SQL_ANALYSIS_URL/health" >/dev/null 2>&1; then - return - fi - sleep 1 - done - echo "SQL analysis service did not become healthy. Log: $PYTHON_SERVICE_LOG" >&2 + echo "Set KTX_SQL_ANALYSIS_URL or KTX_DAEMON_URL before running this smoke." >&2 exit 1 } @@ -111,7 +88,7 @@ NODE cd "$KTX_ROOT" pnpm --filter @ktx/context run build pnpm --filter @ktx/cli run build -start_sql_analysis_if_needed +require_sql_analysis_url docker compose -f "$COMPOSE_FILE" up -d --wait "$EXAMPLE_DIR/scripts/generate-workload.sh" base diff --git a/package.json b/package.json index 08fda8a6..bda78420 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "relationships:verify-orbit": "node scripts/relationship-orbit-verification.mjs", "smoke": "pnpm run build && pnpm --filter @ktx/cli run smoke", "test": "node --test scripts/*.test.mjs && pnpm --filter './packages/*' run test", + "test:slow": "pnpm --filter @ktx/context run test:slow && pnpm --filter @ktx/cli run test:slow", "type-check": "pnpm --filter './packages/*' run type-check" }, "devDependencies": { diff --git a/packages/cli/package.json b/packages/cli/package.json index e85986a4..e386e56f 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -28,7 +28,8 @@ "assets:demo": "node scripts/build-demo-assets.mjs", "build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs", "smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000", - "test": "vitest run", + "test": "vitest run --exclude src/standalone-smoke.test.ts --exclude src/example-smoke.test.ts --exclude src/setup-databases.test.ts --exclude src/scan.test.ts --exclude src/commands/connection-metabase-setup.test.ts --exclude src/setup-models.test.ts --exclude src/setup-sources.test.ts --exclude src/setup.test.ts --exclude src/connection.test.ts --exclude src/setup-embeddings.test.ts --exclude src/ingest.test.ts --exclude src/commands/connection-mapping.test.ts --exclude src/ingest-viz.test.ts --exclude src/demo.test.ts --exclude src/setup-project.test.ts --exclude src/sl.test.ts --exclude src/local-scan-connectors.test.ts --exclude src/commands/connection-notion.test.ts", + "test:slow": "vitest run src/setup-databases.test.ts src/scan.test.ts src/commands/connection-metabase-setup.test.ts src/setup-models.test.ts src/setup-sources.test.ts src/setup.test.ts src/connection.test.ts src/setup-embeddings.test.ts src/ingest.test.ts src/commands/connection-mapping.test.ts src/ingest-viz.test.ts src/demo.test.ts src/setup-project.test.ts src/sl.test.ts src/local-scan-connectors.test.ts src/commands/connection-notion.test.ts --testTimeout 30000", "type-check": "tsc -p tsconfig.json --noEmit" }, "dependencies": { diff --git a/packages/cli/src/ingest-viz.test.ts b/packages/cli/src/ingest-viz.test.ts new file mode 100644 index 00000000..936490d7 --- /dev/null +++ b/packages/cli/src/ingest-viz.test.ts @@ -0,0 +1,863 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + type LocalIngestResult, + type MemoryFlowReplayInput, + type RunLocalIngestOptions, +} from '@ktx/context/ingest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKtxIngest } from './ingest.js'; +import { + completedLocalBundleRun, + emitLiveLocalMemoryFlow, + localFakeBundleReport, + makeIo, + persistLocalBundleReport, + writeBundleReportFile, + writeWarehouseConfig, +} from './ingest.test-utils.js'; +import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; + +describe('runKtxIngest viz and replay', () => { + let tempDir: string; + let originalTerm: string | undefined; + + beforeEach(async () => { + resetVizFallbackWarningsForTest(); + originalTerm = process.env.TERM; + process.env.TERM = 'xterm-256color'; + tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-ingest-')); + }); + + afterEach(async () => { + if (originalTerm === undefined) { + delete process.env.TERM; + } else { + process.env.TERM = originalTerm; + } + await rm(tempDir, { recursive: true, force: true }); + }); + + it('renders live memory-flow frames for run --viz when stdout is interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); + input.memoryFlow?.update({ syncId: 'sync-live-1' }); + input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); + input.memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); + input.memoryFlow?.finish('done'); + + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + now: () => new Date('2026-04-30T14:00:00.000Z'), + }, + ), + ).resolves.toBe(0); + + expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.any(Object) })); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + expect((io.stdout().match(/KTX memory flow/g) ?? []).length).toBeGreaterThan(1); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('fake-orders'); + expect(io.stderr()).toBe(''); + }); + + it('uses the TUI live session for run --viz when stdin and stdout are interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + now: () => new Date('2026-04-30T14:00:00.000Z'), + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(startLiveMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'live-viz-run', + connectionId: 'warehouse', + adapter: 'fake', + status: 'running', + }); + expect(liveSession.update).toHaveBeenCalled(); + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toBe(''); + }); + + it('prints a final plain summary after live viz completes', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-summary'); + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'viz', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('Memory-flow summary: done'); + expect(io.stdout()).toContain('Connection: warehouse'); + }); + + it('falls back to text live rendering when the TUI live session is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('falls back to text live rendering when TUI startup fails with a redacted warning', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn( + async (_input: MemoryFlowReplayInput, ioArg: { stderr: { write(chunk: string): void } }) => { + ioArg.stderr.write('TUI visualization unavailable: Failed [redacted-url] [redacted]; using text renderer.\n'); + return null; + }, + ); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain('TUI visualization unavailable: Failed [redacted-url] [redacted]'); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + }); + + it('does not start live TUI when run --viz disables input', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + return completedLocalBundleRun(input, 'no-input-live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + })); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('does not attach a live memory-flow sink for plain run output', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'plain-run')); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { runLocalIngest: runLocal }, + ), + ).resolves.toBe(0); + + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('Job: plain-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + }); + + it('falls back to plain run output for run --viz when stdout is not interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const io = makeIo({ isTTY: false }); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'non-tty-viz-run')); + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'non-tty-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Job: non-tty-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('falls back to plain run output for run --viz when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'raw-missing-viz-run')); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + })); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'raw-missing-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('Job: raw-missing-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('returns an error code for missing status', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo(); + + await expect( + runKtxIngest({ command: 'status', projectDir, runId: 'missing-run', outputMode: 'plain' }, io.io), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found'); + }); + + it('uses the latest local ingest report when status has no run id', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + await persistLocalBundleReport(projectDir, localFakeBundleReport('older-run')); + await persistLocalBundleReport(projectDir, localFakeBundleReport('newer-run')); + const io = makeIo(); + + await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-newer-run'); + expect(io.stdout()).toContain('Job: newer-run'); + expect(io.stderr()).toBe(''); + }); + + it('renders the latest local ingest report through watch when run id is omitted', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + await persistLocalBundleReport(projectDir, localFakeBundleReport('watch-latest')); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest({ command: 'watch', projectDir, outputMode: 'viz', inputMode: 'disabled' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('Run: run-watch-latest'); + expect(io.stderr()).toBe(''); + }); + + it('renders report-file replay through the memory-flow TUI', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'job-1', + reportFile, + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/metabase done'); + expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); + expect(io.stdout()).toContain('Commit: abc12345 Run: run-1 Report: report-1'); + expect(io.stdout()).toContain('SOURCE'); + expect(io.stdout()).toContain('ACTIONS'); + expect(io.stdout()).toContain('SAVED'); + expect(io.stderr()).toBe(''); + }); + + it('prints report-file JSON without looking up local ingest status', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo(); + + await expect( + runKtxIngest({ command: 'status', projectDir, runId: 'report-1', reportFile, outputMode: 'json' }, io.io), + ).resolves.toBe(0); + + const parsed = JSON.parse(io.stdout()); + expect(parsed).toMatchObject({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + }); + expect(io.stderr()).toBe(''); + }); + + it('routes interactive report-file replay through the stored TUI renderer', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'run-1', + reportFile, + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'run-1', + reportId: 'report-1', + connectionId: 'warehouse', + adapter: 'metabase', + }); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); + + it('rejects report-file replay when the requested id does not match the report', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo(); + + await expect( + runKtxIngest({ command: 'replay', projectDir, runId: 'unrelated-id', reportFile, outputMode: 'plain' }, io.io), + ).resolves.toBe(1); + + expect(io.stderr()).toContain( + `Report file ${reportFile} does not match ingest replay id "unrelated-id"; expected one of report-1, run-1, job-1`, + ); + expect(io.stdout()).toBe(''); + }); + + it('renders memory-flow snapshot for status --viz when stdout is interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-1')); + + const io = makeIo({ isTTY: true }); + await expect( + runKtxIngest( + { command: 'status', projectDir, runId: 'viz-run-1', outputMode: 'viz', inputMode: 'disabled' }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).toContain('SOURCE'); + expect(io.stdout()).toContain('CHUNKS'); + expect(io.stdout()).toContain('WORKUNITS'); + expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); + expect(io.stderr()).toBe(''); + }); + + it('uses the TUI renderer for stored status --viz when stdin and stdout are interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + runId: 'tui-viz-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'run-tui-viz-run', + connectionId: 'warehouse', + adapter: 'fake', + }); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); + + it('falls back to the text renderer when TUI declines stored status --viz', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-fallback-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120, keypresses: [{ name: 'q' }] }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => false); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + runId: 'tui-fallback-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('does not use TUI for stored --viz when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-no-input-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'tui-no-input-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + }); + + it('falls back to plain status for stored --viz when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('raw-missing-stored-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'raw-missing-stored-viz-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Run: run-raw-missing-stored-viz-run'); + expect(io.stdout()).toContain('Job: raw-missing-stored-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('keeps stored --viz snapshot-only when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('no-input-viz-run')); + + const io = makeIo({ isTTY: true, columns: 120 }); + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'no-input-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stderr()).toBe(''); + }); + + it('keeps disabled-input stored --viz snapshot output even when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('disabled-raw-missing-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'disabled-raw-missing-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stderr()).toBe(''); + }); + + it('degrades stored --viz snapshots to plain status when stdout is redirected even when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('redirected-no-input-viz-run')); + + const io = makeIo({ isTTY: false }); + await expect( + runKtxIngest( + { + command: 'replay', + projectDir, + runId: 'redirected-no-input-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-redirected-no-input-viz-run'); + expect(io.stdout()).toContain('Job: redirected-no-input-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('degrades ingest replay --viz to plain status when TERM is dumb', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('dumb-terminal-viz-run')); + + const io = makeIo({ isTTY: true }); + await expect( + runKtxIngest( + { command: 'replay', projectDir, runId: 'dumb-terminal-viz-run', outputMode: 'viz' }, + io.io, + { env: { ...process.env, TERM: 'dumb' } }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-dumb-terminal-viz-run'); + expect(io.stdout()).toContain('Job: dumb-terminal-viz-run'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.', + ); + }); + + it('falls back to plain status for --viz when stdout is not interactive', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-2')); + + const io = makeIo({ isTTY: false }); + await expect( + runKtxIngest({ command: 'replay', projectDir, runId: 'viz-run-2', outputMode: 'viz' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-viz-run-2'); + expect(io.stdout()).toContain('Job: viz-run-2'); + expect(io.stdout()).not.toContain('KTX memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('prints JSON for status --json', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('json-run-1')); + + const io = makeIo(); + await expect( + runKtxIngest({ command: 'status', projectDir, runId: 'json-run-1', outputMode: 'json' }, io.io), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + runId: 'run-json-run-1', + jobId: 'json-run-1', + sourceKey: 'fake', + connectionId: 'warehouse', + }); + expect(io.stderr()).toBe(''); + }); +}); diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts new file mode 100644 index 00000000..a83b38be --- /dev/null +++ b/packages/cli/src/ingest.test-utils.ts @@ -0,0 +1,746 @@ +import { EventEmitter } from 'node:events'; +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent'; +import { + LocalLookerRuntimeStore, + LocalMetabaseSourceStateReader, + MetabaseSourceAdapter, + getLocalIngestStatus, + type ChunkResult, + type FetchContext, + type IngestReportSnapshot, + type LocalIngestResult, + type LocalMetabaseFanoutProgress, + type LookerMappingClient, + type LookerRuntimeClient, + type LookerTableIdentifierParser, + type MemoryFlowEventSink, + type MemoryFlowReplayInput, + type MetabaseCard, + type MetabaseCardSummary, + type MetabaseClientFactory, + type MetabaseRuntimeClient, + type RunLocalIngestOptions, + type SourceAdapter, + type SqliteBundleIngestStore, +} from '@ktx/context/ingest'; +import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; +import { expect, vi } from 'vitest'; +import { type KtxIngestArgs, runKtxIngest } from './ingest.js'; + +export function makeIo( + options: { + isTTY?: boolean; + stdinIsTTY?: boolean; + columns?: number; + rawMode?: boolean; + keypresses?: { name?: string; ctrl?: boolean }[]; + } = {}, +) { + let stdout = ''; + let stderr = ''; + type TestKey = { name?: string; ctrl?: boolean }; + + class TestStdin extends EventEmitter { + isTTY = options.stdinIsTTY ?? false; + isRaw = false; + + setRawMode = + options.rawMode === false + ? undefined + : (value: boolean): void => { + this.isRaw = value; + }; + + resume(): void { + return undefined; + } + + pause(): void { + return undefined; + } + + override on(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + const result = super.on(eventName, listener); + if (eventName === 'keypress') { + for (const key of options.keypresses ?? []) { + queueMicrotask(() => listener('', key)); + } + } + return result; + } + + override off(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + return super.off(eventName, listener); + } + + override removeListener(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + return super.removeListener(eventName, listener); + } + } + + const stdin = new TestStdin(); + + return { + io: { + stdin, + stdout: { + isTTY: options.isTTY, + columns: options.columns, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +export async function writeWarehouseConfig(projectDir: string): Promise { + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' warehouse_a:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + '', + ].join('\n'), + 'utf-8', + ); +} + +export async function writeMetabaseConfig(projectDir: string): Promise { + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); +} + +export function bundleReportSnapshot(): IngestReportSnapshot { + return { + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-04-30T12:00:00.000Z', + body: { + syncId: 'sync-1', + diffSummary: { added: 2, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: 'abc12345', + workUnits: [ + { + unitKey: 'cards', + rawFiles: ['cards/1.json', 'cards/2.json'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, + { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, + ], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'cards/1.json', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/revenue.md', + actionType: 'wiki_written', + }, + { + rawPath: 'cards/2.json', + artifactKind: 'sl', + artifactKey: 'warehouse.orders', + actionType: 'measure_added', + }, + ], + toolTranscripts: [ + { + unitKey: 'cards', + path: 'tool-transcripts/cards.jsonl', + toolCallCount: 4, + errorCount: 0, + toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'], + }, + ], + }, + }; +} + +export function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const nextReport = localFakeBundleReport(jobId, { + id: 'report-live-1', + runId: 'run-live-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + +export function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const failedWorkUnit = { + ...bundleReportSnapshot().body.workUnits[0], + status: 'failed' as const, + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }; + const nextReport = localFakeBundleReport(jobId, { + id: 'report-failed-1', + runId: 'run-failed-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + body: { + workUnits: [failedWorkUnit], + failedWorkUnits: [failedWorkUnit.unitKey], + }, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + +export class CliLookerSlWritingAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: RunLoopParams) => { + if ( + params.telemetryTags?.operationName === 'ingest-bundle-wu' && + params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' + ) { + const slWrite = params.toolSet.sl_write_source; + if (!slWrite?.execute) { + throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); + } + const result = await slWrite.execute( + { + connectionId: 'prod-warehouse', + sourceName: 'looker__ecommerce__orders', + source: { + name: 'looker__ecommerce__orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'revenue', type: 'number' }, + ], + measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }], + }, + }, + { toolCallId: 'cli-looker-sl-write', messages: [] }, + ); + if (!result.structured.success) { + throw new Error(result.markdown); + } + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +export class CliMetabaseAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const })); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +export class CliMetabaseSourceAdapter implements SourceAdapter { + readonly source = 'metabase'; + readonly skillNames: string[] = []; + readonly fetchCalls: Array<{ metabaseConnectionId: string; metabaseDatabaseId: number; connectionId: string }> = []; + private readonly databaseByStagedDir = new Map(); + + detect(): Promise { + return Promise.resolve(true); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; + this.fetchCalls.push({ + metabaseConnectionId: config.metabaseConnectionId, + metabaseDatabaseId: config.metabaseDatabaseId, + connectionId: ctx.connectionId, + }); + this.databaseByStagedDir.set(stagedDir, config.metabaseDatabaseId); + await mkdir(join(stagedDir, 'cards'), { recursive: true }); + await mkdir(join(stagedDir, 'databases'), { recursive: true }); + await writeFile( + join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }), + 'utf-8', + ); + } + + async chunk(stagedDir: string): Promise { + const databaseId = this.databaseByStagedDir.get(stagedDir); + if (!databaseId) { + throw new Error(`Missing Metabase database id for staged dir ${stagedDir}`); + } + return { + workUnits: [ + { + unitKey: `metabase-db-${databaseId}`, + rawFiles: [`cards/${databaseId}.json`], + peerFileIndex: [], + dependencyPaths: [`databases/${databaseId}.json`], + }, + ], + }; + } +} + +const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ + { + id: 101, + name: 'Collection 12 Revenue', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 12, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, + parameters: [], + dashboard_count: 0, + }, + { + id: 102, + name: 'Collection 12 Margin', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 12, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, + parameters: [], + dashboard_count: 0, + }, + { + id: 103, + name: 'Collection 13 Pipeline', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 13, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, + parameters: [], + dashboard_count: 0, + }, +]; + +function metabaseCardSummary(card: MetabaseCard): MetabaseCardSummary { + return { + id: card.id, + name: card.name, + archived: card.archived, + database_id: card.database_id, + collection_id: card.collection_id, + }; +} + +function createSyncModeMetabaseClient(): MetabaseRuntimeClient { + const cardsById = new Map(SYNC_MODE_METABASE_CARDS.map((card) => [card.id, card])); + return { + testConnection: async () => ({ success: true }), + getCurrentUser: async () => ({ id: 1, email: 'local@example.test' }), + getDatabases: async () => [{ id: 1, name: 'Warehouse A', engine: 'postgres' }], + getDatabase: async (id) => ({ id, name: 'Warehouse A', engine: 'postgres' }), + getCollectionTree: async () => [ + { id: 12, name: 'Selected Collection', parent_id: 'root', children: [] }, + { id: 13, name: 'Other Collection', parent_id: 'root', children: [] }, + ], + getCollection: async (id) => ({ + id, + name: id === 12 ? 'Selected Collection' : 'Other Collection', + parent_id: 'root', + children: [], + }), + getCollectionItems: async (collectionId) => + SYNC_MODE_METABASE_CARDS.filter((card) => card.collection_id === collectionId).map((card) => ({ + id: card.id, + model: 'card', + name: card.name, + collection_id: card.collection_id, + database_id: card.database_id, + })), + getCard: async (id) => { + const card = cardsById.get(id); + if (!card) { + throw new Error(`unexpected card ${id}`); + } + return card; + }, + getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), + convertMbqlToNative: async () => ({ query: 'select 1' }), + getNativeSql: (card) => card.dataset_query?.native?.query ?? null, + getTemplateTags: () => ({}), + getCardSql: async (card) => card.dataset_query?.native?.query ?? null, + getResolvedSql: async (card) => ({ + resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, + templateTags: [], + resolutionStatus: 'resolved', + }), + cleanup: async () => undefined, + }; +} + +export class StaticMetabaseClientFactory implements MetabaseClientFactory { + constructor(private readonly client: MetabaseRuntimeClient) {} + + createClient(): MetabaseRuntimeClient { + return this.client; + } +} + +type SyncModeCase = { + name: string; + syncMode: 'ALL' | 'ONLY' | 'EXCEPT'; + selections: Array<{ selectionType: 'collection' | 'item'; metabaseObjectId: number }>; + expectedRawFiles: string[]; + expectedWorkUnitKeys: string[]; +}; + +export async function runPublicMetabaseSyncModeCase(tempDir: string, input: SyncModeCase): Promise { + const projectDir = join(tempDir, `metabase-sync-mode-${input.name}`); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + `project: metabase-sync-mode-${input.name}`, + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' api_url: https://metabase.example.test', + ' api_key: literal-test-key', + ' warehouse_a:', + ' driver: postgres', + ' url: postgresql://readonly@db.example.test/warehouse_a', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + + const project = await loadKtxProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: input.syncMode, + defaultTagNames: ['sync-mode-smoke'], + selections: input.selections, + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Warehouse A', + metabaseEngine: 'postgres', + metabaseHost: 'db.example.test', + metabaseDbName: 'warehouse_a', + targetConnectionId: 'warehouse_a', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + + const adapter = new MetabaseSourceAdapter({ + clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()), + sourceStateReader: store, + }); + const jobId = `metabase-sync-mode-${input.name}-child`; + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + createAdapters: vi.fn(() => [adapter]), + jobIdFactory: () => jobId, + localIngestOptions: { + agentRunner: new CliMetabaseAgentRunner(), + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`); + + const report = await getLocalIngestStatus(project, jobId); + expect(report).not.toBeNull(); + expect(report?.body.workUnits.map((wu) => wu.unitKey).sort()).toEqual(input.expectedWorkUnitKeys); + expect(report?.body.workUnits.flatMap((wu) => wu.rawFiles).sort()).toEqual(input.expectedRawFiles); +} + +type CliLookerRuntimeClient = LookerRuntimeClient & + Pick & { + cleanup: ReturnType>>; + }; + +export function makeCliLookerRuntimeClient(): CliLookerRuntimeClient { + const lookerModels = { + source: 'looker', + fetchedAt: '2026-05-05T00:00:00.000Z', + models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], + }; + const lookerExplore = { + source: 'looker', + modelName: 'ecommerce', + exploreName: 'orders', + label: 'Orders', + description: null, + connectionName: 'analytics', + viewName: 'orders', + rawSqlTableName: 'public.orders', + fields: { + dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }], + }, + joins: [ + { + name: 'users', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'public.users', + sqlOn: '${orders.user_id} = ${users.id}', + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, + }; + + return { + listLookerConnections: vi.fn().mockResolvedValue([ + { + name: 'analytics', + host: 'db.example.test', + database: 'analytics', + schema: null, + dialect: 'postgres', + }, + ]), + listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]), + getDashboard: vi.fn().mockResolvedValue({ + lookerId: '10', + title: 'Revenue Overview', + description: 'Revenue dashboard', + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:00:00.000Z', + tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }], + }), + listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]), + getLook: vi.fn().mockResolvedValue({ + lookerId: '20', + title: 'Revenue Look', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:10:00.000Z', + query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] }, + }), + listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }), + listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]), + listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]), + listLookmlModels: vi.fn().mockResolvedValue(lookerModels), + getExplore: vi.fn().mockResolvedValue(lookerExplore), + getSignals: vi.fn().mockResolvedValue({ + dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }], + lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }], + scheduledPlans: [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 }, + ], + favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }], + }), + cleanup: vi.fn>().mockResolvedValue(undefined), + }; +} + +interface TestLookerTableIdentifierParser extends LookerTableIdentifierParser { + parse: ReturnType>; +} + +export function makeCliLookerParser(): TestLookerTableIdentifierParser { + return { + parse: vi.fn().mockResolvedValue({ + 'ecommerce.orders': { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + 'ecommerce.orders.users': { + ok: true, + catalog: null, + schema: 'public', + name: 'users', + canonical_table: 'public.users', + }, + }), + }; +} + +export function localFakeBundleReport( + jobId: string, + overrides: Partial> & { body?: Partial } = {}, +): IngestReportSnapshot { + const report = bundleReportSnapshot(); + return { + ...report, + id: `report-${jobId}`, + runId: `run-${jobId}`, + jobId, + connectionId: 'warehouse', + sourceKey: 'fake', + ...overrides, + body: { + ...report.body, + syncId: 'sync-live-1', + ...(overrides.body ?? {}), + }, + }; +} + +export async function localBundleStore(projectDir: string, ids: [string, string]): Promise { + const { SqliteBundleIngestStore } = await import('@ktx/context/ingest'); + const project = await loadKtxProject({ projectDir }); + return new SqliteBundleIngestStore({ + dbPath: ktxLocalStateDbPath(project), + idFactory: (() => { + let index = 0; + return () => ids[index++] ?? `generated-${index}`; + })(), + }); +} + +export async function persistLocalBundleReport(projectDir: string, report = bundleReportSnapshot()): Promise { + const store = await localBundleStore(projectDir, [report.runId, report.id]); + const run = await store.create({ + jobId: report.jobId, + connectionId: report.connectionId, + sourceKey: report.sourceKey, + syncId: report.body.syncId, + trigger: 'manual_resync', + }); + await store.markCompleted(run.id, report.body.diffSummary); + await store.create({ + runId: run.id, + jobId: report.jobId, + connectionId: report.connectionId, + sourceKey: report.sourceKey, + body: report.body, + }); +} + +export async function writeBundleReportFile(tempDir: string, report = bundleReportSnapshot()): Promise { + const reportFile = join(tempDir, 'bundle-report.json'); + await writeFile(reportFile, `${JSON.stringify(report, null, 2)}\n`, 'utf-8'); + return reportFile; +} + +export function emitLiveLocalMemoryFlow(memoryFlow: MemoryFlowEventSink | undefined): void { + memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); + memoryFlow?.update({ syncId: 'sync-live-1' }); + memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); + memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); + memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); + memoryFlow?.finish('done'); +} diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 5a18938b..d1299d8a 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -1,737 +1,38 @@ -import { EventEmitter } from 'node:events'; import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent'; import { LocalLookerRuntimeStore, LocalMetabaseSourceStateReader, - MetabaseSourceAdapter, getLocalIngestStatus, - type ChunkResult, - type FetchContext, - type IngestReportSnapshot, type LocalIngestResult, type LocalMetabaseFanoutProgress, - type MemoryFlowEventSink, type MemoryFlowReplayInput, - type MetabaseCard, - type MetabaseCardSummary, - type MetabaseClientFactory, - type MetabaseRuntimeClient, type RunLocalIngestOptions, type SourceAdapter, - type SqliteBundleIngestStore, } from '@ktx/context/ingest'; -import { initKtxProject, ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; +import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { type KtxIngestArgs, runKtxIngest } from './ingest.js'; +import { + CliLookerSlWritingAgentRunner, + CliMetabaseAgentRunner, + CliMetabaseSourceAdapter, + completedLocalBundleRun, + emitLiveLocalMemoryFlow, + failedLocalBundleRun, + localFakeBundleReport, + makeCliLookerParser, + makeCliLookerRuntimeClient, + makeIo, + persistLocalBundleReport, + runPublicMetabaseSyncModeCase, + writeBundleReportFile, + writeMetabaseConfig, + writeWarehouseConfig, +} from './ingest.test-utils.js'; import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; -function makeIo( - options: { - isTTY?: boolean; - stdinIsTTY?: boolean; - columns?: number; - rawMode?: boolean; - keypresses?: { name?: string; ctrl?: boolean }[]; - } = {}, -) { - let stdout = ''; - let stderr = ''; - type TestKey = { name?: string; ctrl?: boolean }; - - class TestStdin extends EventEmitter { - isTTY = options.stdinIsTTY ?? false; - isRaw = false; - - setRawMode = - options.rawMode === false - ? undefined - : (value: boolean): void => { - this.isRaw = value; - }; - - resume(): void { - return undefined; - } - - pause(): void { - return undefined; - } - - override on(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { - const result = super.on(eventName, listener); - if (eventName === 'keypress') { - for (const key of options.keypresses ?? []) { - queueMicrotask(() => listener('', key)); - } - } - return result; - } - - override off(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { - return super.off(eventName, listener); - } - - override removeListener(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { - return super.removeListener(eventName, listener); - } - } - - const stdin = new TestStdin(); - - return { - io: { - stdin, - stdout: { - isTTY: options.isTTY, - columns: options.columns, - write: (chunk: string) => { - stdout += chunk; - }, - }, - stderr: { - write: (chunk: string) => { - stderr += chunk; - }, - }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -async function writeWarehouseConfig(projectDir: string): Promise { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' prod-metabase:', - ' driver: metabase', - ' warehouse_a:', - ' driver: postgres', - 'ingest:', - ' adapters:', - ' - fake', - '', - ].join('\n'), - 'utf-8', - ); -} - -async function writeMetabaseConfig(projectDir: string): Promise { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' warehouse:', - ' driver: postgres', - 'ingest:', - ' adapters:', - ' - metabase', - ' embeddings:', - ' backend: deterministic', - '', - ].join('\n'), - 'utf-8', - ); -} - -function bundleReportSnapshot(): IngestReportSnapshot { - return { - id: 'report-1', - runId: 'run-1', - jobId: 'job-1', - connectionId: 'warehouse', - sourceKey: 'metabase', - createdAt: '2026-04-30T12:00:00.000Z', - body: { - syncId: 'sync-1', - diffSummary: { added: 2, modified: 0, deleted: 0, unchanged: 0 }, - commitSha: 'abc12345', - workUnits: [ - { - unitKey: 'cards', - rawFiles: ['cards/1.json', 'cards/2.json'], - status: 'success', - actions: [ - { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, - { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, - ], - touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], - }, - ], - failedWorkUnits: [], - reconciliationSkipped: false, - conflictsResolved: [], - evictionsApplied: [], - unmappedFallbacks: [], - evictionInputs: [], - unresolvedCards: [], - supersededBy: null, - overrideOf: null, - provenanceRows: [ - { - rawPath: 'cards/1.json', - artifactKind: 'wiki', - artifactKey: 'knowledge/global/revenue.md', - actionType: 'wiki_written', - }, - { - rawPath: 'cards/2.json', - artifactKind: 'sl', - artifactKey: 'warehouse.orders', - actionType: 'measure_added', - }, - ], - toolTranscripts: [ - { - unitKey: 'cards', - path: 'tool-transcripts/cards.jsonl', - toolCallCount: 4, - errorCount: 0, - toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'], - }, - ], - }, - }; -} - -function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { - const nextReport = localFakeBundleReport(jobId, { - id: 'report-live-1', - runId: 'run-live-1', - connectionId: input.connectionId, - sourceKey: input.adapter, - }); - return { - result: { - jobId, - runId: nextReport.runId, - syncId: nextReport.body.syncId, - diffSummary: nextReport.body.diffSummary, - workUnitCount: nextReport.body.workUnits.length, - failedWorkUnits: nextReport.body.failedWorkUnits, - artifactsWritten: nextReport.body.provenanceRows.length, - commitSha: nextReport.body.commitSha, - }, - report: nextReport, - }; -} - -function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { - const failedWorkUnit = { - ...bundleReportSnapshot().body.workUnits[0], - status: 'failed' as const, - reason: 'writer tool failed', - actions: [], - touchedSlSources: [], - }; - const nextReport = localFakeBundleReport(jobId, { - id: 'report-failed-1', - runId: 'run-failed-1', - connectionId: input.connectionId, - sourceKey: input.adapter, - body: { - workUnits: [failedWorkUnit], - failedWorkUnits: [failedWorkUnit.unitKey], - }, - }); - return { - result: { - jobId, - runId: nextReport.runId, - syncId: nextReport.body.syncId, - diffSummary: nextReport.body.diffSummary, - workUnitCount: nextReport.body.workUnits.length, - failedWorkUnits: nextReport.body.failedWorkUnits, - artifactsWritten: nextReport.body.provenanceRows.length, - commitSha: nextReport.body.commitSha, - }, - report: nextReport, - }; -} - -class CliLookerSlWritingAgentRunner extends AgentRunnerService { - override runLoop = vi.fn(async (params: RunLoopParams) => { - if ( - params.telemetryTags?.operationName === 'ingest-bundle-wu' && - params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' - ) { - const slWrite = params.toolSet.sl_write_source; - if (!slWrite?.execute) { - throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); - } - const result = await slWrite.execute( - { - connectionId: 'prod-warehouse', - sourceName: 'looker__ecommerce__orders', - source: { - name: 'looker__ecommerce__orders', - table: 'public.orders', - grain: ['id'], - columns: [ - { name: 'id', type: 'number' }, - { name: 'revenue', type: 'number' }, - ], - measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }], - }, - }, - { toolCallId: 'cli-looker-sl-write', messages: [] }, - ); - if (!result.structured.success) { - throw new Error(result.markdown); - } - } - return { stopReason: 'natural' as const }; - }); - - constructor() { - super({ llmProvider: { getModel: () => ({}) as never } as never }); - } -} - -class CliMetabaseAgentRunner extends AgentRunnerService { - override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const })); - - constructor() { - super({ llmProvider: { getModel: () => ({}) as never } as never }); - } -} - -class CliMetabaseSourceAdapter implements SourceAdapter { - readonly source = 'metabase'; - readonly skillNames: string[] = []; - readonly fetchCalls: Array<{ metabaseConnectionId: string; metabaseDatabaseId: number; connectionId: string }> = []; - private readonly databaseByStagedDir = new Map(); - - detect(): Promise { - return Promise.resolve(true); - } - - async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { - const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; - this.fetchCalls.push({ - metabaseConnectionId: config.metabaseConnectionId, - metabaseDatabaseId: config.metabaseDatabaseId, - connectionId: ctx.connectionId, - }); - this.databaseByStagedDir.set(stagedDir, config.metabaseDatabaseId); - await mkdir(join(stagedDir, 'cards'), { recursive: true }); - await mkdir(join(stagedDir, 'databases'), { recursive: true }); - await writeFile( - join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`), - JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }), - 'utf-8', - ); - await writeFile( - join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`), - JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }), - 'utf-8', - ); - } - - async chunk(stagedDir: string): Promise { - const databaseId = this.databaseByStagedDir.get(stagedDir); - if (!databaseId) { - throw new Error(`Missing Metabase database id for staged dir ${stagedDir}`); - } - return { - workUnits: [ - { - unitKey: `metabase-db-${databaseId}`, - rawFiles: [`cards/${databaseId}.json`], - peerFileIndex: [], - dependencyPaths: [`databases/${databaseId}.json`], - }, - ], - }; - } -} - -const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ - { - id: 101, - name: 'Collection 12 Revenue', - description: null, - type: 'question', - query_type: 'native', - database_id: 1, - collection_id: 12, - archived: false, - result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, - parameters: [], - dashboard_count: 0, - }, - { - id: 102, - name: 'Collection 12 Margin', - description: null, - type: 'question', - query_type: 'native', - database_id: 1, - collection_id: 12, - archived: false, - result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, - parameters: [], - dashboard_count: 0, - }, - { - id: 103, - name: 'Collection 13 Pipeline', - description: null, - type: 'question', - query_type: 'native', - database_id: 1, - collection_id: 13, - archived: false, - result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, - parameters: [], - dashboard_count: 0, - }, -]; - -function metabaseCardSummary(card: MetabaseCard): MetabaseCardSummary { - return { - id: card.id, - name: card.name, - archived: card.archived, - database_id: card.database_id, - collection_id: card.collection_id, - }; -} - -function createSyncModeMetabaseClient(): MetabaseRuntimeClient { - const cardsById = new Map(SYNC_MODE_METABASE_CARDS.map((card) => [card.id, card])); - return { - testConnection: async () => ({ success: true }), - getCurrentUser: async () => ({ id: 1, email: 'local@example.test' }), - getDatabases: async () => [{ id: 1, name: 'Warehouse A', engine: 'postgres' }], - getDatabase: async (id) => ({ id, name: 'Warehouse A', engine: 'postgres' }), - getCollectionTree: async () => [ - { id: 12, name: 'Selected Collection', parent_id: 'root', children: [] }, - { id: 13, name: 'Other Collection', parent_id: 'root', children: [] }, - ], - getCollection: async (id) => ({ - id, - name: id === 12 ? 'Selected Collection' : 'Other Collection', - parent_id: 'root', - children: [], - }), - getCollectionItems: async (collectionId) => - SYNC_MODE_METABASE_CARDS.filter((card) => card.collection_id === collectionId).map((card) => ({ - id: card.id, - model: 'card', - name: card.name, - collection_id: card.collection_id, - database_id: card.database_id, - })), - getCard: async (id) => { - const card = cardsById.get(id); - if (!card) { - throw new Error(`unexpected card ${id}`); - } - return card; - }, - getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), - convertMbqlToNative: async () => ({ query: 'select 1' }), - getNativeSql: (card) => card.dataset_query?.native?.query ?? null, - getTemplateTags: () => ({}), - getCardSql: async (card) => card.dataset_query?.native?.query ?? null, - getResolvedSql: async (card) => ({ - resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, - templateTags: [], - resolutionStatus: 'resolved', - }), - cleanup: async () => undefined, - }; -} - -class StaticMetabaseClientFactory implements MetabaseClientFactory { - constructor(private readonly client: MetabaseRuntimeClient) {} - - createClient(): MetabaseRuntimeClient { - return this.client; - } -} - -type SyncModeCase = { - name: string; - syncMode: 'ALL' | 'ONLY' | 'EXCEPT'; - selections: Array<{ selectionType: 'collection' | 'item'; metabaseObjectId: number }>; - expectedRawFiles: string[]; - expectedWorkUnitKeys: string[]; -}; - -async function runPublicMetabaseSyncModeCase(tempDir: string, input: SyncModeCase): Promise { - const projectDir = join(tempDir, `metabase-sync-mode-${input.name}`); - await initKtxProject({ projectDir, projectName: `metabase-sync-mode-${input.name}` }); - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - `project: metabase-sync-mode-${input.name}`, - 'connections:', - ' prod-metabase:', - ' driver: metabase', - ' api_url: https://metabase.example.test', - ' api_key: literal-test-key', - ' warehouse_a:', - ' driver: postgres', - ' url: postgresql://readonly@db.example.test/warehouse_a', - 'ingest:', - ' adapters:', - ' - metabase', - ' embeddings:', - ' backend: deterministic', - '', - ].join('\n'), - 'utf-8', - ); - - const project = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); - await store.replaceSourceState({ - connectionId: 'prod-metabase', - syncMode: input.syncMode, - defaultTagNames: ['sync-mode-smoke'], - selections: input.selections, - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Warehouse A', - metabaseEngine: 'postgres', - metabaseHost: 'db.example.test', - metabaseDbName: 'warehouse_a', - targetConnectionId: 'warehouse_a', - syncEnabled: true, - source: 'refresh', - }, - ], - }); - - const adapter = new MetabaseSourceAdapter({ - clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()), - sourceStateReader: store, - }); - const jobId = `metabase-sync-mode-${input.name}-child`; - const io = makeIo(); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'prod-metabase', - adapter: 'metabase', - outputMode: 'plain', - }, - io.io, - { - createAdapters: vi.fn(() => [adapter]), - jobIdFactory: () => jobId, - localIngestOptions: { - agentRunner: new CliMetabaseAgentRunner(), - }, - }, - ), - ).resolves.toBe(0); - - expect(io.stderr()).toBe(''); - expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); - expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`); - - const report = await getLocalIngestStatus(project, jobId); - expect(report).not.toBeNull(); - expect(report?.body.workUnits.map((wu) => wu.unitKey).sort()).toEqual(input.expectedWorkUnitKeys); - expect(report?.body.workUnits.flatMap((wu) => wu.rawFiles).sort()).toEqual(input.expectedRawFiles); -} - -function makeCliLookerRuntimeClient() { - const lookerModels = { - source: 'looker', - fetchedAt: '2026-05-05T00:00:00.000Z', - models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], - }; - const lookerExplore = { - source: 'looker', - modelName: 'ecommerce', - exploreName: 'orders', - label: 'Orders', - description: null, - connectionName: 'analytics', - viewName: 'orders', - rawSqlTableName: 'public.orders', - fields: { - dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }], - measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }], - }, - joins: [ - { - name: 'users', - type: 'left_outer', - relationship: 'many_to_one', - rawSqlTableName: 'public.users', - sqlOn: '${orders.user_id} = ${users.id}', - from: null, - targetTable: null, - }, - ], - targetWarehouseConnectionId: null, - targetTable: null, - }; - - return { - listLookerConnections: vi.fn().mockResolvedValue([ - { - name: 'analytics', - host: 'db.example.test', - database: 'analytics', - schema: null, - dialect: 'postgres', - }, - ]), - listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]), - getDashboard: vi.fn().mockResolvedValue({ - lookerId: '10', - title: 'Revenue Overview', - description: 'Revenue dashboard', - folderId: '7', - ownerId: '3', - updatedAt: '2026-05-05T08:00:00.000Z', - tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }], - }), - listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]), - getLook: vi.fn().mockResolvedValue({ - lookerId: '20', - title: 'Revenue Look', - description: null, - folderId: '7', - ownerId: '3', - updatedAt: '2026-05-05T08:10:00.000Z', - query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] }, - }), - listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }), - listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]), - listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]), - listLookmlModels: vi.fn().mockResolvedValue(lookerModels), - getExplore: vi.fn().mockResolvedValue(lookerExplore), - getSignals: vi.fn().mockResolvedValue({ - dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }], - lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }], - scheduledPlans: [ - { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 }, - ], - favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }], - }), - cleanup: vi.fn().mockResolvedValue(undefined), - }; -} - -function makeCliLookerParser() { - return { - parse: vi.fn().mockResolvedValue({ - 'ecommerce.orders': { - ok: true, - catalog: null, - schema: 'public', - name: 'orders', - canonical_table: 'public.orders', - }, - 'ecommerce.orders.users': { - ok: true, - catalog: null, - schema: 'public', - name: 'users', - canonical_table: 'public.users', - }, - }), - }; -} - -function localFakeBundleReport( - jobId: string, - overrides: Partial> & { body?: Partial } = {}, -): IngestReportSnapshot { - const report = bundleReportSnapshot(); - return { - ...report, - id: `report-${jobId}`, - runId: `run-${jobId}`, - jobId, - connectionId: 'warehouse', - sourceKey: 'fake', - ...overrides, - body: { - ...report.body, - syncId: 'sync-live-1', - ...(overrides.body ?? {}), - }, - }; -} - -async function localBundleStore(projectDir: string, ids: [string, string]): Promise { - const { SqliteBundleIngestStore } = await import('@ktx/context/ingest'); - const project = await loadKtxProject({ projectDir }); - return new SqliteBundleIngestStore({ - dbPath: ktxLocalStateDbPath(project), - idFactory: (() => { - let index = 0; - return () => ids[index++] ?? `generated-${index}`; - })(), - }); -} - -async function persistLocalBundleReport(projectDir: string, report = bundleReportSnapshot()): Promise { - const store = await localBundleStore(projectDir, [report.runId, report.id]); - const run = await store.create({ - jobId: report.jobId, - connectionId: report.connectionId, - sourceKey: report.sourceKey, - syncId: report.body.syncId, - trigger: 'manual_resync', - }); - await store.markCompleted(run.id, report.body.diffSummary); - await store.create({ - runId: run.id, - jobId: report.jobId, - connectionId: report.connectionId, - sourceKey: report.sourceKey, - body: report.body, - }); -} - -async function writeBundleReportFile(tempDir: string, report = bundleReportSnapshot()): Promise { - const reportFile = join(tempDir, 'bundle-report.json'); - await writeFile(reportFile, `${JSON.stringify(report, null, 2)}\n`, 'utf-8'); - return reportFile; -} - -function emitLiveLocalMemoryFlow(memoryFlow: MemoryFlowEventSink | undefined): void { - memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); - memoryFlow?.update({ syncId: 'sync-live-1' }); - memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); - memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); - memoryFlow?.update({ - plannedWorkUnits: [ - { - unitKey: 'fake-orders', - rawFiles: ['orders/orders.json'], - peerFileCount: 0, - dependencyCount: 0, - }, - ], - }); - memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); - memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); - memoryFlow?.finish('done'); -} - describe('runKtxIngest', () => { let tempDir: string; let originalTerm: string | undefined; @@ -754,7 +55,6 @@ describe('runKtxIngest', () => { it('runs local ingest and reads status', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -806,7 +106,6 @@ describe('runKtxIngest', () => { it('routes metabase scheduled pulls to the fan-out runner and prints child summaries', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); const report = localFakeBundleReport('metabase-child-1', { @@ -864,7 +163,6 @@ describe('runKtxIngest', () => { it('returns a non-zero code when Metabase fan-out has failed children', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); const report = localFakeBundleReport('metabase-child-1', { @@ -935,7 +233,6 @@ describe('runKtxIngest', () => { it('prints Metabase fan-out progress before the final summary', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); const report = localFakeBundleReport('metabase-child-1', { @@ -1015,7 +312,7 @@ describe('runKtxIngest', () => { it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => { const projectDir = join(tempDir, 'metabase-cli-project'); - await initKtxProject({ projectDir, projectName: 'metabase-cli' }); + await writeWarehouseConfig(projectDir); await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -1153,7 +450,6 @@ describe('runKtxIngest', () => { it('prints metabase fan-out JSON results', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -1188,7 +484,6 @@ describe('runKtxIngest', () => { it('rejects source-dir uploads through the metabase fan-out route', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -1218,7 +513,6 @@ describe('runKtxIngest', () => { it('prints previous run and diff summary for local ingest results', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -1252,7 +546,6 @@ describe('runKtxIngest', () => { it('returns a non-zero code when local ingest reports failed work units', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -1284,7 +577,6 @@ describe('runKtxIngest', () => { it('passes the debug LLM request file to local ingest runs', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'job-debug'), @@ -1311,7 +603,6 @@ describe('runKtxIngest', () => { it('passes daemon database introspection URL to default local ingest adapters', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); @@ -1359,7 +650,7 @@ describe('runKtxIngest', () => { it('passes the target connection id when constructing local historic-sql adapters', async () => { const projectDir = join(tempDir, 'historic-sql-project'); - await initKtxProject({ projectDir, projectName: 'historic-sql-project' }); + await writeWarehouseConfig(projectDir); await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -1420,7 +711,6 @@ describe('runKtxIngest', () => { it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeWarehouseConfig(projectDir); const pullConfigOptions = { looker: { @@ -1474,7 +764,7 @@ describe('runKtxIngest', () => { it('runs Looker scheduled ingest through the public CLI command path', async () => { const projectDir = join(tempDir, 'looker-project'); - await initKtxProject({ projectDir, projectName: 'looker-cli' }); + await writeWarehouseConfig(projectDir); await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -1568,847 +858,4 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); - it('renders live memory-flow frames for run --viz when stdout is interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); - input.memoryFlow?.update({ syncId: 'sync-live-1' }); - input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); - input.memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); - input.memoryFlow?.update({ - plannedWorkUnits: [ - { - unitKey: 'fake-orders', - rawFiles: ['orders/orders.json'], - peerFileCount: 0, - dependencyCount: 0, - }, - ], - }); - input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); - input.memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); - input.memoryFlow?.finish('done'); - - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - now: () => new Date('2026-04-30T14:00:00.000Z'), - }, - ), - ).resolves.toBe(0); - - expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.any(Object) })); - expect(io.stdout()).toContain('\u001b[2J\u001b[H'); - expect((io.stdout().match(/KTX memory flow/g) ?? []).length).toBeGreaterThan(1); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('fake-orders'); - expect(io.stderr()).toBe(''); - }); - - it('uses the TUI live session for run --viz when stdin and stdout are interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const liveSession = { - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - }; - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - now: () => new Date('2026-04-30T14:00:00.000Z'), - }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); - expect(startLiveMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ - runId: 'live-viz-run', - connectionId: 'warehouse', - adapter: 'fake', - status: 'running', - }); - expect(liveSession.update).toHaveBeenCalled(); - expect(liveSession.close).toHaveBeenCalledTimes(1); - expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toBe(''); - }); - - it('prints a final plain summary after live viz completes', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const liveSession = { - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - }; - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-summary'); - }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - outputMode: 'viz', - }, - io.io, - { runLocalIngest: runLocal, startLiveMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(liveSession.close).toHaveBeenCalledTimes(1); - expect(io.stdout()).toContain('Memory-flow summary: done'); - expect(io.stdout()).toContain('Connection: warehouse'); - }); - - it('falls back to text live rendering when the TUI live session is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); - expect(io.stdout()).toContain('\u001b[2J\u001b[H'); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('falls back to text live rendering when TUI startup fails with a redacted warning', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - emitLiveLocalMemoryFlow(input.memoryFlow); - return completedLocalBundleRun(input, 'live-viz-run'); - }); - const startLiveMemoryFlow = vi.fn( - async (_input: MemoryFlowReplayInput, ioArg: { stderr: { write(chunk: string): void } }) => { - ioArg.stderr.write('TUI visualization unavailable: Failed [redacted-url] [redacted]; using text renderer.\n'); - return null; - }, - ); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'live-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(io.stderr()).toContain('TUI visualization unavailable: Failed [redacted-url] [redacted]'); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('\u001b[2J\u001b[H'); - }); - - it('does not start live TUI when run --viz disables input', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { - return completedLocalBundleRun(input, 'no-input-live-viz-run'); - }); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - })); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - { runLocalIngest: runLocal, startLiveMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).not.toHaveBeenCalled(); - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('does not attach a live memory-flow sink for plain run output', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'plain-run')); - const io = makeIo({ isTTY: true }); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'plain', - }, - io.io, - { runLocalIngest: runLocal }, - ), - ).resolves.toBe(0); - - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('Job: plain-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - }); - - it('falls back to plain run output for run --viz when stdout is not interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const io = makeIo({ isTTY: false }); - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'non-tty-viz-run')); - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - jobIdFactory: () => 'non-tty-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Job: non-tty-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdout is not an interactive terminal; printing plain output.', - ); - }); - - it('falls back to plain run output for run --viz when stdin raw mode is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); - const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'raw-missing-viz-run')); - const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ - update: vi.fn(), - close: vi.fn(), - isClosed: vi.fn(() => false), - })); - - await expect( - runKtxIngest( - { - command: 'run', - projectDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir, - outputMode: 'viz', - }, - io.io, - { - runLocalIngest: runLocal, - startLiveMemoryFlow, - jobIdFactory: () => 'raw-missing-viz-run', - }, - ), - ).resolves.toBe(0); - - expect(startLiveMemoryFlow).not.toHaveBeenCalled(); - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('Job: raw-missing-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdin raw mode is unavailable; printing plain output.', - ); - }); - - it('returns an error code for missing status', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const io = makeIo(); - - await expect( - runKtxIngest({ command: 'status', projectDir, runId: 'missing-run', outputMode: 'plain' }, io.io), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found'); - }); - - it('uses the latest local ingest report when status has no run id', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - await persistLocalBundleReport(projectDir, localFakeBundleReport('older-run')); - await persistLocalBundleReport(projectDir, localFakeBundleReport('newer-run')); - const io = makeIo(); - - await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-newer-run'); - expect(io.stdout()).toContain('Job: newer-run'); - expect(io.stderr()).toBe(''); - }); - - it('renders the latest local ingest report through watch when run id is omitted', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - await persistLocalBundleReport(projectDir, localFakeBundleReport('watch-latest')); - const io = makeIo({ isTTY: true }); - - await expect( - runKtxIngest({ command: 'watch', projectDir, outputMode: 'viz', inputMode: 'disabled' }, io.io), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('Run: run-watch-latest'); - expect(io.stderr()).toBe(''); - }); - - it('renders report-file replay through the memory-flow TUI', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo({ isTTY: true }); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'job-1', - reportFile, - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/metabase done'); - expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); - expect(io.stdout()).toContain('Commit: abc12345 Run: run-1 Report: report-1'); - expect(io.stdout()).toContain('SOURCE'); - expect(io.stdout()).toContain('ACTIONS'); - expect(io.stdout()).toContain('SAVED'); - expect(io.stderr()).toBe(''); - }); - - it('prints report-file JSON without looking up local ingest status', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo(); - - await expect( - runKtxIngest({ command: 'status', projectDir, runId: 'report-1', reportFile, outputMode: 'json' }, io.io), - ).resolves.toBe(0); - - const parsed = JSON.parse(io.stdout()); - expect(parsed).toMatchObject({ - id: 'report-1', - runId: 'run-1', - jobId: 'job-1', - connectionId: 'warehouse', - sourceKey: 'metabase', - }); - expect(io.stderr()).toBe(''); - }); - - it('routes interactive report-file replay through the stored TUI renderer', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'run-1', - reportFile, - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); - expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ - runId: 'run-1', - reportId: 'report-1', - connectionId: 'warehouse', - adapter: 'metabase', - }); - expect(io.stdout()).toBe(''); - expect(io.stderr()).toBe(''); - }); - - it('rejects report-file replay when the requested id does not match the report', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const reportFile = await writeBundleReportFile(tempDir); - const io = makeIo(); - - await expect( - runKtxIngest({ command: 'replay', projectDir, runId: 'unrelated-id', reportFile, outputMode: 'plain' }, io.io), - ).resolves.toBe(1); - - expect(io.stderr()).toContain( - `Report file ${reportFile} does not match ingest replay id "unrelated-id"; expected one of report-1, run-1, job-1`, - ); - expect(io.stdout()).toBe(''); - }); - - it('renders memory-flow snapshot for status --viz when stdout is interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-1')); - - const io = makeIo({ isTTY: true }); - await expect( - runKtxIngest( - { command: 'status', projectDir, runId: 'viz-run-1', outputMode: 'viz', inputMode: 'disabled' }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).toContain('SOURCE'); - expect(io.stdout()).toContain('CHUNKS'); - expect(io.stdout()).toContain('WORKUNITS'); - expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); - expect(io.stderr()).toBe(''); - }); - - it('uses the TUI renderer for stored status --viz when stdin and stdout are interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-viz-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'status', - projectDir, - runId: 'tui-viz-run', - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); - expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ - runId: 'run-tui-viz-run', - connectionId: 'warehouse', - adapter: 'fake', - }); - expect(io.stdout()).toBe(''); - expect(io.stderr()).toBe(''); - }); - - it('falls back to the text renderer when TUI declines stored status --viz', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-fallback-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120, keypresses: [{ name: 'q' }] }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => false); - - await expect( - runKtxIngest( - { - command: 'status', - projectDir, - runId: 'tui-fallback-run', - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('does not use TUI for stored --viz when input is disabled', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-no-input-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'tui-no-input-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - }); - - it('falls back to plain status for stored --viz when stdin raw mode is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('raw-missing-stored-viz-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); - const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); - - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'raw-missing-stored-viz-run', - outputMode: 'viz', - }, - io.io, - { renderStoredMemoryFlow }, - ), - ).resolves.toBe(0); - - expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('Run: run-raw-missing-stored-viz-run'); - expect(io.stdout()).toContain('Job: raw-missing-stored-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdin raw mode is unavailable; printing plain output.', - ); - }); - - it('keeps stored --viz snapshot-only when input is disabled', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('no-input-viz-run')); - - const io = makeIo({ isTTY: true, columns: 120 }); - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'no-input-viz-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); - expect(io.stderr()).toBe(''); - }); - - it('keeps disabled-input stored --viz snapshot output even when stdin raw mode is unavailable', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('disabled-raw-missing-viz-run')); - - const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'disabled-raw-missing-viz-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); - expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); - expect(io.stderr()).toBe(''); - }); - - it('degrades stored --viz snapshots to plain status when stdout is redirected even when input is disabled', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('redirected-no-input-viz-run')); - - const io = makeIo({ isTTY: false }); - await expect( - runKtxIngest( - { - command: 'replay', - projectDir, - runId: 'redirected-no-input-viz-run', - outputMode: 'viz', - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-redirected-no-input-viz-run'); - expect(io.stdout()).toContain('Job: redirected-no-input-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdout is not an interactive terminal; printing plain output.', - ); - }); - - it('degrades ingest replay --viz to plain status when TERM is dumb', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('dumb-terminal-viz-run')); - - const io = makeIo({ isTTY: true }); - await expect( - runKtxIngest( - { command: 'replay', projectDir, runId: 'dumb-terminal-viz-run', outputMode: 'viz' }, - io.io, - { env: { ...process.env, TERM: 'dumb' } }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-dumb-terminal-viz-run'); - expect(io.stdout()).toContain('Job: dumb-terminal-viz-run'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.', - ); - }); - - it('falls back to plain status for --viz when stdout is not interactive', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-2')); - - const io = makeIo({ isTTY: false }); - await expect( - runKtxIngest({ command: 'replay', projectDir, runId: 'viz-run-2', outputMode: 'viz' }, io.io), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Run: run-viz-run-2'); - expect(io.stdout()).toContain('Job: viz-run-2'); - expect(io.stdout()).not.toContain('KTX memory flow'); - expect(io.stderr()).toContain( - 'Visualization requested but stdout is not an interactive terminal; printing plain output.', - ); - }); - - it('prints JSON for status --json', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeWarehouseConfig(projectDir); - const sourceDir = join(tempDir, 'source'); - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); - - await persistLocalBundleReport(projectDir, localFakeBundleReport('json-run-1')); - - const io = makeIo(); - await expect( - runKtxIngest({ command: 'status', projectDir, runId: 'json-run-1', outputMode: 'json' }, io.io), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - runId: 'run-json-run-1', - jobId: 'json-run-1', - sourceKey: 'fake', - connectionId: 'warehouse', - }); - expect(io.stderr()).toBe(''); - }); }); diff --git a/packages/context/package.json b/packages/context/package.json index b85947e5..78d3c4de 100644 --- a/packages/context/package.json +++ b/packages/context/package.json @@ -120,10 +120,12 @@ "scripts": { "build": "tsc -p tsconfig.json", "relationships:benchmarks": "pnpm --silent run build && node scripts/relationship-benchmark-report.mjs", + "relationships:benchmarks:test": "KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts", "search:pglite-spike": "node scripts/pglite-hybrid-search-spike.mjs", "search:pglite-owner-prototype": "node scripts/pglite-owner-process-prototype.mjs", "search:pglite-sl-prototype": "node scripts/pglite-sl-search-prototype.mjs", - "test": "vitest run", + "test": "vitest run --exclude src/scan/relationship-benchmarks.test.ts --exclude src/scan/local-scan.test.ts --exclude src/mcp/local-project-ports.test.ts --exclude src/ingest/local-stage-ingest.test.ts --exclude src/sl/pglite-sl-search-prototype.test.ts --exclude src/core/git.service.test.ts --exclude src/ingest/local-adapters.test.ts --exclude src/ingest/local-bundle-ingest.test.ts --exclude src/ingest/local-metabase-ingest.test.ts --exclude src/sl/local-sl.test.ts --exclude src/search/pglite-owner-process.test.ts --exclude src/scan/local-enrichment-artifacts.test.ts --exclude src/search/pglite-spike.test.ts --exclude src/wiki/local-knowledge.test.ts --exclude src/sl/local-query.test.ts --exclude src/scan/relationship-review-decisions.test.ts --exclude src/scan/relationship-profiling.test.ts", + "test:slow": "vitest run src/scan/local-scan.test.ts src/mcp/local-project-ports.test.ts src/ingest/local-stage-ingest.test.ts src/sl/pglite-sl-search-prototype.test.ts src/core/git.service.test.ts src/ingest/local-adapters.test.ts src/ingest/local-bundle-ingest.test.ts src/ingest/local-metabase-ingest.test.ts src/sl/local-sl.test.ts src/search/pglite-owner-process.test.ts src/scan/local-enrichment-artifacts.test.ts src/search/pglite-spike.test.ts src/wiki/local-knowledge.test.ts src/sl/local-query.test.ts src/scan/relationship-review-decisions.test.ts src/scan/relationship-profiling.test.ts --testTimeout 30000", "type-check": "tsc -p tsconfig.json --noEmit" }, "dependencies": { diff --git a/packages/context/src/scan/relationship-benchmarks.test.ts b/packages/context/src/scan/relationship-benchmarks.test.ts index ecc9e4a9..b4e5c782 100644 --- a/packages/context/src/scan/relationship-benchmarks.test.ts +++ b/packages/context/src/scan/relationship-benchmarks.test.ts @@ -53,6 +53,12 @@ const CHECKED_IN_FIXTURE_ORIGINS = { semantic_embedding_aliases_no_declared_constraints: 'synthetic', } as const; +function runAdHocRelationshipBenchmarks(): boolean { + return process.env.KTX_RUN_RELATIONSHIP_BENCHMARKS === '1'; +} + +const adHocRelationshipBenchmarkIt = runAdHocRelationshipBenchmarks() ? it : it.skip; + function snapshot(): KtxSchemaSnapshot { return { connectionId: 'warehouse', @@ -644,7 +650,7 @@ describe('relationship benchmarks', () => { expect(fixture.expected.expectedLinks).toHaveLength(1900); }); - it('runs the scale stress fixture inside the benchmark validation budget', async () => { + adHocRelationshipBenchmarkIt('runs the scale stress fixture inside the benchmark validation budget', async () => { const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); const fixture = await loadKtxRelationshipBenchmarkFixture( join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints'), diff --git a/packages/context/src/scan/relationship-candidates.ts b/packages/context/src/scan/relationship-candidates.ts index b10aa069..cd3b7767 100644 --- a/packages/context/src/scan/relationship-candidates.ts +++ b/packages/context/src/scan/relationship-candidates.ts @@ -7,6 +7,7 @@ import type { } from './enrichment-types.js'; import { localCandidateTables } from './relationship-locality.js'; import { + type KtxRelationshipNormalizedName, normalizeKtxRelationshipName, pluralizeKtxRelationshipToken, singularizeKtxRelationshipToken, @@ -97,9 +98,22 @@ const REFERENCE_SUFFIXES: Array<{ suffix: string; reason: string }> = [ { suffix: '_uuid', reason: 'foreign_key_uuid_suffix' }, ]; const RELATIONSHIP_KEY_TARGET_SUFFIXES = ['_id', '_key', '_code', '_uuid'] as const; +const tableAliasesCache = new WeakMap>(); +const parentTableNameAliasesCache = new WeakMap>(); +const normalizedColumnNameCache = new WeakMap(); + +function normalizedColumnName(column: KtxEnrichedColumn): KtxRelationshipNormalizedName { + const cached = normalizedColumnNameCache.get(column); + if (cached) { + return cached; + } + const normalized = normalizeKtxRelationshipName(column.name); + normalizedColumnNameCache.set(column, normalized); + return normalized; +} function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean { - const normalized = normalizeKtxRelationshipName(column.name); + const normalized = normalizedColumnName(column); return ( normalized.tokens.length >= 2 && RELATIONSHIP_KEY_TARGET_SUFFIXES.some((suffix) => normalized.normalized.endsWith(suffix)) @@ -107,8 +121,8 @@ function isRelationshipKeyShapedTarget(column: KtxEnrichedColumn): boolean { } function columnSuffixMatchesTarget(input: { fromColumn: KtxEnrichedColumn; toColumn: KtxEnrichedColumn }): boolean { - const source = normalizeKtxRelationshipName(input.fromColumn.name).normalized; - const target = normalizeKtxRelationshipName(input.toColumn.name).normalized; + const source = normalizedColumnName(input.fromColumn).normalized; + const target = normalizedColumnName(input.toColumn).normalized; return source !== target && target.length > 0 && source.endsWith(`_${target}`); } @@ -160,7 +174,7 @@ function hasUsableEmbedding(column: KtxEnrichedColumn): boolean { } function sourceColumnReference(column: KtxEnrichedColumn): KtxRelationshipSourceColumnReference | null { - const normalized = normalizeKtxRelationshipName(column.name); + const normalized = normalizedColumnName(column); if (SELF_REFERENCE_NAMES.has(normalized.normalized)) { return { base: normalized.normalized.replace(/_id$/u, ''), reason: 'foreign_key_suffix' }; } @@ -192,6 +206,11 @@ function addNormalizedTableAlias(aliases: Set, name: string): void { } function tableAliases(table: KtxEnrichedTable): Set { + const cached = tableAliasesCache.get(table); + if (cached) { + return cached; + } + const normalized = normalizeKtxRelationshipName(table.ref.name); const aliases = new Set([normalized.normalized, normalized.singular, normalized.plural]); if (normalized.tokens.length > 1) { @@ -203,6 +222,7 @@ function tableAliases(table: KtxEnrichedTable): Set { aliases.add(pluralizeKtxRelationshipToken(singularLastToken)); } } + tableAliasesCache.set(table, aliases); return aliases; } @@ -212,13 +232,19 @@ function finalTableNamePart(table: KtxEnrichedTable): string { } function parentTableNameAliases(table: KtxEnrichedTable): Set { - const aliases = tableAliases(table); + const cached = parentTableNameAliasesCache.get(table); + if (cached) { + return cached; + } + + const aliases = new Set(tableAliases(table)); addNormalizedTableAlias(aliases, finalTableNamePart(table)); + parentTableNameAliasesCache.set(table, aliases); return aliases; } function targetKeyScore(table: KtxEnrichedTable, column: KtxEnrichedColumn): number { - const columnName = normalizeKtxRelationshipName(column.name).normalized; + const columnName = normalizedColumnName(column).normalized; const tableKeyBases = parentTableNameAliases(table); if (column.primaryKey) { return 1; @@ -338,7 +364,7 @@ function candidateParentTables(input: { maxParentTables, }).map((item) => item.table); - const normalizedColumn = normalizeKtxRelationshipName(input.fromColumn.name).normalized; + const normalizedColumn = normalizedColumnName(input.fromColumn).normalized; if (!SELF_REFERENCE_NAMES.has(normalizedColumn) || ranked.some((table) => table.id === input.fromTable.id)) { return ranked; } @@ -364,7 +390,7 @@ function targetKeyEvidence( return { score: 0, reasons: [] }; } - const columnName = normalizeKtxRelationshipName(column.name).normalized; + const columnName = normalizedColumnName(column).normalized; if (columnName === 'code' || columnName.endsWith('_code') || columnName === 'key' || columnName.endsWith('_key')) { return { score: 0.86, reasons: ['profile_unique_target'] }; } @@ -500,7 +526,7 @@ function createCandidate(input: { evidence: { sourceColumnBase: input.sourceBase, targetTableBase: input.targetBase, - targetColumnBase: normalizeKtxRelationshipName(input.toColumn.name).normalized, + targetColumnBase: normalizedColumnName(input.toColumn).normalized, targetKeyScore: input.targetKeyScore, nameScore: input.nameScore, reasons: input.reasons, @@ -553,7 +579,7 @@ function generateKtxEmbeddingRelationshipCandidates( continue; } - const sourceBase = normalizeKtxRelationshipName(fromColumn.name).normalized; + const sourceBase = normalizedColumnName(fromColumn).normalized; const targetBase = normalizeKtxRelationshipName(toTable.ref.name).singular; const reasons = ['embedding_similarity', ...keyEvidence.reasons]; const candidate = createCandidate({ @@ -620,7 +646,7 @@ export function generateKtxRelationshipDiscoveryCandidates( const sameTable = fromTable.id === toTable.id; const nameMatchesTarget = strictAliases.has(sourceBase); const parentTableNameMatcher = !sameTable && !nameMatchesTarget && parentAliases.has(sourceBase); - const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizeKtxRelationshipName(fromColumn.name).normalized); + const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizedColumnName(fromColumn).normalized); const strictTableMatcher = (!sameTable && nameMatchesTarget) || selfReference; for (const toColumn of toTable.columns) { @@ -675,7 +701,7 @@ export function generateKtxRelationshipDiscoveryCandidates( if ( !suffixMatcher && !parentTableNameMatcher && - normalizeKtxRelationshipName(fromColumn.name).normalized === normalizeKtxRelationshipName(toColumn.name).normalized + normalizedColumnName(fromColumn).normalized === normalizedColumnName(toColumn).normalized ) { reasons.push('exact_column_name'); nameScore = Math.max(nameScore, 0.9); diff --git a/packages/context/src/scan/relationship-locality.ts b/packages/context/src/scan/relationship-locality.ts index 5b180430..246ce84d 100644 --- a/packages/context/src/scan/relationship-locality.ts +++ b/packages/context/src/scan/relationship-locality.ts @@ -18,20 +18,28 @@ export interface LocalKtxRelationshipCandidateTablesInput { const DEFAULT_MAX_PARENT_TABLES = 20; const RELATIONSHIP_SUFFIX_TOKENS = new Set(['id', 'ids', 'key', 'keys', 'code', 'codes', 'uuid', 'uuids']); +const normalizedTokenVariantsCache = new Map(); function roundedScore(value: number): number { return Number(Math.max(0, Math.min(1, value)).toFixed(3)); } function normalizedTokenVariants(name: string): string[] { + const cached = normalizedTokenVariantsCache.get(name); + if (cached) { + return cached; + } + const normalized = normalizeKtxRelationshipName(name); - return Array.from( + const variants = Array.from( new Set([ ...normalized.tokens, ...tokenizeKtxRelationshipName(normalized.singular), ...tokenizeKtxRelationshipName(normalized.plural), ]), ).filter(Boolean); + normalizedTokenVariantsCache.set(name, variants); + return variants; } function childColumnLocalityTokens(column: KtxEnrichedColumn): string[] { @@ -91,24 +99,29 @@ function parentEmbeddingScore(childColumn: KtxEnrichedColumn, parentTable: KtxEn } function tableTokenScore(input: { - childTable: KtxEnrichedTable; - childColumn: KtxEnrichedColumn; + childTableId: string; + childTableTokens: readonly string[]; + childColumnTokens: readonly string[]; parentTable: KtxEnrichedTable; }): number { - const childTableTokens = normalizedTokenVariants(input.childTable.ref.name); - const childColumnTokens = childColumnLocalityTokens(input.childColumn); const parentTokens = normalizedTokenVariants(input.parentTable.ref.name); - const columnOnlyScore = jaccard(childColumnTokens, parentTokens); - if (input.parentTable.id === input.childTable.id) { + const columnOnlyScore = jaccard(input.childColumnTokens, parentTokens); + if (parentTokens.length === 0) { + return 0; + } + if (input.parentTable.id === input.childTableId) { return columnOnlyScore; } - const columnAndTableScore = jaccard(uniqueTokens([...childTableTokens, ...childColumnTokens]), parentTokens); + const columnAndTableScore = jaccard(uniqueTokens([...input.childTableTokens, ...input.childColumnTokens]), parentTokens); return Math.max(columnOnlyScore, columnAndTableScore * 0.6); } function localityScore(input: { childTable: KtxEnrichedTable; + childTableId: string; + childTableTokens: readonly string[]; childColumn: KtxEnrichedColumn; + childColumnTokens: readonly string[]; parentTable: KtxEnrichedTable; }): Omit { const tokenScore = roundedScore(tableTokenScore(input)); @@ -143,12 +156,18 @@ export function localCandidateTables( return []; } + const childTableTokens = normalizedTokenVariants(input.childTable.ref.name); + const childColumnTokens = childColumnLocalityTokens(input.childColumn); + return input.parentTables .map((table) => ({ table, ...localityScore({ childTable: input.childTable, + childTableId: input.childTable.id, + childTableTokens, childColumn: input.childColumn, + childColumnTokens, parentTable: table, }), })) diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts index 56f415a8..218c0435 100644 --- a/packages/context/src/sl/schemas.ts +++ b/packages/context/src/sl/schemas.ts @@ -1,7 +1,7 @@ import { z } from 'zod'; // Literal vocabularies — kept in lockstep with the Python Pydantic model at -// python-service/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole / +// python/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole / // ColumnVisibility / JoinDeclaration). If these diverge, YAMLs can pass // TypeScript validation at ingest time but fail Python loading at query time. const columnTypeValues = ['string', 'number', 'time', 'boolean'] as const; diff --git a/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts b/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts index f6cdd3fe..f9bf513b 100644 --- a/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts +++ b/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest'; import { createHttpSqlAnalysisPort } from './http-sql-analysis-port.js'; describe('createHttpSqlAnalysisPort', () => { - it('calls the python-service fingerprint endpoint and maps snake_case response fields', async () => { + it('calls the SQL-analysis fingerprint endpoint and maps snake_case response fields', async () => { const requestJson = vi.fn(async () => ({ fingerprint: 'fingerprint-template', normalized_sql: 'SELECT * FROM analytics.orders WHERE status = ?', @@ -26,7 +26,7 @@ describe('createHttpSqlAnalysisPort', () => { }); }); - it('preserves python-service parse errors in the mapped result', async () => { + it('preserves SQL-analysis parse errors in the mapped result', async () => { const requestJson = vi.fn(async () => ({ fingerprint: '', normalized_sql: '', diff --git a/packages/context/src/tools/base-tool.ts b/packages/context/src/tools/base-tool.ts index 37da69a0..0566a0ca 100644 --- a/packages/context/src/tools/base-tool.ts +++ b/packages/context/src/tools/base-tool.ts @@ -151,7 +151,7 @@ export abstract class BaseTool { } } }, - // Send only markdown to LLM - frontend still receives full { markdown, structured } via stream + // Send only markdown to the LLM; tool callers still receive the structured output. toModelOutput: ({ output }) => { if (output && typeof output === 'object' && 'markdown' in output) { return { type: 'content', value: [{ type: 'text', text: output.markdown as string }] }; diff --git a/scripts/build-benchmark-snapshot.test.mjs b/scripts/build-benchmark-snapshot.test.mjs index adc30173..26ac6419 100644 --- a/scripts/build-benchmark-snapshot.test.mjs +++ b/scripts/build-benchmark-snapshot.test.mjs @@ -1,4 +1,5 @@ import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; import { createRequire } from 'node:module'; import { describe, it } from 'node:test'; import { buildBenchmarkSnapshot } from './build-benchmark-snapshot.mjs'; @@ -250,4 +251,13 @@ describe('buildBenchmarkSnapshot', () => { }, ]); }); + + it('exposes relationship benchmarks as an explicit context package script', async () => { + const packageJson = JSON.parse(await readFile(new URL('../packages/context/package.json', import.meta.url), 'utf8')); + + assert.equal( + packageJson.scripts['relationships:benchmarks:test'], + 'KTX_RUN_RELATIONSHIP_BENCHMARKS=1 vitest run src/scan/relationship-benchmarks.test.ts', + ); + }); }); diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs index fab2b87e..39946464 100644 --- a/scripts/check-boundaries.test.mjs +++ b/scripts/check-boundaries.test.mjs @@ -14,7 +14,7 @@ function lowerProductName() { describe('scanFileContent', () => { it('rejects source imports from application directories', () => { const serverAlias = '@' + 'server/contracts'; - const pythonAppPath = 'python-service/' + 'app/api/endpoints/semantic_layer.py'; + const pythonAppPath = `${['python', 'service'].join('-')}/app/api/endpoints/semantic_layer.py`; const violations = [ ...scanFileContent('packages/context/src/index.ts', `import { orpc } from '${serverAlias}';`), diff --git a/scripts/ci-artifact-upload.test.mjs b/scripts/ci-artifact-upload.test.mjs index d18db979..3fecdfbc 100644 --- a/scripts/ci-artifact-upload.test.mjs +++ b/scripts/ci-artifact-upload.test.mjs @@ -4,7 +4,7 @@ import { dirname, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { describe, it } from 'node:test'; -const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '..', '..'); +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '..'); const ciWorkflowPath = resolve(repoRoot, '.github', 'workflows', 'ci.yml'); async function readCiWorkflowOrSkip(testContext) { @@ -21,7 +21,7 @@ async function readCiWorkflowOrSkip(testContext) { } describe('KTX CI artifact upload contract', () => { - it('uploads verified KTX package artifacts from check-ktx-subtree', async (testContext) => { + it('uploads verified KTX package artifacts from the standalone check job', async (testContext) => { const workflow = await readCiWorkflowOrSkip(testContext); if (workflow === null) { return; @@ -29,42 +29,35 @@ describe('KTX CI artifact upload contract', () => { assert.match( workflow, - /name: Build ktx package artifacts and verify public smoke\s+run: cd ktx && pnpm run artifacts:build && pnpm run artifacts:verify-manifest && pnpm run artifacts:verify-demo\s+- name: Upload ktx package artifacts/s, + /name: Build and verify package artifacts\s+run: pnpm run artifacts:check\s+- name: Upload package artifacts/s, ); assert.match(workflow, /uses: actions\/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f/); assert.match(workflow, /name: ktx-package-artifacts-\$\{\{ github\.sha \}\}/); - assert.match(workflow, /ktx\/dist\/artifacts\/manifest\.json/); - assert.match(workflow, /ktx\/dist\/artifacts\/npm\/\*\.tgz/); - assert.match(workflow, /ktx\/dist\/artifacts\/python\/\*\.whl/); - assert.match(workflow, /ktx\/dist\/artifacts\/python\/\*\.tar\.gz/); + assert.match(workflow, /dist\/artifacts\/manifest\.json/); + assert.match(workflow, /dist\/artifacts\/npm\/\*\.tgz/); + assert.match(workflow, /dist\/artifacts\/python\/\*\.whl/); + assert.match(workflow, /dist\/artifacts\/python\/\*\.tar\.gz/); assert.match(workflow, /if-no-files-found: error/); assert.match(workflow, /retention-days: 7/); }); - it('runs packed demo artifact smoke on Linux and macOS', async (testContext) => { + it('runs TypeScript and Python checks in the standalone workflow', async (testContext) => { const workflow = await readCiWorkflowOrSkip(testContext); if (workflow === null) { return; } - assert.match(workflow, /check-ktx-packed-demo:/); - assert.match(workflow, /matrix:\s+os: \[ubuntu-latest, macos-latest\]/s); - assert.match(workflow, /name: Download ktx package artifacts/); - assert.match(workflow, /path: ktx\/dist\/artifacts/); - assert.match(workflow, /run: cd ktx && pnpm run artifacts:verify-demo/); + assert.match(workflow, /run: pnpm run check/); + assert.match(workflow, /run: uv sync --all-packages/); + assert.match(workflow, /run: uv run pytest/); }); - it('includes packed demo artifact smoke in ci-success', async (testContext) => { + it('does not depend on host application CI jobs', async (testContext) => { const workflow = await readCiWorkflowOrSkip(testContext); if (workflow === null) { return; } - assert.match( - workflow, - /needs: \[check-ktx-subtree, check-ktx-packed-demo, build-python-service, test-server, build-frontend, run-pre-commit, build-docker-images\]/, - ); - assert.match(workflow, /needs\.check-ktx-packed-demo\.result.*== "failure"/); - assert.match(workflow, /needs\.check-ktx-packed-demo\.result.*== "cancelled"/); + assert.doesNotMatch(workflow, /build-python-service|test-server|build-frontend|build-docker-images/); }); }); diff --git a/scripts/conductor-run.sh b/scripts/conductor-run.sh new file mode 100755 index 00000000..33b40b20 --- /dev/null +++ b/scripts/conductor-run.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# conductor-run.sh - Starts the long-lived local KTX daemon for Conductor. +# +# Uses a fixed port because Conductor runs this workspace in nonconcurrent mode. + +set -e +set -o pipefail + +read_required_uv_version() { + local project_file="$1" + + if [ ! -f "$project_file" ]; then + return 1 + fi + + sed -nE 's/^[[:space:]]*required-version[[:space:]]*=[[:space:]]*"([^"]+)".*/\1/p' "$project_file" | head -n 1 +} + +uv_version() { + local uv_bin="$1" + + "$uv_bin" --version 2>/dev/null | awk '{print $2}' +} + +install_workspace_uv() { + local required_version="$1" + local install_dir="$PWD/.context/bin/uv-$required_version" + + mkdir -p "$install_dir" + + if [ ! -x "$install_dir/uv" ] || [ "$(uv_version "$install_dir/uv")" != "$required_version" ]; then + echo "Installing workspace-local uv $required_version..." >&2 + curl -LsSf "https://astral.sh/uv/$required_version/install.sh" | + env UV_INSTALL_DIR="$install_dir" UV_NO_MODIFY_PATH=1 sh >&2 + fi + + printf '%s\n' "$install_dir/uv" +} + +resolve_uv_for_project() { + local project_file="$1" + local required_version + local system_uv + local system_version + local workspace_uv + + required_version="$(read_required_uv_version "$project_file" || true)" + required_version="${required_version#==}" + + if [ -z "$required_version" ]; then + command -v uv + return + fi + + if ! [[ "$required_version" =~ ^[0-9]+[.][0-9]+[.][0-9]+$ ]]; then + echo "WARNING: Unsupported uv required-version '$required_version'; using uv from PATH." >&2 + command -v uv + return + fi + + if command -v uv >/dev/null 2>&1; then + system_uv="$(command -v uv)" + system_version="$(uv_version "$system_uv")" + + if [ "$system_version" = "$required_version" ]; then + printf '%s\n' "$system_uv" + return + fi + + echo "Found uv $system_version at $system_uv; $project_file requires uv $required_version." >&2 + else + echo "uv is not installed on PATH; $project_file requires uv $required_version." >&2 + fi + + workspace_uv="$(install_workspace_uv "$required_version")" + + if [ "$(uv_version "$workspace_uv")" != "$required_version" ]; then + echo "ERROR: Expected uv $required_version at $workspace_uv, got $("$workspace_uv" --version 2>&1 || true)." >&2 + return 1 + fi + + printf '%s\n' "$workspace_uv" +} + +echo "=== Starting KTX for Conductor ===" + +echo "Building KTX packages..." +pnpm run build + +KTX_UV_BIN="$(resolve_uv_for_project "pyproject.toml")" +export PATH="$(dirname "$KTX_UV_BIN"):$PATH" + +if [ -f ".venv/bin/activate" ]; then + source .venv/bin/activate +fi + +echo "KTX daemon: http://127.0.0.1:8765" +exec uv run ktx-daemon serve-http --host 127.0.0.1 --port 8765 diff --git a/scripts/conductor-scripts.test.mjs b/scripts/conductor-scripts.test.mjs new file mode 100644 index 00000000..38689def --- /dev/null +++ b/scripts/conductor-scripts.test.mjs @@ -0,0 +1,40 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +async function readText(relativePath) { + return readFile(new URL(`../${relativePath}`, import.meta.url), 'utf8'); +} + +describe('Conductor workspace scripts', () => { + it('registers setup and run scripts in nonconcurrent mode', async () => { + const manifest = JSON.parse(await readText('conductor.json')); + + assert.deepEqual(manifest.scripts, { + setup: 'bash scripts/conductor-setup.sh', + run: 'bash scripts/conductor-run.sh', + }); + assert.equal(manifest.runScriptMode, 'nonconcurrent'); + }); + + it('sets up exact uv, Python packages, JS packages, and the built CLI', async () => { + const setupScript = await readText('scripts/conductor-setup.sh'); + + assert.match(setupScript, /read_required_uv_version\(\)/); + assert.match(setupScript, /\.context\/bin\/uv-\$required_version/); + assert.match(setupScript, /uv sync --all-packages --all-groups/); + assert.match(setupScript, /pnpm install --frozen-lockfile --prefer-offline/); + assert.match(setupScript, /pnpm run native:rebuild/); + assert.match(setupScript, /pnpm run build/); + assert.match(setupScript, /packages\/cli\/dist\/bin\.js dev doctor setup --no-input/); + }); + + it('runs the KTX daemon on the documented fixed local port', async () => { + const runScript = await readText('scripts/conductor-run.sh'); + + assert.match(runScript, /pnpm run build/); + assert.match(runScript, /source \.venv\/bin\/activate/); + assert.match(runScript, /uv run ktx-daemon serve-http --host 127\.0\.0\.1 --port 8765/); + assert.doesNotMatch(runScript, /\bnpx\b/); + }); +}); diff --git a/scripts/conductor-setup.sh b/scripts/conductor-setup.sh new file mode 100755 index 00000000..729b03b0 --- /dev/null +++ b/scripts/conductor-setup.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# conductor-setup.sh - Runs once when Conductor creates a KTX workspace. +# +# Prepares the standalone pnpm + uv workspace and builds the local CLI. + +set -e +set -o pipefail + +read_required_uv_version() { + local project_file="$1" + + if [ ! -f "$project_file" ]; then + return 1 + fi + + sed -nE 's/^[[:space:]]*required-version[[:space:]]*=[[:space:]]*"([^"]+)".*/\1/p' "$project_file" | head -n 1 +} + +uv_version() { + local uv_bin="$1" + + "$uv_bin" --version 2>/dev/null | awk '{print $2}' +} + +install_workspace_uv() { + local required_version="$1" + local install_dir="$PWD/.context/bin/uv-$required_version" + + mkdir -p "$install_dir" + + if [ ! -x "$install_dir/uv" ] || [ "$(uv_version "$install_dir/uv")" != "$required_version" ]; then + echo "Installing workspace-local uv $required_version..." >&2 + curl -LsSf "https://astral.sh/uv/$required_version/install.sh" | + env UV_INSTALL_DIR="$install_dir" UV_NO_MODIFY_PATH=1 sh >&2 + fi + + printf '%s\n' "$install_dir/uv" +} + +resolve_uv_for_project() { + local project_file="$1" + local required_version + local system_uv + local system_version + local workspace_uv + + required_version="$(read_required_uv_version "$project_file" || true)" + required_version="${required_version#==}" + + if [ -z "$required_version" ]; then + command -v uv + return + fi + + if ! [[ "$required_version" =~ ^[0-9]+[.][0-9]+[.][0-9]+$ ]]; then + echo "WARNING: Unsupported uv required-version '$required_version'; using uv from PATH." >&2 + command -v uv + return + fi + + if command -v uv >/dev/null 2>&1; then + system_uv="$(command -v uv)" + system_version="$(uv_version "$system_uv")" + + if [ "$system_version" = "$required_version" ]; then + printf '%s\n' "$system_uv" + return + fi + + echo "Found uv $system_version at $system_uv; $project_file requires uv $required_version." >&2 + else + echo "uv is not installed on PATH; $project_file requires uv $required_version." >&2 + fi + + workspace_uv="$(install_workspace_uv "$required_version")" + + if [ "$(uv_version "$workspace_uv")" != "$required_version" ]; then + echo "ERROR: Expected uv $required_version at $workspace_uv, got $("$workspace_uv" --version 2>&1 || true)." >&2 + return 1 + fi + + printf '%s\n' "$workspace_uv" +} + +echo "=== Conductor KTX workspace setup ===" + +if [ -n "${CONDUCTOR_ROOT_PATH:-}" ] && [ -f "$CONDUCTOR_ROOT_PATH/.env" ]; then + ln -sf "$CONDUCTOR_ROOT_PATH/.env" .env + echo "Linked .env" +fi + +KTX_UV_BIN="$(resolve_uv_for_project "pyproject.toml")" +export PATH="$(dirname "$KTX_UV_BIN"):$PATH" + +echo "Installing KTX Python dependencies..." +uv sync --all-packages --all-groups + +echo "Installing KTX JS dependencies..." +pnpm install --frozen-lockfile --prefer-offline + +echo "Rebuilding native JS dependencies..." +pnpm run native:rebuild + +echo "Building KTX packages..." +pnpm run build + +echo "Running KTX setup doctor..." +node packages/cli/dist/bin.js dev doctor setup --no-input + +echo "=== Setup complete ===" diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index b2da21c8..6eeeb13c 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -66,6 +66,8 @@ describe('standalone example docs', () => { assert.match(smoke, /assert_manifest "\$FIRST_MANIFEST" true/); assert.match(smoke, /assert_manifest "\$SECOND_MANIFEST" false/); assert.match(smoke, /assert_manifest "\$RESET_MANIFEST" true/); + assert.doesNotMatch(readme, /python-service/); + assert.doesNotMatch(smoke, /python-service|PYTHON_SERVICE|REPO_ROOT/); }); it('lists every published TypeScript package in the package root README', async () => { @@ -140,6 +142,8 @@ describe('standalone example docs', () => { assert.match(packageJson.scripts.smoke, /src\/standalone-smoke\.test\.ts/); assert.match(packageJson.scripts.smoke, /src\/example-smoke\.test\.ts/); + assert.match(packageJson.scripts.test, /--exclude src\/standalone-smoke\.test\.ts/); + assert.match(packageJson.scripts.test, /--exclude src\/example-smoke\.test\.ts/); }); it('documents daemon HTTP database, source generation, LookML, embedding, and code execution support', async () => { diff --git a/scripts/precommit-check.test.mjs b/scripts/precommit-check.test.mjs index 94f71e09..55ef66bb 100644 --- a/scripts/precommit-check.test.mjs +++ b/scripts/precommit-check.test.mjs @@ -9,7 +9,7 @@ function commandKeys(files) { describe('precommit-check', () => { it('skips files outside ktx', () => { - assert.deepEqual(commandKeys(['server/src/app.ts']), []); + assert.deepEqual(commandKeys(['outside-workspace/src/app.ts']), []); }); it('runs only the touched package checks for package code', () => { diff --git a/scripts/standalone-ci-workflow.test.mjs b/scripts/standalone-ci-workflow.test.mjs index bfcc1b64..230b9e2f 100644 --- a/scripts/standalone-ci-workflow.test.mjs +++ b/scripts/standalone-ci-workflow.test.mjs @@ -27,6 +27,8 @@ describe('standalone KTX CI workflow', () => { 'cache-dependency-path: "pnpm-lock.yaml"', 'pnpm install --frozen-lockfile', 'pnpm run check', + 'pnpm run test:slow', + 'pnpm run smoke', 'actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405', 'python-version: "3.13"', 'astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b', diff --git a/scripts/test-tiering.test.mjs b/scripts/test-tiering.test.mjs new file mode 100644 index 00000000..5a984dd1 --- /dev/null +++ b/scripts/test-tiering.test.mjs @@ -0,0 +1,73 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +async function readJson(path) { + return JSON.parse(await readFile(new URL(path, import.meta.url), 'utf8')); +} + +function assertScriptContainsAll(script, expected) { + for (const item of expected) { + assert.match(script, new RegExp(item.replaceAll('/', '\\/').replaceAll('.', '\\.'))); + } +} + +describe('test tiering', () => { + const cliSlowTests = [ + 'src/setup-databases.test.ts', + 'src/scan.test.ts', + 'src/commands/connection-metabase-setup.test.ts', + 'src/setup-models.test.ts', + 'src/setup-sources.test.ts', + 'src/setup.test.ts', + 'src/connection.test.ts', + 'src/setup-embeddings.test.ts', + 'src/ingest.test.ts', + 'src/commands/connection-mapping.test.ts', + 'src/ingest-viz.test.ts', + 'src/demo.test.ts', + 'src/setup-project.test.ts', + 'src/sl.test.ts', + 'src/local-scan-connectors.test.ts', + 'src/commands/connection-notion.test.ts', + ]; + + const contextSlowTests = [ + 'src/scan/local-scan.test.ts', + 'src/mcp/local-project-ports.test.ts', + 'src/ingest/local-stage-ingest.test.ts', + 'src/sl/pglite-sl-search-prototype.test.ts', + 'src/core/git.service.test.ts', + 'src/ingest/local-adapters.test.ts', + 'src/ingest/local-bundle-ingest.test.ts', + 'src/ingest/local-metabase-ingest.test.ts', + 'src/sl/local-sl.test.ts', + 'src/search/pglite-owner-process.test.ts', + 'src/scan/local-enrichment-artifacts.test.ts', + 'src/search/pglite-spike.test.ts', + 'src/wiki/local-knowledge.test.ts', + 'src/sl/local-query.test.ts', + 'src/scan/relationship-review-decisions.test.ts', + 'src/scan/relationship-profiling.test.ts', + ]; + + it('keeps slow package tests out of default local package test scripts', async () => { + const cliPackage = await readJson('../packages/cli/package.json'); + const contextPackage = await readJson('../packages/context/package.json'); + + assertScriptContainsAll(cliPackage.scripts.test, cliSlowTests.map((file) => `--exclude ${file}`)); + assertScriptContainsAll(contextPackage.scripts.test, contextSlowTests.map((file) => `--exclude ${file}`)); + assert.match(contextPackage.scripts.test, /--exclude src\/scan\/relationship-benchmarks\.test\.ts/); + }); + + it('provides explicit slow package test scripts for CI', async () => { + const rootPackage = await readJson('../package.json'); + const cliPackage = await readJson('../packages/cli/package.json'); + const contextPackage = await readJson('../packages/context/package.json'); + + assert.equal(rootPackage.scripts['test:slow'], 'pnpm --filter @ktx/context run test:slow && pnpm --filter @ktx/cli run test:slow'); + assertScriptContainsAll(cliPackage.scripts['test:slow'], cliSlowTests); + assertScriptContainsAll(contextPackage.scripts['test:slow'], contextSlowTests); + assert.doesNotMatch(contextPackage.scripts['test:slow'], /relationship-benchmarks\.test\.ts/); + }); +});