diff --git a/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts b/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts index 297071ae..8fb675a2 100644 --- a/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts +++ b/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts @@ -6,6 +6,7 @@ import { detectLiveDatabaseStagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE, LIVE_DATABASE_META_FILE, + LIVE_DATABASE_WARNINGS_FILE, liveDatabaseTablePath, readLiveDatabaseTableFiles, writeLiveDatabaseSnapshot, @@ -145,6 +146,31 @@ describe('live-database staged snapshot files', () => { expect(connectionJson).not.toContain('pem-value'); }); + it('writes redacted scan warnings next to live database metadata', async () => { + const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-warning-stage-')); + await writeLiveDatabaseSnapshot(dir, { + ...snapshot(), + warnings: [ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { + schema: 'public', + kind: 'primary_key', + url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret + }, + }, + ], + }); + + const warningsJson = await readFile(join(dir, LIVE_DATABASE_WARNINGS_FILE), 'utf8'); + expect(warningsJson).toContain('"constraint_discovery_unauthorized"'); + expect(warningsJson).toContain('"schema": "public"'); + expect(warningsJson).toContain('"url": ""'); + expect(warningsJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret + }); + it('returns false for a directory that is missing live database metadata', async () => { const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-empty-')); expect(await detectLiveDatabaseStagedDir(dir)).toBe(false); diff --git a/packages/cli/src/context/ingest/adapters/live-database/stage.ts b/packages/cli/src/context/ingest/adapters/live-database/stage.ts index ba925986..c214a5af 100644 --- a/packages/cli/src/context/ingest/adapters/live-database/stage.ts +++ b/packages/cli/src/context/ingest/adapters/live-database/stage.ts @@ -7,6 +7,7 @@ import type { KtxSchemaSnapshot, KtxSchemaTable, KtxTableRef } from '../../../sc export const LIVE_DATABASE_META_FILE = 'connection.json'; export const LIVE_DATABASE_FOREIGN_KEYS_FILE = 'foreign-keys.json'; +export const LIVE_DATABASE_WARNINGS_FILE = 'warnings.json'; const LIVE_DATABASE_TABLES_DIR = 'tables'; interface LiveDatabaseTableFile { @@ -89,6 +90,13 @@ function foreignKeyIndex(snapshot: KtxSchemaSnapshot): ForeignKeyIndexEntry[] { return entries; } +function warningArtifact(snapshot: KtxSchemaSnapshot): { warnings: KtxSchemaSnapshot['warnings'] } { + const redacted = redactKtxSensitiveMetadata({ warnings: snapshot.warnings ?? [] }); + return { + warnings: Array.isArray(redacted.warnings) ? (redacted.warnings as KtxSchemaSnapshot['warnings']) : [], + }; +} + export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: KtxSchemaSnapshot): Promise { await mkdir(join(stagedDir, LIVE_DATABASE_TABLES_DIR), { recursive: true }); const sortedTables = [...snapshot.tables].sort((a, b) => tableSortKey(a).localeCompare(tableSortKey(b))); @@ -105,6 +113,7 @@ export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: Ktx join(stagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE), stableJson({ foreignKeys: foreignKeyIndex(snapshot) }), ); + await writeFile(join(stagedDir, LIVE_DATABASE_WARNINGS_FILE), stableJson(warningArtifact(snapshot))); for (const table of sortedTables) { await writeFile(join(stagedDir, liveDatabaseTablePath(table)), stableJson(table)); } diff --git a/packages/cli/src/context/scan/local-scan.test.ts b/packages/cli/src/context/scan/local-scan.test.ts index 7b5af5b0..e6e2b84b 100644 --- a/packages/cli/src/context/scan/local-scan.test.ts +++ b/packages/cli/src/context/scan/local-scan.test.ts @@ -180,6 +180,13 @@ function fetchOnlyAdapter(options: { extractedAt?: () => string; snapshot?: KtxS 'utf-8', ); await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + if (scanSnapshot.warnings?.length) { + await writeFile( + join(stagedDir, 'warnings.json'), + `${JSON.stringify({ warnings: scanSnapshot.warnings })}\n`, + 'utf-8', + ); + } for (const table of scanSnapshot.tables) { await writeFile(join(stagedDir, 'tables', `${table.name}.json`), `${JSON.stringify(table)}\n`, 'utf-8'); } @@ -336,6 +343,48 @@ describe('local scan', () => { }); }); + it('threads structural snapshot warnings into the final scan report', async () => { + const result = await runLocalScan({ + project, + adapters: [ + fetchOnlyAdapter({ + snapshot: { + ...defaultFetchSnapshot(), + warnings: [ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'primary_key' }, + }, + ], + }, + }), + ], + connectionId: 'warehouse', + jobId: 'scan-run-structural-warnings', + now: () => new Date('2026-04-29T09:01:00.000Z'), + }); + + expect(result.report.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'primary_key' }, + }, + ]); + await expect( + readFile( + join( + project.projectDir, + 'raw-sources/warehouse/live-database/2026-04-29-090100-scan-run-structural-warnings/scan-report.json', + ), + 'utf-8', + ), + ).resolves.toContain('"constraint_discovery_unauthorized"'); + }); + it('passes enabled_tables as fetch context tableScope and does not post-filter staged snapshots', async () => { project.config.connections.warehouse = { ...project.config.connections.warehouse, diff --git a/packages/cli/src/context/scan/local-scan.ts b/packages/cli/src/context/scan/local-scan.ts index cb886991..350e691c 100644 --- a/packages/cli/src/context/scan/local-scan.ts +++ b/packages/cli/src/context/scan/local-scan.ts @@ -469,6 +469,9 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise { }); }); + it('rebuilds scan warnings from persisted live-database warning files', async () => { + const rawRoot = 'raw-sources/warehouse/live-database/sync-warnings'; + await project.fileStore.writeFile( + `${rawRoot}/connection.json`, + '{"connectionId":"warehouse","metadata":{}}\n', + 'ktx', + 'ktx@example.com', + 'Seed connection artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/warnings.json`, + `${JSON.stringify( + { + warnings: [ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'foreign_key' }, + }, + ], + }, + null, + 2, + )}\n`, + 'ktx', + 'ktx@example.com', + 'Seed warning artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/orders.json`, + '{"name":"orders","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":false,"comment":null}],"foreignKeys":[]}\n', + 'ktx', + 'ktx@example.com', + 'Seed orders artifact', + ); + + const snapshot = await readLocalScanStructuralSnapshot({ + project, + connectionId: 'warehouse', + driver: 'postgres', + rawSourcesDir: rawRoot, + extractedAtFallback: '2026-04-29T13:00:00.000Z', + }); + + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'foreign_key' }, + }, + ]); + }); + it('uses the scan report timestamp when connection.json omits extractedAt', async () => { const rawRoot = 'raw-sources/warehouse/live-database/sync-2'; await project.fileStore.writeFile( @@ -192,4 +247,32 @@ describe('readLocalScanStructuralSnapshot', () => { expect(snapshot.extractedAt).toBe('2026-04-29T13:00:00.000Z'); }); + + it('tolerates older live-database staged directories without warnings.json', async () => { + const rawRoot = 'raw-sources/warehouse/live-database/sync-no-warnings'; + await project.fileStore.writeFile( + `${rawRoot}/connection.json`, + '{"connectionId":"warehouse","metadata":{}}\n', + 'ktx', + 'ktx@example.com', + 'Seed connection artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/orders.json`, + '{"name":"orders","catalog":null,"db":null,"kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n', + 'ktx', + 'ktx@example.com', + 'Seed orders artifact', + ); + + const snapshot = await readLocalScanStructuralSnapshot({ + project, + connectionId: 'warehouse', + driver: 'postgres', + rawSourcesDir: rawRoot, + extractedAtFallback: '2026-04-29T13:00:00.000Z', + }); + + expect(snapshot.warnings).toEqual([]); + }); }); diff --git a/packages/cli/src/context/scan/local-structural-artifacts.ts b/packages/cli/src/context/scan/local-structural-artifacts.ts index 2c968384..1abc68bc 100644 --- a/packages/cli/src/context/scan/local-structural-artifacts.ts +++ b/packages/cli/src/context/scan/local-structural-artifacts.ts @@ -1,6 +1,7 @@ import type { KtxLocalProject } from '../../context/project/project.js'; import type { KtxConnectionDriver, + KtxScanWarning, KtxSchemaColumn, KtxSchemaForeignKey, KtxSchemaSnapshot, @@ -30,6 +31,59 @@ function metadataRecord(value: unknown): Record { return isRecord(value) ? value : {}; } +const scanWarningCodes = new Set([ + 'connector_capability_missing', + 'sampling_failed', + 'statistics_failed', + 'llm_unavailable', + 'embedding_unavailable', + 'scan_enrichment_backend_not_configured', + 'relationship_validation_failed', + 'relationship_llm_invalid_reference', + 'relationship_llm_proposal_failed', + 'credential_redacted', + 'enrichment_failed', + 'description_fallback_used', + 'constraint_discovery_unauthorized', +]); + +function parseWarning(rawWarning: unknown, path: string): KtxScanWarning { + if ( + !isRecord(rawWarning) || + typeof rawWarning.code !== 'string' || + !scanWarningCodes.has(rawWarning.code as KtxScanWarning['code']) || + typeof rawWarning.message !== 'string' || + typeof rawWarning.recoverable !== 'boolean' + ) { + throw new Error(`Invalid KTX schema warning artifact: ${path}`); + } + return { + code: rawWarning.code as KtxScanWarning['code'], + message: rawWarning.message, + recoverable: rawWarning.recoverable, + ...(typeof rawWarning.table === 'string' ? { table: rawWarning.table } : {}), + ...(typeof rawWarning.column === 'string' ? { column: rawWarning.column } : {}), + ...(isRecord(rawWarning.metadata) ? { metadata: rawWarning.metadata } : {}), + }; +} + +async function readWarnings(input: ReadLocalScanStructuralSnapshotInput): Promise { + const path = `${input.rawSourcesDir}/warnings.json`; + try { + const warningRaw = await input.project.fileStore.readFile(path); + const parsed = JSON.parse(warningRaw.content) as unknown; + if (!isRecord(parsed) || !Array.isArray(parsed.warnings)) { + throw new Error(`Invalid KTX schema warnings artifact: ${path}`); + } + return parsed.warnings.map((warning) => parseWarning(warning, path)); + } catch (error) { + if (error instanceof Error && /not found|ENOENT|no such file/i.test(error.message)) { + return []; + } + throw error; + } +} + function optionalStringOrNull(value: unknown): string | null | undefined { if (value === undefined) { return undefined; @@ -113,6 +167,7 @@ export async function readLocalScanStructuralSnapshot( const tableRaw = await input.project.fileStore.readFile(path); tables.push(parseTable(tableRaw.content, path)); } + const warnings = await readWarnings(input); return { connectionId: typeof connection.connectionId === 'string' ? connection.connectionId : input.connectionId, @@ -121,5 +176,6 @@ export async function readLocalScanStructuralSnapshot( scope: isRecord(connection.scope) ? connection.scope : {}, metadata: metadataRecord(connection.metadata), tables, + warnings, }; }