feat(scan): carry structural warnings through reports

This commit is contained in:
Andrey Avtomonov 2026-05-24 01:09:02 +02:00
parent 483d0d6c15
commit d7d4a57859
6 changed files with 226 additions and 0 deletions

View file

@ -6,6 +6,7 @@ import {
detectLiveDatabaseStagedDir,
LIVE_DATABASE_FOREIGN_KEYS_FILE,
LIVE_DATABASE_META_FILE,
LIVE_DATABASE_WARNINGS_FILE,
liveDatabaseTablePath,
readLiveDatabaseTableFiles,
writeLiveDatabaseSnapshot,
@ -145,6 +146,31 @@ describe('live-database staged snapshot files', () => {
expect(connectionJson).not.toContain('pem-value');
});
it('writes redacted scan warnings next to live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-warning-stage-'));
await writeLiveDatabaseSnapshot(dir, {
...snapshot(),
warnings: [
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: {
schema: 'public',
kind: 'primary_key',
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
},
},
],
});
const warningsJson = await readFile(join(dir, LIVE_DATABASE_WARNINGS_FILE), 'utf8');
expect(warningsJson).toContain('"constraint_discovery_unauthorized"');
expect(warningsJson).toContain('"schema": "public"');
expect(warningsJson).toContain('"url": "<redacted>"');
expect(warningsJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
});
it('returns false for a directory that is missing live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-empty-'));
expect(await detectLiveDatabaseStagedDir(dir)).toBe(false);

View file

@ -7,6 +7,7 @@ import type { KtxSchemaSnapshot, KtxSchemaTable, KtxTableRef } from '../../../sc
export const LIVE_DATABASE_META_FILE = 'connection.json';
export const LIVE_DATABASE_FOREIGN_KEYS_FILE = 'foreign-keys.json';
export const LIVE_DATABASE_WARNINGS_FILE = 'warnings.json';
const LIVE_DATABASE_TABLES_DIR = 'tables';
interface LiveDatabaseTableFile {
@ -89,6 +90,13 @@ function foreignKeyIndex(snapshot: KtxSchemaSnapshot): ForeignKeyIndexEntry[] {
return entries;
}
function warningArtifact(snapshot: KtxSchemaSnapshot): { warnings: KtxSchemaSnapshot['warnings'] } {
const redacted = redactKtxSensitiveMetadata({ warnings: snapshot.warnings ?? [] });
return {
warnings: Array.isArray(redacted.warnings) ? (redacted.warnings as KtxSchemaSnapshot['warnings']) : [],
};
}
export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: KtxSchemaSnapshot): Promise<void> {
await mkdir(join(stagedDir, LIVE_DATABASE_TABLES_DIR), { recursive: true });
const sortedTables = [...snapshot.tables].sort((a, b) => tableSortKey(a).localeCompare(tableSortKey(b)));
@ -105,6 +113,7 @@ export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: Ktx
join(stagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE),
stableJson({ foreignKeys: foreignKeyIndex(snapshot) }),
);
await writeFile(join(stagedDir, LIVE_DATABASE_WARNINGS_FILE), stableJson(warningArtifact(snapshot)));
for (const table of sortedTables) {
await writeFile(join(stagedDir, liveDatabaseTablePath(table)), stableJson(table));
}

View file

@ -180,6 +180,13 @@ function fetchOnlyAdapter(options: { extractedAt?: () => string; snapshot?: KtxS
'utf-8',
);
await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8');
if (scanSnapshot.warnings?.length) {
await writeFile(
join(stagedDir, 'warnings.json'),
`${JSON.stringify({ warnings: scanSnapshot.warnings })}\n`,
'utf-8',
);
}
for (const table of scanSnapshot.tables) {
await writeFile(join(stagedDir, 'tables', `${table.name}.json`), `${JSON.stringify(table)}\n`, 'utf-8');
}
@ -336,6 +343,48 @@ describe('local scan', () => {
});
});
it('threads structural snapshot warnings into the final scan report', async () => {
const result = await runLocalScan({
project,
adapters: [
fetchOnlyAdapter({
snapshot: {
...defaultFetchSnapshot(),
warnings: [
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'primary_key' },
},
],
},
}),
],
connectionId: 'warehouse',
jobId: 'scan-run-structural-warnings',
now: () => new Date('2026-04-29T09:01:00.000Z'),
});
expect(result.report.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'primary_key' },
},
]);
await expect(
readFile(
join(
project.projectDir,
'raw-sources/warehouse/live-database/2026-04-29-090100-scan-run-structural-warnings/scan-report.json',
),
'utf-8',
),
).resolves.toContain('"constraint_discovery_unauthorized"');
});
it('passes enabled_tables as fetch context tableScope and does not post-filter staged snapshots', async () => {
project.config.connections.warehouse = {
...project.config.connections.warehouse,

View file

@ -469,6 +469,9 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
extractedAtFallback: report.createdAt,
});
enrichmentSnapshot = rawSnapshot;
if (rawSnapshot.warnings?.length) {
report.warnings.push(...rawSnapshot.warnings);
}
const manifestArtifacts = await writeLocalScanManifestShards({
project: options.project,
connectionId: options.connectionId,

View file

@ -165,6 +165,61 @@ describe('readLocalScanStructuralSnapshot', () => {
});
});
it('rebuilds scan warnings from persisted live-database warning files', async () => {
const rawRoot = 'raw-sources/warehouse/live-database/sync-warnings';
await project.fileStore.writeFile(
`${rawRoot}/connection.json`,
'{"connectionId":"warehouse","metadata":{}}\n',
'ktx',
'ktx@example.com',
'Seed connection artifact',
);
await project.fileStore.writeFile(
`${rawRoot}/warnings.json`,
`${JSON.stringify(
{
warnings: [
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'foreign_key' },
},
],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed warning artifact',
);
await project.fileStore.writeFile(
`${rawRoot}/tables/orders.json`,
'{"name":"orders","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":false,"comment":null}],"foreignKeys":[]}\n',
'ktx',
'ktx@example.com',
'Seed orders artifact',
);
const snapshot = await readLocalScanStructuralSnapshot({
project,
connectionId: 'warehouse',
driver: 'postgres',
rawSourcesDir: rawRoot,
extractedAtFallback: '2026-04-29T13:00:00.000Z',
});
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'foreign_key' },
},
]);
});
it('uses the scan report timestamp when connection.json omits extractedAt', async () => {
const rawRoot = 'raw-sources/warehouse/live-database/sync-2';
await project.fileStore.writeFile(
@ -192,4 +247,32 @@ describe('readLocalScanStructuralSnapshot', () => {
expect(snapshot.extractedAt).toBe('2026-04-29T13:00:00.000Z');
});
it('tolerates older live-database staged directories without warnings.json', async () => {
const rawRoot = 'raw-sources/warehouse/live-database/sync-no-warnings';
await project.fileStore.writeFile(
`${rawRoot}/connection.json`,
'{"connectionId":"warehouse","metadata":{}}\n',
'ktx',
'ktx@example.com',
'Seed connection artifact',
);
await project.fileStore.writeFile(
`${rawRoot}/tables/orders.json`,
'{"name":"orders","catalog":null,"db":null,"kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n',
'ktx',
'ktx@example.com',
'Seed orders artifact',
);
const snapshot = await readLocalScanStructuralSnapshot({
project,
connectionId: 'warehouse',
driver: 'postgres',
rawSourcesDir: rawRoot,
extractedAtFallback: '2026-04-29T13:00:00.000Z',
});
expect(snapshot.warnings).toEqual([]);
});
});

View file

@ -1,6 +1,7 @@
import type { KtxLocalProject } from '../../context/project/project.js';
import type {
KtxConnectionDriver,
KtxScanWarning,
KtxSchemaColumn,
KtxSchemaForeignKey,
KtxSchemaSnapshot,
@ -30,6 +31,59 @@ function metadataRecord(value: unknown): Record<string, unknown> {
return isRecord(value) ? value : {};
}
const scanWarningCodes = new Set<KtxScanWarning['code']>([
'connector_capability_missing',
'sampling_failed',
'statistics_failed',
'llm_unavailable',
'embedding_unavailable',
'scan_enrichment_backend_not_configured',
'relationship_validation_failed',
'relationship_llm_invalid_reference',
'relationship_llm_proposal_failed',
'credential_redacted',
'enrichment_failed',
'description_fallback_used',
'constraint_discovery_unauthorized',
]);
function parseWarning(rawWarning: unknown, path: string): KtxScanWarning {
if (
!isRecord(rawWarning) ||
typeof rawWarning.code !== 'string' ||
!scanWarningCodes.has(rawWarning.code as KtxScanWarning['code']) ||
typeof rawWarning.message !== 'string' ||
typeof rawWarning.recoverable !== 'boolean'
) {
throw new Error(`Invalid KTX schema warning artifact: ${path}`);
}
return {
code: rawWarning.code as KtxScanWarning['code'],
message: rawWarning.message,
recoverable: rawWarning.recoverable,
...(typeof rawWarning.table === 'string' ? { table: rawWarning.table } : {}),
...(typeof rawWarning.column === 'string' ? { column: rawWarning.column } : {}),
...(isRecord(rawWarning.metadata) ? { metadata: rawWarning.metadata } : {}),
};
}
async function readWarnings(input: ReadLocalScanStructuralSnapshotInput): Promise<KtxScanWarning[]> {
const path = `${input.rawSourcesDir}/warnings.json`;
try {
const warningRaw = await input.project.fileStore.readFile(path);
const parsed = JSON.parse(warningRaw.content) as unknown;
if (!isRecord(parsed) || !Array.isArray(parsed.warnings)) {
throw new Error(`Invalid KTX schema warnings artifact: ${path}`);
}
return parsed.warnings.map((warning) => parseWarning(warning, path));
} catch (error) {
if (error instanceof Error && /not found|ENOENT|no such file/i.test(error.message)) {
return [];
}
throw error;
}
}
function optionalStringOrNull(value: unknown): string | null | undefined {
if (value === undefined) {
return undefined;
@ -113,6 +167,7 @@ export async function readLocalScanStructuralSnapshot(
const tableRaw = await input.project.fileStore.readFile(path);
tables.push(parseTable(tableRaw.content, path));
}
const warnings = await readWarnings(input);
return {
connectionId: typeof connection.connectionId === 'string' ? connection.connectionId : input.connectionId,
@ -121,5 +176,6 @@ export async function readLocalScanStructuralSnapshot(
scope: isRecord(connection.scope) ? connection.scope : {},
metadata: metadataRecord(connection.metadata),
tables,
warnings,
};
}