ktx/packages/context/src/ingest/finalization-scope.ts

146 lines
4.8 KiB
TypeScript
Raw Normal View History

feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
import type { SemanticLayerSource } from '../sl/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { IngestReportFinalizationMismatch } from './reports.js';
interface DeriveTouchedSourcesInput {
changedPaths: string[];
beforeSourcesByConnection: Map<string, SemanticLayerSource[]>;
afterSourcesByConnection: Map<string, SemanticLayerSource[]>;
}
interface DeriveTouchedSourcesResult {
touchedSources: TouchedSlSource[];
unresolvedPaths: string[];
}
interface CompareFinalizationDeclarationsInput {
declaredTouchedSources: TouchedSlSource[];
derivedTouchedSources: TouchedSlSource[];
declaredChangedWikiPageKeys: string[];
derivedChangedWikiPageKeys: string[];
}
function uniqueSorted(values: string[]): string[] {
return [...new Set(values.filter((value) => value.length > 0))].sort();
}
function touchedKey(source: TouchedSlSource): string {
return `${source.connectionId}:${source.sourceName}`;
}
function stableJson(value: unknown): string {
if (Array.isArray(value)) {
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
}
if (value && typeof value === 'object') {
const record = value as Record<string, unknown>;
return `{${Object.keys(record)
.sort()
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
.join(',')}}`;
}
return JSON.stringify(value);
}
function changedSourceNames(
beforeSources: SemanticLayerSource[],
afterSources: SemanticLayerSource[],
): string[] {
const before = new Map(beforeSources.map((source) => [source.name, stableJson(source)]));
const after = new Map(afterSources.map((source) => [source.name, stableJson(source)]));
return uniqueSorted(
uniqueSorted([...before.keys(), ...after.keys()]).filter(
(sourceName) => before.get(sourceName) !== after.get(sourceName),
),
);
}
export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
return uniqueSorted(
paths
.filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md'))
.filter((path) => !path.slice('wiki/global/'.length, -'.md'.length).includes('/'))
.map((path) => path.slice('wiki/global/'.length, -'.md'.length)),
);
}
export async function deriveFinalizationTouchedSources(
input: DeriveTouchedSourcesInput,
): Promise<DeriveTouchedSourcesResult> {
const touched = new Map<string, TouchedSlSource>();
const unresolvedPaths: string[] = [];
for (const path of input.changedPaths) {
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
continue;
}
const parts = path.split('/');
const connectionId = parts[1] ?? '';
if (!connectionId) {
unresolvedPaths.push(path);
continue;
}
if (parts[2] !== '_schema') {
const fileName = parts.at(-1) ?? '';
const sourceName = fileName.replace(/\.ya?ml$/, '');
if (!sourceName) {
unresolvedPaths.push(path);
continue;
}
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
continue;
}
const changedNames = changedSourceNames(
input.beforeSourcesByConnection.get(connectionId) ?? [],
input.afterSourcesByConnection.get(connectionId) ?? [],
);
if (changedNames.length === 0) {
unresolvedPaths.push(path);
continue;
}
for (const sourceName of changedNames) {
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
}
}
return {
touchedSources: [...touched.values()].sort((left, right) =>
touchedKey(left).localeCompare(touchedKey(right)),
),
unresolvedPaths: uniqueSorted(unresolvedPaths),
};
}
export function compareFinalizationDeclarations(
input: CompareFinalizationDeclarationsInput,
): IngestReportFinalizationMismatch[] {
const mismatches: IngestReportFinalizationMismatch[] = [];
const declaredSl = new Set(input.declaredTouchedSources.map(touchedKey));
const derivedSl = new Set(input.derivedTouchedSources.map(touchedKey));
const declaredWiki = new Set(input.declaredChangedWikiPageKeys);
const derivedWiki = new Set(input.derivedChangedWikiPageKeys);
for (const key of [...derivedSl].sort()) {
if (!declaredSl.has(key)) {
mismatches.push({ artifactKind: 'sl', key, direction: 'missing_from_adapter_declaration' });
}
}
for (const key of [...declaredSl].sort()) {
if (!derivedSl.has(key)) {
mismatches.push({ artifactKind: 'sl', key, direction: 'extra_in_adapter_declaration' });
}
}
for (const key of [...derivedWiki].sort()) {
if (!declaredWiki.has(key)) {
mismatches.push({ artifactKind: 'wiki', key, direction: 'missing_from_adapter_declaration' });
}
}
for (const key of [...declaredWiki].sort()) {
if (!derivedWiki.has(key)) {
mismatches.push({ artifactKind: 'wiki', key, direction: 'extra_in_adapter_declaration' });
}
}
return mismatches;
}