ktx/packages/context/src/ingest/finalization-scope.ts
Andrey Avtomonov 4ec5903aa5
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136)
* Refine adapter-owned ingest finalization design after adversarial review iteration 1

* Refine adapter-owned ingest finalization design after adversarial review iteration 2

* Refine adapter-owned ingest finalization design after adversarial review iteration 3

* Implement adapter-owned ingest finalization v1

Moves finalization from runner-owned post-processors into typed
SourceAdapter.finalize() contracts. Adds finalization report schema,
scope derivation, override replay context, and migrates historic-SQL
projection. Removes IngestBundlePostProcessorPort wiring and
HistoricSqlProjectionPostProcessor.

* feat(ingest): export finalization adapter contract types

* test(ingest): exercise historic sql finalization locally

* docs(plans): add adapter-owned finalization v1 closure plan

* fix(setup): unblock clean Linux installs and add enabled_tables allowlist

- Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs
  don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install.
- Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR
  entries that httpx rejects with InvalidURL (OrbStack injects these by
  default).
- Add `enabled_tables` allowlist on warehouse connections (zod schema +
  live-database introspection filter) to scope ingest to specific tables.
- Add `getting-started/troubleshooting-linux` docs page covering the Python
  3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link
  it from the quickstart troubleshooting table and the llms-docs map.
- Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds
  can serve under host.docker.internal.

* Move docs changes to specs repo

* fix(cli): keep managed runtime python version private

* Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00

145 lines
4.8 KiB
TypeScript

import type { SemanticLayerSource } from '../sl/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { IngestReportFinalizationMismatch } from './reports.js';
interface DeriveTouchedSourcesInput {
changedPaths: string[];
beforeSourcesByConnection: Map<string, SemanticLayerSource[]>;
afterSourcesByConnection: Map<string, SemanticLayerSource[]>;
}
interface DeriveTouchedSourcesResult {
touchedSources: TouchedSlSource[];
unresolvedPaths: string[];
}
interface CompareFinalizationDeclarationsInput {
declaredTouchedSources: TouchedSlSource[];
derivedTouchedSources: TouchedSlSource[];
declaredChangedWikiPageKeys: string[];
derivedChangedWikiPageKeys: string[];
}
function uniqueSorted(values: string[]): string[] {
return [...new Set(values.filter((value) => value.length > 0))].sort();
}
function touchedKey(source: TouchedSlSource): string {
return `${source.connectionId}:${source.sourceName}`;
}
function stableJson(value: unknown): string {
if (Array.isArray(value)) {
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
}
if (value && typeof value === 'object') {
const record = value as Record<string, unknown>;
return `{${Object.keys(record)
.sort()
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
.join(',')}}`;
}
return JSON.stringify(value);
}
function changedSourceNames(
beforeSources: SemanticLayerSource[],
afterSources: SemanticLayerSource[],
): string[] {
const before = new Map(beforeSources.map((source) => [source.name, stableJson(source)]));
const after = new Map(afterSources.map((source) => [source.name, stableJson(source)]));
return uniqueSorted(
uniqueSorted([...before.keys(), ...after.keys()]).filter(
(sourceName) => before.get(sourceName) !== after.get(sourceName),
),
);
}
export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
return uniqueSorted(
paths
.filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md'))
.filter((path) => !path.slice('wiki/global/'.length, -'.md'.length).includes('/'))
.map((path) => path.slice('wiki/global/'.length, -'.md'.length)),
);
}
export async function deriveFinalizationTouchedSources(
input: DeriveTouchedSourcesInput,
): Promise<DeriveTouchedSourcesResult> {
const touched = new Map<string, TouchedSlSource>();
const unresolvedPaths: string[] = [];
for (const path of input.changedPaths) {
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
continue;
}
const parts = path.split('/');
const connectionId = parts[1] ?? '';
if (!connectionId) {
unresolvedPaths.push(path);
continue;
}
if (parts[2] !== '_schema') {
const fileName = parts.at(-1) ?? '';
const sourceName = fileName.replace(/\.ya?ml$/, '');
if (!sourceName) {
unresolvedPaths.push(path);
continue;
}
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
continue;
}
const changedNames = changedSourceNames(
input.beforeSourcesByConnection.get(connectionId) ?? [],
input.afterSourcesByConnection.get(connectionId) ?? [],
);
if (changedNames.length === 0) {
unresolvedPaths.push(path);
continue;
}
for (const sourceName of changedNames) {
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
}
}
return {
touchedSources: [...touched.values()].sort((left, right) =>
touchedKey(left).localeCompare(touchedKey(right)),
),
unresolvedPaths: uniqueSorted(unresolvedPaths),
};
}
export function compareFinalizationDeclarations(
input: CompareFinalizationDeclarationsInput,
): IngestReportFinalizationMismatch[] {
const mismatches: IngestReportFinalizationMismatch[] = [];
const declaredSl = new Set(input.declaredTouchedSources.map(touchedKey));
const derivedSl = new Set(input.derivedTouchedSources.map(touchedKey));
const declaredWiki = new Set(input.declaredChangedWikiPageKeys);
const derivedWiki = new Set(input.derivedChangedWikiPageKeys);
for (const key of [...derivedSl].sort()) {
if (!declaredSl.has(key)) {
mismatches.push({ artifactKind: 'sl', key, direction: 'missing_from_adapter_declaration' });
}
}
for (const key of [...declaredSl].sort()) {
if (!derivedSl.has(key)) {
mismatches.push({ artifactKind: 'sl', key, direction: 'extra_in_adapter_declaration' });
}
}
for (const key of [...derivedWiki].sort()) {
if (!declaredWiki.has(key)) {
mismatches.push({ artifactKind: 'wiki', key, direction: 'missing_from_adapter_declaration' });
}
}
for (const key of [...declaredWiki].sort()) {
if (!derivedWiki.has(key)) {
mismatches.push({ artifactKind: 'wiki', key, direction: 'extra_in_adapter_declaration' });
}
}
return mismatches;
}