mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
* Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
145 lines
4.8 KiB
TypeScript
145 lines
4.8 KiB
TypeScript
import type { SemanticLayerSource } from '../sl/index.js';
|
|
import type { TouchedSlSource } from '../tools/index.js';
|
|
import type { IngestReportFinalizationMismatch } from './reports.js';
|
|
|
|
interface DeriveTouchedSourcesInput {
|
|
changedPaths: string[];
|
|
beforeSourcesByConnection: Map<string, SemanticLayerSource[]>;
|
|
afterSourcesByConnection: Map<string, SemanticLayerSource[]>;
|
|
}
|
|
|
|
interface DeriveTouchedSourcesResult {
|
|
touchedSources: TouchedSlSource[];
|
|
unresolvedPaths: string[];
|
|
}
|
|
|
|
interface CompareFinalizationDeclarationsInput {
|
|
declaredTouchedSources: TouchedSlSource[];
|
|
derivedTouchedSources: TouchedSlSource[];
|
|
declaredChangedWikiPageKeys: string[];
|
|
derivedChangedWikiPageKeys: string[];
|
|
}
|
|
|
|
function uniqueSorted(values: string[]): string[] {
|
|
return [...new Set(values.filter((value) => value.length > 0))].sort();
|
|
}
|
|
|
|
function touchedKey(source: TouchedSlSource): string {
|
|
return `${source.connectionId}:${source.sourceName}`;
|
|
}
|
|
|
|
function stableJson(value: unknown): string {
|
|
if (Array.isArray(value)) {
|
|
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
|
|
}
|
|
if (value && typeof value === 'object') {
|
|
const record = value as Record<string, unknown>;
|
|
return `{${Object.keys(record)
|
|
.sort()
|
|
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
|
|
.join(',')}}`;
|
|
}
|
|
return JSON.stringify(value);
|
|
}
|
|
|
|
function changedSourceNames(
|
|
beforeSources: SemanticLayerSource[],
|
|
afterSources: SemanticLayerSource[],
|
|
): string[] {
|
|
const before = new Map(beforeSources.map((source) => [source.name, stableJson(source)]));
|
|
const after = new Map(afterSources.map((source) => [source.name, stableJson(source)]));
|
|
return uniqueSorted(
|
|
uniqueSorted([...before.keys(), ...after.keys()]).filter(
|
|
(sourceName) => before.get(sourceName) !== after.get(sourceName),
|
|
),
|
|
);
|
|
}
|
|
|
|
export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
|
|
return uniqueSorted(
|
|
paths
|
|
.filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md'))
|
|
.filter((path) => !path.slice('wiki/global/'.length, -'.md'.length).includes('/'))
|
|
.map((path) => path.slice('wiki/global/'.length, -'.md'.length)),
|
|
);
|
|
}
|
|
|
|
export async function deriveFinalizationTouchedSources(
|
|
input: DeriveTouchedSourcesInput,
|
|
): Promise<DeriveTouchedSourcesResult> {
|
|
const touched = new Map<string, TouchedSlSource>();
|
|
const unresolvedPaths: string[] = [];
|
|
|
|
for (const path of input.changedPaths) {
|
|
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
|
|
continue;
|
|
}
|
|
const parts = path.split('/');
|
|
const connectionId = parts[1] ?? '';
|
|
if (!connectionId) {
|
|
unresolvedPaths.push(path);
|
|
continue;
|
|
}
|
|
if (parts[2] !== '_schema') {
|
|
const fileName = parts.at(-1) ?? '';
|
|
const sourceName = fileName.replace(/\.ya?ml$/, '');
|
|
if (!sourceName) {
|
|
unresolvedPaths.push(path);
|
|
continue;
|
|
}
|
|
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
|
continue;
|
|
}
|
|
|
|
const changedNames = changedSourceNames(
|
|
input.beforeSourcesByConnection.get(connectionId) ?? [],
|
|
input.afterSourcesByConnection.get(connectionId) ?? [],
|
|
);
|
|
if (changedNames.length === 0) {
|
|
unresolvedPaths.push(path);
|
|
continue;
|
|
}
|
|
for (const sourceName of changedNames) {
|
|
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
|
}
|
|
}
|
|
|
|
return {
|
|
touchedSources: [...touched.values()].sort((left, right) =>
|
|
touchedKey(left).localeCompare(touchedKey(right)),
|
|
),
|
|
unresolvedPaths: uniqueSorted(unresolvedPaths),
|
|
};
|
|
}
|
|
|
|
export function compareFinalizationDeclarations(
|
|
input: CompareFinalizationDeclarationsInput,
|
|
): IngestReportFinalizationMismatch[] {
|
|
const mismatches: IngestReportFinalizationMismatch[] = [];
|
|
const declaredSl = new Set(input.declaredTouchedSources.map(touchedKey));
|
|
const derivedSl = new Set(input.derivedTouchedSources.map(touchedKey));
|
|
const declaredWiki = new Set(input.declaredChangedWikiPageKeys);
|
|
const derivedWiki = new Set(input.derivedChangedWikiPageKeys);
|
|
|
|
for (const key of [...derivedSl].sort()) {
|
|
if (!declaredSl.has(key)) {
|
|
mismatches.push({ artifactKind: 'sl', key, direction: 'missing_from_adapter_declaration' });
|
|
}
|
|
}
|
|
for (const key of [...declaredSl].sort()) {
|
|
if (!derivedSl.has(key)) {
|
|
mismatches.push({ artifactKind: 'sl', key, direction: 'extra_in_adapter_declaration' });
|
|
}
|
|
}
|
|
for (const key of [...derivedWiki].sort()) {
|
|
if (!declaredWiki.has(key)) {
|
|
mismatches.push({ artifactKind: 'wiki', key, direction: 'missing_from_adapter_declaration' });
|
|
}
|
|
}
|
|
for (const key of [...declaredWiki].sort()) {
|
|
if (!derivedWiki.has(key)) {
|
|
mismatches.push({ artifactKind: 'wiki', key, direction: 'extra_in_adapter_declaration' });
|
|
}
|
|
}
|
|
return mismatches;
|
|
}
|