feat: preserve historic sql usage in manifest shards

This commit is contained in:
Andrey Avtomonov 2026-05-11 16:53:22 +02:00
parent f17053061d
commit c45d131a1f
4 changed files with 120 additions and 2 deletions

View file

@ -742,6 +742,13 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
orders: {
table: 'public.orders',
descriptions: { user: 'Pinned structural description', ai: 'Old generated text' },
usage: {
narrative: 'Orders are commonly filtered by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'Preserve analyst note',
},
columns: [
{
name: 'id',
@ -797,6 +804,7 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
tables: {
orders: {
descriptions: Record<string, string>;
usage?: Record<string, unknown>;
columns: Array<{ name: string; descriptions?: Record<string, string> }>;
joins: Array<{ to: string; on: string; source: string }>;
};
@ -807,6 +815,13 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
user: 'Pinned structural description',
db: 'DB orders table',
});
expect(manifest.tables.orders.usage).toEqual({
narrative: 'Orders are commonly filtered by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'Preserve analyst note',
});
expect(manifest.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({
user: 'Pinned structural id',
db: 'DB order id',

View file

@ -6,6 +6,7 @@ import {
type LiveDatabaseManifestJoinEntry,
type LiveDatabaseManifestShard,
type LiveDatabaseManifestTableData,
type TableUsageOutput,
} from '../ingest/index.js';
import type { KtxScanRelationshipConfig } from '../project/config.js';
import type { KtxLocalProject } from '../project/index.js';
@ -56,6 +57,7 @@ export interface WriteLocalScanEnrichmentArtifactsResult extends WriteLocalScanM
interface ExistingManifestState {
descriptions: Map<string, LiveDatabaseManifestExistingDescriptions>;
preservedJoins: Map<string, LiveDatabaseManifestJoinEntry[]>;
usage: Map<string, TableUsageOutput>;
}
type LocalDescriptionUpdates = KtxLocalScanEnrichmentResult['descriptionUpdates'];
@ -196,6 +198,7 @@ async function loadExistingManifestState(
): Promise<ExistingManifestState> {
const descriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>();
const preservedJoins = new Map<string, LiveDatabaseManifestJoinEntry[]>();
const usage = new Map<string, TableUsageOutput>();
const validTableNames = new Set(snapshot.tables.map((table) => table.name));
const columnsByTable = validColumns(snapshot);
@ -203,7 +206,7 @@ async function loadExistingManifestState(
try {
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml'));
} catch {
return { descriptions, preservedJoins };
return { descriptions, preservedJoins, usage };
}
for (const file of files) {
@ -225,6 +228,9 @@ async function loadExistingManifestState(
),
),
});
if (entry.usage) {
usage.set(tableName, { ...entry.usage });
}
const joins = (entry.joins ?? []).filter((join) => {
return (
(join.source === 'manual' || join.source === 'inferred') &&
@ -241,7 +247,7 @@ async function loadExistingManifestState(
}
}
return { descriptions, preservedJoins };
return { descriptions, preservedJoins, usage };
}
async function writeJsonArtifact(
@ -276,6 +282,7 @@ export async function writeLocalScanManifestShards(
joins: relationshipJoins(input.snapshot, input.relationshipUpdate),
existingDescriptions: existing.descriptions,
existingPreservedJoins: existing.preservedJoins,
existingUsage: existing.usage,
mapColumnType: (dimensionType) => dimensionType,
});