feat: preserve historic sql usage in manifest shards

This commit is contained in:
Andrey Avtomonov 2026-05-11 16:53:22 +02:00
parent f17053061d
commit c45d131a1f
4 changed files with 120 additions and 2 deletions

View file

@ -186,6 +186,62 @@ describe('buildLiveDatabaseManifestShards', () => {
});
});
it('preserves external usage keys while replacing historic SQL managed keys', () => {
const existingUsage = new Map([
[
'orders',
{
narrative: 'Old generated usage narrative.',
frequencyTier: 'low' as const,
commonFilters: ['old_status'],
commonJoins: [],
ownerNote: 'Pinned analyst note',
},
],
]);
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
existingUsage,
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
usage: {
narrative: 'Fresh generated usage narrative.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['created_at'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
},
columns: [{ name: 'id', type: 'INTEGER' }],
},
],
joins: [],
});
expect(shardObject(result.shards)).toEqual({
public: {
tables: {
orders: {
table: 'public.orders',
usage: {
ownerNote: 'Pinned analyst note',
narrative: 'Fresh generated usage narrative.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['created_at'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
},
columns: [{ name: 'id', type: 'integer' }],
},
},
},
});
});
it('renders ordered multi-column joins in both directions', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',

View file

@ -1,3 +1,5 @@
import type { TableUsageOutput } from '../historic-sql/skill-schemas.js';
const RELATIONSHIP_MAP: Record<string, string> = {
MANY_TO_ONE: 'many_to_one',
ONE_TO_MANY: 'one_to_many',
@ -11,6 +13,14 @@ const RELATIONSHIP_INVERSE: Record<string, string> = {
};
const SCAN_MANAGED_DESCRIPTION_KEYS = new Set(['db', 'ai']);
const HISTORIC_SQL_MANAGED_USAGE_KEYS = new Set([
'narrative',
'frequencyTier',
'commonFilters',
'commonGroupBys',
'commonJoins',
'staleSince',
]);
export interface LiveDatabaseManifestColumn {
name: string;
@ -30,6 +40,7 @@ export interface LiveDatabaseManifestJoinEntry {
export interface LiveDatabaseManifestTableEntry {
table: string;
descriptions?: Record<string, string>;
usage?: TableUsageOutput;
columns: LiveDatabaseManifestColumn[];
joins?: LiveDatabaseManifestJoinEntry[];
}
@ -43,6 +54,7 @@ export interface LiveDatabaseManifestTableData {
catalog: string | null;
db: string | null;
descriptions?: Record<string, string>;
usage?: TableUsageOutput;
columns: Array<{
name: string;
type: string;
@ -73,6 +85,7 @@ export interface BuildLiveDatabaseManifestShardsInput {
mapColumnType: (nativeType: string) => string;
existingPreservedJoins?: Map<string, LiveDatabaseManifestJoinEntry[]>;
existingDescriptions?: Map<string, LiveDatabaseManifestExistingDescriptions>;
existingUsage?: Map<string, TableUsageOutput>;
}
export interface BuildLiveDatabaseManifestShardsResult {
@ -101,6 +114,28 @@ function mergeDescriptionsPreservingExternal(
return Object.keys(result).length > 0 ? result : undefined;
}
export function mergeUsagePreservingExternal(
existing: TableUsageOutput | undefined,
incoming: TableUsageOutput | undefined,
): TableUsageOutput | undefined {
if (!existing && !incoming) {
return undefined;
}
if (!incoming) {
return existing ? { ...existing } : undefined;
}
const result: Record<string, unknown> = {};
if (existing) {
for (const [key, value] of Object.entries(existing)) {
if (!HISTORIC_SQL_MANAGED_USAGE_KEYS.has(key)) {
result[key] = value;
}
}
}
Object.assign(result, incoming);
return Object.keys(result).length > 0 ? (result as TableUsageOutput) : undefined;
}
function getShardKey(connectionType: string, catalog: string | null, db: string | null): string {
const normalized = connectionType.toUpperCase();
@ -254,6 +289,11 @@ export function buildLiveDatabaseManifestShards(
entry.descriptions = tableDescriptions;
}
const usage = mergeUsagePreservingExternal(input.existingUsage?.get(table.name), table.usage);
if (usage) {
entry.usage = usage;
}
const tableJoins = joinsByTable.get(table.name);
if (tableJoins && tableJoins.length > 0) {
entry.joins = tableJoins;

View file

@ -742,6 +742,13 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
orders: {
table: 'public.orders',
descriptions: { user: 'Pinned structural description', ai: 'Old generated text' },
usage: {
narrative: 'Orders are commonly filtered by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'Preserve analyst note',
},
columns: [
{
name: 'id',
@ -797,6 +804,7 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
tables: {
orders: {
descriptions: Record<string, string>;
usage?: Record<string, unknown>;
columns: Array<{ name: string; descriptions?: Record<string, string> }>;
joins: Array<{ to: string; on: string; source: string }>;
};
@ -807,6 +815,13 @@ describe('writeLocalScanEnrichmentArtifacts', () => {
user: 'Pinned structural description',
db: 'DB orders table',
});
expect(manifest.tables.orders.usage).toEqual({
narrative: 'Orders are commonly filtered by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'Preserve analyst note',
});
expect(manifest.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({
user: 'Pinned structural id',
db: 'DB order id',

View file

@ -6,6 +6,7 @@ import {
type LiveDatabaseManifestJoinEntry,
type LiveDatabaseManifestShard,
type LiveDatabaseManifestTableData,
type TableUsageOutput,
} from '../ingest/index.js';
import type { KtxScanRelationshipConfig } from '../project/config.js';
import type { KtxLocalProject } from '../project/index.js';
@ -56,6 +57,7 @@ export interface WriteLocalScanEnrichmentArtifactsResult extends WriteLocalScanM
interface ExistingManifestState {
descriptions: Map<string, LiveDatabaseManifestExistingDescriptions>;
preservedJoins: Map<string, LiveDatabaseManifestJoinEntry[]>;
usage: Map<string, TableUsageOutput>;
}
type LocalDescriptionUpdates = KtxLocalScanEnrichmentResult['descriptionUpdates'];
@ -196,6 +198,7 @@ async function loadExistingManifestState(
): Promise<ExistingManifestState> {
const descriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>();
const preservedJoins = new Map<string, LiveDatabaseManifestJoinEntry[]>();
const usage = new Map<string, TableUsageOutput>();
const validTableNames = new Set(snapshot.tables.map((table) => table.name));
const columnsByTable = validColumns(snapshot);
@ -203,7 +206,7 @@ async function loadExistingManifestState(
try {
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml'));
} catch {
return { descriptions, preservedJoins };
return { descriptions, preservedJoins, usage };
}
for (const file of files) {
@ -225,6 +228,9 @@ async function loadExistingManifestState(
),
),
});
if (entry.usage) {
usage.set(tableName, { ...entry.usage });
}
const joins = (entry.joins ?? []).filter((join) => {
return (
(join.source === 'manual' || join.source === 'inferred') &&
@ -241,7 +247,7 @@ async function loadExistingManifestState(
}
}
return { descriptions, preservedJoins };
return { descriptions, preservedJoins, usage };
}
async function writeJsonArtifact(
@ -276,6 +282,7 @@ export async function writeLocalScanManifestShards(
joins: relationshipJoins(input.snapshot, input.relationshipUpdate),
existingDescriptions: existing.descriptions,
existingPreservedJoins: existing.preservedJoins,
existingUsage: existing.usage,
mapColumnType: (dimensionType) => dimensionType,
});