From 5232578d4471f75a26bfae873a6fa2c040976fda Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Mon, 8 Jun 2026 15:58:12 +0200 Subject: [PATCH] fix(sl): stop baking drift-prone counts into overlay summaries (#270) The auto-generated semantic-layer overlay description embedded measure/segment/column counts that were computed once and never recomputed, so the summary drifted and misreported its source after measures were later appended. Make the auto fallback count-free, since those counts are already rendered live from the body at `ktx sl list`/ `read` time; this removes the drift class without ever overwriting human-authored descriptions (the fill-only-when-empty guard is untouched). Adds a regression test that fails on main and passes after the fix, plus guards for description preservation and the no-measures fallback. --- .../context/sl/description-normalization.ts | 20 +---- .../sl/description-normalization.test.ts | 73 +++++++++++++++++++ 2 files changed, 77 insertions(+), 16 deletions(-) create mode 100644 packages/cli/test/context/sl/description-normalization.test.ts diff --git a/packages/cli/src/context/sl/description-normalization.ts b/packages/cli/src/context/sl/description-normalization.ts index ef657fdd..04bf41ba 100644 --- a/packages/cli/src/context/sl/description-normalization.ts +++ b/packages/cli/src/context/sl/description-normalization.ts @@ -50,13 +50,6 @@ function humanizeIdentifier(value: string): string { .toLowerCase(); } -function formatCount(count: number, singular: string, plural = `${singular}s`): string | null { - if (count <= 0) { - return null; - } - return `${count} ${count === 1 ? singular : plural}`; -} - function sourceFallback(source: Record, sourceName: string): string { const table = cleanText(source.table); const sql = cleanText(source.sql); @@ -66,15 +59,10 @@ function sourceFallback(source: Record, sourceName: string): st if (sql) { return `Semantic-layer source for ${sourceName} backed by curated SQL.`; } - - const counts = [ - formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'), - formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'), - formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'), - ].filter((item): item is string => Boolean(item)); - return counts.length > 0 - ? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.` - : `Semantic-layer overlay for ${sourceName}.`; + // Measure/segment/column counts are rendered live from the body at list/read + // time, so baking them into stored prose freezes a derived value that drifts + // as the source later gains measures. Keep the auto fallback count-free. + return `Semantic-layer overlay for ${sourceName}.`; } function columnFallback(column: Record, sourceName: string): string { diff --git a/packages/cli/test/context/sl/description-normalization.test.ts b/packages/cli/test/context/sl/description-normalization.test.ts new file mode 100644 index 00000000..00223183 --- /dev/null +++ b/packages/cli/test/context/sl/description-normalization.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it } from 'vitest'; + +import { normalizeSemanticLayerDescriptions } from '../../../src/context/sl/description-normalization.js'; + +/** + * Build an overlay-shaped source (no `table`/`sql`) so the overlay fallback + * branch is exercised. Measure/segment counts are derived from array length, so + * the element contents are irrelevant to the summary. + */ +function overlaySource(measureCount: number, segmentCount = 0): Record { + return { + name: 'mart_customer_health', + measures: Array.from({ length: measureCount }, (_, i) => ({ name: `m${i}`, expr: 'count(*)' })), + segments: Array.from({ length: segmentCount }, (_, i) => ({ name: `s${i}`, expr: 'true' })), + }; +} + +function ktxSummary(source: Record): string | undefined { + const descriptions = source.descriptions; + if (descriptions && typeof descriptions === 'object' && !Array.isArray(descriptions)) { + const ktx = (descriptions as Record).ktx; + return typeof ktx === 'string' ? ktx : undefined; + } + return undefined; +} + +describe('normalizeSemanticLayerDescriptions', () => { + it('stores a count-free overlay summary so the count cannot drift', () => { + const normalized = normalizeSemanticLayerDescriptions(overlaySource(4, 3), { fillMissing: true }); + // The live count is rendered from the body at list/read time; it must not be + // frozen into the stored prose, where it would silently go stale. + expect(ktxSummary(normalized)).toBe('Semantic-layer overlay for mart_customer_health.'); + }); + + it('does not keep a stale measure count after measures are appended', () => { + // First ingest pass writes the auto summary for a 4-measure overlay. + const first = normalizeSemanticLayerDescriptions(overlaySource(4, 3), { fillMissing: true }); + + // A later ingest/reconcile pass appends 2 measures to the same source (now 6) + // and re-normalizes — exactly what sl_edit_source does with fillMissing. + (first.measures as unknown[]).push({ name: 'm4', expr: 'count(*)' }, { name: 'm5', expr: 'count(*)' }); + const second = normalizeSemanticLayerDescriptions(first, { fillMissing: true }); + + expect(ktxSummary(second)).not.toMatch(/4 measures/); + }); + + it('never overwrites a human-authored user description across re-normalization', () => { + const input: Record = { + ...overlaySource(4), + descriptions: { user: 'Health score per account, owned by RevOps.' }, + }; + const authored = normalizeSemanticLayerDescriptions(input, { fillMissing: true }); + expect(authored.descriptions).toEqual({ user: 'Health score per account, owned by RevOps.' }); + + (authored.measures as unknown[]).push({ name: 'm4', expr: 'count(*)' }); + const again = normalizeSemanticLayerDescriptions(authored, { fillMissing: true }); + expect(again.descriptions).toEqual({ user: 'Health score per account, owned by RevOps.' }); + }); + + it('never overwrites an authored ktx description even when it resembles the auto summary', () => { + const input: Record = { + ...overlaySource(2), + descriptions: { ktx: 'Curated overlay notes for the health mart.' }, + }; + const authored = normalizeSemanticLayerDescriptions(input, { fillMissing: true }); + expect(ktxSummary(authored)).toBe('Curated overlay notes for the health mart.'); + }); + + it('still produces a sensible fallback for a source with no measures', () => { + const normalized = normalizeSemanticLayerDescriptions({ name: 'mart_empty' }, { fillMissing: true }); + expect(ktxSummary(normalized)).toBe('Semantic-layer overlay for mart_empty.'); + }); +});