fix(sl): stop baking drift-prone counts into overlay summaries (#270)

The auto-generated semantic-layer overlay description embedded measure/segment/column counts that were computed once and never recomputed, so the summary drifted and misreported its source after measures were later appended. Make the auto fallback count-free, since those counts are already rendered live from the body at `ktx sl list`/ `read` time; this removes the drift class without ever overwriting human-authored descriptions (the fill-only-when-empty guard is untouched). Adds a regression test that fails on main and passes after the fix, plus guards for description preservation and the no-measures fallback.
2026-07-25 12:01:03 +02:00 · 2026-06-08 15:58:12 +02:00 · 2026-06-08 15:58:12 +02:00 · 5232578d44
commit 5232578d44
parent 2c18a62de4
2 changed files with 77 additions and 16 deletions
--- a/packages/cli/src/context/sl/description-normalization.ts
+++ b/packages/cli/src/context/sl/description-normalization.ts
@ -50,13 +50,6 @@ function humanizeIdentifier(value: string): string {
    .toLowerCase();
 }

-function formatCount(count: number, singular: string, plural = `${singular}s`): string | null {
-  if (count <= 0) {
-    return null;
-  }
-  return `${count} ${count === 1 ? singular : plural}`;
-}
-
 function sourceFallback(source: Record<string, unknown>, sourceName: string): string {
  const table = cleanText(source.table);
  const sql = cleanText(source.sql);
@ -66,15 +59,10 @@ function sourceFallback(source: Record<string, unknown>, sourceName: string): st
  if (sql) {
    return `Semantic-layer source for ${sourceName} backed by curated SQL.`;
  }
-
-  const counts = [
-    formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'),
-    formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'),
-    formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'),
-  ].filter((item): item is string => Boolean(item));
-  return counts.length > 0
-    ? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.`
-    : `Semantic-layer overlay for ${sourceName}.`;
+  // Measure/segment/column counts are rendered live from the body at list/read
+  // time, so baking them into stored prose freezes a derived value that drifts
+  // as the source later gains measures. Keep the auto fallback count-free.
+  return `Semantic-layer overlay for ${sourceName}.`;
 }

 function columnFallback(column: Record<string, unknown>, sourceName: string): string {
--- a/packages/cli/test/context/sl/description-normalization.test.ts
+++ b/packages/cli/test/context/sl/description-normalization.test.ts
@ -0,0 +1,73 @@
+import { describe, expect, it } from 'vitest';
+
+import { normalizeSemanticLayerDescriptions } from '../../../src/context/sl/description-normalization.js';
+
+/**
+ * Build an overlay-shaped source (no `table`/`sql`) so the overlay fallback
+ * branch is exercised. Measure/segment counts are derived from array length, so
+ * the element contents are irrelevant to the summary.
+ */
+function overlaySource(measureCount: number, segmentCount = 0): Record<string, unknown> {
+  return {
+    name: 'mart_customer_health',
+    measures: Array.from({ length: measureCount }, (_, i) => ({ name: `m${i}`, expr: 'count(*)' })),
+    segments: Array.from({ length: segmentCount }, (_, i) => ({ name: `s${i}`, expr: 'true' })),
+  };
+}
+
+function ktxSummary(source: Record<string, unknown>): string | undefined {
+  const descriptions = source.descriptions;
+  if (descriptions && typeof descriptions === 'object' && !Array.isArray(descriptions)) {
+    const ktx = (descriptions as Record<string, unknown>).ktx;
+    return typeof ktx === 'string' ? ktx : undefined;
+  }
+  return undefined;
+}
+
+describe('normalizeSemanticLayerDescriptions', () => {
+  it('stores a count-free overlay summary so the count cannot drift', () => {
+    const normalized = normalizeSemanticLayerDescriptions(overlaySource(4, 3), { fillMissing: true });
+    // The live count is rendered from the body at list/read time; it must not be
+    // frozen into the stored prose, where it would silently go stale.
+    expect(ktxSummary(normalized)).toBe('Semantic-layer overlay for mart_customer_health.');
+  });
+
+  it('does not keep a stale measure count after measures are appended', () => {
+    // First ingest pass writes the auto summary for a 4-measure overlay.
+    const first = normalizeSemanticLayerDescriptions(overlaySource(4, 3), { fillMissing: true });
+
+    // A later ingest/reconcile pass appends 2 measures to the same source (now 6)
+    // and re-normalizes — exactly what sl_edit_source does with fillMissing.
+    (first.measures as unknown[]).push({ name: 'm4', expr: 'count(*)' }, { name: 'm5', expr: 'count(*)' });
+    const second = normalizeSemanticLayerDescriptions(first, { fillMissing: true });
+
+    expect(ktxSummary(second)).not.toMatch(/4 measures/);
+  });
+
+  it('never overwrites a human-authored user description across re-normalization', () => {
+    const input: Record<string, unknown> = {
+      ...overlaySource(4),
+      descriptions: { user: 'Health score per account, owned by RevOps.' },
+    };
+    const authored = normalizeSemanticLayerDescriptions(input, { fillMissing: true });
+    expect(authored.descriptions).toEqual({ user: 'Health score per account, owned by RevOps.' });
+
+    (authored.measures as unknown[]).push({ name: 'm4', expr: 'count(*)' });
+    const again = normalizeSemanticLayerDescriptions(authored, { fillMissing: true });
+    expect(again.descriptions).toEqual({ user: 'Health score per account, owned by RevOps.' });
+  });
+
+  it('never overwrites an authored ktx description even when it resembles the auto summary', () => {
+    const input: Record<string, unknown> = {
+      ...overlaySource(2),
+      descriptions: { ktx: 'Curated overlay notes for the health mart.' },
+    };
+    const authored = normalizeSemanticLayerDescriptions(input, { fillMissing: true });
+    expect(ktxSummary(authored)).toBe('Curated overlay notes for the health mart.');
+  });
+
+  it('still produces a sensible fallback for a source with no measures', () => {
+    const normalized = normalizeSemanticLayerDescriptions({ name: 'mart_empty' }, { fillMissing: true });
+    expect(ktxSummary(normalized)).toBe('Semantic-layer overlay for mart_empty.');
+  });
+});