fix(sl): stop baking drift-prone counts into overlay summaries (#270)

The auto-generated semantic-layer overlay description embedded
measure/segment/column counts that were computed once and never
recomputed, so the summary drifted and misreported its source after
measures were later appended. Make the auto fallback count-free, since
those counts are already rendered live from the body at `ktx sl list`/
`read` time; this removes the drift class without ever overwriting
human-authored descriptions (the fill-only-when-empty guard is untouched).

Adds a regression test that fails on main and passes after the fix, plus
guards for description preservation and the no-measures fallback.
This commit is contained in:
Andrey Avtomonov 2026-06-08 15:58:12 +02:00 committed by GitHub
parent 2c18a62de4
commit 5232578d44
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 77 additions and 16 deletions

View file

@ -50,13 +50,6 @@ function humanizeIdentifier(value: string): string {
.toLowerCase();
}
function formatCount(count: number, singular: string, plural = `${singular}s`): string | null {
if (count <= 0) {
return null;
}
return `${count} ${count === 1 ? singular : plural}`;
}
function sourceFallback(source: Record<string, unknown>, sourceName: string): string {
const table = cleanText(source.table);
const sql = cleanText(source.sql);
@ -66,15 +59,10 @@ function sourceFallback(source: Record<string, unknown>, sourceName: string): st
if (sql) {
return `Semantic-layer source for ${sourceName} backed by curated SQL.`;
}
const counts = [
formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'),
formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'),
formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'),
].filter((item): item is string => Boolean(item));
return counts.length > 0
? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.`
: `Semantic-layer overlay for ${sourceName}.`;
// Measure/segment/column counts are rendered live from the body at list/read
// time, so baking them into stored prose freezes a derived value that drifts
// as the source later gains measures. Keep the auto fallback count-free.
return `Semantic-layer overlay for ${sourceName}.`;
}
function columnFallback(column: Record<string, unknown>, sourceName: string): string {

View file

@ -0,0 +1,73 @@
import { describe, expect, it } from 'vitest';
import { normalizeSemanticLayerDescriptions } from '../../../src/context/sl/description-normalization.js';
/**
* Build an overlay-shaped source (no `table`/`sql`) so the overlay fallback
* branch is exercised. Measure/segment counts are derived from array length, so
* the element contents are irrelevant to the summary.
*/
function overlaySource(measureCount: number, segmentCount = 0): Record<string, unknown> {
return {
name: 'mart_customer_health',
measures: Array.from({ length: measureCount }, (_, i) => ({ name: `m${i}`, expr: 'count(*)' })),
segments: Array.from({ length: segmentCount }, (_, i) => ({ name: `s${i}`, expr: 'true' })),
};
}
function ktxSummary(source: Record<string, unknown>): string | undefined {
const descriptions = source.descriptions;
if (descriptions && typeof descriptions === 'object' && !Array.isArray(descriptions)) {
const ktx = (descriptions as Record<string, unknown>).ktx;
return typeof ktx === 'string' ? ktx : undefined;
}
return undefined;
}
describe('normalizeSemanticLayerDescriptions', () => {
it('stores a count-free overlay summary so the count cannot drift', () => {
const normalized = normalizeSemanticLayerDescriptions(overlaySource(4, 3), { fillMissing: true });
// The live count is rendered from the body at list/read time; it must not be
// frozen into the stored prose, where it would silently go stale.
expect(ktxSummary(normalized)).toBe('Semantic-layer overlay for mart_customer_health.');
});
it('does not keep a stale measure count after measures are appended', () => {
// First ingest pass writes the auto summary for a 4-measure overlay.
const first = normalizeSemanticLayerDescriptions(overlaySource(4, 3), { fillMissing: true });
// A later ingest/reconcile pass appends 2 measures to the same source (now 6)
// and re-normalizes — exactly what sl_edit_source does with fillMissing.
(first.measures as unknown[]).push({ name: 'm4', expr: 'count(*)' }, { name: 'm5', expr: 'count(*)' });
const second = normalizeSemanticLayerDescriptions(first, { fillMissing: true });
expect(ktxSummary(second)).not.toMatch(/4 measures/);
});
it('never overwrites a human-authored user description across re-normalization', () => {
const input: Record<string, unknown> = {
...overlaySource(4),
descriptions: { user: 'Health score per account, owned by RevOps.' },
};
const authored = normalizeSemanticLayerDescriptions(input, { fillMissing: true });
expect(authored.descriptions).toEqual({ user: 'Health score per account, owned by RevOps.' });
(authored.measures as unknown[]).push({ name: 'm4', expr: 'count(*)' });
const again = normalizeSemanticLayerDescriptions(authored, { fillMissing: true });
expect(again.descriptions).toEqual({ user: 'Health score per account, owned by RevOps.' });
});
it('never overwrites an authored ktx description even when it resembles the auto summary', () => {
const input: Record<string, unknown> = {
...overlaySource(2),
descriptions: { ktx: 'Curated overlay notes for the health mart.' },
};
const authored = normalizeSemanticLayerDescriptions(input, { fillMissing: true });
expect(ktxSummary(authored)).toBe('Curated overlay notes for the health mart.');
});
it('still produces a sensible fallback for a source with no measures', () => {
const normalized = normalizeSemanticLayerDescriptions({ name: 'mart_empty' }, { fillMissing: true });
expect(ktxSummary(normalized)).toBe('Semantic-layer overlay for mart_empty.');
});
});