Normalize semantic layer descriptions

This commit is contained in:
Luca Martial 2026-05-11 00:31:15 -07:00
parent c82989119b
commit 86c818a454
21 changed files with 498 additions and 37 deletions

View file

@ -127,6 +127,39 @@ describe('SlEditSourceTool — session gating', () => {
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
it('fills missing descriptions when an ingest session edits a source', async () => {
const { tool } = makeTool();
const session = makeSession({
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'dbt' },
});
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
descriptions: { ktx: expect.stringContaining('orders') },
columns: [
expect.objectContaining({
descriptions: { ktx: expect.stringContaining('Identifier') },
}),
],
}),
expect.any(String),
expect.any(String),
expect.any(String),
);
});
});
describe('SlEditSourceTool — manifest-backed source without overlay', () => {

View file

@ -2,6 +2,7 @@ import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
import type { SemanticLayerSource } from '../types.js';
import {
BaseSemanticLayerTool,
@ -147,6 +148,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
} catch (e) {
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
}
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
// Re-serialize and write
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });

View file

@ -175,6 +175,89 @@ describe('SlWriteSourceTool — session gating', () => {
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
it('normalizes flat source and column descriptions before writing', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
source: {
name: 'orders',
description: 'Finance orders used for invoice reconciliation.',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(semanticLayerService.writeSource).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
columns: [expect.objectContaining({ descriptions: { user: 'Stable order identifier.' } })],
}),
expect.any(String),
expect.any(String),
expect.any(String),
);
});
it('fills missing descriptions for ingest-written overlays and columns', async () => {
const session = makeSession({
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'metabase' },
semanticLayerService: {
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any,
});
const { tool } = makeTool();
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'mart_account_segments',
source: {
name: 'mart_account_segments',
columns: [{ name: 'is_large_contract', type: 'boolean', expr: 'contract_arr_cents >= 20000000' }],
measures: [{ name: 'account_count', expr: 'count(account_id)' }],
} as any,
} as any,
{ ...baseContext, session },
);
expect(result.structured.success).toBe(true);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
descriptions: {
ktx: expect.stringContaining('mart_account_segments'),
},
columns: [
expect.objectContaining({
descriptions: {
ktx: expect.stringContaining('is large contract'),
},
}),
],
}),
expect.any(String),
expect.any(String),
expect.any(String),
);
});
});
describe('SlWriteSourceTool — disconnected-components warning in markdown', () => {

View file

@ -10,6 +10,7 @@ import {
type SemanticLayerStructured,
sourceDefinitionSchema,
} from './base-semantic-layer.tool.js';
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]);
@ -154,14 +155,16 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
semanticLayerService: SemanticLayerService,
skipIndex: boolean,
): Promise<ToolOutput<SemanticLayerStructured>> {
const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql);
const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
const isOverlay =
!('table' in normalizedSource && normalizedSource.table) && !('sql' in normalizedSource && normalizedSource.sql);
const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName);
const commitMessage = existing
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
const yamlContent = YAML.stringify(source);
const yamlContent = YAML.stringify(normalizedSource);
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
if (orphanError) {
@ -172,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent });
}
const validatedSource = source as SemanticLayerSource;
const validatedSource = normalizedSource as SemanticLayerSource;
const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource);
const validationErrors = validationResult.errors;
const validationWarnings = [...validationResult.warnings];