import type { Mock } from 'vitest'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import { ColumnNameCollisionError, composeOverlay, ConflictingExcludeAndOverrideError, enrichColumnsFromManifest, findDanglingSegmentRefs, projectManifestEntry, SemanticLayerService, toResolvedWire, UnknownColumnOverrideError, } from '../../../src/context/sl/semantic-layer.service.js'; import { resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from '../../../src/context/sl/schemas.js'; import type { SemanticLayerSource } from '../../../src/context/sl/types.js'; const pythonPort = { validateSources: vi.fn(), generateSources: vi.fn(), query: vi.fn(), }; function connectionCatalog(connectionType = 'SNOWFLAKE') { return { listEnabledConnections: vi.fn().mockResolvedValue([]), getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType }), executeQuery: vi.fn(), }; } const baseTable: SemanticLayerSource = { name: 'fct_labs', grain: ['lab_order_id'], table: 'analytics.fct_labs', columns: [ { name: 'lab_order_id', type: 'string' }, { name: 'admin_user_id', type: 'string' }, { name: 'lab_type', type: 'string' }, ], joins: [], measures: [], }; describe('listConnectionIdsWithNames', () => { it('discovers local KTX connection ids from semantic-layer directories', async () => { const configService = { listFiles: vi.fn().mockResolvedValue({ files: [ 'semantic-layer/warehouse/_schema/public.yaml', 'semantic-layer/dbt-main/orders.yaml', 'semantic-layer/.gitkeep', ], }), }; const catalog = connectionCatalog(); catalog.listEnabledConnections.mockImplementation(async (ids: string[]) => ids.map((id) => ({ id, name: id, connectionType: id === 'warehouse' ? 'postgres' : 'dbt' })), ); const service = new SemanticLayerService(configService as never, catalog, pythonPort); await expect(service.listConnectionIdsWithNames()).resolves.toEqual([ { id: 'dbt-main', name: 'dbt-main', connectionType: 'dbt' }, { id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }, ]); expect(catalog.listEnabledConnections).toHaveBeenCalledWith(['dbt-main', 'warehouse']); }); }); describe('loadSource', () => { it('warns and returns null when an existing source file has invalid YAML', async () => { const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() }; const configService = { readFile: vi.fn().mockResolvedValue({ content: 'name: [' }), }; const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never); await expect(service.loadSource('warehouse', 'orders')).resolves.toBeNull(); expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml'); expect(logger.warn).toHaveBeenCalledWith( expect.stringContaining('[loadSource] warehouse/orders.yaml: YAML parse failed:'), ); }); }); describe('composeOverlay', () => { it('carries top-level segments from overlay into the composed source', () => { const overlay = { name: 'fct_labs', segments: [{ name: 'byol', expr: "lab_type = 'byol'", description: 'BYOL cohort' }], }; const composed = composeOverlay(baseTable, overlay); expect(composed.segments).toHaveLength(1); expect(composed.segments?.[0].name).toBe('byol'); expect(composed.segments?.[0].expr).toBe("lab_type = 'byol'"); }); it('preserves measure-level segments references', () => { const overlay = { name: 'fct_labs', segments: [{ name: 'byol', expr: "lab_type = 'byol'" }], measures: [ { name: 'byol_subscriber_count', expr: 'count(distinct admin_user_id)', segments: ['byol'], description: 'BYOL subscribers', }, ], }; const composed = composeOverlay(baseTable, overlay); expect(composed.measures).toHaveLength(1); expect(composed.measures[0].segments).toEqual(['byol']); }); it('leaves base segments unchanged when overlay does not specify segments', () => { const baseWithSegments: SemanticLayerSource = { ...baseTable, segments: [{ name: 'pre_existing', expr: 'is_paid = true' }], }; const overlay = { name: 'fct_labs', descriptions: { user: 'no segments here' } }; const composed = composeOverlay(baseWithSegments, overlay); expect(composed.segments).toEqual([{ name: 'pre_existing', expr: 'is_paid = true' }]); }); it('replaces base segments when overlay provides its own (even an empty array)', () => { const baseWithSegments: SemanticLayerSource = { ...baseTable, segments: [{ name: 'pre_existing', expr: 'is_paid = true' }], }; const overlay = { name: 'fct_labs', segments: [] }; const composed = composeOverlay(baseWithSegments, overlay); expect(composed.segments).toEqual([]); }); it('throws on unknown top-level overlay keys with a pointed error', () => { const overlay = { name: 'fct_labs', frobnicate: true }; expect(() => composeOverlay(baseTable, overlay)).toThrow( /overlay for 'fct_labs' has unhandled keys \[frobnicate\]/, ); }); it('lists every unknown key in the error message, not just the first', () => { const overlay = { name: 'fct_labs', foo: 1, bar: 2 }; expect(() => composeOverlay(baseTable, overlay)).toThrow(/foo, bar/); }); it('still handles existing known keys without regression', () => { const overlay = { name: 'fct_labs', descriptions: { user: 'patient lab orders' }, exclude_columns: ['admin_user_id'], columns: [{ name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }], measures: [{ name: 'count_all', expr: 'count(*)' }], }; const composed = composeOverlay(baseTable, overlay); expect(composed.columns.find((c) => c.name === 'admin_user_id')).toBeUndefined(); expect(composed.columns.find((c) => c.name === 'is_byol')).toBeDefined(); expect(composed.measures).toHaveLength(1); }); it('applies column_overrides to same-named manifest columns', () => { const overlay = { name: 'fct_labs', column_overrides: [ { name: 'lab_order_id', descriptions: { user: 'Primary key' } }, { name: 'admin_user_id', descriptions: { user: 'FK to admin_users' } }, ], }; const composed = composeOverlay(baseTable, overlay); // No duplicate columns appended — same-named overlay entries merged onto the base. expect(composed.columns).toHaveLength(3); const labOrder = composed.columns.find((c) => c.name === 'lab_order_id'); expect(labOrder?.type).toBe('string'); expect(labOrder?.descriptions).toEqual({ user: 'Primary key' }); const adminUser = composed.columns.find((c) => c.name === 'admin_user_id'); expect(adminUser?.type).toBe('string'); expect(adminUser?.descriptions).toEqual({ user: 'FK to admin_users' }); }); it('appends computed columns alongside column overrides', () => { const overlay = { name: 'fct_labs', column_overrides: [ { name: 'lab_order_id', descriptions: { user: 'PK doc' } }, ], columns: [ { name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }, ], }; const composed = composeOverlay(baseTable, overlay); expect(composed.columns).toHaveLength(4); expect(composed.columns.find((c) => c.name === 'is_byol')?.expr).toBe("lab_type = 'byol'"); expect(composed.columns.find((c) => c.name === 'lab_order_id')?.type).toBe('string'); }); it('rejects column_overrides that target unknown manifest columns', () => { expect(() => composeOverlay(baseTable, { name: 'fct_labs', column_overrides: [{ name: 'missing', descriptions: { user: 'Nope' } }], }), ).toThrow(UnknownColumnOverrideError); }); it('rejects computed columns whose names collide with manifest columns', () => { expect(() => composeOverlay(baseTable, { name: 'fct_labs', columns: [{ name: 'lab_order_id', type: 'string', expr: 'lab_order_id' }], }), ).toThrow(ColumnNameCollisionError); }); it('rejects exclude/override conflicts before applying exclusions', () => { expect(() => composeOverlay(baseTable, { name: 'fct_labs', exclude_columns: ['lab_order_id'], column_overrides: [{ name: 'lab_order_id', descriptions: { user: 'Hidden PK' } }], }), ).toThrow(ConflictingExcludeAndOverrideError); }); it('merges overlay descriptions (plural) with base descriptions keyed by source', () => { const baseWithDescriptions: SemanticLayerSource = { ...baseTable, descriptions: { db: 'scan-derived description', ai: 'AI description' }, }; const overlay = { name: 'fct_labs', descriptions: { dbt: 'dbt description', ai: 'AI description (overridden)' }, }; const composed = composeOverlay(baseWithDescriptions, overlay); expect(composed.descriptions).toEqual({ db: 'scan-derived description', ai: 'AI description (overridden)', dbt: 'dbt description', }); }); it('replaces manifest usage only when an overlay explicitly provides usage', () => { const baseWithUsage: SemanticLayerSource = { ...baseTable, usage: { narrative: 'Orders are commonly queried by lifecycle status.', frequencyTier: 'high', commonFilters: ['status'], commonJoins: [{ table: 'public.customers', on: ['customer_id'] }], }, }; expect(composeOverlay(baseWithUsage, { name: 'fct_labs', measures: [] }).usage).toEqual(baseWithUsage.usage); const composed = composeOverlay(baseWithUsage, { name: 'fct_labs', usage: { narrative: 'Overlay-curated usage note.', frequencyTier: 'mid', commonFilters: ['created_at'], commonGroupBys: ['created_at'], commonJoins: [], }, }); expect(composed.usage).toEqual({ narrative: 'Overlay-curated usage note.', frequencyTier: 'mid', commonFilters: ['created_at'], commonGroupBys: ['created_at'], commonJoins: [], }); }); }); describe('enrichColumnsFromManifest', () => { const manifest: SemanticLayerSource = { name: 'CONSIGNMENTS', table: 'ANALYTICS.MARTS.CONSIGNMENTS', grain: ['CONSIGNED_ITEM_ID'], columns: [ { name: 'CONSIGNED_ITEM_ID', type: 'string', descriptions: { ai: 'Unique identifier for the consigned item record.' }, }, { name: 'CASH_ADV_AMOUNT', type: 'number', descriptions: { ai: 'Amount of cash advance disbursed to consigners.' }, }, { name: 'CONSIGNMENT_CREATED_AT', type: 'time', role: 'time', descriptions: { ai: 'Timestamp when the consignment was created.' }, }, ], joins: [], measures: [], }; it('fills blank type and descriptions on source columns from the manifest', () => { const source: SemanticLayerSource = { name: 'aav_consignments', sql: 'SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT FROM MARTS.CONSIGNMENTS WHERE ...', inherits_columns_from: 'CONSIGNMENTS', grain: ['CONSIGNED_ITEM_ID'], columns: [ { name: 'CONSIGNED_ITEM_ID', type: '' }, { name: 'CASH_ADV_AMOUNT', type: '' }, ], joins: [], measures: [], }; const enriched = enrichColumnsFromManifest(source, manifest); expect(enriched.columns[0]).toEqual({ name: 'CONSIGNED_ITEM_ID', type: 'string', descriptions: { ai: 'Unique identifier for the consigned item record.' }, }); expect(enriched.columns[1]).toEqual({ name: 'CASH_ADV_AMOUNT', type: 'number', descriptions: { ai: 'Amount of cash advance disbursed to consigners.' }, }); }); it('preserves a local description if the source already declared one', () => { const source: SemanticLayerSource = { name: 'aav_consignments', sql: 'SELECT CONSIGNED_ITEM_ID FROM ...', inherits_columns_from: 'CONSIGNMENTS', grain: ['CONSIGNED_ITEM_ID'], columns: [ { name: 'CONSIGNED_ITEM_ID', type: 'string', descriptions: { ai: 'AAV-specific note: always non-null in this filtered view.' }, }, ], joins: [], measures: [], }; const enriched = enrichColumnsFromManifest(source, manifest); expect(enriched.columns[0].descriptions).toEqual({ ai: 'AAV-specific note: always non-null in this filtered view.', }); }); it('passes through columns absent from the manifest unchanged', () => { const source: SemanticLayerSource = { name: 'aav_consignments', sql: 'SELECT ALT_VALUE_COMBINED, my_derived FROM ...', inherits_columns_from: 'CONSIGNMENTS', grain: ['CONSIGNED_ITEM_ID'], columns: [{ name: 'my_derived', type: 'number', expr: 'CASH_ADV_AMOUNT * 2' }], joins: [], measures: [], }; const enriched = enrichColumnsFromManifest(source, manifest); expect(enriched.columns[0]).toEqual({ name: 'my_derived', type: 'number', expr: 'CASH_ADV_AMOUNT * 2', }); }); it('copies role from the manifest when the source omits it', () => { const source: SemanticLayerSource = { name: 'aav_consignments', sql: 'SELECT CONSIGNMENT_CREATED_AT FROM ...', inherits_columns_from: 'CONSIGNMENTS', grain: ['CONSIGNED_ITEM_ID'], columns: [{ name: 'CONSIGNMENT_CREATED_AT', type: '' }], joins: [], measures: [], }; const enriched = enrichColumnsFromManifest(source, manifest); expect(enriched.columns[0].role).toBe('time'); expect(enriched.columns[0].type).toBe('time'); }); it('returns the source unchanged when manifestEntry is null/undefined', () => { const source: SemanticLayerSource = { name: 'aav_consignments', sql: 'SELECT FOO FROM ...', grain: ['FOO'], columns: [{ name: 'FOO', type: '' }], joins: [], measures: [], }; const enriched = enrichColumnsFromManifest(source, null); expect(enriched).toEqual(source); }); }); describe('sourceDefinitionSchema', () => { it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => { const result = sourceDefinitionSchema.safeParse({ name: 'orders', descriptions: { dbt: 'Order facts from dbt.' }, table: 'public.orders', grain: ['id'], columns: [ { name: 'status', type: 'string', descriptions: { dbt: 'Order lifecycle status.' }, constraints: { dbt: { not_null: true, unique: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { dbt: [{ name: 'accepted_values', package: 'dbt' }], dbt_by_package: { dbt: ['accepted_values'] }, }, }, ], joins: [], measures: [], tags: { dbt: ['mart', 'finance'] }, freshness: { dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } } }, default_time_dimension: { dbt: 'updated_at' }, }); expect(result.success).toBe(true); if (!result.success) { return; } expect(result.data.descriptions).toEqual({ dbt: 'Order facts from dbt.' }); expect(result.data.columns[0]).toMatchObject({ descriptions: { dbt: 'Order lifecycle status.' }, constraints: { dbt: { not_null: true, unique: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { dbt: [{ name: 'accepted_values', package: 'dbt' }], dbt_by_package: { dbt: ['accepted_values'] }, }, }); expect(result.data.tags).toEqual({ dbt: ['mart', 'finance'] }); expect(result.data.freshness).toEqual({ dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } }, }); }); it('accepts historic SQL usage on standalone sources', () => { const result = sourceDefinitionSchema.safeParse({ name: 'orders', table: 'public.orders', grain: ['id'], columns: [{ name: 'id', type: 'string' }], joins: [], measures: [], usage: { narrative: 'Orders are queried for fulfillment and revenue analysis.', frequencyTier: 'high', commonFilters: ['status', 'created_at'], commonJoins: [{ table: 'public.customers', on: ['customer_id'] }], externalOwner: 'analytics', }, }); expect(result.success).toBe(true); if (!result.success) { return; } expect(result.data.usage).toMatchObject({ narrative: 'Orders are queried for fulfillment and revenue analysis.', frequencyTier: 'high', commonFilters: ['status', 'created_at'], commonJoins: [{ table: 'public.customers', on: ['customer_id'] }], externalOwner: 'analytics', }); }); it("rejects qualified grain names (e.g. 'activity.account_id')", () => { const result = sourceDefinitionSchema.safeParse({ name: 'activity', table: 'public.activity', grain: ['activity.account_id'], columns: [{ name: 'account_id', type: 'number' }], joins: [], measures: [], }); expect(result.success).toBe(false); if (result.success) return; expect(result.error.issues.some((i) => i.path.join('.').startsWith('grain'))).toBe(true); }); it('rejects qualified column names', () => { const result = sourceDefinitionSchema.safeParse({ name: 'activity', table: 'public.activity', grain: ['account_id'], columns: [{ name: 'activity.account_id', type: 'number' }], joins: [], measures: [], }); expect(result.success).toBe(false); if (result.success) return; expect(result.error.issues.some((i) => i.path.join('.').startsWith('columns'))).toBe(true); }); }); describe('sourceOverlaySchema', () => { it('accepts column_overrides and keeps columns computed-only', () => { const result = sourceOverlaySchema.safeParse({ name: 'orders', column_overrides: [{ name: 'status', descriptions: { user: 'Lifecycle status' } }], columns: [{ name: 'is_paid', type: 'boolean', expr: "status = 'paid'" }], }); expect(result.success).toBe(true); }); it('rejects typeless overlay columns and singular description on overrides', () => { const result = sourceOverlaySchema.safeParse({ name: 'orders', column_overrides: [{ name: 'status', description: 'Lifecycle status' }], columns: [{ name: 'status', descriptions: { user: 'Lifecycle status' } }], }); expect(result.success).toBe(false); if (!result.success) { const paths = result.error.issues.map((issue) => issue.path.join('.')); expect(paths).toContain('column_overrides.0'); expect(paths).toContain('columns.0.type'); expect(paths).toContain('columns.0.expr'); } }); }); describe('toResolvedWire', () => { it('strips TS-only authoring and provenance fields before the Python boundary', () => { const wire = toResolvedWire({ name: 'orders', table: 'public.orders', inherits_columns_from: 'orders', grain: ['id'], columns: [{ name: 'id', type: 'string' }], joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one', source: 'formal' }], measures: [], usage: { narrative: 'Frequently queried orders.', frequencyTier: 'high', commonFilters: ['status'], commonJoins: [], }, }); expect(wire).toEqual({ name: 'orders', table: 'public.orders', grain: ['id'], columns: [{ name: 'id', type: 'string' }], joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], measures: [], }); expect(resolvedSourceSchema.parse(wire)).toEqual(wire); }); }); describe('projectManifestEntry', () => { it('projects manifest usage onto the semantic-layer source', () => { const source = projectManifestEntry('orders', { table: 'public.orders', usage: { narrative: 'Orders are frequently filtered by status.', frequencyTier: 'high', commonFilters: ['status'], commonJoins: [{ table: 'public.customers', on: ['customer_id'] }], }, columns: [ { name: 'id', type: 'string', pk: true }, { name: 'status', type: 'string' }, ], }); expect(source.usage).toEqual({ narrative: 'Orders are frequently filtered by status.', frequencyTier: 'high', commonFilters: ['status'], commonJoins: [{ table: 'public.customers', on: ['customer_id'] }], }); }); }); describe('findManifestEntryByTableRef', () => { let configService: { listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>; readFile: Mock<(path: string) => Promise<{ content: string }>>; }; let service: SemanticLayerService; beforeEach(() => { configService = { listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>().mockResolvedValue({ files: ['semantic-layer/conn-1/_schema/marts.yaml'], }), readFile: vi.fn<(path: string) => Promise<{ content: string }>>().mockResolvedValue({ content: [ 'tables:', ' CONSIGNMENTS:', ' table: ANALYTICS.MARTS.CONSIGNMENTS', ' columns:', ' - { name: CONSIGNED_ITEM_ID, type: string, pk: true }', ].join('\n'), }), }; service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort); }); it('finds by exact bare manifest key', async () => { const entry = await service.findManifestEntryByTableRef('conn-1', 'CONSIGNMENTS'); expect(entry?.name).toBe('CONSIGNMENTS'); }); it('finds by fully-qualified table path', async () => { const entry = await service.findManifestEntryByTableRef('conn-1', 'ANALYTICS.MARTS.CONSIGNMENTS'); expect(entry?.name).toBe('CONSIGNMENTS'); }); it('finds by schema-qualified suffix', async () => { const entry = await service.findManifestEntryByTableRef('conn-1', 'MARTS.CONSIGNMENTS'); expect(entry?.name).toBe('CONSIGNMENTS'); }); it('matches case-insensitively on table path', async () => { const entry = await service.findManifestEntryByTableRef('conn-1', 'analytics.marts.consignments'); expect(entry?.name).toBe('CONSIGNMENTS'); }); it('returns null when nothing matches', async () => { const entry = await service.findManifestEntryByTableRef('conn-1', 'NOT_A_TABLE'); expect(entry).toBeNull(); }); }); describe('loadAllSources — standalone enrichment via inherits_columns_from', () => { let configService: { listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>; readFile: Mock<(path: string) => Promise<{ content: string }>>; }; let service: SemanticLayerService; beforeEach(() => { configService = { listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>(), readFile: vi.fn<(path: string) => Promise<{ content: string }>>(), }; service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort); }); it('preserves dbt metadata when projecting manifest-backed sources', async () => { const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/conn-1' || dir === 'semantic-layer/conn-1/_schema') { return Promise.resolve({ files: [schemaPath] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockResolvedValue({ content: [ 'tables:', ' orders:', ' table: public.orders', ' tags: { dbt: [mart] }', ' freshness:', ' dbt:', ' loaded_at_field: updated_at', ' columns:', ' - name: status', ' type: string', ' constraints: { dbt: { not_null: true } }', ' enum_values: { dbt: [placed, shipped] }', ' tests:', ' dbt:', ' - { name: accepted_values, package: dbt }', ].join('\n'), }); const { sources, loadErrors } = await service.loadAllSources('conn-1'); expect(loadErrors).toEqual([]); expect(sources[0]).toMatchObject({ name: 'orders', tags: { dbt: ['mart'] }, freshness: { dbt: { loaded_at_field: 'updated_at' } }, columns: [ { name: 'status', constraints: { dbt: { not_null: true } }, enum_values: { dbt: ['placed', 'shipped'] }, tests: { dbt: [{ name: 'accepted_values', package: 'dbt' }] }, }, ], }); }); it('fills blank columns on a standalone source from the manifest entry it points at', async () => { const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/conn-1') { return Promise.resolve({ files: [schemaPath, standalonePath] }); } if (dir === 'semantic-layer/conn-1/_schema') { return Promise.resolve({ files: [schemaPath] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockImplementation((path: string) => { if (path === schemaPath) { return Promise.resolve({ content: [ 'tables:', ' CONSIGNMENTS:', ' table: ANALYTICS.MARTS.CONSIGNMENTS', ' columns:', ' - name: CONSIGNED_ITEM_ID', ' type: string', ' descriptions: { ai: "Unique consigned-item id." }', ' - name: CASH_ADV_AMOUNT', ' type: number', ' descriptions: { ai: "Cash advance amount." }', ].join('\n'), }); } if (path === standalonePath) { return Promise.resolve({ content: [ 'name: aav_consignments', 'sql: |', ' SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT FROM ANALYTICS.MARTS.CONSIGNMENTS WHERE x', 'inherits_columns_from: CONSIGNMENTS', 'grain: [CONSIGNED_ITEM_ID]', 'columns:', ' - { name: CONSIGNED_ITEM_ID }', ' - { name: CASH_ADV_AMOUNT }', ].join('\n'), }); } return Promise.reject(new Error(`Unexpected readFile: ${path}`)); }); const { sources, loadErrors } = await service.loadAllSources('conn-1'); expect(loadErrors).toEqual([]); const aav = sources.find((s) => s.name === 'aav_consignments'); expect(aav).toBeDefined(); expect(aav?.columns).toEqual([ { name: 'CONSIGNED_ITEM_ID', type: 'string', descriptions: { ai: 'Unique consigned-item id.' } }, { name: 'CASH_ADV_AMOUNT', type: 'number', descriptions: { ai: 'Cash advance amount.' } }, ]); }); it('accepts a fully-qualified path in inherits_columns_from', async () => { const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/conn-1') { return Promise.resolve({ files: [schemaPath, standalonePath] }); } if (dir === 'semantic-layer/conn-1/_schema') { return Promise.resolve({ files: [schemaPath] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockImplementation((path: string) => { if (path === schemaPath) { return Promise.resolve({ content: [ 'tables:', ' CONSIGNMENTS:', ' table: ANALYTICS.MARTS.CONSIGNMENTS', ' columns:', ' - { name: CONSIGNED_ITEM_ID, type: string }', ].join('\n'), }); } return Promise.resolve({ content: [ 'name: aav_consignments', 'sql: SELECT 1', 'inherits_columns_from: ANALYTICS.MARTS.CONSIGNMENTS', 'grain: [CONSIGNED_ITEM_ID]', 'columns:', ' - { name: CONSIGNED_ITEM_ID }', ].join('\n'), }); }); const { sources, loadErrors } = await service.loadAllSources('conn-1'); expect(loadErrors).toEqual([]); const aav = sources.find((s) => s.name === 'aav_consignments'); expect(aav?.columns[0].type).toBe('string'); }); it('passes the source through unchanged if inherits_columns_from misses', async () => { const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/conn-1') { return Promise.resolve({ files: [standalonePath] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockResolvedValue({ content: [ 'name: aav_consignments', 'sql: SELECT 1', 'inherits_columns_from: NO_SUCH_TABLE', 'grain: [FOO]', 'columns:', ' - { name: FOO, type: string }', ].join('\n'), }); const { sources, loadErrors } = await service.loadAllSources('conn-1'); expect(loadErrors).toEqual([]); const aav = sources.find((s) => s.name === 'aav_consignments'); expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]); }); it('loads standalone source and column description maps', async () => { const standalonePath = 'semantic-layer/conn-1/orders.yaml'; configService.listFiles.mockResolvedValue({ files: [standalonePath] }); configService.readFile.mockResolvedValue({ content: [ 'name: orders', 'descriptions:', ' user: Finance orders used for invoice reconciliation.', 'table: public.orders', 'grain: [id]', 'columns:', ' - name: id', ' type: string', ' descriptions:', ' user: Stable order identifier.', ].join('\n'), }); const { sources, loadErrors } = await service.loadAllSources('conn-1'); expect(loadErrors).toEqual([]); expect(sources[0]).toMatchObject({ name: 'orders', descriptions: { user: 'Finance orders used for invoice reconciliation.' }, columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }], }); }); it('reports file-attributed errors for overlay columns that shadow manifest columns', async () => { const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; const overlayPath = 'semantic-layer/conn-1/orders.yaml'; configService.listFiles.mockResolvedValue({ files: [schemaPath, overlayPath] }); configService.readFile.mockImplementation((path: string) => { if (path === schemaPath) { return Promise.resolve({ content: [ 'tables:', ' orders:', ' table: public.orders', ' columns:', ' - { name: id, type: string, pk: true }', ].join('\n'), }); } return Promise.resolve({ content: ['name: orders', 'columns:', ' - name: id', ' descriptions: { user: "Stable id." }'].join('\n'), }); }); const { loadErrors } = await service.loadAllSources('conn-1'); expect(loadErrors.join('\n')).toContain(overlayPath); expect(loadErrors.join('\n')).toContain("column 'id' in columns already exists on manifest source 'orders'"); expect(loadErrors.join('\n')).not.toContain('column_overrides'); }); it('reports and logs directory listing failures instead of treating them as empty sources', async () => { const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() }; configService.listFiles.mockRejectedValue(new Error('permission denied')); service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never); const { sources, loadErrors } = await service.loadAllSources('conn-1'); expect(sources).toEqual([]); expect(loadErrors).toEqual([ 'Failed to list semantic-layer files under semantic-layer/conn-1: permission denied', ]); expect(logger.warn).toHaveBeenCalledWith( 'Failed to list semantic-layer files under semantic-layer/conn-1: permission denied', ); }); }); describe('validateWithProposedSource', () => { let configService: { listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>; readFile: Mock<(path: string) => Promise<{ content: string }>>; }; let service: SemanticLayerService; beforeEach(() => { pythonPort.validateSources.mockReset(); configService = { listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>().mockResolvedValue({ files: [], }), readFile: vi.fn<(path: string) => Promise<{ content: string }>>(), }; service = new SemanticLayerService(configService as never, connectionCatalog('BIGQUERY'), pythonPort); }); it('uses the connection warehouse dialect, not hardcoded postgres', async () => { pythonPort.validateSources.mockResolvedValue({ data: { errors: [], warnings: [] }, }); await service.validateWithProposedSource('conn-1', { name: 'std', table: 'analytics.std', grain: ['id'], columns: [{ name: 'id', type: 'number' }], joins: [], measures: [], }); expect(pythonPort.validateSources).toHaveBeenCalledWith( expect.objectContaining({ dialect: 'bigquery', }), ); }); it('composes a bare overlay with its manifest base before validating', async () => { const schemaPath = 'semantic-layer/conn-1/_schema/core.yaml'; const listFilesImpl = (dir: string): Promise<{ files: string[] }> => { if (dir === 'semantic-layer/conn-1') { return Promise.resolve({ files: [schemaPath, 'semantic-layer/conn-1/fct_orders.yaml'] }); } if (dir === 'semantic-layer/conn-1/_schema') { return Promise.resolve({ files: [schemaPath] }); } return Promise.resolve({ files: [] }); }; const readFileImpl = (path: string): Promise<{ content: string }> => { if (path === schemaPath) { return Promise.resolve({ content: [ 'tables:', ' fct_orders:', ' table: analytics.fct_orders', ' columns:', ' - { name: id, type: string, pk: true }', ' - { name: amount, type: number }', ].join('\n'), }); } if (path === 'semantic-layer/conn-1/fct_orders.yaml') { return Promise.resolve({ content: 'name: fct_orders\nmeasures: []\n' }); } return Promise.reject(new Error(`Unexpected readFile: ${path}`)); }; configService.listFiles.mockImplementation(listFilesImpl); configService.readFile.mockImplementation(readFileImpl); pythonPort.validateSources.mockResolvedValue({ data: { errors: [], warnings: [] }, }); const overlay: SemanticLayerSource = { name: 'fct_orders', grain: ['id'], columns: [], joins: [], measures: [{ name: 'total_amount', expr: 'sum(amount)' }], }; await service.validateWithProposedSource('conn-1', overlay); expect(pythonPort.validateSources).toHaveBeenCalledTimes(1); const sources = (pythonPort.validateSources.mock.calls[0][0]?.sources ?? []) as Array>; const composed = sources.find((s) => s.name === 'fct_orders'); expect(composed).toBeDefined(); expect(composed?.table).toBe('analytics.fct_orders'); expect(composed?.measures).toEqual([{ name: 'total_amount', expr: 'sum(amount)' }]); }); it('returns a pointed error when a bare overlay has no manifest base', async () => { configService.listFiles.mockResolvedValue({ files: [] }); const overlay: SemanticLayerSource = { name: 'orphan', grain: [], columns: [], joins: [], measures: [], }; const result = await service.validateWithProposedSource('conn-1', overlay); expect(result.errors[0]).toMatch(/Overlay 'orphan' has no matching manifest entry/); expect(pythonPort.validateSources).not.toHaveBeenCalled(); }); it('rejects table-backed sources whose declared columns are absent from a matching physical manifest', async () => { const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/dbt-main') { return Promise.resolve({ files: [] }); } if (dir === 'semantic-layer') { return Promise.resolve({ files: [schemaPath] }); } if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') { return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockImplementation((path: string) => { if (path === schemaPath) { return Promise.resolve({ content: [ 'tables:', ' int_procurement_qualifying_actions:', ' table: orbit_analytics.int_procurement_qualifying_actions', ' columns:', ' - { name: action_id, type: string }', ' - { name: account_id, type: string }', ' - { name: user_id, type: string }', ' - { name: action_date, type: time }', ' - { name: action_type, type: string }', ].join('\n'), }); } return Promise.reject(new Error(`Unexpected readFile: ${path}`)); }); pythonPort.validateSources.mockResolvedValue({ data: { errors: [], warnings: [] }, }); const result = await service.validateWithProposedSource('dbt-main', { name: 'int_procurement_qualifying_actions', table: 'orbit_analytics.int_procurement_qualifying_actions', grain: ['purchase_request_id'], columns: [ { name: 'purchase_request_id', type: 'string' }, { name: 'account_id', type: 'string' }, { name: 'requester_user_id', type: 'string' }, { name: 'action_week', type: 'time' }, ], joins: [], measures: [{ name: 'qualifying_action_count', expr: 'count(purchase_request_id)' }], }); expect(result.errors.join('\n')).toMatch(/declared column\(s\) absent from physical table/); expect(result.errors.join('\n')).toMatch(/purchase_request_id/); expect(result.errors.join('\n')).toMatch(/requester_user_id/); expect(result.errors.join('\n')).toMatch(/action_week/); expect(result.errors.join('\n')).toMatch(/measure "qualifying_action_count" references unknown column\(s\)/); }); it('keeps valid table-backed sources clean when a physical manifest matches', async () => { const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/dbt-main') { return Promise.resolve({ files: [] }); } if (dir === 'semantic-layer') { return Promise.resolve({ files: [schemaPath] }); } if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') { return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockResolvedValue({ content: [ 'tables:', ' mart_revenue_daily:', ' table: orbit_analytics.mart_revenue_daily', ' columns:', ' - { name: revenue_date, type: time }', ' - { name: gross_revenue_cents, type: number }', ' - { name: credits_cents, type: number }', ' - { name: refunds_cents, type: number }', ' - { name: net_revenue_cents, type: number }', ].join('\n'), }); pythonPort.validateSources.mockResolvedValue({ data: { errors: [], warnings: [] }, }); const result = await service.validateWithProposedSource('dbt-main', { name: 'mart_revenue_daily', table: 'orbit_analytics.mart_revenue_daily', grain: ['revenue_date'], columns: [ { name: 'revenue_date', type: 'time' }, { name: 'gross_revenue_cents', type: 'number' }, { name: 'credits_cents', type: 'number' }, { name: 'refunds_cents', type: 'number' }, { name: 'net_revenue_cents', type: 'number' }, ], joins: [], measures: [{ name: 'net_revenue', expr: 'sum(net_revenue_cents)' }], }); expect(result.errors).toEqual([]); }); it('allows SQL syntax tokens and cast types in physical expression validation', async () => { const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/dbt-main') { return Promise.resolve({ files: [] }); } if (dir === 'semantic-layer') { return Promise.resolve({ files: [schemaPath] }); } if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') { return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockResolvedValue({ content: [ 'tables:', ' mart_revenue_daily:', ' table: orbit_analytics.mart_revenue_daily', ' columns:', ' - { name: order_id, type: string }', ' - { name: revenue_date, type: time }', ' - { name: amount, type: number }', ' - { name: status, type: string }', ' - { name: created_at, type: time }', ].join('\n'), }); pythonPort.validateSources.mockResolvedValue({ data: { errors: [], warnings: [] }, }); const result = await service.validateWithProposedSource('dbt-main', { name: 'mart_revenue_daily', table: 'orbit_analytics.mart_revenue_daily', grain: ['order_id'], columns: [ { name: 'order_id', type: 'string' }, { name: 'revenue_date', type: 'time' }, { name: 'amount', type: 'number' }, { name: 'status', type: 'string' }, { name: 'created_at', type: 'time' }, { name: 'status_text', type: 'string', expr: 'status::text' }, ], segments: [{ name: 'current_or_paid', expr: "created_at <= current_date OR status = 'paid'" }], joins: [], measures: [ { name: 'paid_amount', expr: "sum(amount) FILTER (WHERE status = 'paid')" }, { name: 'cast_amount_count', expr: 'count(cast(amount as text))' }, ], }); expect(result.errors).toEqual([]); }); it('rejects join keys that are absent from matched physical sources', async () => { const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml'; configService.listFiles.mockImplementation((dir: string) => { if (dir === 'semantic-layer/dbt-main') { return Promise.resolve({ files: [] }); } if (dir === 'semantic-layer') { return Promise.resolve({ files: [schemaPath] }); } if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') { return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] }); } return Promise.resolve({ files: [] }); }); configService.readFile.mockResolvedValue({ content: [ 'tables:', ' activity:', ' table: orbit_analytics.activity', ' columns:', ' - { name: account_id, type: string }', ' accounts:', ' table: orbit_analytics.accounts', ' columns:', ' - { name: account_id, type: string }', ].join('\n'), }); pythonPort.validateSources.mockResolvedValue({ data: { errors: [], warnings: [] }, }); const result = await service.validateWithProposedSource('dbt-main', { name: 'activity', table: 'orbit_analytics.activity', grain: ['account_id'], columns: [{ name: 'account_id', type: 'string' }], joins: [{ to: 'accounts', on: 'activity.account_name = accounts.account_uuid', relationship: 'many_to_one' }], measures: [], }); expect(result.errors.join('\n')).toMatch(/local column "account_name"/); expect(result.errors.join('\n')).toMatch(/target column "account_uuid"/); }); }); describe('findDanglingSegmentRefs', () => { it('returns empty when every measure segment resolves', () => { const source = { segments: [{ name: 'byol' }, { name: 'paid' }], measures: [ { name: 'byol_count', segments: ['byol'] }, { name: 'paid_count', segments: ['paid', 'byol'] }, ], }; expect(findDanglingSegmentRefs(source)).toEqual([]); }); it('flags measures whose segment reference does not exist on the source', () => { const source = { segments: [{ name: 'byol' }], measures: [{ name: 'broken', segments: ['byol', 'missing'] }], }; const refs = findDanglingSegmentRefs(source); expect(refs).toHaveLength(1); expect(refs[0]).toMatch(/measure 'broken' references unknown segment 'missing'/); }); it('flags when a source has zero segments but measures reference one', () => { const source = { measures: [{ name: 'broken', segments: ['byol'] }], }; const refs = findDanglingSegmentRefs(source); expect(refs).toHaveLength(1); expect(refs[0]).toMatch(/unknown segment 'byol'/); }); it('is a no-op for sources with no measures or no segment references', () => { expect(findDanglingSegmentRefs({ measures: [{ name: 'simple', expr: 'count(*)' }] })).toEqual([]); expect(findDanglingSegmentRefs({})).toEqual([]); }); });