mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat: carry historic sql usage in semantic sources
This commit is contained in:
parent
d73a54e8c7
commit
f17053061d
4 changed files with 103 additions and 0 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import { z } from 'zod';
|
||||
import { tableUsageOutputSchema } from '../ingest/adapters/historic-sql/skill-schemas.js';
|
||||
|
||||
// Literal vocabularies — kept in lockstep with the Python Pydantic model at
|
||||
// python/ktx-sl/semantic_layer/models.py (SourceColumn / ColumnRole /
|
||||
|
|
@ -125,6 +126,7 @@ export const sourceDefinitionSchema = z
|
|||
default_time_dimension: defaultTimeDimensionDbtSchema.optional(),
|
||||
tags: sourceKeyedStringArraySchema.optional(),
|
||||
freshness: sourceFreshnessSchema.optional(),
|
||||
usage: tableUsageOutputSchema.optional(),
|
||||
})
|
||||
.strict()
|
||||
.refine((s) => (s.table || s.sql) && !(s.table && s.sql), {
|
||||
|
|
@ -145,6 +147,7 @@ export const sourceOverlaySchema = z
|
|||
exclude_columns: z.array(z.string()).optional(),
|
||||
disable_joins: z.array(z.string()).optional(),
|
||||
default_time_dimension: defaultTimeDimensionDbtSchema.optional(),
|
||||
usage: tableUsageOutputSchema.optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import {
|
|||
composeOverlay,
|
||||
enrichColumnsFromManifest,
|
||||
findDanglingSegmentRefs,
|
||||
projectManifestEntry,
|
||||
SemanticLayerService,
|
||||
} from './semantic-layer.service.js';
|
||||
import { sourceDefinitionSchema } from './schemas.js';
|
||||
|
|
@ -129,6 +130,39 @@ describe('composeOverlay', () => {
|
|||
dbt: 'dbt description',
|
||||
});
|
||||
});
|
||||
|
||||
it('replaces manifest usage only when an overlay explicitly provides usage', () => {
|
||||
const baseWithUsage: SemanticLayerSource = {
|
||||
...baseTable,
|
||||
usage: {
|
||||
narrative: 'Orders are commonly queried by lifecycle status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
|
||||
},
|
||||
};
|
||||
|
||||
expect(composeOverlay(baseWithUsage, { name: 'fct_labs', measures: [] }).usage).toEqual(baseWithUsage.usage);
|
||||
|
||||
const composed = composeOverlay(baseWithUsage, {
|
||||
name: 'fct_labs',
|
||||
usage: {
|
||||
narrative: 'Overlay-curated usage note.',
|
||||
frequencyTier: 'mid',
|
||||
commonFilters: ['created_at'],
|
||||
commonGroupBys: ['created_at'],
|
||||
commonJoins: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(composed.usage).toEqual({
|
||||
narrative: 'Overlay-curated usage note.',
|
||||
frequencyTier: 'mid',
|
||||
commonFilters: ['created_at'],
|
||||
commonGroupBys: ['created_at'],
|
||||
commonJoins: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('enrichColumnsFromManifest', () => {
|
||||
|
|
@ -299,6 +333,61 @@ describe('sourceDefinitionSchema', () => {
|
|||
dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } },
|
||||
});
|
||||
});
|
||||
|
||||
it('accepts historic SQL usage on standalone sources', () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
usage: {
|
||||
narrative: 'Orders are queried for fulfillment and revenue analysis.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status', 'created_at'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
|
||||
externalOwner: 'analytics',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
if (!result.success) {
|
||||
return;
|
||||
}
|
||||
expect(result.data.usage).toMatchObject({
|
||||
narrative: 'Orders are queried for fulfillment and revenue analysis.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status', 'created_at'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
|
||||
externalOwner: 'analytics',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('projectManifestEntry', () => {
|
||||
it('projects manifest usage onto the semantic-layer source', () => {
|
||||
const source = projectManifestEntry('orders', {
|
||||
table: 'public.orders',
|
||||
usage: {
|
||||
narrative: 'Orders are frequently filtered by status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
|
||||
},
|
||||
columns: [
|
||||
{ name: 'id', type: 'string', pk: true },
|
||||
{ name: 'status', type: 'string' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(source.usage).toEqual({
|
||||
narrative: 'Orders are frequently filtered by status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('findManifestEntryByTableRef', () => {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import YAML from 'yaml';
|
||||
import type { KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-schemas.js';
|
||||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
|
|
@ -884,6 +885,7 @@ export interface ManifestTableEntry {
|
|||
joins?: ManifestJoinEntry[];
|
||||
tags?: { dbt?: string[] };
|
||||
freshness?: { dbt?: { raw?: unknown; loaded_at_field?: string | null } };
|
||||
usage?: TableUsageOutput;
|
||||
}
|
||||
|
||||
/** Migrate legacy flat description/db_description fields to a descriptions map. */
|
||||
|
|
@ -930,6 +932,7 @@ export function projectManifestEntry(name: string, entry: ManifestTableEntry): S
|
|||
measures: [],
|
||||
...(entry.tags?.dbt?.length ? { tags: entry.tags } : {}),
|
||||
...(entry.freshness?.dbt ? { freshness: entry.freshness } : {}),
|
||||
...(entry.usage ? { usage: entry.usage } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1005,6 +1008,7 @@ const COMPOSE_KNOWN_KEYS = new Set([
|
|||
'exclude_columns',
|
||||
'disable_joins',
|
||||
'default_time_dimension',
|
||||
'usage',
|
||||
]);
|
||||
|
||||
export function composeOverlay(base: SemanticLayerSource, overlay: Record<string, unknown>): SemanticLayerSource {
|
||||
|
|
@ -1028,6 +1032,10 @@ export function composeOverlay(base: SemanticLayerSource, overlay: Record<string
|
|||
};
|
||||
}
|
||||
|
||||
if (normalizedOverlay.usage !== undefined) {
|
||||
result.usage = normalizedOverlay.usage as SemanticLayerSource['usage'];
|
||||
}
|
||||
|
||||
// Filter out excluded columns
|
||||
const excluded = new Set((normalizedOverlay.exclude_columns as string[] | undefined) ?? []);
|
||||
let columns = result.columns.filter((c) => !excluded.has(c.name));
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-schemas.js';
|
||||
|
||||
export interface SemanticLayerSource {
|
||||
name: string;
|
||||
descriptions?: Record<string, string>;
|
||||
|
|
@ -42,6 +44,7 @@ export interface SemanticLayerSource {
|
|||
default_time_dimension?: { dbt?: string };
|
||||
tags?: { dbt?: string[] };
|
||||
freshness?: { dbt?: { raw?: unknown; loaded_at_field?: string | null } };
|
||||
usage?: TableUsageOutput;
|
||||
}
|
||||
|
||||
export interface SemanticLayerQueryInput {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue