mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
feat: validate wiki body semantic references
This commit is contained in:
parent
739d88420e
commit
ba76f2e5f0
2 changed files with 196 additions and 0 deletions
70
packages/context/src/ingest/wiki-body-refs.test.ts
Normal file
70
packages/context/src/ingest/wiki-body-refs.test.ts
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js';
|
||||
|
||||
const sources = [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [
|
||||
{ name: 'account_id', type: 'string' },
|
||||
{ name: 'segment', type: 'string' },
|
||||
],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
];
|
||||
|
||||
describe('wiki body refs', () => {
|
||||
it('parses only explicit inline-code body references outside fenced blocks', () => {
|
||||
const body = [
|
||||
'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.',
|
||||
'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.',
|
||||
'Ignore prose mart_account_segments.total_contract_arr_cents.',
|
||||
'Ignore `single_token`.',
|
||||
'```sql',
|
||||
'select `mart_account_segments.total_contract_arr_cents`',
|
||||
'```',
|
||||
].join('\n');
|
||||
|
||||
expect(parseWikiBodyRefs(body)).toEqual([
|
||||
{ kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' },
|
||||
{ kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' },
|
||||
{ kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' },
|
||||
{ kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects stale inline-code semantic-layer references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.',
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([
|
||||
'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
|
||||
]);
|
||||
});
|
||||
|
||||
it('validates source, dimension, segment, measure, and table references', async () => {
|
||||
const invalid = await findInvalidWikiBodyRefs({
|
||||
pageKey: 'account-segments',
|
||||
body: [
|
||||
'`mart_account_segments.total_contract_arr`',
|
||||
'`mart_account_segments.segment`',
|
||||
'`mart_account_segments.enterprise`',
|
||||
'`source:mart_account_segments`',
|
||||
'`table:analytics.mart_account_segments`',
|
||||
].join('\n'),
|
||||
visibleConnectionIds: ['warehouse'],
|
||||
loadSources: async () => sources,
|
||||
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
|
||||
});
|
||||
|
||||
expect(invalid).toEqual([]);
|
||||
});
|
||||
});
|
||||
126
packages/context/src/ingest/wiki-body-refs.ts
Normal file
126
packages/context/src/ingest/wiki-body-refs.ts
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
import type { SemanticLayerSource } from '../sl/index.js';
|
||||
|
||||
export type WikiBodyRef =
|
||||
| { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string }
|
||||
| { kind: 'sl_source'; connectionId: string | null; sourceName: string }
|
||||
| { kind: 'table'; connectionId: string | null; tableRef: string };
|
||||
|
||||
export interface WikiBodyRefValidationInput {
|
||||
pageKey: string;
|
||||
body: string;
|
||||
visibleConnectionIds: string[];
|
||||
loadSources(connectionId: string): Promise<SemanticLayerSource[]>;
|
||||
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
const inlineCodePattern = /`([^`\n]+)`/g;
|
||||
|
||||
function visibleLinesOutsideFences(body: string): string[] {
|
||||
const lines: string[] = [];
|
||||
let fenced = false;
|
||||
for (const line of body.split('\n')) {
|
||||
if (/^\s*```/.test(line)) {
|
||||
fenced = !fenced;
|
||||
continue;
|
||||
}
|
||||
if (!fenced) {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
function parseConnectionScoped(value: string): { connectionId: string | null; body: string } {
|
||||
const slash = value.indexOf('/');
|
||||
if (slash <= 0) {
|
||||
return { connectionId: null, body: value };
|
||||
}
|
||||
return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) };
|
||||
}
|
||||
|
||||
export function parseWikiBodyRefs(body: string): WikiBodyRef[] {
|
||||
const refs: WikiBodyRef[] = [];
|
||||
for (const line of visibleLinesOutsideFences(body)) {
|
||||
for (const match of line.matchAll(inlineCodePattern)) {
|
||||
const token = (match[1] ?? '').trim();
|
||||
if (!token) {
|
||||
continue;
|
||||
}
|
||||
const scoped = parseConnectionScoped(token);
|
||||
if (scoped.body.startsWith('source:')) {
|
||||
const sourceName = scoped.body.slice('source:'.length).trim();
|
||||
if (sourceName) {
|
||||
refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (scoped.body.startsWith('table:')) {
|
||||
const tableRef = scoped.body.slice('table:'.length).trim();
|
||||
if (tableRef) {
|
||||
refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const parts = scoped.body.split('.');
|
||||
if (parts.length === 2 && parts[0] && parts[1]) {
|
||||
refs.push({
|
||||
kind: 'sl_entity',
|
||||
connectionId: scoped.connectionId,
|
||||
sourceName: parts[0],
|
||||
entityName: parts[1],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return refs;
|
||||
}
|
||||
|
||||
function entityNames(source: SemanticLayerSource): Set<string> {
|
||||
return new Set([
|
||||
...(source.measures ?? []).map((measure) => measure.name),
|
||||
...(source.columns ?? []).map((column) => column.name),
|
||||
...(source.segments ?? []).map((segment) => segment.name),
|
||||
]);
|
||||
}
|
||||
|
||||
export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourceCache = new Map<string, SemanticLayerSource[]>();
|
||||
const loadSources = async (connectionId: string): Promise<SemanticLayerSource[]> => {
|
||||
const cached = sourceCache.get(connectionId);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const sources = await input.loadSources(connectionId);
|
||||
sourceCache.set(connectionId, sources);
|
||||
return sources;
|
||||
};
|
||||
|
||||
for (const ref of parseWikiBodyRefs(input.body)) {
|
||||
const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds;
|
||||
if (ref.kind === 'table') {
|
||||
const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef)));
|
||||
if (!found.some(Boolean)) {
|
||||
errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let source: SemanticLayerSource | undefined;
|
||||
for (const connectionId of connectionIds) {
|
||||
source = (await loadSources(connectionId)).find((candidate) => candidate.name === ref.sourceName);
|
||||
if (source) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!source) {
|
||||
errors.push(`${input.pageKey}: unknown semantic-layer source ${ref.sourceName}`);
|
||||
continue;
|
||||
}
|
||||
if (ref.kind === 'sl_entity' && !entityNames(source).has(ref.entityName)) {
|
||||
errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`);
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue