feat: validate wiki body semantic references

This commit is contained in:
Andrey Avtomonov 2026-05-17 21:22:43 +02:00
parent 739d88420e
commit ba76f2e5f0
2 changed files with 196 additions and 0 deletions

View file

@ -0,0 +1,70 @@
import { describe, expect, it } from 'vitest';
import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js';
const sources = [
{
name: 'mart_account_segments',
grain: ['account_id'],
columns: [
{ name: 'account_id', type: 'string' },
{ name: 'segment', type: 'string' },
],
joins: [],
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }],
table: 'analytics.mart_account_segments',
},
];
describe('wiki body refs', () => {
it('parses only explicit inline-code body references outside fenced blocks', () => {
const body = [
'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.',
'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.',
'Ignore prose mart_account_segments.total_contract_arr_cents.',
'Ignore `single_token`.',
'```sql',
'select `mart_account_segments.total_contract_arr_cents`',
'```',
].join('\n');
expect(parseWikiBodyRefs(body)).toEqual([
{ kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' },
{ kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' },
{ kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' },
{ kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' },
]);
});
it('rejects stale inline-code semantic-layer references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'account-segments',
body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.',
visibleConnectionIds: ['warehouse'],
loadSources: async () => sources,
tableExists: async () => true,
});
expect(invalid).toEqual([
'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
]);
});
it('validates source, dimension, segment, measure, and table references', async () => {
const invalid = await findInvalidWikiBodyRefs({
pageKey: 'account-segments',
body: [
'`mart_account_segments.total_contract_arr`',
'`mart_account_segments.segment`',
'`mart_account_segments.enterprise`',
'`source:mart_account_segments`',
'`table:analytics.mart_account_segments`',
].join('\n'),
visibleConnectionIds: ['warehouse'],
loadSources: async () => sources,
tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments',
});
expect(invalid).toEqual([]);
});
});

View file

@ -0,0 +1,126 @@
import type { SemanticLayerSource } from '../sl/index.js';
export type WikiBodyRef =
| { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string }
| { kind: 'sl_source'; connectionId: string | null; sourceName: string }
| { kind: 'table'; connectionId: string | null; tableRef: string };
export interface WikiBodyRefValidationInput {
pageKey: string;
body: string;
visibleConnectionIds: string[];
loadSources(connectionId: string): Promise<SemanticLayerSource[]>;
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
}
const inlineCodePattern = /`([^`\n]+)`/g;
function visibleLinesOutsideFences(body: string): string[] {
const lines: string[] = [];
let fenced = false;
for (const line of body.split('\n')) {
if (/^\s*```/.test(line)) {
fenced = !fenced;
continue;
}
if (!fenced) {
lines.push(line);
}
}
return lines;
}
function parseConnectionScoped(value: string): { connectionId: string | null; body: string } {
const slash = value.indexOf('/');
if (slash <= 0) {
return { connectionId: null, body: value };
}
return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) };
}
export function parseWikiBodyRefs(body: string): WikiBodyRef[] {
const refs: WikiBodyRef[] = [];
for (const line of visibleLinesOutsideFences(body)) {
for (const match of line.matchAll(inlineCodePattern)) {
const token = (match[1] ?? '').trim();
if (!token) {
continue;
}
const scoped = parseConnectionScoped(token);
if (scoped.body.startsWith('source:')) {
const sourceName = scoped.body.slice('source:'.length).trim();
if (sourceName) {
refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName });
}
continue;
}
if (scoped.body.startsWith('table:')) {
const tableRef = scoped.body.slice('table:'.length).trim();
if (tableRef) {
refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef });
}
continue;
}
const parts = scoped.body.split('.');
if (parts.length === 2 && parts[0] && parts[1]) {
refs.push({
kind: 'sl_entity',
connectionId: scoped.connectionId,
sourceName: parts[0],
entityName: parts[1],
});
}
}
}
return refs;
}
function entityNames(source: SemanticLayerSource): Set<string> {
return new Set([
...(source.measures ?? []).map((measure) => measure.name),
...(source.columns ?? []).map((column) => column.name),
...(source.segments ?? []).map((segment) => segment.name),
]);
}
export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise<string[]> {
const errors: string[] = [];
const sourceCache = new Map<string, SemanticLayerSource[]>();
const loadSources = async (connectionId: string): Promise<SemanticLayerSource[]> => {
const cached = sourceCache.get(connectionId);
if (cached) {
return cached;
}
const sources = await input.loadSources(connectionId);
sourceCache.set(connectionId, sources);
return sources;
};
for (const ref of parseWikiBodyRefs(input.body)) {
const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds;
if (ref.kind === 'table') {
const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef)));
if (!found.some(Boolean)) {
errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`);
}
continue;
}
let source: SemanticLayerSource | undefined;
for (const connectionId of connectionIds) {
source = (await loadSources(connectionId)).find((candidate) => candidate.name === ref.sourceName);
if (source) {
break;
}
}
if (!source) {
errors.push(`${input.pageKey}: unknown semantic-layer source ${ref.sourceName}`);
continue;
}
if (ref.kind === 'sl_entity' && !entityNames(source).has(ref.entityName)) {
errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`);
}
}
return errors;
}