mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
Normalize semantic layer descriptions
This commit is contained in:
parent
c82989119b
commit
86c818a454
21 changed files with 498 additions and 37 deletions
|
|
@ -22,7 +22,7 @@ export interface KtxCliPackageInfo {
|
|||
}
|
||||
|
||||
export interface KtxCliIo {
|
||||
stdout: { isTTY?: boolean; write(chunk: string): void };
|
||||
stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void };
|
||||
stderr: { write(chunk: string): void };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { describe, expect, it, vi } from 'vitest';
|
|||
import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js';
|
||||
import {
|
||||
extractProgressMessage,
|
||||
createRepainter,
|
||||
initViewState,
|
||||
parseIngestSummary,
|
||||
parseScanSummary,
|
||||
|
|
@ -11,13 +12,14 @@ import {
|
|||
viewStateFromSourceProgress,
|
||||
} from './context-build-view.js';
|
||||
|
||||
function makeIo(options: { isTTY?: boolean } = {}) {
|
||||
function makeIo(options: { isTTY?: boolean; columns?: number } = {}) {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: {
|
||||
isTTY: options.isTTY,
|
||||
columns: options.columns,
|
||||
write: (chunk: string) => {
|
||||
stdout += chunk;
|
||||
},
|
||||
|
|
@ -305,6 +307,31 @@ describe('renderContextBuildView', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('createRepainter', () => {
|
||||
it('moves up visual rows, not just newline count, when content wraps', () => {
|
||||
const io = makeIo({ isTTY: true, columns: 5 });
|
||||
const repainter = createRepainter(io.io);
|
||||
|
||||
repainter.paint('abcdefghijk\n');
|
||||
repainter.paint('updated\n');
|
||||
repainter.paint('done\n');
|
||||
|
||||
const cursorMoves = [...io.stdout().matchAll(/\u001b\[(\d+)A\r/g)].map((match) => Number(match[1]));
|
||||
expect(cursorMoves).toEqual([3, 2]);
|
||||
});
|
||||
|
||||
it('returns to the start of a single-line frame without moving up when content has no newline', () => {
|
||||
const io = makeIo({ isTTY: true, columns: 80 });
|
||||
const repainter = createRepainter(io.io);
|
||||
|
||||
repainter.paint('hello');
|
||||
repainter.paint('bye');
|
||||
|
||||
expect(io.stdout()).toContain('\rbye');
|
||||
expect(io.stdout()).not.toContain('\u001b[1A\rbye');
|
||||
});
|
||||
});
|
||||
|
||||
describe('runContextBuild', () => {
|
||||
it('executes scan targets before source-ingest targets', async () => {
|
||||
const io = makeIo();
|
||||
|
|
|
|||
|
|
@ -226,6 +226,7 @@ export function renderContextBuildView(
|
|||
// --- IO Capture ---
|
||||
|
||||
const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g');
|
||||
const ANSI_RE = /\x1b\[[0-9;]*m/g;
|
||||
|
||||
export function extractProgressMessage(chunk: string): string | null {
|
||||
const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim();
|
||||
|
|
@ -342,16 +343,45 @@ export function viewStateFromSourceProgress(
|
|||
// --- Repaint ---
|
||||
|
||||
export function createRepainter(io: KtxCliIo) {
|
||||
let lastLineCount = 0;
|
||||
let hasPainted = false;
|
||||
let lastCursorUpRows = 0;
|
||||
|
||||
const terminalColumns = () => {
|
||||
for (const columns of [io.stdout.columns, process.stdout.columns]) {
|
||||
if (typeof columns === 'number' && Number.isFinite(columns) && columns > 0) return columns;
|
||||
}
|
||||
return 80;
|
||||
};
|
||||
|
||||
const visualRows = (line: string, columns: number) => {
|
||||
const plainLength = line.replace(ANSI_RE, '').length;
|
||||
return Math.max(1, Math.ceil(plainLength / columns));
|
||||
};
|
||||
|
||||
const cursorUpRowsAfterWrite = (content: string) => {
|
||||
const columns = terminalColumns();
|
||||
const endsWithNewline = content.endsWith('\n');
|
||||
const lines = content.split('\n');
|
||||
return lines.reduce((sum, line, index) => {
|
||||
if (index === lines.length - 1) {
|
||||
return endsWithNewline ? sum : sum + Math.max(0, visualRows(line, columns) - 1);
|
||||
}
|
||||
return sum + visualRows(line, columns);
|
||||
}, 0);
|
||||
};
|
||||
|
||||
return {
|
||||
paint(content: string) {
|
||||
if (lastLineCount > 0) {
|
||||
io.stdout.write(`${ESC}[${lastLineCount}A\r`);
|
||||
if (hasPainted) {
|
||||
if (lastCursorUpRows > 0) {
|
||||
io.stdout.write(`${ESC}[${lastCursorUpRows}A`);
|
||||
}
|
||||
io.stdout.write('\r');
|
||||
}
|
||||
io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`));
|
||||
io.stdout.write(`${ESC}[J`);
|
||||
lastLineCount = (content.match(/\n/g) ?? []).length;
|
||||
hasPainted = true;
|
||||
lastCursorUpRows = cursorUpRowsAfterWrite(content);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ describe('KTX local project runtime', () => {
|
|||
const gitignore = await readFile(join(projectDir, '.ktx/.gitignore'), 'utf-8');
|
||||
expect(gitignore).toContain('cache/');
|
||||
expect(gitignore).toContain('db.sqlite');
|
||||
expect(gitignore).toContain('db.sqlite-*');
|
||||
expect(gitignore).toContain('ingest-transcripts/');
|
||||
expect(gitignore).toContain('secrets/');
|
||||
expect(gitignore).toContain('setup/');
|
||||
expect(gitignore).toContain('agents/');
|
||||
|
|
|
|||
|
|
@ -35,7 +35,10 @@ export interface InitKtxProjectResult extends KtxLocalProject {
|
|||
}
|
||||
|
||||
const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [
|
||||
{ path: '.ktx/.gitignore', content: 'cache/\ndb.sqlite\nsecrets/\nsetup/\nagents/\n' },
|
||||
{
|
||||
path: '.ktx/.gitignore',
|
||||
content: 'cache/\ndb.sqlite\ndb.sqlite-*\ningest-transcripts/\nsecrets/\nsetup/\nagents/\n',
|
||||
},
|
||||
{ path: '.ktx/prompts/.gitkeep', content: '' },
|
||||
{ path: '.ktx/skills/.gitkeep', content: '' },
|
||||
{ path: 'knowledge/global/.gitkeep', content: '' },
|
||||
|
|
|
|||
|
|
@ -67,10 +67,10 @@ describe('KTX setup config helpers', () => {
|
|||
|
||||
it('merges setup-local gitignore entries without removing existing lines', () => {
|
||||
expect(mergeKtxSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe(
|
||||
['cache/', 'db.sqlite', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
['cache/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
);
|
||||
expect(mergeKtxSetupGitignoreEntries('cache/\nsecrets/\n')).toBe(
|
||||
['cache/', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
['cache/', 'secrets/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'setup/', 'agents/', ''].join('\n'),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,7 +4,15 @@ export const KTX_SETUP_STEPS = ['project', 'llm', 'embeddings', 'databases', 'so
|
|||
|
||||
export type KtxSetupStep = (typeof KTX_SETUP_STEPS)[number];
|
||||
|
||||
const SETUP_GITIGNORE_ENTRIES = ['secrets/', 'setup/', 'agents/'] as const;
|
||||
const SETUP_GITIGNORE_ENTRIES = [
|
||||
'cache/',
|
||||
'db.sqlite',
|
||||
'db.sqlite-*',
|
||||
'ingest-transcripts/',
|
||||
'secrets/',
|
||||
'setup/',
|
||||
'agents/',
|
||||
] as const;
|
||||
|
||||
export function markKtxSetupStepComplete(config: KtxProjectConfig, step: KtxSetupStep): KtxProjectConfig {
|
||||
const databaseConnectionIds = config.setup?.database_connection_ids ?? [];
|
||||
|
|
|
|||
136
packages/context/src/sl/description-normalization.ts
Normal file
136
packages/context/src/sl/description-normalization.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
type DescriptionMap = Record<string, string>;
|
||||
|
||||
interface NormalizeDescriptionOptions {
|
||||
fillMissing?: boolean;
|
||||
}
|
||||
|
||||
function cleanText(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function cleanDescriptionMap(value: unknown): DescriptionMap {
|
||||
const result: DescriptionMap = {};
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return result;
|
||||
}
|
||||
for (const [key, text] of Object.entries(value)) {
|
||||
const cleaned = cleanText(text);
|
||||
if (cleaned) {
|
||||
result[key] = cleaned;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function hasDescriptions(descriptions: DescriptionMap): boolean {
|
||||
return Object.keys(descriptions).length > 0;
|
||||
}
|
||||
|
||||
function withDescriptionMap(record: Record<string, unknown>, fallback: string | null): Record<string, unknown> {
|
||||
const descriptions = cleanDescriptionMap(record.descriptions);
|
||||
const flatDescription = cleanText(record.description);
|
||||
if (flatDescription && !descriptions.user) {
|
||||
descriptions.user = flatDescription;
|
||||
}
|
||||
if (!hasDescriptions(descriptions) && fallback) {
|
||||
descriptions.ktx = fallback;
|
||||
}
|
||||
|
||||
const next = { ...record };
|
||||
delete next.description;
|
||||
if (hasDescriptions(descriptions)) {
|
||||
next.descriptions = descriptions;
|
||||
} else {
|
||||
delete next.descriptions;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
function humanizeIdentifier(value: string): string {
|
||||
return value
|
||||
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
||||
.replace(/[_-]+/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
function formatCount(count: number, singular: string, plural = `${singular}s`): string | null {
|
||||
if (count <= 0) {
|
||||
return null;
|
||||
}
|
||||
return `${count} ${count === 1 ? singular : plural}`;
|
||||
}
|
||||
|
||||
function sourceFallback(source: Record<string, unknown>, sourceName: string): string {
|
||||
const table = cleanText(source.table);
|
||||
const sql = cleanText(source.sql);
|
||||
if (table) {
|
||||
return `Semantic-layer source for ${sourceName} backed by ${table}.`;
|
||||
}
|
||||
if (sql) {
|
||||
return `Semantic-layer source for ${sourceName} backed by curated SQL.`;
|
||||
}
|
||||
|
||||
const counts = [
|
||||
formatCount(Array.isArray(source.measures) ? source.measures.length : 0, 'measure'),
|
||||
formatCount(Array.isArray(source.segments) ? source.segments.length : 0, 'segment'),
|
||||
formatCount(Array.isArray(source.columns) ? source.columns.length : 0, 'computed column'),
|
||||
].filter((item): item is string => Boolean(item));
|
||||
return counts.length > 0
|
||||
? `Semantic-layer overlay for ${sourceName} defining ${counts.join(', ')}.`
|
||||
: `Semantic-layer overlay for ${sourceName}.`;
|
||||
}
|
||||
|
||||
function columnFallback(column: Record<string, unknown>, sourceName: string): string {
|
||||
const columnName = cleanText(column.name) ?? 'column';
|
||||
const label = humanizeIdentifier(columnName) || columnName;
|
||||
const expr = cleanText(column.expr);
|
||||
|
||||
if (expr) {
|
||||
return `Computed ${label} value for ${sourceName}.`;
|
||||
}
|
||||
|
||||
if (columnName.toLowerCase() === 'id') {
|
||||
return `Identifier column for ${sourceName}.`;
|
||||
}
|
||||
|
||||
const idMatch = columnName.match(/^(.+)_id$/i);
|
||||
if (idMatch) {
|
||||
const entity = humanizeIdentifier(idMatch[1] ?? '');
|
||||
return entity ? `Identifier for the related ${entity} on ${sourceName}.` : `Identifier column for ${sourceName}.`;
|
||||
}
|
||||
|
||||
if (/(^|_)(date|time|timestamp|created_at|updated_at|week_start|month_start)($|_)/i.test(columnName)) {
|
||||
return `Date or time value for ${label} on ${sourceName}.`;
|
||||
}
|
||||
|
||||
return `Column ${label} from ${sourceName}.`;
|
||||
}
|
||||
|
||||
export function normalizeSemanticLayerDescriptions<T extends object>(
|
||||
source: T,
|
||||
options: NormalizeDescriptionOptions = {},
|
||||
): T {
|
||||
const sourceRecord = source as Record<string, unknown>;
|
||||
const sourceName = cleanText(sourceRecord.name) ?? 'source';
|
||||
const normalized = withDescriptionMap(
|
||||
sourceRecord,
|
||||
options.fillMissing ? sourceFallback(sourceRecord, sourceName) : null,
|
||||
);
|
||||
|
||||
if (Array.isArray(sourceRecord.columns)) {
|
||||
normalized.columns = sourceRecord.columns.map((column) => {
|
||||
if (!column || typeof column !== 'object' || Array.isArray(column)) {
|
||||
return column;
|
||||
}
|
||||
const columnRecord = column as Record<string, unknown>;
|
||||
return withDescriptionMap(
|
||||
columnRecord,
|
||||
options.fillMissing ? columnFallback(columnRecord, sourceName) : null,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
return normalized as T;
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js';
|
|||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
|
||||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js';
|
||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||
|
|
@ -180,14 +181,14 @@ function manifestTables(value: Record<string, unknown>): Record<string, Manifest
|
|||
|
||||
function parsedStandaloneSource(parsed: Record<string, unknown>, name: string): SemanticLayerSource {
|
||||
const source = parsed as Partial<SemanticLayerSource>;
|
||||
return {
|
||||
return normalizeSemanticLayerDescriptions({
|
||||
...source,
|
||||
name,
|
||||
grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [],
|
||||
columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [],
|
||||
joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [],
|
||||
measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [],
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function loadLocalSlSourceRecords(
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ const segmentDefinitionSchema = z.object({
|
|||
description: z.string().optional(),
|
||||
});
|
||||
|
||||
const descriptionsSchema = z.record(z.string(), z.string().min(1));
|
||||
|
||||
const defaultTimeDimensionDbtSchema = z.object({
|
||||
dbt: z.string().optional(),
|
||||
});
|
||||
|
|
@ -77,6 +79,7 @@ const sourceColumnSchema = z.object({
|
|||
role: z.enum(columnRoleValues).optional(),
|
||||
visibility: z.enum(columnVisibilityValues).optional(),
|
||||
description: z.string().optional(),
|
||||
descriptions: descriptionsSchema.optional(),
|
||||
expr: z.string().optional(),
|
||||
constraints: sourceKeyedColumnConstraintsSchema.optional(),
|
||||
enum_values: sourceKeyedStringArraySchema.optional(),
|
||||
|
|
@ -91,6 +94,7 @@ const overlayColumnSchema = z
|
|||
role: z.enum(columnRoleValues).optional(),
|
||||
visibility: z.enum(columnVisibilityValues).optional(),
|
||||
description: z.string().optional(),
|
||||
descriptions: descriptionsSchema.optional(),
|
||||
expr: z.string().optional(),
|
||||
})
|
||||
.refine((col) => !col.type || col.expr, {
|
||||
|
|
@ -102,6 +106,7 @@ export const sourceDefinitionSchema = z
|
|||
.object({
|
||||
name: z.string().min(1),
|
||||
description: z.string().optional(),
|
||||
descriptions: descriptionsSchema.optional(),
|
||||
// Accepted for documentation parity with the Python spec; behavior is driven
|
||||
// by the `table` / `sql` fields, not by this discriminator.
|
||||
source_type: z.enum(['table', 'sql']).optional(),
|
||||
|
|
|
|||
|
|
@ -257,12 +257,14 @@ describe('sourceDefinitionSchema', () => {
|
|||
it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => {
|
||||
const result = sourceDefinitionSchema.safeParse({
|
||||
name: 'orders',
|
||||
descriptions: { dbt: 'Order facts from dbt.' },
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [
|
||||
{
|
||||
name: 'status',
|
||||
type: 'string',
|
||||
descriptions: { dbt: 'Order lifecycle status.' },
|
||||
constraints: { dbt: { not_null: true, unique: true } },
|
||||
enum_values: { dbt: ['placed', 'shipped'] },
|
||||
tests: {
|
||||
|
|
@ -282,7 +284,9 @@ describe('sourceDefinitionSchema', () => {
|
|||
if (!result.success) {
|
||||
return;
|
||||
}
|
||||
expect(result.data.descriptions).toEqual({ dbt: 'Order facts from dbt.' });
|
||||
expect(result.data.columns[0]).toMatchObject({
|
||||
descriptions: { dbt: 'Order lifecycle status.' },
|
||||
constraints: { dbt: { not_null: true, unique: true } },
|
||||
enum_values: { dbt: ['placed', 'shipped'] },
|
||||
tests: {
|
||||
|
|
@ -528,6 +532,31 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', (
|
|||
const aav = sources.find((s) => s.name === 'aav_consignments');
|
||||
expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]);
|
||||
});
|
||||
|
||||
it('normalizes legacy flat source and column descriptions when loading standalone files', async () => {
|
||||
const standalonePath = 'semantic-layer/conn-1/orders.yaml';
|
||||
configService.listFiles.mockResolvedValue({ files: [standalonePath] });
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'name: orders',
|
||||
'description: Finance orders used for invoice reconciliation.',
|
||||
'table: public.orders',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: id',
|
||||
' type: string',
|
||||
' description: Stable order identifier.',
|
||||
].join('\n'),
|
||||
});
|
||||
|
||||
const sources = await service.loadAllSources('conn-1');
|
||||
|
||||
expect(sources[0]).toMatchObject({
|
||||
name: 'orders',
|
||||
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
|
||||
columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateWithProposedSource', () => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import YAML from 'yaml';
|
|||
import type { KtxFileStorePort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput, SemanticLayerSource } from './types.js';
|
||||
|
||||
|
|
@ -101,6 +102,7 @@ export class SemanticLayerService {
|
|||
const warnings: string[] = [];
|
||||
|
||||
if (!options?.skipValidation) {
|
||||
source = normalizeSemanticLayerDescriptions(source);
|
||||
const sourceData: Record<string, unknown> = { ...source };
|
||||
|
||||
if ((sourceData.table || sourceData.sql) && (await this.isManifestBacked(connectionId, source.name))) {
|
||||
|
|
@ -129,7 +131,8 @@ export class SemanticLayerService {
|
|||
}
|
||||
|
||||
const path = this.sourcePath(connectionId, source.name);
|
||||
const content = YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0 });
|
||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||
skipLock: options?.skipLock,
|
||||
|
|
@ -199,14 +202,14 @@ export class SemanticLayerService {
|
|||
if (sources.has(name)) {
|
||||
this.logger.warn(`Standalone source '${name}' in ${filePath} overrides manifest entry of the same name`);
|
||||
}
|
||||
let standalone: SemanticLayerSource = {
|
||||
let standalone: SemanticLayerSource = normalizeSemanticLayerDescriptions({
|
||||
...(data as Partial<SemanticLayerSource>),
|
||||
name,
|
||||
grain: Array.isArray(data.grain) ? (data.grain as string[]) : [],
|
||||
columns: Array.isArray(data.columns) ? (data.columns as SemanticLayerSource['columns']) : [],
|
||||
joins: Array.isArray(data.joins) ? (data.joins as SemanticLayerSource['joins']) : [],
|
||||
measures: Array.isArray(data.measures) ? (data.measures as SemanticLayerSource['measures']) : [],
|
||||
};
|
||||
});
|
||||
// If the source declares `inherits_columns_from`, fill any blank
|
||||
// type/descriptions/role from the matching manifest entry. Lets the
|
||||
// agent write `columns: [{name: FOO}]` without redeclaring known fields.
|
||||
|
|
@ -1005,7 +1008,8 @@ const COMPOSE_KNOWN_KEYS = new Set([
|
|||
]);
|
||||
|
||||
export function composeOverlay(base: SemanticLayerSource, overlay: Record<string, unknown>): SemanticLayerSource {
|
||||
const unknownKeys = Object.keys(overlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k));
|
||||
const normalizedOverlay = normalizeSemanticLayerDescriptions(overlay);
|
||||
const unknownKeys = Object.keys(normalizedOverlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k));
|
||||
if (unknownKeys.length > 0) {
|
||||
throw new Error(
|
||||
`composeOverlay: overlay for '${base.name}' has unhandled keys [${unknownKeys.join(', ')}]. ` +
|
||||
|
|
@ -1015,50 +1019,47 @@ export function composeOverlay(base: SemanticLayerSource, overlay: Record<string
|
|||
|
||||
const result = { ...base };
|
||||
|
||||
if (overlay.description) {
|
||||
result.descriptions = { ...(result.descriptions ?? {}), user: overlay.description as string };
|
||||
}
|
||||
|
||||
// Descriptions (plural) merge keyed by source (e.g. `dbt`, `ai`, `db`). Overlay keys
|
||||
// win over matching base keys but unrelated base keys are preserved.
|
||||
if (overlay.descriptions) {
|
||||
if (normalizedOverlay.descriptions) {
|
||||
result.descriptions = {
|
||||
...(result.descriptions ?? {}),
|
||||
...(overlay.descriptions as Record<string, string>),
|
||||
...(normalizedOverlay.descriptions as Record<string, string>),
|
||||
};
|
||||
}
|
||||
|
||||
// Filter out excluded columns
|
||||
const excluded = new Set((overlay.exclude_columns as string[] | undefined) ?? []);
|
||||
const excluded = new Set((normalizedOverlay.exclude_columns as string[] | undefined) ?? []);
|
||||
let columns = result.columns.filter((c) => !excluded.has(c.name));
|
||||
|
||||
// Append overlay computed columns
|
||||
const overlayColumns = (overlay.columns as SemanticLayerSource['columns'] | undefined) ?? [];
|
||||
const overlayColumns = (normalizedOverlay.columns as SemanticLayerSource['columns'] | undefined) ?? [];
|
||||
columns = [...columns, ...overlayColumns];
|
||||
result.columns = columns;
|
||||
|
||||
// Measures from overlay only
|
||||
result.measures = (overlay.measures as SemanticLayerSource['measures'] | undefined) ?? [];
|
||||
result.measures = (normalizedOverlay.measures as SemanticLayerSource['measures'] | undefined) ?? [];
|
||||
|
||||
// Segments: overlay-replaces semantics. Manifest tables don't carry segments today;
|
||||
// if that changes, add a union branch here.
|
||||
if (overlay.segments !== undefined) {
|
||||
result.segments = overlay.segments as SemanticLayerSource['segments'];
|
||||
if (normalizedOverlay.segments !== undefined) {
|
||||
result.segments = normalizedOverlay.segments as SemanticLayerSource['segments'];
|
||||
}
|
||||
|
||||
// Override grain
|
||||
if (overlay.grain) {
|
||||
result.grain = overlay.grain as string[];
|
||||
if (normalizedOverlay.grain) {
|
||||
result.grain = normalizedOverlay.grain as string[];
|
||||
}
|
||||
|
||||
if (overlay.default_time_dimension !== undefined) {
|
||||
result.default_time_dimension = overlay.default_time_dimension as SemanticLayerSource['default_time_dimension'];
|
||||
if (normalizedOverlay.default_time_dimension !== undefined) {
|
||||
result.default_time_dimension =
|
||||
normalizedOverlay.default_time_dimension as SemanticLayerSource['default_time_dimension'];
|
||||
}
|
||||
|
||||
// Union + dedupe joins, apply suppressions
|
||||
const disabled = new Set(((overlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs));
|
||||
const disabled = new Set(((normalizedOverlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs));
|
||||
const manifestJoins = result.joins.filter((j) => !disabled.has(normalizeWs(j.on)));
|
||||
const overlayJoins = (overlay.joins as SemanticLayerSource['joins'] | undefined) ?? [];
|
||||
const overlayJoins = (normalizedOverlay.joins as SemanticLayerSource['joins'] | undefined) ?? [];
|
||||
const existingKeys = new Set(manifestJoins.map((j) => `${j.to}::${normalizeWs(j.on)}`));
|
||||
const newJoins = overlayJoins.filter((j) => !existingKeys.has(`${j.to}::${normalizeWs(j.on)}`));
|
||||
result.joins = [...manifestJoins, ...newJoins];
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger } from '../core/index.js';
|
||||
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import type { SlSourcesIndexPort } from './ports.js';
|
||||
import type { SemanticLayerSource } from './types.js';
|
||||
|
||||
|
|
@ -8,6 +9,7 @@ export function buildSemanticLayerSourceSearchText(
|
|||
source: SemanticLayerSource,
|
||||
priority: string[] = DEFAULT_PRIORITY,
|
||||
): string {
|
||||
source = normalizeSemanticLayerDescriptions(source);
|
||||
const config = { priority };
|
||||
const parts: string[] = [source.name.replace(/_/g, ' ')];
|
||||
|
||||
|
|
|
|||
|
|
@ -127,6 +127,39 @@ describe('SlEditSourceTool — session gating', () => {
|
|||
);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('fills missing descriptions when an ingest session edits a source', async () => {
|
||||
const { tool } = makeTool();
|
||||
const session = makeSession({
|
||||
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'dbt' },
|
||||
});
|
||||
const context: ToolContext = { ...baseContext, session };
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: session.connectionId,
|
||||
sourceName: 'orders',
|
||||
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
|
||||
} as any,
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
descriptions: { ktx: expect.stringContaining('orders') },
|
||||
columns: [
|
||||
expect.objectContaining({
|
||||
descriptions: { ktx: expect.stringContaining('Identifier') },
|
||||
}),
|
||||
],
|
||||
}),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import YAML from 'yaml';
|
|||
import { z } from 'zod';
|
||||
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
import {
|
||||
BaseSemanticLayerTool,
|
||||
|
|
@ -147,6 +148,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
} catch (e) {
|
||||
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
|
||||
}
|
||||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
|
||||
// Re-serialize and write
|
||||
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });
|
||||
|
|
|
|||
|
|
@ -175,6 +175,89 @@ describe('SlWriteSourceTool — session gating', () => {
|
|||
);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('normalizes flat source and column descriptions before writing', async () => {
|
||||
const { tool, semanticLayerService } = makeTool();
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: '11111111-1111-1111-1111-111111111111',
|
||||
sourceName: 'orders',
|
||||
source: {
|
||||
name: 'orders',
|
||||
description: 'Finance orders used for invoice reconciliation.',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }],
|
||||
measures: [],
|
||||
joins: [],
|
||||
} as any,
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect(semanticLayerService.writeSource).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
descriptions: { user: 'Finance orders used for invoice reconciliation.' },
|
||||
columns: [expect.objectContaining({ descriptions: { user: 'Stable order identifier.' } })],
|
||||
}),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
|
||||
it('fills missing descriptions for ingest-written overlays and columns', async () => {
|
||||
const session = makeSession({
|
||||
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'metabase' },
|
||||
semanticLayerService: {
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
loadAllSources: vi.fn().mockResolvedValue([]),
|
||||
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
|
||||
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
|
||||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
||||
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
||||
} as any,
|
||||
});
|
||||
const { tool } = makeTool();
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: session.connectionId,
|
||||
sourceName: 'mart_account_segments',
|
||||
source: {
|
||||
name: 'mart_account_segments',
|
||||
columns: [{ name: 'is_large_contract', type: 'boolean', expr: 'contract_arr_cents >= 20000000' }],
|
||||
measures: [{ name: 'account_count', expr: 'count(account_id)' }],
|
||||
} as any,
|
||||
} as any,
|
||||
{ ...baseContext, session },
|
||||
);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
descriptions: {
|
||||
ktx: expect.stringContaining('mart_account_segments'),
|
||||
},
|
||||
columns: [
|
||||
expect.objectContaining({
|
||||
descriptions: {
|
||||
ktx: expect.stringContaining('is large contract'),
|
||||
},
|
||||
}),
|
||||
],
|
||||
}),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('SlWriteSourceTool — disconnected-components warning in markdown', () => {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import {
|
|||
type SemanticLayerStructured,
|
||||
sourceDefinitionSchema,
|
||||
} from './base-semantic-layer.tool.js';
|
||||
import { normalizeSemanticLayerDescriptions } from '../description-normalization.js';
|
||||
import { slToolConnectionIdSchema } from './connection-id-schema.js';
|
||||
|
||||
const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]);
|
||||
|
|
@ -154,14 +155,16 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
semanticLayerService: SemanticLayerService,
|
||||
skipIndex: boolean,
|
||||
): Promise<ToolOutput<SemanticLayerStructured>> {
|
||||
const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql);
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
const isOverlay =
|
||||
!('table' in normalizedSource && normalizedSource.table) && !('sql' in normalizedSource && normalizedSource.sql);
|
||||
|
||||
const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName);
|
||||
const commitMessage = existing
|
||||
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
|
||||
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
|
||||
|
||||
const yamlContent = YAML.stringify(source);
|
||||
const yamlContent = YAML.stringify(normalizedSource);
|
||||
|
||||
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
|
||||
if (orphanError) {
|
||||
|
|
@ -172,7 +175,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent });
|
||||
}
|
||||
|
||||
const validatedSource = source as SemanticLayerSource;
|
||||
const validatedSource = normalizedSource as SemanticLayerSource;
|
||||
const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource);
|
||||
const validationErrors = validationResult.errors;
|
||||
const validationWarnings = [...validationResult.warnings];
|
||||
|
|
|
|||
|
|
@ -37,6 +37,42 @@ describe('WikiWriteTool', () => {
|
|||
expect(result.markdown).toMatch(/created/i);
|
||||
});
|
||||
|
||||
it('normalizes accidentally escaped markdown newlines before writing', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
|
||||
await tool.call(
|
||||
{
|
||||
key: 'large-contract-requesters',
|
||||
summary: 'Cross-schema Metabase query',
|
||||
content:
|
||||
'# Large Contract Requesters\\n\\n**Source card:** Metabase #110\\n\\n## SQL\\n\\n```sql\\nselect * from orbit_analytics.mart_account_segments\\n```\\n',
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(wikiService.writePage.mock.calls[0][4]).toBe(
|
||||
'# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n',
|
||||
);
|
||||
expect(wikiService.syncSinglePage.mock.calls[0][4]).toBe(
|
||||
'# Large Contract Requesters\n\n**Source card:** Metabase #110\n\n## SQL\n\n```sql\nselect * from orbit_analytics.mart_account_segments\n```\n',
|
||||
);
|
||||
});
|
||||
|
||||
it('preserves intentional escaped newline examples in inline code', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
|
||||
await tool.call(
|
||||
{
|
||||
key: 'newline-token',
|
||||
summary: 'Escaped newline token',
|
||||
content: 'Use `\\n\\n` when documenting the literal separator.',
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
|
||||
expect(wikiService.writePage.mock.calls[0][4]).toBe('Use `\\n\\n` when documenting the literal separator.');
|
||||
});
|
||||
|
||||
it('skips syncSinglePage when session is worktree-scoped', async () => {
|
||||
const { tool, wikiService } = makeTool();
|
||||
const session: ToolSession = {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,22 @@ interface WikiWriteStructured {
|
|||
action?: 'created' | 'updated';
|
||||
}
|
||||
|
||||
function looksLikeEscapedMarkdown(content: string): boolean {
|
||||
const withoutInlineCode = content.replace(/`[^`]*`/g, '');
|
||||
return /\\n\\n|(?:^|\\n)#{1,6}\s|\\n[-*]\s|\\n\d+\.\s|\\n```|\\n\|/.test(withoutInlineCode);
|
||||
}
|
||||
|
||||
function normalizeAccidentalEscapedMarkdownNewlines(content: string): string {
|
||||
const escapedBreaks = content.match(/\\[rn]/g)?.length ?? 0;
|
||||
if (escapedBreaks < 2) return content;
|
||||
|
||||
const actualBreaks = content.match(/\r?\n/g)?.length ?? 0;
|
||||
if (actualBreaks > 0 && escapedBreaks <= actualBreaks * 4) return content;
|
||||
if (!looksLikeEscapedMarkdown(content)) return content;
|
||||
|
||||
return content.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n').replace(/\\r/g, '\n');
|
||||
}
|
||||
|
||||
export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
|
||||
readonly name = 'wiki_write';
|
||||
|
||||
|
|
@ -125,7 +141,7 @@ tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to
|
|||
};
|
||||
|
||||
if (input.content) {
|
||||
finalContent = input.content;
|
||||
finalContent = normalizeAccidentalEscapedMarkdownNewlines(input.content);
|
||||
} else {
|
||||
const editResult = applySqlEdits(existing?.content ?? '', input.replacements ?? []);
|
||||
if (!editResult.success) {
|
||||
|
|
|
|||
|
|
@ -36,6 +36,22 @@ class SourceColumnTests(BaseModel):
|
|||
dbt_by_package: dict[str, list[str]] | None = None
|
||||
|
||||
|
||||
_DEFAULT_DESCRIPTION_PRIORITY = ["user", "ai", "dbt", "db"]
|
||||
|
||||
|
||||
def _resolve_description_map(descriptions: dict[str, str] | None) -> str | None:
|
||||
if not descriptions:
|
||||
return None
|
||||
for source in _DEFAULT_DESCRIPTION_PRIORITY:
|
||||
text = descriptions.get(source)
|
||||
if text:
|
||||
return text
|
||||
for text in descriptions.values():
|
||||
if text:
|
||||
return text
|
||||
return None
|
||||
|
||||
|
||||
class FreshnessDbt(BaseModel):
|
||||
raw: Any | None = None
|
||||
loaded_at_field: str | None = None
|
||||
|
|
@ -47,12 +63,19 @@ class SourceColumn(BaseModel):
|
|||
visibility: ColumnVisibility = ColumnVisibility.PUBLIC
|
||||
role: ColumnRole = ColumnRole.DEFAULT
|
||||
description: str | None = None
|
||||
descriptions: dict[str, str] | None = None
|
||||
expr: str | None = None
|
||||
natural_granularity: str | None = None
|
||||
constraints: dict[str, ColumnDbtConstraints] | None = None
|
||||
enum_values: dict[str, list[str]] | None = None
|
||||
tests: SourceColumnTests | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def resolve_description(self) -> SourceColumn:
|
||||
if self.description is None:
|
||||
self.description = _resolve_description_map(self.descriptions)
|
||||
return self
|
||||
|
||||
|
||||
class JoinDeclaration(BaseModel):
|
||||
to: str
|
||||
|
|
@ -84,6 +107,7 @@ class DefaultTimeDimensionDbt(BaseModel):
|
|||
class SourceDefinition(BaseModel):
|
||||
name: str
|
||||
description: str | None = None
|
||||
descriptions: dict[str, str] | None = None
|
||||
table: str | None = None
|
||||
sql: str | None = None
|
||||
grain: list[str]
|
||||
|
|
@ -97,6 +121,8 @@ class SourceDefinition(BaseModel):
|
|||
|
||||
@model_validator(mode="after")
|
||||
def validate_source(self) -> SourceDefinition:
|
||||
if self.description is None:
|
||||
self.description = _resolve_description_map(self.descriptions)
|
||||
if self.table and self.sql:
|
||||
raise ValueError("'table' and 'sql' are mutually exclusive")
|
||||
if not self.grain:
|
||||
|
|
|
|||
|
|
@ -33,6 +33,14 @@ class TestSourceColumn:
|
|||
assert col.visibility == ColumnVisibility.HIDDEN
|
||||
assert col.role == ColumnRole.TIME
|
||||
|
||||
def test_descriptions_map_resolves_visible_description(self):
|
||||
col = SourceColumn(
|
||||
name="account_id",
|
||||
type="string",
|
||||
descriptions={"ktx": "Identifier for the related account."},
|
||||
)
|
||||
assert col.description == "Identifier for the related account."
|
||||
|
||||
def test_invalid_type(self):
|
||||
with pytest.raises(ValidationError):
|
||||
SourceColumn(name="id", type="integer")
|
||||
|
|
@ -63,6 +71,16 @@ class TestSourceDefinition:
|
|||
assert src.is_sql_source
|
||||
assert not src.is_table_source
|
||||
|
||||
def test_descriptions_map_resolves_visible_description(self):
|
||||
src = SourceDefinition(
|
||||
name="orders",
|
||||
descriptions={"ktx": "Semantic-layer source for orders."},
|
||||
table="public.orders",
|
||||
grain=["id"],
|
||||
columns=[SourceColumn(name="id", type="number")],
|
||||
)
|
||||
assert src.description == "Semantic-layer source for orders."
|
||||
|
||||
def test_table_and_sql_mutually_exclusive(self):
|
||||
with pytest.raises(ValidationError, match="mutually exclusive"):
|
||||
SourceDefinition(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue