Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,34 @@
const DESCRIPTION_SOURCES = ['user', 'ai', 'dbt', 'db'] as const;
type DescriptionSource = (typeof DESCRIPTION_SOURCES)[number];
type DescriptionSources = Record<string, string>;
interface DescriptionResolutionConfig {
priority: string[];
}
export const DEFAULT_PRIORITY: DescriptionSource[] = [...DESCRIPTION_SOURCES];
/**
* Resolves which description to surface based on a priority list.
* Returns the first non-empty description matching a priority key,
* falling back to the first available value for unknown sources.
*/
export function resolveDescription(
descriptions: DescriptionSources | undefined,
config: DescriptionResolutionConfig,
): string | null {
if (!descriptions || Object.keys(descriptions).length === 0) {
return null;
}
for (const source of config.priority) {
const text = descriptions[source];
if (text) {
return text;
}
}
// Fallback: first available value (for unknown future sources)
return Object.values(descriptions).find(Boolean) ?? null;
}

View file

@ -0,0 +1,32 @@
export type { SlValidationResult, SlValidatorPort } from './sl-validator.port.js';
export type {
SemanticLayerQueryExecutionResult,
SemanticLayerQueryInput,
SemanticLayerSource,
SlDictionaryMatch,
SlSearchLaneSummary,
SlSearchMatchReason,
SlSearchMetadata,
} from './types.js';
export type {
KloConnectionInfo,
KloQueryResult,
SlConnectionCatalogPort,
SlPythonPort,
SlSourcesIndexPort,
} from './ports.js';
export { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
export { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
export {
composeOverlay,
enrichColumnsFromManifest,
findDanglingSegmentRefs,
SemanticLayerService,
} from './semantic-layer.service.js';
export { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
export type { SlDictionaryEntry } from './sl-dictionary-profile.js';
export { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
export { SqliteSlSourcesIndex, type SqliteSlSourcesIndexOptions } from './sqlite-sl-sources-index.js';
export * from './local-sl.js';
export * from './local-query.js';
export * from './tools/index.js';

View file

@ -0,0 +1,260 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KloSemanticLayerComputePort } from '../daemon/index.js';
import { initKloProject, type KloLocalProject } from '../project/index.js';
import { compileLocalSlQuery } from './local-query.js';
describe('compileLocalSlQuery', () => {
let tempDir: string;
let project: KloLocalProject;
let compute: KloSemanticLayerComputePort;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-local-query-'));
project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
project.config.connections.warehouse = { driver: 'postgres', readonly: true };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: public.orders
grain:
- id
columns:
- name: id
type: number
- name: status
type: string
measures:
- name: order_count
expr: count(*)
joins: []
`,
'klo',
'klo@example.com',
'Add orders source',
);
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders_overlay.yaml',
`name: orders_overlay
inherits_columns_from: orders
columns:
- name: paid_at
type: timestamp
joins: []
measures: []
grain: []
`,
'klo',
'klo@example.com',
'Add overlay source',
);
compute = {
query: vi.fn(async (input) => ({
sql: 'select status, count(*) as order_count from public.orders group by status',
dialect: input.dialect,
columns: [{ name: 'orders.status' }, { name: 'orders.order_count' }],
plan: { measures: input.query.measures, dimensions: input.query.dimensions },
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
};
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('compiles a local semantic-layer query with computable sources only', async () => {
const result = await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
compute,
});
expect(compute.query).toHaveBeenCalledWith({
sources: [
{
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'status', type: 'string' },
],
measures: [{ name: 'order_count', expr: 'count(*)' }],
joins: [],
},
],
dialect: 'postgres',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
});
expect(result).toEqual({
connectionId: 'warehouse',
dialect: 'postgres',
sql: 'select status, count(*) as order_count from public.orders group by status',
headers: ['orders.status', 'orders.order_count'],
rows: [],
totalRows: 0,
plan: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
execution: {
mode: 'compile_only',
reason: 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.',
},
},
});
});
it('compiles a local semantic-layer query from manifest-backed scan sources', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
payments:
table: public.payments
columns:
- name: payment_id
type: number
pk: true
- name: amount
type: number
`,
'klo',
'klo@example.com',
'Add manifest shard',
);
await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['sum(payments.amount)'],
dimensions: [],
},
compute,
});
expect(compute.query).toHaveBeenLastCalledWith({
sources: expect.arrayContaining([
{
name: 'payments',
table: 'public.payments',
grain: ['payment_id'],
columns: [
{
name: 'payment_id',
type: 'number',
role: undefined,
descriptions: undefined,
constraints: undefined,
enum_values: undefined,
tests: undefined,
},
{
name: 'amount',
type: 'number',
role: undefined,
descriptions: undefined,
constraints: undefined,
enum_values: undefined,
tests: undefined,
},
],
joins: [],
measures: [],
},
]),
dialect: 'postgres',
query: {
measures: ['sum(payments.amount)'],
dimensions: [],
},
});
});
it('resolves the only configured connection when connectionId is omitted', async () => {
await compileLocalSlQuery(project, {
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
});
expect(compute.query).toHaveBeenCalledWith(
expect.objectContaining({
dialect: 'postgres',
}),
);
});
it('executes compiled SQL through a local query executor when requested', async () => {
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['status', 'order_count'],
rows: [['paid', 2]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const result = await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: {
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
},
compute,
execute: true,
maxRows: 10,
queryExecutor,
});
expect(queryExecutor.execute).toHaveBeenCalledWith({
connectionId: 'warehouse',
projectDir: project.projectDir,
connection: { driver: 'postgres', readonly: true },
sql: 'select status, count(*) as order_count from public.orders group by status',
maxRows: 10,
});
expect(result.rows).toEqual([['paid', 2]]);
expect(result.totalRows).toBe(1);
expect(result.plan.execution).toEqual({
mode: 'executed',
driver: 'postgres',
maxRows: 10,
rowCount: 1,
});
});
it('requires a query executor for executed mode', async () => {
await expect(
compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
execute: true,
}),
).rejects.toThrow('Local semantic-layer execution requires a query executor.');
});
it('requires connectionId when multiple connections are configured', async () => {
project.config.connections.analytics = { driver: 'bigquery', readonly: true };
await expect(
compileLocalSlQuery(project, {
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
}),
).rejects.toThrow('connectionId is required when the local project has zero or multiple connections.');
});
});

View file

@ -0,0 +1,150 @@
import type { KloSqlQueryExecutorPort } from '../connections/index.js';
import type { KloSemanticLayerComputePort } from '../daemon/index.js';
import type { KloLocalProject } from '../project/index.js';
import { loadLocalSlSourceRecords } from './local-sl.js';
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js';
const COMPILE_ONLY_REASON =
'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.';
export interface CompileLocalSlQueryOptions {
connectionId?: string;
query: SemanticLayerQueryInput;
compute: KloSemanticLayerComputePort;
execute?: boolean;
maxRows?: number;
queryExecutor?: KloSqlQueryExecutorPort;
}
export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionResult {
connectionId: string;
dialect: string;
}
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function dialectForDriver(driver: string | undefined): string {
const normalized = (driver ?? 'postgres').toUpperCase();
const map: Record<string, string> = {
POSTGRESQL: 'postgres',
POSTGRES: 'postgres',
BIGQUERY: 'bigquery',
SNOWFLAKE: 'snowflake',
MYSQL: 'mysql',
SQLSERVER: 'tsql',
MSSQL: 'tsql',
SQLITE: 'sqlite',
DUCKDB: 'duckdb',
CLICKHOUSE: 'clickhouse',
REDSHIFT: 'redshift',
DATABRICKS: 'databricks',
};
return map[normalized] ?? 'postgres';
}
function resolveLocalConnectionId(project: KloLocalProject, requested: string | undefined): string {
if (requested) {
return assertSafeConnectionId(requested);
}
const ids = Object.keys(project.config.connections).sort();
if (ids.length === 1) {
return assertSafeConnectionId(ids[0]);
}
throw new Error('connectionId is required when the local project has zero or multiple connections.');
}
async function loadComputableSources(
project: KloLocalProject,
connectionId: string,
): Promise<Record<string, unknown>[]> {
return (await loadLocalSlSourceRecords(project, { connectionId: assertSafeConnectionId(connectionId) }))
.map((record) => ({ ...record.source }))
.filter((source) => source.table || source.sql);
}
function headersFromColumns(columns: Array<Record<string, unknown>>): string[] {
return columns
.map((column) => column.name)
.filter((name): name is string => typeof name === 'string' && name.length > 0);
}
export async function compileLocalSlQuery(
project: KloLocalProject,
options: CompileLocalSlQueryOptions,
): Promise<CompileLocalSlQueryResult> {
const connectionId = resolveLocalConnectionId(project, options.connectionId);
const dialect = dialectForDriver(project.config.connections[connectionId]?.driver);
const response = await options.compute.query({
sources: await loadComputableSources(project, connectionId),
dialect,
query: options.query,
});
if (!options.execute) {
return {
connectionId,
dialect: response.dialect,
sql: response.sql,
headers: headersFromColumns(response.columns),
rows: [],
totalRows: 0,
plan: {
...response.plan,
execution: {
mode: 'compile_only',
reason: COMPILE_ONLY_REASON,
},
},
};
}
if (!options.queryExecutor) {
throw new Error('Local semantic-layer execution requires a query executor.');
}
const maxRows = options.maxRows ?? options.query.limit;
const execution = await options.queryExecutor.execute({
connectionId,
projectDir: project.projectDir,
connection: project.config.connections[connectionId],
sql: response.sql,
maxRows,
});
return {
connectionId,
dialect: response.dialect,
sql: response.sql,
headers: execution.headers,
rows: execution.rows,
totalRows: execution.totalRows,
plan: {
...response.plan,
execution: {
mode: 'executed',
driver: project.config.connections[connectionId]?.driver ?? 'unknown',
maxRows,
rowCount: execution.rowCount,
},
},
};
}

View file

@ -0,0 +1,321 @@
import { access, mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKloProject, type KloLocalProject } from '../project/index.js';
import {
listLocalSlSources,
readLocalSlSource,
searchLocalSlSources,
validateLocalSlSource,
writeLocalSlSource,
} from './local-sl.js';
const ORDERS_YAML = [
'name: orders',
'table: public.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: revenue',
' type: number',
'measures:',
' - name: total_revenue',
' expr: sum(revenue)',
'',
].join('\n');
const SUPPORT_YAML = [
'name: tickets',
'description: Support tickets grouped by priority.',
'table: public.tickets',
'grain:',
' - ticket_id',
'columns:',
' - name: ticket_id',
' type: string',
' - name: priority',
' type: string',
'measures:',
' - name: ticket_count',
' expr: count(*)',
'',
].join('\n');
describe('local semantic-layer helpers', () => {
let tempDir: string;
let project: KloLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-local-sl-'));
project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('writes, reads, lists, and validates semantic-layer sources', async () => {
const write = await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
expect(write.path).toBe('semantic-layer/warehouse/orders.yaml');
await expect(
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' }),
).resolves.toMatchObject({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
yaml: ORDERS_YAML,
});
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
{
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 1,
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
},
]);
await expect(validateLocalSlSource(ORDERS_YAML)).resolves.toEqual({ valid: true, errors: [] });
});
it('lists and reads manifest-backed scan sources as queryable sources', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
payments:
table: public.payments
columns:
- name: payment_id
type: number
pk: true
- name: amount
type: number
`,
'klo',
'klo@example.com',
'Add manifest shard',
);
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
{
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 0,
name: 'payments',
path: 'semantic-layer/warehouse/_schema/public.yaml#payments',
},
]);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'payments' })).resolves.toEqual(
expect.objectContaining({
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 0,
name: 'payments',
path: 'semantic-layer/warehouse/_schema/public.yaml#payments',
yaml: expect.stringContaining('table: public.payments'),
}),
);
});
it('expands manifest-backed scan sources when listing all connections', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
payments:
table: public.payments
columns:
- name: payment_id
type: number
pk: true
- name: amount
type: number
`,
'klo',
'klo@example.com',
'Add manifest shard',
);
await expect(listLocalSlSources(project)).resolves.toEqual([
{
columnCount: 2,
connectionId: 'warehouse',
joinCount: 0,
measureCount: 0,
name: 'payments',
path: 'semantic-layer/warehouse/_schema/public.yaml#payments',
},
]);
});
it('searches local semantic-layer source text through SQLite FTS', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'tickets',
yaml: SUPPORT_YAML,
});
const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'total revenue' });
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
score: expect.any(Number),
}),
]);
expect(results[0]?.score).toBeGreaterThan(0);
await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined();
});
it('searches all connections with one global hybrid ranking pass', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await writeLocalSlSource(project, {
connectionId: 'finance',
sourceName: 'orders',
yaml: [
'name: orders',
'description: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: invoice_status',
' type: string',
'',
].join('\n'),
});
const results = await searchLocalSlSources(project, { query: 'orders' });
expect(results.map((result) => `${result.connectionId}/${result.name}`)).toEqual([
'finance/orders',
'warehouse/orders',
]);
expect(results[0]).toMatchObject({
score: expect.any(Number),
matchReasons: expect.arrayContaining(['lexical']),
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'lexical', status: 'available' })]),
});
});
it('returns dictionary evidence when collected sample values explain a match', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 2,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.2,
nullRate: 0,
sampleValues: ['paid', 'refunded'],
minTextLength: 4,
maxTextLength: 8,
},
},
warnings: [],
},
null,
2,
)}\n`,
'klo',
'klo@example.com',
'Seed dictionary profile',
);
const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'refunded' });
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
matchReasons: ['dictionary'],
dictionaryMatches: [{ column: 'status', values: ['refunded'] }],
}),
]);
});
it('adds the token lane alongside lexical matches for normalized query terms', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'orders',
yaml: ORDERS_YAML,
});
const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'orders---' });
expect(results[0]).toMatchObject({
connectionId: 'warehouse',
name: 'orders',
matchReasons: expect.arrayContaining(['token']),
});
});
it('reports schema validation errors without writing invalid YAML', async () => {
const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n');
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
valid: false,
errors: [expect.stringContaining('grain')],
});
await expect(
writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'broken',
yaml: invalidYaml,
}),
).rejects.toThrow('Invalid semantic-layer source');
});
it('rejects unsafe source paths', async () => {
await expect(
readLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: '../orders',
}),
).rejects.toThrow('Unsafe semantic-layer source name');
});
});

View file

@ -0,0 +1,595 @@
import { join } from 'node:path';
import YAML from 'yaml';
import { z } from 'zod';
import type { KloEmbeddingPort, KloFileWriteResult } from '../core/index.js';
import type { KloLocalProject } from '../project/index.js';
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js';
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
export interface LocalSlSourceSummary {
connectionId: string;
name: string;
path: string;
description?: string;
columnCount: number;
measureCount: number;
joinCount: number;
}
export interface LocalSlSourceSearchResult extends LocalSlSourceSummary {
score: number;
matchReasons?: SlSearchMatchReason[];
dictionaryMatches?: SlDictionaryMatch[];
lanes?: SlSearchLaneSummary[];
}
export interface LocalSlSearchInput {
connectionId?: string;
query: string;
embeddingService?: KloEmbeddingPort | null;
limit?: number;
backend?: 'pglite-owner-prototype';
pglite?: PgliteSlSearchPrototypeOwnerOptions;
}
export interface LocalSlSource extends LocalSlSourceSummary {
yaml: string;
}
export interface LocalSlSourceRecord extends LocalSlSource {
source: SemanticLayerSource;
}
export interface LocalSlValidationResult {
valid: boolean;
errors: string[];
}
const LOCAL_AUTHOR = 'klo';
const LOCAL_AUTHOR_EMAIL = 'klo@example.com';
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function slPath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
}
function sourceNameFromPath(path: string): string {
return (
path
.split('/')
.at(-1)
?.replace(/\.ya?ml$/, '') ?? path
);
}
function parseYamlRecord(raw: string): Record<string, unknown> {
const parsed = YAML.parse(raw) as unknown;
if (!isRecord(parsed)) {
throw new Error('Semantic-layer source YAML must contain an object');
}
return parsed;
}
function descriptionMap(value: Record<string, unknown>): Record<string, string> | undefined {
const result: Record<string, string> = {};
const descriptions = value.descriptions;
if (isRecord(descriptions)) {
for (const [key, text] of Object.entries(descriptions)) {
if (typeof text === 'string' && text.trim().length > 0) {
result[key] = text;
}
}
}
const flatDescription = value.description;
if (!result.user && typeof flatDescription === 'string' && flatDescription.trim().length > 0) {
result.user = flatDescription;
}
return Object.keys(result).length > 0 ? result : undefined;
}
function validationErrors(error: unknown): string[] {
if (error instanceof z.ZodError) {
return error.issues.map((issue) => `${issue.path.join('.') || '<root>'}: ${issue.message}`);
}
return [error instanceof Error ? error.message : String(error)];
}
function summarizeSource(args: { connectionId: string; path: string; raw: string }): LocalSlSourceSummary {
const parsed = parseYamlRecord(args.raw);
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(args.path);
const description = resolveDescription(descriptionMap(parsed), { priority: DEFAULT_PRIORITY }) ?? undefined;
return {
connectionId: args.connectionId,
name,
path: args.path,
...(description ? { description } : {}),
columnCount: Array.isArray(parsed.columns) ? parsed.columns.length : 0,
measureCount: Array.isArray(parsed.measures) ? parsed.measures.length : 0,
joinCount: Array.isArray(parsed.joins) ? parsed.joins.length : 0,
};
}
function sourceToYaml(source: SemanticLayerSource): string {
return YAML.stringify(source, { indent: 2, lineWidth: 0 });
}
function summarizeSemanticSource(args: {
connectionId: string;
path: string;
source: SemanticLayerSource;
}): LocalSlSourceSummary {
const description = resolveDescription(args.source.descriptions, { priority: DEFAULT_PRIORITY }) ?? undefined;
return {
connectionId: args.connectionId,
name: args.source.name,
path: args.path,
...(description ? { description } : {}),
columnCount: args.source.columns.length,
measureCount: args.source.measures.length,
joinCount: args.source.joins.length,
};
}
function manifestTables(value: Record<string, unknown>): Record<string, ManifestTableEntry> | null {
return isRecord(value.tables) ? (value.tables as Record<string, ManifestTableEntry>) : null;
}
function parsedStandaloneSource(parsed: Record<string, unknown>, name: string): SemanticLayerSource {
const source = parsed as Partial<SemanticLayerSource>;
return {
...source,
name,
grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [],
columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [],
joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [],
measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [],
};
}
export async function loadLocalSlSourceRecords(
project: KloLocalProject,
input: { connectionId: string },
): Promise<LocalSlSourceRecord[]> {
const connectionId = assertSafeConnectionId(input.connectionId);
const dir = `semantic-layer/${connectionId}`;
const schemaDir = `${dir}/_schema`;
const listed = await project.fileStore.listFiles(dir);
const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort();
const sources = new Map<string, LocalSlSourceRecord>();
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path);
const tables = manifestTables(parseYamlRecord(raw.content));
if (!tables) {
continue;
}
for (const [name, entry] of Object.entries(tables)) {
const source = projectManifestEntry(name, entry);
const projectedPath = `${path}#${name}`;
sources.set(name, {
...summarizeSemanticSource({ connectionId, path: projectedPath, source }),
yaml: sourceToYaml(source),
source,
});
}
}
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path);
const parsed = parseYamlRecord(raw.content);
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
if (parsed.table || parsed.sql) {
const source = parsedStandaloneSource(parsed, name);
sources.set(name, { ...summarizeSource({ connectionId, path, raw: raw.content }), yaml: raw.content, source });
continue;
}
const base = sources.get(name);
if (!base) {
continue;
}
const source = composeOverlay(base.source, parsed);
sources.set(name, {
...summarizeSemanticSource({ connectionId, path, source }),
yaml: sourceToYaml(source),
source,
});
}
return [...sources.values()].sort((left, right) => left.name.localeCompare(right.name));
}
export async function validateLocalSlSource(rawYaml: string): Promise<LocalSlValidationResult> {
try {
const parsed = parseYamlRecord(rawYaml);
const schema = parsed.table || parsed.sql ? sourceDefinitionSchema : sourceOverlaySchema;
schema.parse(parsed);
return { valid: true, errors: [] };
} catch (error) {
return { valid: false, errors: validationErrors(error) };
}
}
export async function writeLocalSlSource(
project: KloLocalProject,
input: { connectionId: string; sourceName: string; yaml: string },
): Promise<KloFileWriteResult> {
const validation = await validateLocalSlSource(input.yaml);
if (!validation.valid) {
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
}
const parsed = parseYamlRecord(input.yaml);
if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) {
throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`);
}
const path = slPath(input.connectionId, input.sourceName);
return project.fileStore.writeFile(
path,
input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`,
LOCAL_AUTHOR,
LOCAL_AUTHOR_EMAIL,
`Write semantic-layer source: ${input.connectionId}/${input.sourceName}`,
);
}
export async function readLocalSlSource(
project: KloLocalProject,
input: { connectionId: string; sourceName: string },
): Promise<LocalSlSource | null> {
const path = slPath(input.connectionId, input.sourceName);
try {
const result = await project.fileStore.readFile(path);
return {
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
yaml: result.content,
};
} catch {
const records = await loadLocalSlSourceRecords(project, {
connectionId: input.connectionId,
});
const record = records.find((source) => source.name === input.sourceName);
return record ? { ...record } : null;
}
}
export async function listLocalSlSources(
project: KloLocalProject,
input: { connectionId?: string } = {},
): Promise<LocalSlSourceSummary[]> {
if (input.connectionId) {
return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map(
({ source: _source, yaml: _yaml, ...summary }) => summary,
);
}
const listed = await project.fileStore.listFiles('semantic-layer');
const connectionIds = [...new Set(listed.files.map((path) => path.split('/')[1]).filter(isSafeConnectionId))].sort();
const summaries: LocalSlSourceSummary[] = [];
for (const connectionId of connectionIds) {
const records = await loadLocalSlSourceRecords(project, { connectionId });
summaries.push(...records.map(({ source: _source, yaml: _yaml, ...summary }) => summary));
}
return summaries.sort(
(left, right) => left.connectionId.localeCompare(right.connectionId) || left.name.localeCompare(right.name),
);
}
interface LocalSlSearchCandidate {
summary: LocalSlSourceSummary;
source: SemanticLayerSource;
searchText: string;
}
function sqliteSlDbPath(project: KloLocalProject): string {
return join(project.projectDir, '.klo', 'db.sqlite');
}
async function loadLocalSlSearchCandidates(
project: KloLocalProject,
input: { connectionId?: string } = {},
): Promise<LocalSlSearchCandidate[]> {
if (input.connectionId) {
return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map((record) => ({
summary: {
connectionId: record.connectionId,
name: record.name,
path: record.path,
...(record.description ? { description: record.description } : {}),
columnCount: record.columnCount,
measureCount: record.measureCount,
joinCount: record.joinCount,
},
source: record.source,
searchText: buildSemanticLayerSourceSearchText(record.source),
}));
}
const listed = await project.fileStore.listFiles('semantic-layer');
const connectionIds = [...new Set(listed.files.map((path) => path.split('/')[1]).filter(isSafeConnectionId))].sort();
const candidates: LocalSlSearchCandidate[] = [];
for (const connectionId of connectionIds) {
candidates.push(...(await loadLocalSlSearchCandidates(project, { connectionId })));
}
return candidates.sort(
(left, right) =>
left.summary.connectionId.localeCompare(right.summary.connectionId) ||
left.summary.name.localeCompare(right.summary.name),
);
}
function candidateKey(summary: LocalSlSourceSummary): string {
return `${summary.connectionId}/${summary.name}`;
}
function tokenLaneCandidates(candidates: LocalSlSearchCandidate[], terms: readonly string[]) {
if (terms.length === 0) {
return [];
}
return candidates
.map((candidate) => {
const haystack = candidate.searchText.toLowerCase();
const matchedTerms = terms.filter((term) => haystack.includes(term));
return {
candidate,
score: matchedTerms.length / terms.length,
};
})
.filter((result) => result.score > 0)
.sort(
(left, right) =>
right.score - left.score ||
left.candidate.summary.connectionId.localeCompare(right.candidate.summary.connectionId) ||
left.candidate.summary.name.localeCompare(right.candidate.summary.name),
);
}
async function refreshHybridSlIndexes(input: {
index: SqliteSlSourcesIndex;
project: KloLocalProject;
candidates: LocalSlSearchCandidate[];
embeddingService?: KloEmbeddingPort | null;
}): Promise<void> {
const candidatesByConnection = new Map<string, LocalSlSearchCandidate[]>();
for (const candidate of input.candidates) {
candidatesByConnection.set(candidate.summary.connectionId, [
...(candidatesByConnection.get(candidate.summary.connectionId) ?? []),
candidate,
]);
}
for (const [connectionId, group] of candidatesByConnection) {
if (input.embeddingService) {
const service = new SlSearchService(input.embeddingService, input.index);
await service.indexSources(
connectionId,
group.map((candidate) => candidate.source),
);
} else {
await input.index.upsertSources(
connectionId,
group.map((candidate) => ({
sourceName: candidate.summary.name,
searchText: candidate.searchText,
embedding: null,
})),
);
await input.index.deleteStale(
connectionId,
group.map((candidate) => candidate.summary.name),
);
}
}
const dictionaryEntries = await loadLatestSlDictionaryEntries(input.project, [...candidatesByConnection.keys()]);
for (const connectionId of candidatesByConnection.keys()) {
await input.index.replaceDictionaryEntries(
connectionId,
dictionaryEntries.filter((entry) => entry.connectionId === connectionId),
);
}
}
export async function searchLocalSlSources(
project: KloLocalProject,
input: LocalSlSearchInput,
): Promise<LocalSlSourceSearchResult[]> {
const query = input.query.trim();
if (!query) {
return (await listLocalSlSources(project, { connectionId: input.connectionId })).map((source) => ({
...source,
score: 1,
}));
}
if (input.backend === 'pglite-owner-prototype') {
if (!input.pglite) {
throw new Error('PGlite semantic-layer search prototype requires pglite owner-process options.');
}
const { searchLocalSlSourcesWithPglitePrototype } = await import('./pglite-sl-search-prototype.js');
return searchLocalSlSourcesWithPglitePrototype(project, {
connectionId: input.connectionId,
query,
embeddingService: input.embeddingService ?? null,
limit: input.limit,
pglite: input.pglite,
});
}
const candidates = await loadLocalSlSearchCandidates(project, { connectionId: input.connectionId });
if (project.config.storage.search !== 'sqlite-fts5') {
return candidates
.map((candidate) => {
const terms = query
.toLowerCase()
.split(/\s+/)
.map((term) => term.trim())
.filter(Boolean);
return {
candidate,
score:
terms.length === 0
? 0
: terms.filter((term) => candidate.searchText.toLowerCase().includes(term)).length / terms.length,
};
})
.filter((result) => result.score > 0)
.map((result) => ({
...result.candidate.summary,
score: result.score,
matchReasons: ['token'],
}))
.sort(
(left, right) =>
right.score - left.score ||
left.connectionId.localeCompare(right.connectionId) ||
left.path.localeCompare(right.path),
);
}
const index = new SqliteSlSourcesIndex({ dbPath: sqliteSlDbPath(project) });
await refreshHybridSlIndexes({ index, project, candidates, embeddingService: input.embeddingService ?? null });
const candidateById = new Map(candidates.map((candidate) => [candidateKey(candidate.summary), candidate]));
const connectionIds = input.connectionId ? [input.connectionId] : undefined;
const finalLimit = input.limit ?? candidates.length;
const core = new HybridSearchCore();
const dictionaryEvidence = new Map<string, SlDictionaryMatch[]>();
const generators: SearchCandidateGenerator[] = [
{
lane: 'lexical',
async generate(args) {
const rows = await index.searchLexicalCandidates({
connectionIds,
queryText: args.queryText,
limit: args.laneCandidatePoolLimit,
});
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
};
},
},
{
lane: 'dictionary',
async generate(args) {
const rows = await index.searchDictionaryCandidates({
connectionIds,
queryText: args.queryText,
limit: args.laneCandidatePoolLimit,
});
for (const row of rows) {
dictionaryEvidence.set(row.id, row.matches);
}
return {
candidates: rows.map((row) => ({
id: row.id,
rank: row.rank,
rawScore: row.rawScore,
evidence: row.matches,
})),
};
},
},
{
lane: 'token',
async generate(args) {
const rows = tokenLaneCandidates(candidates, args.normalizedQuery.terms).slice(0, args.laneCandidatePoolLimit);
return {
candidates: rows.map((row, index) => ({
id: candidateKey(row.candidate.summary),
rank: index + 1,
rawScore: row.score,
})),
};
},
},
{
lane: 'semantic',
async generate(args) {
if (!input.embeddingService) {
return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' };
}
try {
const queryEmbedding = await input.embeddingService.computeEmbedding(args.queryText);
const rows = await index.searchSemanticCandidates({
connectionIds,
queryEmbedding,
limit: args.laneCandidatePoolLimit,
});
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
};
} catch (error) {
return {
status: 'skipped',
candidates: [],
reason: `embedding_unhealthy:${error instanceof Error ? error.message : String(error)}`,
};
}
},
},
];
const result = await core.search({ queryText: query, limit: finalLimit, generators });
const hydrated: LocalSlSourceSearchResult[] = [];
for (const fused of result.results) {
const candidate = candidateById.get(fused.id);
if (!candidate) {
continue;
}
const dictionaryMatches = dictionaryEvidence.get(fused.id);
hydrated.push({
...candidate.summary,
score: fused.score,
matchReasons: fused.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: result.lanes,
});
}
return hydrated;
}

View file

@ -0,0 +1,268 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { createServer } from 'node:net';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKloProject, type KloLocalProject } from '../project/index.js';
import { assertSearchBackendConformanceCase } from '../search/index.js';
import { searchLocalSlSources, writeLocalSlSource, type LocalSlSourceSearchResult } from './local-sl.js';
import { searchLocalSlSourcesWithPglitePrototype } from './pglite-sl-search-prototype.js';
const ORDERS_YAML = [
'name: orders',
'description: Orders with paid revenue and refund status.',
'table: public.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: status',
' type: string',
' - name: revenue',
' type: number',
'measures:',
' - name: total_revenue',
' expr: sum(revenue)',
'',
].join('\n');
const FINANCE_ORDERS_YAML = [
'name: orders',
'description: Finance orders used for invoice reconciliation.',
'table: finance.orders',
'grain:',
' - order_id',
'columns:',
' - name: order_id',
' type: string',
' - name: invoice_status',
' type: string',
'',
].join('\n');
const CUSTOMERS_YAML = [
'name: customers',
'description: Customer lifecycle accounts by region.',
'table: public.customers',
'grain:',
' - customer_id',
'columns:',
' - name: customer_id',
' type: string',
' - name: region',
' type: string',
'',
].join('\n');
class FakeEmbeddingPort {
readonly maxBatchSize = 16;
async computeEmbedding(text: string): Promise<number[]> {
const normalized = text.toLowerCase();
if (normalized.includes('semantic revenue') || normalized.includes('orders with paid revenue')) {
return [1, 0, 0];
}
if (normalized.includes('finance orders')) {
return [0.72, 0.28, 0];
}
return [0, 1, 0];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
}
}
async function allocatePort(): Promise<number> {
const server = createServer();
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
const address = server.address();
if (typeof address !== 'object' || address === null) {
throw new Error('Expected TCP server address while allocating a PGlite SL prototype port.');
}
await new Promise<void>((resolve, reject) => {
server.close((error) => {
if (error) {
reject(error);
return;
}
resolve();
});
});
return address.port;
}
function toConformanceResult(result: LocalSlSourceSearchResult) {
return {
id: `${result.connectionId}/${result.name}`,
score: result.score,
matchReasons: result.matchReasons ?? [],
lanes: result.lanes,
dictionaryMatches: result.dictionaryMatches,
};
}
async function seedSemanticLayerProject(project: KloLocalProject): Promise<void> {
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
await writeLocalSlSource(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 2,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.2,
nullRate: 0,
sampleValues: ['paid', 'refunded'],
minTextLength: 4,
maxTextLength: 8,
},
'customers.region': {
table: { catalog: null, db: 'public', name: 'customers' },
column: 'region',
nativeType: 'text',
normalizedType: 'string',
rowCount: 10,
nullCount: 0,
distinctCount: 3,
uniquenessRatio: 0.3,
nullRate: 0,
sampleValues: ['emea', 'amer', 'apac'],
minTextLength: 4,
maxTextLength: 4,
},
},
warnings: [],
},
null,
2,
)}\n`,
'klo',
'klo@example.com',
'Seed PGlite dictionary profile',
);
}
describe('PGlite semantic-layer search prototype', () => {
let tempDir: string;
let project: KloLocalProject;
let pgliteDataDir: string;
let port: number;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-sl-prototype-'));
project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
project.config.ingest.embeddings.dimensions = 3;
pgliteDataDir = join(tempDir, 'pglite-search');
port = await allocatePort();
await seedSemanticLayerProject(project);
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('returns lexical semantic-layer matches through PGlite FTS', async () => {
const results = await searchLocalSlSourcesWithPglitePrototype(project, {
query: 'paid revenue',
limit: 5,
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
assertSearchBackendConformanceCase({
backendName: 'pglite-owner-prototype',
surface: 'semantic-layer',
caseName: 'pglite lexical source ranking',
results: results.map(toConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['lexical'],
},
expectedLanes: {
lexical: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
});
});
it('returns dictionary evidence through PGlite pg_trgm and exact matching', async () => {
const results = await searchLocalSlSourcesWithPglitePrototype(project, {
connectionId: 'warehouse',
query: 'refund',
limit: 5,
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
assertSearchBackendConformanceCase({
backendName: 'pglite-owner-prototype',
surface: 'semantic-layer',
caseName: 'pglite dictionary source evidence',
results: results.map(toConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['dictionary'],
},
expectedLanes: {
dictionary: { status: 'available' },
semantic: { status: 'skipped', reason: 'embedding_unconfigured' },
},
expectedDictionaryMatchesById: {
'warehouse/orders': [{ column: 'status', values: ['refunded'] }],
},
});
});
it('returns semantic matches through PGlite vector ordering when embeddings are configured', async () => {
const results = await searchLocalSlSourcesWithPglitePrototype(project, {
query: 'semantic revenue',
limit: 5,
embeddingService: new FakeEmbeddingPort(),
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
assertSearchBackendConformanceCase({
backendName: 'pglite-owner-prototype',
surface: 'semantic-layer',
caseName: 'pglite semantic source ranking',
results: results.map(toConformanceResult),
expectedTopIds: ['warehouse/orders'],
expectedReasonsById: {
'warehouse/orders': ['semantic'],
},
expectedLanes: {
semantic: { status: 'available' },
},
});
});
it('routes through PGlite only when the private local search input opts in', async () => {
const results = await searchLocalSlSources(project, {
query: 'refnd',
limit: 5,
backend: 'pglite-owner-prototype',
pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port },
});
expect(results[0]).toMatchObject({
connectionId: 'warehouse',
name: 'orders',
matchReasons: expect.arrayContaining(['dictionary']),
dictionaryMatches: [{ column: 'status', values: ['refunded'] }],
});
});
});

View file

@ -0,0 +1,569 @@
import { mkdir } from 'node:fs/promises';
import { join } from 'node:path';
import type { KloEmbeddingPort } from '../core/index.js';
import type { KloLocalProject } from '../project/index.js';
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
import { KloPGliteOwnerProcess } from '../search/pglite-owner-process.js';
import {
listLocalSlSources,
loadLocalSlSourceRecords,
type LocalSlSourceSearchResult,
type LocalSlSourceSummary,
} from './local-sl.js';
import { loadLatestSlDictionaryEntries, type SlDictionaryEntry } from './sl-dictionary-profile.js';
import { buildSemanticLayerSourceSearchText } from './sl-search.service.js';
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchMatchReason } from './types.js';
export interface PgliteSlSearchPrototypeOwnerOptions {
dataDir?: string;
host: string;
port: number;
}
export interface PgliteSlSearchPrototypeInput {
connectionId?: string;
query: string;
embeddingService?: KloEmbeddingPort | null;
limit?: number;
pglite: PgliteSlSearchPrototypeOwnerOptions;
}
interface LocalSlSearchCandidate {
summary: LocalSlSourceSummary;
source: SemanticLayerSource;
searchText: string;
}
interface PgliteLaneRow {
id: string;
connection_id: string;
source_name: string;
score: number | string;
}
interface PgliteDictionaryRow extends PgliteLaneRow {
column_name: string;
value: string;
}
function candidateKey(summary: LocalSlSourceSummary): string {
return `${summary.connectionId}/${summary.name}`;
}
function pgliteDataDir(project: KloLocalProject, input: PgliteSlSearchPrototypeOwnerOptions): string {
return input.dataDir ?? join(project.projectDir, '.klo', 'pglite-search-prototype');
}
function vectorDimensions(project: KloLocalProject): number {
const dimensions = project.config.ingest.embeddings.dimensions;
if (!Number.isInteger(dimensions) || dimensions <= 0) {
throw new Error(`PGlite SL search prototype needs a positive embedding dimension, got ${String(dimensions)}.`);
}
return dimensions;
}
function connectionIdsForSearch(input: { connectionId?: string }): string[] | null {
return input.connectionId ? [input.connectionId] : null;
}
async function loadCandidates(
project: KloLocalProject,
input: { connectionId?: string } = {},
): Promise<LocalSlSearchCandidate[]> {
if (input.connectionId) {
return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map((record) => ({
summary: {
connectionId: record.connectionId,
name: record.name,
path: record.path,
...(record.description ? { description: record.description } : {}),
columnCount: record.columnCount,
measureCount: record.measureCount,
joinCount: record.joinCount,
},
source: record.source,
searchText: buildSemanticLayerSourceSearchText(record.source),
}));
}
const listed = await project.fileStore.listFiles('semantic-layer');
const connectionIds = [
...new Set(
listed.files
.map((path) => path.split('/')[1])
.filter((connectionId): connectionId is string =>
typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId),
),
),
].sort();
const candidates: LocalSlSearchCandidate[] = [];
for (const connectionId of connectionIds) {
candidates.push(...(await loadCandidates(project, { connectionId })));
}
return candidates.sort(
(left, right) =>
left.summary.connectionId.localeCompare(right.summary.connectionId) ||
left.summary.name.localeCompare(right.summary.name),
);
}
function tokenLaneCandidates(candidates: LocalSlSearchCandidate[], terms: readonly string[]) {
if (terms.length === 0) {
return [];
}
return candidates
.map((candidate) => {
const haystack = candidate.searchText.toLowerCase();
const matchedTerms = terms.filter((term) => haystack.includes(term));
return {
candidate,
score: matchedTerms.length / terms.length,
};
})
.filter((result) => result.score > 0)
.sort(
(left, right) =>
right.score - left.score ||
left.candidate.summary.connectionId.localeCompare(right.candidate.summary.connectionId) ||
left.candidate.summary.name.localeCompare(right.candidate.summary.name),
);
}
function postgresqlOrTsQuery(query: string): string {
const terms = query
.toLowerCase()
.split(/[^a-z0-9_]+/u)
.map((term) => term.trim())
.filter(Boolean);
return [...new Set(terms)].join(' | ');
}
async function resetPrototypeSchema(owner: KloPGliteOwnerProcess, dimensions: number): Promise<void> {
await owner.query(`
DROP TABLE IF EXISTS prototype_sl_dictionary_values;
DROP TABLE IF EXISTS prototype_sl_sources;
CREATE TABLE prototype_sl_sources (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
path TEXT NOT NULL,
description TEXT,
column_count INTEGER NOT NULL,
measure_count INTEGER NOT NULL,
join_count INTEGER NOT NULL,
search_text TEXT NOT NULL,
embedding vector(${dimensions}),
PRIMARY KEY (connection_id, source_name)
);
CREATE INDEX prototype_sl_sources_fts_idx
ON prototype_sl_sources
USING GIN (to_tsvector('english', search_text));
CREATE INDEX prototype_sl_sources_vector_idx
ON prototype_sl_sources
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 1);
CREATE TABLE prototype_sl_dictionary_values (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
column_name TEXT NOT NULL,
value TEXT NOT NULL,
value_lower TEXT NOT NULL,
cardinality INTEGER,
PRIMARY KEY (connection_id, source_name, column_name, value)
);
CREATE INDEX prototype_sl_dictionary_values_trgm_idx
ON prototype_sl_dictionary_values
USING GIN (value gin_trgm_ops);
`);
}
async function sourceEmbeddings(input: {
candidates: LocalSlSearchCandidate[];
embeddingService?: KloEmbeddingPort | null;
dimensions: number;
}): Promise<Map<string, number[]> | null> {
if (!input.embeddingService) {
return null;
}
const texts = input.candidates.map((candidate) => candidate.searchText);
const embeddings = await input.embeddingService.computeEmbeddingsBulk(texts);
const byId = new Map<string, number[]>();
embeddings.forEach((embedding, index) => {
if (embedding.length !== input.dimensions) {
throw new Error(
`PGlite SL search prototype expected ${input.dimensions} embedding dimensions, got ${embedding.length}.`,
);
}
const candidate = input.candidates[index];
if (candidate) {
byId.set(candidateKey(candidate.summary), embedding);
}
});
return byId;
}
async function insertSourceRows(input: {
owner: KloPGliteOwnerProcess;
candidates: LocalSlSearchCandidate[];
embeddings: Map<string, number[]> | null;
}): Promise<void> {
for (const candidate of input.candidates) {
const summary = candidate.summary;
const embedding = input.embeddings?.get(candidateKey(summary));
await input.owner.query(
`
INSERT INTO prototype_sl_sources (
connection_id,
source_name,
path,
description,
column_count,
measure_count,
join_count,
search_text,
embedding
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::vector)
`,
[
summary.connectionId,
summary.name,
summary.path,
summary.description ?? null,
summary.columnCount,
summary.measureCount,
summary.joinCount,
candidate.searchText,
embedding ? JSON.stringify(embedding) : null,
],
);
}
}
async function insertDictionaryRows(owner: KloPGliteOwnerProcess, entries: SlDictionaryEntry[]): Promise<void> {
for (const entry of entries) {
await owner.query(
`
INSERT INTO prototype_sl_dictionary_values (
connection_id,
source_name,
column_name,
value,
value_lower,
cardinality
)
VALUES ($1, $2, $3, $4, lower($4), $5)
`,
[entry.connectionId, entry.sourceName, entry.columnName, entry.value, entry.cardinality ?? null],
);
}
}
function groupDictionaryRows(rows: PgliteDictionaryRow[], limit: number) {
const grouped = new Map<string, PgliteDictionaryRow[]>();
for (const row of rows) {
grouped.set(row.id, [...(grouped.get(row.id) ?? []), row]);
}
return [...grouped.entries()]
.map(([id, group]) => {
const first = group[0];
const byColumn = new Map<string, string[]>();
for (const row of group.sort(
(left, right) => left.column_name.localeCompare(right.column_name) || left.value.localeCompare(right.value),
)) {
byColumn.set(row.column_name, [...(byColumn.get(row.column_name) ?? []), row.value]);
}
const matches: SlDictionaryMatch[] = [...byColumn.entries()].map(([column, values]) => ({
column,
values: values.slice(0, 5),
...(values.length > 5 ? { overflowCount: values.length - 5 } : {}),
}));
return {
id,
connectionId: first?.connection_id ?? '',
sourceName: first?.source_name ?? '',
rawScore: matches.reduce((total, match) => total + match.values.length, 0),
matches,
};
})
.sort(
(left, right) =>
right.rawScore - left.rawScore ||
right.matches.length - left.matches.length ||
left.connectionId.localeCompare(right.connectionId) ||
left.sourceName.localeCompare(right.sourceName),
)
.slice(0, Math.max(1, limit))
.map((candidate, index) => ({ ...candidate, rank: index + 1 }));
}
async function queryLexicalCandidates(input: {
owner: KloPGliteOwnerProcess;
queryText: string;
connectionIds: string[] | null;
limit: number;
}) {
const tsQuery = postgresqlOrTsQuery(input.queryText);
if (!tsQuery) {
return [];
}
const result = await input.owner.query<PgliteLaneRow>(
`
SELECT
connection_id || '/' || source_name AS id,
connection_id,
source_name,
ts_rank_cd(to_tsvector('english', search_text), to_tsquery('english', $1)) AS score
FROM prototype_sl_sources
WHERE to_tsvector('english', search_text) @@ to_tsquery('english', $1)
AND ($2::text[] IS NULL OR connection_id = ANY($2::text[]))
ORDER BY score DESC, connection_id ASC, source_name ASC
LIMIT $3
`,
[tsQuery, input.connectionIds, Math.max(1, input.limit)],
);
return result.rows.map((row, index) => ({
id: row.id,
connectionId: row.connection_id,
sourceName: row.source_name,
rank: index + 1,
rawScore: Number(row.score),
}));
}
async function querySemanticCandidates(input: {
owner: KloPGliteOwnerProcess;
queryText: string;
connectionIds: string[] | null;
embeddingService?: KloEmbeddingPort | null;
dimensions: number;
limit: number;
}) {
if (!input.embeddingService) {
return { status: 'skipped' as const, candidates: [], reason: 'embedding_unconfigured' };
}
try {
const queryEmbedding = await input.embeddingService.computeEmbedding(input.queryText);
if (queryEmbedding.length !== input.dimensions) {
return {
status: 'skipped' as const,
candidates: [],
reason: `embedding_unhealthy:expected ${input.dimensions} dimensions, got ${queryEmbedding.length}`,
};
}
const result = await input.owner.query<PgliteLaneRow>(
`
SELECT
connection_id || '/' || source_name AS id,
connection_id,
source_name,
1 - (embedding <=> $1::vector) AS score
FROM prototype_sl_sources
WHERE embedding IS NOT NULL
AND ($2::text[] IS NULL OR connection_id = ANY($2::text[]))
ORDER BY embedding <=> $1::vector, connection_id ASC, source_name ASC
LIMIT $3
`,
[JSON.stringify(queryEmbedding), input.connectionIds, Math.max(1, input.limit)],
);
return {
candidates: result.rows.map((row, index) => ({
id: row.id,
connectionId: row.connection_id,
sourceName: row.source_name,
rank: index + 1,
rawScore: Number(row.score),
})),
};
} catch (error) {
return {
status: 'skipped' as const,
candidates: [],
reason: `embedding_unhealthy:${error instanceof Error ? error.message : String(error)}`,
};
}
}
async function queryDictionaryCandidates(input: {
owner: KloPGliteOwnerProcess;
queryText: string;
connectionIds: string[] | null;
limit: number;
}) {
const query = input.queryText.trim();
if (!query) {
return [];
}
const result = await input.owner.query<PgliteDictionaryRow>(
`
SELECT
connection_id || '/' || source_name AS id,
connection_id,
source_name,
column_name,
value,
GREATEST(
similarity(value, $1),
CASE WHEN value_lower = lower($1) THEN 1 ELSE 0 END,
CASE WHEN value_lower LIKE '%' || lower($1) || '%' THEN 0.75 ELSE 0 END
) AS score
FROM prototype_sl_dictionary_values
WHERE (
similarity(value, $1) > 0
OR value_lower = lower($1)
OR value_lower LIKE '%' || lower($1) || '%'
)
AND ($2::text[] IS NULL OR connection_id = ANY($2::text[]))
ORDER BY score DESC, connection_id ASC, source_name ASC, column_name ASC, value ASC
LIMIT $3
`,
[query, input.connectionIds, Math.max(25, input.limit * 4)],
);
return groupDictionaryRows(result.rows, input.limit);
}
export async function searchLocalSlSourcesWithPglitePrototype(
project: KloLocalProject,
input: PgliteSlSearchPrototypeInput,
): Promise<LocalSlSourceSearchResult[]> {
const query = input.query.trim();
if (!query) {
return (await listLocalSlSources(project, { connectionId: input.connectionId })).map((source) => ({
...source,
score: 1,
}));
}
const candidates = await loadCandidates(project, { connectionId: input.connectionId });
const dimensions = vectorDimensions(project);
const dataDir = pgliteDataDir(project, input.pglite);
await mkdir(dataDir, { recursive: true });
const owner = await KloPGliteOwnerProcess.start({
dataDir,
host: input.pglite.host,
port: input.pglite.port,
});
try {
const embeddings = await sourceEmbeddings({
candidates,
embeddingService: input.embeddingService ?? null,
dimensions,
});
await resetPrototypeSchema(owner, dimensions);
await insertSourceRows({ owner, candidates, embeddings });
const candidateConnectionIds = [...new Set(candidates.map((candidate) => candidate.summary.connectionId))].sort();
const dictionaryEntries = await loadLatestSlDictionaryEntries(project, candidateConnectionIds);
await insertDictionaryRows(owner, dictionaryEntries);
const candidateById = new Map(candidates.map((candidate) => [candidateKey(candidate.summary), candidate]));
const connectionIds = connectionIdsForSearch(input);
const finalLimit = input.limit ?? candidates.length;
const dictionaryEvidence = new Map<string, SlDictionaryMatch[]>();
const core = new HybridSearchCore();
const generators: SearchCandidateGenerator[] = [
{
lane: 'lexical',
async generate(args) {
const rows = await queryLexicalCandidates({
owner,
queryText: args.queryText,
connectionIds,
limit: args.laneCandidatePoolLimit,
});
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
};
},
},
{
lane: 'dictionary',
async generate(args) {
const rows = await queryDictionaryCandidates({
owner,
queryText: args.queryText,
connectionIds,
limit: args.laneCandidatePoolLimit,
});
for (const row of rows) {
dictionaryEvidence.set(row.id, row.matches);
}
return {
candidates: rows.map((row) => ({
id: row.id,
rank: row.rank,
rawScore: row.rawScore,
evidence: row.matches,
})),
};
},
},
{
lane: 'token',
async generate(args) {
const rows = tokenLaneCandidates(candidates, args.normalizedQuery.terms).slice(
0,
args.laneCandidatePoolLimit,
);
return {
candidates: rows.map((row, index) => ({
id: candidateKey(row.candidate.summary),
rank: index + 1,
rawScore: row.score,
})),
};
},
},
{
lane: 'semantic',
async generate(args) {
return querySemanticCandidates({
owner,
queryText: args.queryText,
connectionIds,
embeddingService: input.embeddingService ?? null,
dimensions,
limit: args.laneCandidatePoolLimit,
});
},
},
];
const fused = await core.search({ queryText: query, limit: finalLimit, generators });
const hydrated: LocalSlSourceSearchResult[] = [];
for (const result of fused.results) {
const candidate = candidateById.get(result.id);
if (!candidate) {
continue;
}
const dictionaryMatches = dictionaryEvidence.get(result.id);
hydrated.push({
...candidate.summary,
score: result.score,
matchReasons: result.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: fused.lanes,
});
}
return hydrated;
} finally {
await owner.stop();
}
}

View file

@ -0,0 +1,53 @@
import type { SemanticLayerQueryInput, SemanticLayerSource } from './types.js';
export interface KloConnectionInfo {
id: string;
name: string;
connectionType: string;
}
export interface KloQueryResult {
headers?: string[];
rows?: unknown[][];
totalRows?: number;
}
export interface SlConnectionCatalogPort {
listEnabledConnections(ids: string[]): Promise<KloConnectionInfo[]>;
getConnectionById(connectionId: string): Promise<KloConnectionInfo | null>;
executeQuery(connectionId: string, sql: string): Promise<KloQueryResult>;
}
export interface SlPythonPort {
validateSources(input: {
sources: SemanticLayerSource[];
dialect: string;
recently_touched?: string[];
}): Promise<{
data?: { errors?: string[]; warnings?: string[]; per_source_warnings?: Record<string, string[]> } | null;
error?: unknown;
}>;
query(input: {
sources: SemanticLayerSource[];
query: SemanticLayerQueryInput;
dialect: string;
}): Promise<{ data?: { sql?: string; plan?: Record<string, unknown> } | null; error?: unknown }>;
}
export interface SlSourcesIndexPort {
upsertSources(
connectionId: string,
sources: Array<{ sourceName: string; searchText: string; embedding: number[] | null; contentHash?: string | null }>,
): Promise<void>;
getExistingSearchTexts(connectionId: string): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>>;
deleteStale(connectionId: string, keepNames: string[]): Promise<void>;
deleteByConnection(connectionId: string): Promise<void>;
deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void>;
search(
connectionId: string,
queryEmbedding: number[] | null,
queryText: string,
limit: number,
minRrfScore?: number,
): Promise<Array<{ sourceName: string; rrfScore: number }>>;
}

View file

@ -0,0 +1,149 @@
import { z } from 'zod';
// Literal vocabularies — kept in lockstep with the Python Pydantic model at
// python-service/klo-sl/semantic_layer/models.py (SourceColumn / ColumnRole /
// ColumnVisibility / JoinDeclaration). If these diverge, YAMLs can pass
// TypeScript validation at ingest time but fail Python loading at query time.
const columnTypeValues = ['string', 'number', 'time', 'boolean'] as const;
const columnRoleValues = ['time', 'default'] as const;
const columnVisibilityValues = ['public', 'internal', 'hidden'] as const;
const joinRelationshipValues = ['many_to_one', 'one_to_many', 'one_to_one'] as const;
const slMeasureDefinitionSchema = z.object({
name: z.string().min(1),
expr: z.string().min(1),
filter: z.string().optional(),
segments: z.array(z.string().min(1)).optional(),
description: z.string().optional(),
});
const segmentDefinitionSchema = z.object({
name: z.string().min(1),
expr: z.string().min(1),
description: z.string().optional(),
});
const defaultTimeDimensionDbtSchema = z.object({
dbt: z.string().optional(),
});
const dbtColumnConstraintsSchema = z.object({
not_null: z.boolean().optional(),
unique: z.boolean().optional(),
});
const dbtDataTestRefSchema = z.object({
name: z.string().min(1),
package: z.string().min(1),
kwargs: z.record(z.string(), z.unknown()).optional(),
});
const dbtColumnTestsSchema = z.object({
dbt: z.array(dbtDataTestRefSchema).optional(),
dbt_by_package: z.record(z.string(), z.array(z.string().min(1))).optional(),
});
const sourceKeyedStringArraySchema = z.object({
dbt: z.array(z.string().min(1)).optional(),
});
const sourceKeyedColumnConstraintsSchema = z.object({
dbt: dbtColumnConstraintsSchema.optional(),
});
const freshnessDbtSchema = z.object({
raw: z.unknown().optional(),
loaded_at_field: z.string().nullable().optional(),
});
const sourceFreshnessSchema = z.object({
dbt: freshnessDbtSchema.optional(),
});
const joinDeclarationSchema = z.object({
to: z.string().min(1),
on: z.string().min(1),
relationship: z.enum(joinRelationshipValues),
alias: z.string().optional(),
});
const sourceColumnSchema = z.object({
name: z.string().min(1),
// type/description optional on standalone sources: compose-time enrichment fills them
// from the manifest entry named in `inherits_columns_from`. If the agent does not set
// `inherits_columns_from`, or the column is not in the manifest, type must be present
// — surfaced by sl_validate.
type: z.enum(columnTypeValues).optional(),
role: z.enum(columnRoleValues).optional(),
visibility: z.enum(columnVisibilityValues).optional(),
description: z.string().optional(),
expr: z.string().optional(),
constraints: sourceKeyedColumnConstraintsSchema.optional(),
enum_values: sourceKeyedStringArraySchema.optional(),
tests: dbtColumnTestsSchema.optional(),
});
/** Overlay column: type requires expr (structural types are inherited from manifest). */
const overlayColumnSchema = z
.object({
name: z.string().min(1),
type: z.enum(columnTypeValues).optional(),
role: z.enum(columnRoleValues).optional(),
visibility: z.enum(columnVisibilityValues).optional(),
description: z.string().optional(),
expr: z.string().optional(),
})
.refine((col) => !col.type || col.expr, {
message: "Overlay column with 'type' must also have 'expr' (only computed columns may specify a type)",
});
/** Standalone source: has `table` or `sql`, requires grain + columns. */
export const sourceDefinitionSchema = z
.object({
name: z.string().min(1),
description: z.string().optional(),
// Accepted for documentation parity with the Python spec; behavior is driven
// by the `table` / `sql` fields, not by this discriminator.
source_type: z.enum(['table', 'sql']).optional(),
table: z.string().optional(),
sql: z.string().optional(),
// Manifest key (e.g. "CONSIGNMENTS") whose column metadata fills any blank
// type/descriptions/role on this source's columns at compose time. Lets the
// agent write `columns: [{name: FOO}]` instead of redeclaring known fields.
// Lookup is fuzzy: bare key, fully-qualified table path, or any suffix all match.
inherits_columns_from: z.string().optional(),
grain: z.array(z.string()).min(1),
columns: z.array(sourceColumnSchema).default([]),
joins: z.array(joinDeclarationSchema).default([]),
measures: z.array(slMeasureDefinitionSchema).default([]),
segments: z.array(segmentDefinitionSchema).optional(),
default_time_dimension: defaultTimeDimensionDbtSchema.optional(),
tags: sourceKeyedStringArraySchema.optional(),
freshness: sourceFreshnessSchema.optional(),
})
.strict()
.refine((s) => (s.table || s.sql) && !(s.table && s.sql), {
message: "Standalone source must have exactly one of 'table' or 'sql' (not both)",
});
/** Overlay source: no table/sql, all fields optional except name. */
export const sourceOverlaySchema = z
.object({
name: z.string().min(1),
description: z.string().optional(),
descriptions: z.record(z.string(), z.string()).optional(),
grain: z.array(z.string()).optional(),
columns: z.array(overlayColumnSchema).optional(),
joins: z.array(joinDeclarationSchema).optional(),
measures: z.array(slMeasureDefinitionSchema).optional(),
segments: z.array(segmentDefinitionSchema).optional(),
exclude_columns: z.array(z.string()).optional(),
disable_joins: z.array(z.string()).optional(),
default_time_dimension: defaultTimeDimensionDbtSchema.optional(),
})
.strict();
/** Returns true if the source data is an overlay (no table/sql field). */
export function isOverlaySource(source: Record<string, unknown>): boolean {
return !source.table && !source.sql;
}

View file

@ -0,0 +1,678 @@
import type { Mock } from 'vitest';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import {
composeOverlay,
enrichColumnsFromManifest,
findDanglingSegmentRefs,
SemanticLayerService,
} from './semantic-layer.service.js';
import { sourceDefinitionSchema } from './schemas.js';
import type { SemanticLayerSource } from './types.js';
const pythonPort = {
validateSources: vi.fn(),
generateSources: vi.fn(),
query: vi.fn(),
};
function connectionCatalog(connectionType = 'SNOWFLAKE') {
return {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType }),
executeQuery: vi.fn(),
};
}
const baseTable: SemanticLayerSource = {
name: 'fct_labs',
grain: ['lab_order_id'],
table: 'analytics.fct_labs',
columns: [
{ name: 'lab_order_id', type: 'string' },
{ name: 'admin_user_id', type: 'string' },
{ name: 'lab_type', type: 'string' },
],
joins: [],
measures: [],
};
describe('composeOverlay', () => {
it('carries top-level segments from overlay into the composed source', () => {
const overlay = {
name: 'fct_labs',
segments: [{ name: 'byol', expr: "lab_type = 'byol'", description: 'BYOL cohort' }],
};
const composed = composeOverlay(baseTable, overlay);
expect(composed.segments).toHaveLength(1);
expect(composed.segments?.[0].name).toBe('byol');
expect(composed.segments?.[0].expr).toBe("lab_type = 'byol'");
});
it('preserves measure-level segments references', () => {
const overlay = {
name: 'fct_labs',
segments: [{ name: 'byol', expr: "lab_type = 'byol'" }],
measures: [
{
name: 'byol_subscriber_count',
expr: 'count(distinct admin_user_id)',
segments: ['byol'],
description: 'BYOL subscribers',
},
],
};
const composed = composeOverlay(baseTable, overlay);
expect(composed.measures).toHaveLength(1);
expect(composed.measures[0].segments).toEqual(['byol']);
});
it('leaves base segments unchanged when overlay does not specify segments', () => {
const baseWithSegments: SemanticLayerSource = {
...baseTable,
segments: [{ name: 'pre_existing', expr: 'is_paid = true' }],
};
const overlay = { name: 'fct_labs', description: 'no segments here' };
const composed = composeOverlay(baseWithSegments, overlay);
expect(composed.segments).toEqual([{ name: 'pre_existing', expr: 'is_paid = true' }]);
});
it('replaces base segments when overlay provides its own (even an empty array)', () => {
const baseWithSegments: SemanticLayerSource = {
...baseTable,
segments: [{ name: 'pre_existing', expr: 'is_paid = true' }],
};
const overlay = { name: 'fct_labs', segments: [] };
const composed = composeOverlay(baseWithSegments, overlay);
expect(composed.segments).toEqual([]);
});
it('throws on unknown top-level overlay keys with a pointed error', () => {
const overlay = { name: 'fct_labs', frobnicate: true };
expect(() => composeOverlay(baseTable, overlay)).toThrow(
/overlay for 'fct_labs' has unhandled keys \[frobnicate\]/,
);
});
it('lists every unknown key in the error message, not just the first', () => {
const overlay = { name: 'fct_labs', foo: 1, bar: 2 };
expect(() => composeOverlay(baseTable, overlay)).toThrow(/foo, bar/);
});
it('still handles existing known keys without regression', () => {
const overlay = {
name: 'fct_labs',
description: 'patient lab orders',
exclude_columns: ['admin_user_id'],
columns: [{ name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }],
measures: [{ name: 'count_all', expr: 'count(*)' }],
};
const composed = composeOverlay(baseTable, overlay);
expect(composed.columns.find((c) => c.name === 'admin_user_id')).toBeUndefined();
expect(composed.columns.find((c) => c.name === 'is_byol')).toBeDefined();
expect(composed.measures).toHaveLength(1);
});
it('merges overlay descriptions (plural) with base descriptions keyed by source', () => {
const baseWithDescriptions: SemanticLayerSource = {
...baseTable,
descriptions: { db: 'scan-derived description', ai: 'AI description' },
};
const overlay = {
name: 'fct_labs',
descriptions: { dbt: 'dbt description', ai: 'AI description (overridden)' },
};
const composed = composeOverlay(baseWithDescriptions, overlay);
expect(composed.descriptions).toEqual({
db: 'scan-derived description',
ai: 'AI description (overridden)',
dbt: 'dbt description',
});
});
});
describe('enrichColumnsFromManifest', () => {
const manifest: SemanticLayerSource = {
name: 'CONSIGNMENTS',
table: 'ANALYTICS.MARTS.CONSIGNMENTS',
grain: ['CONSIGNED_ITEM_ID'],
columns: [
{
name: 'CONSIGNED_ITEM_ID',
type: 'string',
descriptions: { ai: 'Unique identifier for the consigned item record.' },
},
{
name: 'CASH_ADV_AMOUNT',
type: 'number',
descriptions: { ai: 'Amount of cash advance disbursed to consigners.' },
},
{
name: 'CONSIGNMENT_CREATED_AT',
type: 'time',
role: 'time',
descriptions: { ai: 'Timestamp when the consignment was created.' },
},
],
joins: [],
measures: [],
};
it('fills blank type and descriptions on source columns from the manifest', () => {
const source: SemanticLayerSource = {
name: 'aav_consignments',
sql: 'SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT FROM MARTS.CONSIGNMENTS WHERE ...',
inherits_columns_from: 'CONSIGNMENTS',
grain: ['CONSIGNED_ITEM_ID'],
columns: [
{ name: 'CONSIGNED_ITEM_ID', type: '' },
{ name: 'CASH_ADV_AMOUNT', type: '' },
],
joins: [],
measures: [],
};
const enriched = enrichColumnsFromManifest(source, manifest);
expect(enriched.columns[0]).toEqual({
name: 'CONSIGNED_ITEM_ID',
type: 'string',
descriptions: { ai: 'Unique identifier for the consigned item record.' },
});
expect(enriched.columns[1]).toEqual({
name: 'CASH_ADV_AMOUNT',
type: 'number',
descriptions: { ai: 'Amount of cash advance disbursed to consigners.' },
});
});
it('preserves a local description if the source already declared one', () => {
const source: SemanticLayerSource = {
name: 'aav_consignments',
sql: 'SELECT CONSIGNED_ITEM_ID FROM ...',
inherits_columns_from: 'CONSIGNMENTS',
grain: ['CONSIGNED_ITEM_ID'],
columns: [
{
name: 'CONSIGNED_ITEM_ID',
type: 'string',
descriptions: { ai: 'AAV-specific note: always non-null in this filtered view.' },
},
],
joins: [],
measures: [],
};
const enriched = enrichColumnsFromManifest(source, manifest);
expect(enriched.columns[0].descriptions).toEqual({
ai: 'AAV-specific note: always non-null in this filtered view.',
});
});
it('passes through columns absent from the manifest unchanged', () => {
const source: SemanticLayerSource = {
name: 'aav_consignments',
sql: 'SELECT ALT_VALUE_COMBINED, my_derived FROM ...',
inherits_columns_from: 'CONSIGNMENTS',
grain: ['CONSIGNED_ITEM_ID'],
columns: [{ name: 'my_derived', type: 'number', expr: 'CASH_ADV_AMOUNT * 2' }],
joins: [],
measures: [],
};
const enriched = enrichColumnsFromManifest(source, manifest);
expect(enriched.columns[0]).toEqual({
name: 'my_derived',
type: 'number',
expr: 'CASH_ADV_AMOUNT * 2',
});
});
it('copies role from the manifest when the source omits it', () => {
const source: SemanticLayerSource = {
name: 'aav_consignments',
sql: 'SELECT CONSIGNMENT_CREATED_AT FROM ...',
inherits_columns_from: 'CONSIGNMENTS',
grain: ['CONSIGNED_ITEM_ID'],
columns: [{ name: 'CONSIGNMENT_CREATED_AT', type: '' }],
joins: [],
measures: [],
};
const enriched = enrichColumnsFromManifest(source, manifest);
expect(enriched.columns[0].role).toBe('time');
expect(enriched.columns[0].type).toBe('time');
});
it('returns the source unchanged when manifestEntry is null/undefined', () => {
const source: SemanticLayerSource = {
name: 'aav_consignments',
sql: 'SELECT FOO FROM ...',
grain: ['FOO'],
columns: [{ name: 'FOO', type: '' }],
joins: [],
measures: [],
};
const enriched = enrichColumnsFromManifest(source, null);
expect(enriched).toEqual(source);
});
});
describe('sourceDefinitionSchema', () => {
it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => {
const result = sourceDefinitionSchema.safeParse({
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [
{
name: 'status',
type: 'string',
constraints: { dbt: { not_null: true, unique: true } },
enum_values: { dbt: ['placed', 'shipped'] },
tests: {
dbt: [{ name: 'accepted_values', package: 'dbt' }],
dbt_by_package: { dbt: ['accepted_values'] },
},
},
],
joins: [],
measures: [],
tags: { dbt: ['mart', 'finance'] },
freshness: { dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } } },
default_time_dimension: { dbt: 'updated_at' },
});
expect(result.success).toBe(true);
if (!result.success) {
return;
}
expect(result.data.columns[0]).toMatchObject({
constraints: { dbt: { not_null: true, unique: true } },
enum_values: { dbt: ['placed', 'shipped'] },
tests: {
dbt: [{ name: 'accepted_values', package: 'dbt' }],
dbt_by_package: { dbt: ['accepted_values'] },
},
});
expect(result.data.tags).toEqual({ dbt: ['mart', 'finance'] });
expect(result.data.freshness).toEqual({
dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } },
});
});
});
describe('findManifestEntryByTableRef', () => {
let configService: {
listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>;
readFile: Mock<(path: string) => Promise<{ content: string }>>;
};
let service: SemanticLayerService;
beforeEach(() => {
configService = {
listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>().mockResolvedValue({
files: ['semantic-layer/conn-1/_schema/marts.yaml'],
}),
readFile: vi.fn<(path: string) => Promise<{ content: string }>>().mockResolvedValue({
content: [
'tables:',
' CONSIGNMENTS:',
' table: ANALYTICS.MARTS.CONSIGNMENTS',
' columns:',
' - { name: CONSIGNED_ITEM_ID, type: string, pk: true }',
].join('\n'),
}),
};
service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
});
it('finds by exact bare manifest key', async () => {
const entry = await service.findManifestEntryByTableRef('conn-1', 'CONSIGNMENTS');
expect(entry?.name).toBe('CONSIGNMENTS');
});
it('finds by fully-qualified table path', async () => {
const entry = await service.findManifestEntryByTableRef('conn-1', 'ANALYTICS.MARTS.CONSIGNMENTS');
expect(entry?.name).toBe('CONSIGNMENTS');
});
it('finds by schema-qualified suffix', async () => {
const entry = await service.findManifestEntryByTableRef('conn-1', 'MARTS.CONSIGNMENTS');
expect(entry?.name).toBe('CONSIGNMENTS');
});
it('matches case-insensitively on table path', async () => {
const entry = await service.findManifestEntryByTableRef('conn-1', 'analytics.marts.consignments');
expect(entry?.name).toBe('CONSIGNMENTS');
});
it('returns null when nothing matches', async () => {
const entry = await service.findManifestEntryByTableRef('conn-1', 'NOT_A_TABLE');
expect(entry).toBeNull();
});
});
describe('loadAllSources — standalone enrichment via inherits_columns_from', () => {
let configService: {
listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>;
readFile: Mock<(path: string) => Promise<{ content: string }>>;
};
let service: SemanticLayerService;
beforeEach(() => {
configService = {
listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>(),
readFile: vi.fn<(path: string) => Promise<{ content: string }>>(),
};
service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
});
it('preserves dbt metadata when projecting manifest-backed sources', async () => {
const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml';
configService.listFiles.mockImplementation((dir: string) => {
if (dir === 'semantic-layer/conn-1' || dir === 'semantic-layer/conn-1/_schema') {
return Promise.resolve({ files: [schemaPath] });
}
return Promise.resolve({ files: [] });
});
configService.readFile.mockResolvedValue({
content: [
'tables:',
' orders:',
' table: public.orders',
' tags: { dbt: [mart] }',
' freshness:',
' dbt:',
' loaded_at_field: updated_at',
' columns:',
' - name: status',
' type: string',
' constraints: { dbt: { not_null: true } }',
' enum_values: { dbt: [placed, shipped] }',
' tests:',
' dbt:',
' - { name: accepted_values, package: dbt }',
].join('\n'),
});
const sources = await service.loadAllSources('conn-1');
expect(sources[0]).toMatchObject({
name: 'orders',
tags: { dbt: ['mart'] },
freshness: { dbt: { loaded_at_field: 'updated_at' } },
columns: [
{
name: 'status',
constraints: { dbt: { not_null: true } },
enum_values: { dbt: ['placed', 'shipped'] },
tests: { dbt: [{ name: 'accepted_values', package: 'dbt' }] },
},
],
});
});
it('fills blank columns on a standalone source from the manifest entry it points at', async () => {
const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml';
const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml';
configService.listFiles.mockImplementation((dir: string) => {
if (dir === 'semantic-layer/conn-1') {
return Promise.resolve({ files: [schemaPath, standalonePath] });
}
if (dir === 'semantic-layer/conn-1/_schema') {
return Promise.resolve({ files: [schemaPath] });
}
return Promise.resolve({ files: [] });
});
configService.readFile.mockImplementation((path: string) => {
if (path === schemaPath) {
return Promise.resolve({
content: [
'tables:',
' CONSIGNMENTS:',
' table: ANALYTICS.MARTS.CONSIGNMENTS',
' columns:',
' - name: CONSIGNED_ITEM_ID',
' type: string',
' descriptions: { ai: "Unique consigned-item id." }',
' - name: CASH_ADV_AMOUNT',
' type: number',
' descriptions: { ai: "Cash advance amount." }',
].join('\n'),
});
}
if (path === standalonePath) {
return Promise.resolve({
content: [
'name: aav_consignments',
'sql: |',
' SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT FROM ANALYTICS.MARTS.CONSIGNMENTS WHERE x',
'inherits_columns_from: CONSIGNMENTS',
'grain: [CONSIGNED_ITEM_ID]',
'columns:',
' - { name: CONSIGNED_ITEM_ID }',
' - { name: CASH_ADV_AMOUNT }',
].join('\n'),
});
}
return Promise.reject(new Error(`Unexpected readFile: ${path}`));
});
const sources = await service.loadAllSources('conn-1');
const aav = sources.find((s) => s.name === 'aav_consignments');
expect(aav).toBeDefined();
expect(aav?.columns).toEqual([
{ name: 'CONSIGNED_ITEM_ID', type: 'string', descriptions: { ai: 'Unique consigned-item id.' } },
{ name: 'CASH_ADV_AMOUNT', type: 'number', descriptions: { ai: 'Cash advance amount.' } },
]);
});
it('accepts a fully-qualified path in inherits_columns_from', async () => {
const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml';
const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml';
configService.listFiles.mockImplementation((dir: string) => {
if (dir === 'semantic-layer/conn-1') {
return Promise.resolve({ files: [schemaPath, standalonePath] });
}
if (dir === 'semantic-layer/conn-1/_schema') {
return Promise.resolve({ files: [schemaPath] });
}
return Promise.resolve({ files: [] });
});
configService.readFile.mockImplementation((path: string) => {
if (path === schemaPath) {
return Promise.resolve({
content: [
'tables:',
' CONSIGNMENTS:',
' table: ANALYTICS.MARTS.CONSIGNMENTS',
' columns:',
' - { name: CONSIGNED_ITEM_ID, type: string }',
].join('\n'),
});
}
return Promise.resolve({
content: [
'name: aav_consignments',
'sql: SELECT 1',
'inherits_columns_from: ANALYTICS.MARTS.CONSIGNMENTS',
'grain: [CONSIGNED_ITEM_ID]',
'columns:',
' - { name: CONSIGNED_ITEM_ID }',
].join('\n'),
});
});
const sources = await service.loadAllSources('conn-1');
const aav = sources.find((s) => s.name === 'aav_consignments');
expect(aav?.columns[0].type).toBe('string');
});
it('passes the source through unchanged if inherits_columns_from misses', async () => {
const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml';
configService.listFiles.mockImplementation((dir: string) => {
if (dir === 'semantic-layer/conn-1') {
return Promise.resolve({ files: [standalonePath] });
}
return Promise.resolve({ files: [] });
});
configService.readFile.mockResolvedValue({
content: [
'name: aav_consignments',
'sql: SELECT 1',
'inherits_columns_from: NO_SUCH_TABLE',
'grain: [FOO]',
'columns:',
' - { name: FOO, type: string }',
].join('\n'),
});
const sources = await service.loadAllSources('conn-1');
const aav = sources.find((s) => s.name === 'aav_consignments');
expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]);
});
});
describe('validateWithProposedSource', () => {
let configService: {
listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>;
readFile: Mock<(path: string) => Promise<{ content: string }>>;
};
let service: SemanticLayerService;
beforeEach(() => {
pythonPort.validateSources.mockReset();
configService = {
listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>().mockResolvedValue({
files: [],
}),
readFile: vi.fn<(path: string) => Promise<{ content: string }>>(),
};
service = new SemanticLayerService(configService as never, connectionCatalog('BIGQUERY'), pythonPort);
});
it('uses the connection warehouse dialect, not hardcoded postgres', async () => {
pythonPort.validateSources.mockResolvedValue({
data: { errors: [], warnings: [] },
});
await service.validateWithProposedSource('conn-1', {
name: 'std',
table: 'analytics.std',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
});
expect(pythonPort.validateSources).toHaveBeenCalledWith(
expect.objectContaining({
dialect: 'bigquery',
}),
);
});
it('composes a bare overlay with its manifest base before validating', async () => {
const schemaPath = 'semantic-layer/conn-1/_schema/core.yaml';
const listFilesImpl = (dir: string): Promise<{ files: string[] }> => {
if (dir === 'semantic-layer/conn-1') {
return Promise.resolve({ files: [schemaPath, 'semantic-layer/conn-1/fct_orders.yaml'] });
}
if (dir === 'semantic-layer/conn-1/_schema') {
return Promise.resolve({ files: [schemaPath] });
}
return Promise.resolve({ files: [] });
};
const readFileImpl = (path: string): Promise<{ content: string }> => {
if (path === schemaPath) {
return Promise.resolve({
content: [
'tables:',
' fct_orders:',
' table: analytics.fct_orders',
' columns:',
' - { name: id, type: string, pk: true }',
' - { name: amount, type: number }',
].join('\n'),
});
}
if (path === 'semantic-layer/conn-1/fct_orders.yaml') {
return Promise.resolve({ content: 'name: fct_orders\nmeasures: []\n' });
}
return Promise.reject(new Error(`Unexpected readFile: ${path}`));
};
configService.listFiles.mockImplementation(listFilesImpl);
configService.readFile.mockImplementation(readFileImpl);
pythonPort.validateSources.mockResolvedValue({
data: { errors: [], warnings: [] },
});
const overlay: SemanticLayerSource = {
name: 'fct_orders',
grain: ['id'],
columns: [],
joins: [],
measures: [{ name: 'total_amount', expr: 'sum(amount)' }],
};
await service.validateWithProposedSource('conn-1', overlay);
expect(pythonPort.validateSources).toHaveBeenCalledTimes(1);
const sources = (pythonPort.validateSources.mock.calls[0][0]?.sources ?? []) as Array<Record<string, unknown>>;
const composed = sources.find((s) => s.name === 'fct_orders');
expect(composed).toBeDefined();
expect(composed?.table).toBe('analytics.fct_orders');
expect(composed?.measures).toEqual([{ name: 'total_amount', expr: 'sum(amount)' }]);
});
it('returns a pointed error when a bare overlay has no manifest base', async () => {
configService.listFiles.mockResolvedValue({ files: [] });
const overlay: SemanticLayerSource = {
name: 'orphan',
grain: [],
columns: [],
joins: [],
measures: [],
};
const result = await service.validateWithProposedSource('conn-1', overlay);
expect(result.errors[0]).toMatch(/Overlay 'orphan' has no matching manifest entry/);
expect(pythonPort.validateSources).not.toHaveBeenCalled();
});
});
describe('findDanglingSegmentRefs', () => {
it('returns empty when every measure segment resolves', () => {
const source = {
segments: [{ name: 'byol' }, { name: 'paid' }],
measures: [
{ name: 'byol_count', segments: ['byol'] },
{ name: 'paid_count', segments: ['paid', 'byol'] },
],
};
expect(findDanglingSegmentRefs(source)).toEqual([]);
});
it('flags measures whose segment reference does not exist on the source', () => {
const source = {
segments: [{ name: 'byol' }],
measures: [{ name: 'broken', segments: ['byol', 'missing'] }],
};
const refs = findDanglingSegmentRefs(source);
expect(refs).toHaveLength(1);
expect(refs[0]).toMatch(/measure 'broken' references unknown segment 'missing'/);
});
it('flags when a source has zero segments but measures reference one', () => {
const source = {
measures: [{ name: 'broken', segments: ['byol'] }],
};
const refs = findDanglingSegmentRefs(source);
expect(refs).toHaveLength(1);
expect(refs[0]).toMatch(/unknown segment 'byol'/);
});
it('is a no-op for sources with no measures or no segment references', () => {
expect(findDanglingSegmentRefs({ measures: [{ name: 'simple', expr: 'count(*)' }] })).toEqual([]);
expect(findDanglingSegmentRefs({})).toEqual([]);
});
});

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,115 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKloProject, type KloLocalProject } from '../project/index.js';
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
describe('loadLatestSlDictionaryEntries', () => {
let tempDir: string;
let project: KloLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-sl-dictionary-profile-'));
project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('loads latest relationship-profile sample values for dictionary candidate columns', async () => {
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 20,
nullCount: 0,
distinctCount: 3,
uniquenessRatio: 0.15,
nullRate: 0,
sampleValues: ['paid', 'refunded', 'pending'],
minTextLength: 4,
maxTextLength: 8,
},
'orders.customer_id': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'customer_id',
nativeType: 'text',
normalizedType: 'string',
rowCount: 20,
nullCount: 0,
distinctCount: 20,
uniquenessRatio: 1,
nullRate: 0,
sampleValues: ['cus_1'],
minTextLength: 5,
maxTextLength: 5,
},
},
warnings: [],
},
null,
2,
)}\n`,
'klo',
'klo@example.com',
'Seed profile',
);
await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-2/enrichment/relationship-profile.json',
`${JSON.stringify(
{
connectionId: 'warehouse',
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
rowCount: 20,
nullCount: 0,
distinctCount: 2,
uniquenessRatio: 0.1,
nullRate: 0,
sampleValues: ['settled', 'voided'],
minTextLength: 6,
maxTextLength: 7,
},
},
warnings: [],
},
null,
2,
)}\n`,
'klo',
'klo@example.com',
'Seed newer profile',
);
await expect(loadLatestSlDictionaryEntries(project, ['warehouse'])).resolves.toEqual([
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'settled', cardinality: 2 },
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'voided', cardinality: 2 },
]);
});
it('returns an empty list when no relationship profile exists', async () => {
await expect(loadLatestSlDictionaryEntries(project, ['warehouse'])).resolves.toEqual([]);
});
});

View file

@ -0,0 +1,120 @@
import type { KloLocalProject } from '../project/index.js';
import { defaultKloDataDictionarySettings, isKloDataDictionaryCandidate } from '../scan/index.js';
export interface SlDictionaryEntry {
connectionId: string;
sourceName: string;
columnName: string;
value: string;
cardinality: number | null;
}
interface RelationshipProfileColumn {
table?: { name?: string };
column?: string;
nativeType?: string;
normalizedType?: string;
distinctCount?: number;
sampleValues?: unknown[];
}
interface RelationshipProfileArtifact {
connectionId?: string;
columns?: Record<string, RelationshipProfileColumn>;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function parseProfile(raw: string): RelationshipProfileArtifact | null {
const parsed = JSON.parse(raw) as unknown;
if (!isRecord(parsed)) {
return null;
}
return parsed as RelationshipProfileArtifact;
}
function normalizedValues(values: unknown[] | undefined): string[] {
const seen = new Set<string>();
const result: string[] = [];
for (const value of values ?? []) {
const text = String(value).trim();
const key = text.toLowerCase();
if (text.length === 0 || seen.has(key)) {
continue;
}
seen.add(key);
result.push(text);
}
return result.sort((left, right) => left.localeCompare(right));
}
function columnEntries(connectionId: string, column: RelationshipProfileColumn): SlDictionaryEntry[] {
const sourceName = column.table?.name;
const columnName = column.column;
if (!sourceName || !columnName) {
return [];
}
const columnType = column.normalizedType ?? column.nativeType ?? '';
if (!isKloDataDictionaryCandidate(columnType, columnName)) {
return [];
}
const cardinality = typeof column.distinctCount === 'number' ? column.distinctCount : null;
if (cardinality !== null && cardinality > defaultKloDataDictionarySettings.cardinalityThreshold) {
return [];
}
return normalizedValues(column.sampleValues).map((value) => ({
connectionId,
sourceName,
columnName,
value,
cardinality,
}));
}
async function latestProfilePath(project: KloLocalProject, connectionId: string): Promise<string | null> {
const root = `raw-sources/${connectionId}/live-database`;
let files: string[];
try {
files = (await project.fileStore.listFiles(root)).files;
} catch {
return null;
}
return (
files
.filter((path) => path.endsWith('/enrichment/relationship-profile.json'))
.sort((left, right) => left.localeCompare(right))
.at(-1) ?? null
);
}
export async function loadLatestSlDictionaryEntries(
project: KloLocalProject,
connectionIds: readonly string[],
): Promise<SlDictionaryEntry[]> {
const entries: SlDictionaryEntry[] = [];
for (const connectionId of [...new Set(connectionIds)].sort()) {
const path = await latestProfilePath(project, connectionId);
if (!path) {
continue;
}
const raw = await project.fileStore.readFile(path);
const profile = parseProfile(raw.content);
const profileConnectionId = profile?.connectionId ?? connectionId;
for (const column of Object.values(profile?.columns ?? {})) {
entries.push(...columnEntries(profileConnectionId, column));
}
}
return entries.sort(
(left, right) =>
left.connectionId.localeCompare(right.connectionId) ||
left.sourceName.localeCompare(right.sourceName) ||
left.columnName.localeCompare(right.columnName) ||
left.value.localeCompare(right.value),
);
}

View file

@ -0,0 +1,165 @@
import { describe, expect, it, vi } from 'vitest';
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
import type { SemanticLayerSource } from './types.js';
describe('SlSearchService', () => {
it('builds search text from source, columns, measures, and joins', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'orders',
descriptions: { user: 'Customer orders' },
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'string' },
{ name: 'amount', type: 'number', descriptions: { user: 'Order amount' } },
],
measures: [{ name: 'revenue', expr: 'sum(amount)', description: 'Gross revenue' }],
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
};
expect(service.buildSearchText(source)).toContain('orders');
expect(service.buildSearchText(source)).toContain('Customer orders');
expect(service.buildSearchText(source)).toContain('amount (number) Order amount');
expect(service.buildSearchText(source)).toContain('measure: revenue sum(amount) Gross revenue');
expect(service.buildSearchText(source)).toContain('join: customers (many_to_one)');
});
it('exports the same canonical search text builder used by SlSearchService', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'orders',
descriptions: { user: 'Customer orders' },
table: 'public.orders',
grain: ['id'],
columns: [
{
name: 'status',
type: 'string',
enum_values: { dbt: ['paid', 'refunded'] },
constraints: { dbt: { not_null: true } },
},
],
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)', description: 'Gross revenue' }],
tags: { dbt: ['finance'] },
};
expect(buildSemanticLayerSourceSearchText(source)).toBe(service.buildSearchText(source));
expect(buildSemanticLayerSourceSearchText(source)).toContain('dbt values: paid, refunded');
expect(buildSemanticLayerSourceSearchText(source)).toContain('measure: total_revenue sum(revenue) Gross revenue');
expect(buildSemanticLayerSourceSearchText(source)).toContain('dbt tags: finance');
});
it('includes dbt enum, not_null, and unique tokens for columns', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'src_orders',
table: 'public.orders',
grain: [],
columns: [
{
name: 'status',
type: 'string',
descriptions: {},
enum_values: { dbt: ['a', 'b'] },
constraints: { dbt: { not_null: true, unique: true } },
},
],
joins: [],
measures: [],
};
const text = service.buildSearchText(source);
expect(text).toContain('dbt values: a, b');
expect(text).toContain('not_null');
expect(text).toContain('unique');
});
it('includes dbt default time token for MetricFlow agg_time_dimension', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
default_time_dimension: { dbt: 'order_date' },
};
expect(service.buildSearchText(source)).toContain('dbt default time: order_date');
});
it('includes dbt table tags and freshness from manifest-backed source', () => {
const service = new SlSearchService(
{ maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() },
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(),
},
);
const source: SemanticLayerSource = {
name: 'customers',
table: 'jaffle.customers',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [],
tags: { dbt: ['raw', 'core'] },
freshness: {
dbt: {
loaded_at_field: 'updated_at',
raw: { warn_after: { count: 12, period: 'hour' } },
},
},
};
const text = service.buildSearchText(source);
expect(text).toContain('dbt tags: raw, core');
expect(text).toContain('dbt freshness:');
expect(text).toContain('loaded_at=updated_at');
expect(text).toContain('warn_after');
});
});

View file

@ -0,0 +1,168 @@
import type { KloEmbeddingPort, KloLogger } from '../core/index.js';
import { noopLogger } from '../core/index.js';
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
import type { SlSourcesIndexPort } from './ports.js';
import type { SemanticLayerSource } from './types.js';
export function buildSemanticLayerSourceSearchText(
source: SemanticLayerSource,
priority: string[] = DEFAULT_PRIORITY,
): string {
const config = { priority };
const parts: string[] = [source.name.replace(/_/g, ' ')];
const sourceDesc = resolveDescription(source.descriptions, config);
if (sourceDesc) {
parts.push(sourceDesc);
}
if (source.table) {
parts.push(`table: ${source.table}`);
}
if (source.default_time_dimension?.dbt) {
parts.push(`dbt default time: ${source.default_time_dimension.dbt}`);
}
for (const col of source.columns ?? []) {
const colDesc = resolveDescription(col.descriptions, config);
let extra = '';
if (col.enum_values?.dbt?.length) {
extra += ` [dbt values: ${col.enum_values.dbt.join(', ')}]`;
}
if (col.constraints?.dbt?.not_null) {
extra += ' not_null';
}
if (col.constraints?.dbt?.unique) {
extra += ' unique';
}
parts.push(`${col.name} (${col.type})${colDesc ? ` ${colDesc}` : ''}${extra}`);
}
for (const m of source.measures ?? []) {
parts.push(`measure: ${m.name} ${m.expr}${m.description ? ` ${m.description}` : ''}`);
}
for (const j of source.joins ?? []) {
parts.push(`join: ${j.to} (${j.relationship})`);
}
if (source.tags?.dbt?.length) {
parts.push(`dbt tags: ${source.tags.dbt.join(', ')}`);
}
if (source.freshness?.dbt) {
const fd = source.freshness.dbt;
const bits: string[] = [];
if (fd.loaded_at_field) {
bits.push(`loaded_at=${fd.loaded_at_field}`);
}
if (fd.raw !== undefined) {
let rawStr = JSON.stringify(fd.raw);
if (rawStr.length > 120) {
rawStr = `${rawStr.slice(0, 117)}...`;
}
bits.push(rawStr);
}
if (bits.length > 0) {
parts.push(`dbt freshness: ${bits.join(' ')}`);
}
}
return parts.join('. ');
}
export class SlSearchService {
constructor(
private readonly embeddingService: KloEmbeddingPort,
private readonly slSourcesRepository: SlSourcesIndexPort,
private readonly logger: KloLogger = noopLogger,
) {}
async indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<void> {
if (sources.length === 0) {
await this.slSourcesRepository.deleteByConnection(connectionId);
return;
}
// Detect which sources actually changed by comparing search_text
const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
const searchTexts = sources.map((s) => this.buildSearchText(s));
const changedIndices: number[] = [];
for (let i = 0; i < sources.length; i++) {
const prev = existing.get(sources[i].name);
if (!prev || prev.searchText !== searchTexts[i] || !prev.hasEmbedding) {
changedIndices.push(i);
}
}
if (changedIndices.length === 0) {
// Still clean up stale sources even if nothing changed
const keepNames = sources.map((s) => s.name);
await this.slSourcesRepository.deleteStale(connectionId, keepNames);
this.logger.log(`SL sources for connection ${connectionId}: all ${sources.length} up to date, 0 reindexed`);
return;
}
// Compute embeddings only for changed sources
const changedTexts = changedIndices.map((i) => searchTexts[i]);
let changedEmbeddings: (number[] | null)[];
try {
const batchSize = this.embeddingService.maxBatchSize;
const allEmbeddings: number[][] = [];
for (let i = 0; i < changedTexts.length; i += batchSize) {
const batch = changedTexts.slice(i, i + batchSize);
const batchEmbeddings = await this.embeddingService.computeEmbeddingsBulk(batch);
allEmbeddings.push(...batchEmbeddings);
}
changedEmbeddings = allEmbeddings;
} catch (error) {
this.logger.warn(
`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`,
);
changedEmbeddings = changedIndices.map(() => null);
}
const rows = changedIndices.map((srcIdx, i) => {
return {
sourceName: sources[srcIdx].name,
searchText: searchTexts[srcIdx],
embedding: changedEmbeddings[i],
};
});
await this.slSourcesRepository.upsertSources(connectionId, rows);
// Remove sources that no longer exist in YAML
const keepNames = sources.map((s) => s.name);
await this.slSourcesRepository.deleteStale(connectionId, keepNames);
this.logger.log(
`SL sources for connection ${connectionId}: ${changedIndices.length}/${sources.length} reindexed, ${sources.length - changedIndices.length} unchanged`,
);
}
async search(
connectionId: string,
query: string,
limit = 15,
minRrfScore = 0,
): Promise<Array<{ sourceName: string; score: number }>> {
let queryEmbedding: number[] | null = null;
try {
queryEmbedding = await this.embeddingService.computeEmbedding(query);
} catch (error) {
this.logger.warn(
`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`,
);
}
const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);
return results.map((r) => ({ sourceName: r.sourceName, score: r.rrfScore }));
}
buildSearchText(source: SemanticLayerSource, priority: string[] = DEFAULT_PRIORITY): string {
return buildSemanticLayerSourceSearchText(source, priority);
}
}

View file

@ -0,0 +1,8 @@
export interface SlValidationResult {
errors: string[];
warnings: string[];
}
export interface SlValidatorPort<TDeps = unknown> {
validateSingleSource(deps: TDeps, connectionId: string, sourceName: string): Promise<SlValidationResult>;
}

View file

@ -0,0 +1,164 @@
import { access, mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
describe('SqliteSlSourcesIndex', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-sqlite-sl-index-'));
dbPath = join(tempDir, 'db.sqlite');
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('creates SQLite tables and searches indexed source text', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{
sourceName: 'orders',
searchText: 'orders table: public.orders measure: total_revenue sum(revenue) gross revenue',
embedding: null,
},
{
sourceName: 'tickets',
searchText: 'tickets table: public.tickets measure: ticket_count count(*) support queue',
embedding: null,
},
]);
await expect(access(dbPath)).resolves.toBeUndefined();
expect(await index.search('warehouse', null, 'gross revenue', 10)).toEqual([
expect.objectContaining({
sourceName: 'orders',
rrfScore: expect.any(Number),
}),
]);
});
it('reports existing search text and embedding presence', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{
sourceName: 'orders',
searchText: 'orders gross revenue',
embedding: [0.1, 0.2, 0.3],
},
{
sourceName: 'tickets',
searchText: 'tickets support queue',
embedding: null,
},
]);
await expect(index.getExistingSearchTexts('warehouse')).resolves.toEqual(
new Map([
['orders', { searchText: 'orders gross revenue', hasEmbedding: true }],
['tickets', { searchText: 'tickets support queue', hasEmbedding: false }],
]),
);
});
it('deletes stale, named, and connection-scoped rows from the FTS index', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{ sourceName: 'orders', searchText: 'orders revenue', embedding: null },
{ sourceName: 'tickets', searchText: 'tickets support', embedding: null },
]);
await index.upsertSources('finance', [{ sourceName: 'invoices', searchText: 'invoices revenue', embedding: null }]);
await index.deleteStale('warehouse', ['orders']);
expect(await index.search('warehouse', null, 'support', 10)).toEqual([]);
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([
expect.objectContaining({ sourceName: 'orders' }),
]);
expect(await index.search('finance', null, 'revenue', 10)).toEqual([
expect.objectContaining({ sourceName: 'invoices' }),
]);
await index.deleteByConnectionAndName('warehouse', 'orders');
expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([]);
await index.deleteByConnection('finance');
expect(await index.search('finance', null, 'revenue', 10)).toEqual([]);
});
it('returns lane candidates with stable connection-scoped IDs', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
{ sourceName: 'orders', searchText: 'orders gross revenue paid status', embedding: [1, 0] },
]);
await index.upsertSources('finance', [
{ sourceName: 'orders', searchText: 'finance orders invoices', embedding: [0, 1] },
]);
await expect(index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 25 })).resolves.toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
rank: 1,
rawScore: expect.any(Number),
}),
]);
await expect(index.searchSemanticCandidates({ queryEmbedding: [0, 1], limit: 25 })).resolves.toEqual([
expect.objectContaining({ id: 'finance/orders', connectionId: 'finance', sourceName: 'orders', rank: 1 }),
expect.objectContaining({ id: 'warehouse/orders', connectionId: 'warehouse', sourceName: 'orders', rank: 2 }),
]);
});
it('aggregates dictionary matches to one source-level lane candidate', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.replaceDictionaryEntries('warehouse', [
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'paid', cardinality: 3 },
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'refunded', cardinality: 3 },
{ connectionId: 'warehouse', sourceName: 'orders', columnName: 'channel', value: 'paid search', cardinality: 4 },
{
connectionId: 'warehouse',
sourceName: 'tickets',
columnName: 'priority',
value: 'paid support',
cardinality: 5,
},
]);
await expect(index.searchDictionaryCandidates({ queryText: 'paid', limit: 25 })).resolves.toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
rank: 1,
matches: [
{ column: 'channel', values: ['paid search'] },
{ column: 'status', values: ['paid'] },
],
}),
expect.objectContaining({
id: 'warehouse/tickets',
connectionId: 'warehouse',
sourceName: 'tickets',
rank: 2,
matches: [{ column: 'priority', values: ['paid support'] }],
}),
]);
});
it('returns an empty result for blank or punctuation-only queries', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [{ sourceName: 'orders', searchText: 'orders revenue', embedding: null }]);
expect(await index.search('warehouse', null, ' ', 10)).toEqual([]);
expect(await index.search('warehouse', null, '---', 10)).toEqual([]);
});
});

View file

@ -0,0 +1,549 @@
import { mkdirSync } from 'node:fs';
import { dirname } from 'node:path';
import Database from 'better-sqlite3';
import type { SlSourcesIndexPort } from './ports.js';
import type { SlDictionaryEntry } from './sl-dictionary-profile.js';
import type { SlDictionaryMatch } from './types.js';
export interface SqliteSlSourcesIndexOptions {
dbPath: string;
}
type ExistingRow = {
source_name: string;
search_text: string;
embedding_json: string | null;
};
type SearchRow = {
connection_id?: string;
source_name: string;
rank: number;
};
export interface SlSqliteLaneCandidate {
id: string;
connectionId: string;
sourceName: string;
rank: number;
rawScore: number;
}
export interface SlSqliteDictionaryCandidate extends SlSqliteLaneCandidate {
matches: SlDictionaryMatch[];
}
type IndexedSourceRow = {
connection_id: string;
source_name: string;
embedding_json: string | null;
};
type DictionarySearchRow = {
connection_id: string;
source_name: string;
column_name: string;
value: string;
rank: number | null;
};
function candidateId(connectionId: string, sourceName: string): string {
return `${connectionId}/${sourceName}`;
}
function cosineSimilarity(left: number[], right: number[]): number {
if (left.length === 0 || left.length !== right.length) {
return 0;
}
let dot = 0;
let leftNorm = 0;
let rightNorm = 0;
for (let i = 0; i < left.length; i++) {
const l = left[i] ?? 0;
const r = right[i] ?? 0;
dot += l * r;
leftNorm += l * l;
rightNorm += r * r;
}
if (leftNorm === 0 || rightNorm === 0) {
return 0;
}
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
}
function normalizeFtsQuery(query: string): string {
const terms = query
.toLowerCase()
.split(/[^a-z0-9_]+/u)
.map((term) => term.trim())
.filter(Boolean);
return [...new Set(terms)].map((term) => `"${term.replaceAll('"', '""')}"`).join(' OR ');
}
function scoreFromRank(rank: number): number {
return Number((1 / (1 + Math.abs(rank))).toFixed(6));
}
export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
private readonly db: Database.Database;
constructor(options: SqliteSlSourcesIndexOptions) {
mkdirSync(dirname(options.dbPath), { recursive: true });
this.db = new Database(options.dbPath);
this.db.pragma('journal_mode = WAL');
this.db.pragma('foreign_keys = ON');
this.db.exec(`
CREATE TABLE IF NOT EXISTS local_sl_sources (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
search_text TEXT NOT NULL,
embedding_json TEXT,
content_hash TEXT,
updated_at TEXT NOT NULL,
PRIMARY KEY (connection_id, source_name)
);
CREATE VIRTUAL TABLE IF NOT EXISTS local_sl_sources_fts USING fts5(
connection_id UNINDEXED,
source_name UNINDEXED,
search_text
);
CREATE TABLE IF NOT EXISTS local_sl_dictionary_values (
connection_id TEXT NOT NULL,
source_name TEXT NOT NULL,
column_name TEXT NOT NULL,
value TEXT NOT NULL,
value_lower TEXT NOT NULL,
cardinality INTEGER,
updated_at TEXT NOT NULL,
PRIMARY KEY (connection_id, source_name, column_name, value)
);
CREATE VIRTUAL TABLE IF NOT EXISTS local_sl_dictionary_values_fts USING fts5(
connection_id UNINDEXED,
source_name UNINDEXED,
column_name UNINDEXED,
value
);
`);
}
async upsertSources(
connectionId: string,
sources: Array<{ sourceName: string; searchText: string; embedding: number[] | null; contentHash?: string | null }>,
): Promise<void> {
if (sources.length === 0) {
return;
}
const upsertRow = this.db.prepare(`
INSERT INTO local_sl_sources (
connection_id,
source_name,
search_text,
embedding_json,
content_hash,
updated_at
)
VALUES (
@connectionId,
@sourceName,
@searchText,
@embeddingJson,
@contentHash,
@updatedAt
)
ON CONFLICT(connection_id, source_name) DO UPDATE SET
search_text = excluded.search_text,
embedding_json = excluded.embedding_json,
content_hash = COALESCE(excluded.content_hash, local_sl_sources.content_hash),
updated_at = excluded.updated_at
`);
const deleteFts = this.db.prepare(`
DELETE FROM local_sl_sources_fts
WHERE connection_id = @connectionId
AND source_name = @sourceName
`);
const insertFts = this.db.prepare(`
INSERT INTO local_sl_sources_fts (connection_id, source_name, search_text)
VALUES (@connectionId, @sourceName, @searchText)
`);
const transaction = this.db.transaction(
(
rows: Array<{
sourceName: string;
searchText: string;
embedding: number[] | null;
contentHash?: string | null;
}>,
) => {
const updatedAt = new Date().toISOString();
for (const source of rows) {
const row = {
connectionId,
sourceName: source.sourceName,
searchText: source.searchText,
embeddingJson: source.embedding ? JSON.stringify(source.embedding) : null,
contentHash: source.contentHash ?? null,
updatedAt,
};
upsertRow.run(row);
deleteFts.run(row);
insertFts.run(row);
}
},
);
transaction(sources);
}
async getExistingSearchTexts(
connectionId: string,
): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
const rows = this.db
.prepare(
`
SELECT source_name, search_text, embedding_json
FROM local_sl_sources
WHERE connection_id = ?
ORDER BY source_name ASC
`,
)
.all(connectionId) as ExistingRow[];
return new Map(
rows.map((row) => [row.source_name, { searchText: row.search_text, hasEmbedding: row.embedding_json !== null }]),
);
}
async deleteStale(connectionId: string, keepNames: string[]): Promise<void> {
if (keepNames.length === 0) {
await this.deleteByConnection(connectionId);
return;
}
const placeholders = keepNames.map(() => '?').join(', ');
const stale = this.db
.prepare(
`
SELECT source_name
FROM local_sl_sources
WHERE connection_id = ?
AND source_name NOT IN (${placeholders})
`,
)
.all(connectionId, ...keepNames) as Array<{ source_name: string }>;
const deleteFts = this.db.prepare(`
DELETE FROM local_sl_sources_fts
WHERE connection_id = ?
AND source_name = ?
`);
const deleteRow = this.db.prepare(`
DELETE FROM local_sl_sources
WHERE connection_id = ?
AND source_name = ?
`);
const remove = this.db.transaction((sourceNames: string[]) => {
for (const sourceName of sourceNames) {
deleteFts.run(connectionId, sourceName);
deleteRow.run(connectionId, sourceName);
}
});
remove(stale.map((row) => row.source_name));
}
async deleteByConnection(connectionId: string): Promise<void> {
const remove = this.db.transaction(() => {
this.db.prepare('DELETE FROM local_sl_sources_fts WHERE connection_id = ?').run(connectionId);
this.db.prepare('DELETE FROM local_sl_sources WHERE connection_id = ?').run(connectionId);
});
remove();
}
async deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void> {
this.deleteByConnectionAndNameSync(connectionId, sourceName);
}
async replaceDictionaryEntries(connectionId: string, entries: SlDictionaryEntry[]): Promise<void> {
const remove = this.db.transaction(() => {
this.db.prepare('DELETE FROM local_sl_dictionary_values_fts WHERE connection_id = ?').run(connectionId);
this.db.prepare('DELETE FROM local_sl_dictionary_values WHERE connection_id = ?').run(connectionId);
});
const insertRow = this.db.prepare(`
INSERT INTO local_sl_dictionary_values (
connection_id,
source_name,
column_name,
value,
value_lower,
cardinality,
updated_at
)
VALUES (
@connectionId,
@sourceName,
@columnName,
@value,
@valueLower,
@cardinality,
@updatedAt
)
`);
const insertFts = this.db.prepare(`
INSERT INTO local_sl_dictionary_values_fts (connection_id, source_name, column_name, value)
VALUES (@connectionId, @sourceName, @columnName, @value)
`);
const write = this.db.transaction((rows: SlDictionaryEntry[]) => {
const updatedAt = new Date().toISOString();
for (const entry of rows.filter((candidate) => candidate.connectionId === connectionId)) {
const row = {
connectionId: entry.connectionId,
sourceName: entry.sourceName,
columnName: entry.columnName,
value: entry.value,
valueLower: entry.value.toLowerCase(),
cardinality: entry.cardinality,
updatedAt,
};
insertRow.run(row);
insertFts.run(row);
}
});
remove();
write(entries);
}
async searchLexicalCandidates(input: {
connectionIds?: readonly string[];
queryText: string;
limit: number;
}): Promise<SlSqliteLaneCandidate[]> {
const ftsQuery = normalizeFtsQuery(input.queryText);
if (!ftsQuery) {
return [];
}
const connectionIds = [...new Set(input.connectionIds ?? [])].sort();
const connectionPredicate =
connectionIds.length > 0 ? `AND connection_id IN (${connectionIds.map(() => '?').join(', ')})` : '';
const rows = this.db
.prepare(
`
SELECT connection_id, source_name, bm25(local_sl_sources_fts) AS rank
FROM local_sl_sources_fts
WHERE local_sl_sources_fts MATCH ?
${connectionPredicate}
ORDER BY rank ASC, connection_id ASC, source_name ASC
LIMIT ?
`,
)
.all(ftsQuery, ...connectionIds, Math.max(1, input.limit)) as Array<SearchRow & { connection_id: string }>;
return rows.map((row, index) => ({
id: candidateId(row.connection_id, row.source_name),
connectionId: row.connection_id,
sourceName: row.source_name,
rank: index + 1,
rawScore: Number(row.rank),
}));
}
async searchSemanticCandidates(input: {
connectionIds?: readonly string[];
queryEmbedding: number[];
limit: number;
}): Promise<SlSqliteLaneCandidate[]> {
const connectionIds = [...new Set(input.connectionIds ?? [])].sort();
const connectionPredicate =
connectionIds.length > 0 ? `WHERE connection_id IN (${connectionIds.map(() => '?').join(', ')})` : '';
const rows = this.db
.prepare(
`
SELECT connection_id, source_name, embedding_json
FROM local_sl_sources
${connectionPredicate}
ORDER BY connection_id ASC, source_name ASC
`,
)
.all(...connectionIds) as IndexedSourceRow[];
return rows
.flatMap((row) => {
if (!row.embedding_json) {
return [];
}
try {
const embedding = JSON.parse(row.embedding_json) as unknown;
if (!Array.isArray(embedding) || !embedding.every((value) => typeof value === 'number')) {
return [];
}
return [
{
id: candidateId(row.connection_id, row.source_name),
connectionId: row.connection_id,
sourceName: row.source_name,
rank: 0,
rawScore: cosineSimilarity(input.queryEmbedding, embedding),
},
];
} catch {
return [];
}
})
.sort(
(left, right) =>
right.rawScore - left.rawScore ||
left.connectionId.localeCompare(right.connectionId) ||
left.sourceName.localeCompare(right.sourceName),
)
.slice(0, Math.max(1, input.limit))
.map((candidate, index) => ({ ...candidate, rank: index + 1 }));
}
async searchDictionaryCandidates(input: {
connectionIds?: readonly string[];
queryText: string;
limit: number;
}): Promise<SlSqliteDictionaryCandidate[]> {
const ftsQuery = normalizeFtsQuery(input.queryText);
const normalizedQuery = input.queryText.trim().toLowerCase();
if (!ftsQuery && !normalizedQuery) {
return [];
}
const connectionIds = [...new Set(input.connectionIds ?? [])].sort();
const connectionPredicate =
connectionIds.length > 0 ? `AND connection_id IN (${connectionIds.map(() => '?').join(', ')})` : '';
const ftsRows = ftsQuery
? (this.db
.prepare(
`
SELECT connection_id, source_name, column_name, value, bm25(local_sl_dictionary_values_fts) AS rank
FROM local_sl_dictionary_values_fts
WHERE local_sl_dictionary_values_fts MATCH ?
${connectionPredicate}
ORDER BY rank ASC, connection_id ASC, source_name ASC, column_name ASC, value ASC
LIMIT ?
`,
)
.all(ftsQuery, ...connectionIds, Math.max(25, input.limit * 4)) as DictionarySearchRow[])
: [];
const substringRows = normalizedQuery
? (this.db
.prepare(
`
SELECT connection_id, source_name, column_name, value, NULL AS rank
FROM local_sl_dictionary_values
WHERE value_lower LIKE ?
${connectionPredicate}
ORDER BY connection_id ASC, source_name ASC, column_name ASC, value ASC
LIMIT ?
`,
)
.all(`%${normalizedQuery}%`, ...connectionIds, Math.max(25, input.limit * 4)) as DictionarySearchRow[])
: [];
const rowsByKey = new Map<string, DictionarySearchRow>();
for (const row of [...ftsRows, ...substringRows]) {
const key = `${row.connection_id}/${row.source_name}/${row.column_name}/${row.value}`;
if (!rowsByKey.has(key)) {
rowsByKey.set(key, row);
}
}
const grouped = new Map<string, DictionarySearchRow[]>();
for (const row of rowsByKey.values()) {
const key = candidateId(row.connection_id, row.source_name);
grouped.set(key, [...(grouped.get(key) ?? []), row]);
}
return [...grouped.entries()]
.map(([id, rows]) => {
const [first] = rows;
const byColumn = new Map<string, string[]>();
for (const row of rows.sort(
(left, right) => left.column_name.localeCompare(right.column_name) || left.value.localeCompare(right.value),
)) {
byColumn.set(row.column_name, [...(byColumn.get(row.column_name) ?? []), row.value]);
}
const matches = [...byColumn.entries()].map(([column, values]) => ({ column, values: values.slice(0, 5) }));
return {
id,
connectionId: first?.connection_id ?? '',
sourceName: first?.source_name ?? '',
rank: 0,
rawScore: matches.reduce((total, match) => total + match.values.length, 0),
matches,
};
})
.sort(
(left, right) =>
right.rawScore - left.rawScore ||
right.matches.length - left.matches.length ||
left.connectionId.localeCompare(right.connectionId) ||
left.sourceName.localeCompare(right.sourceName),
)
.slice(0, Math.max(1, input.limit))
.map((candidate, index) => ({ ...candidate, rank: index + 1 }));
}
async search(
connectionId: string,
_queryEmbedding: number[] | null,
queryText: string,
limit: number,
minRrfScore = 0,
): Promise<Array<{ sourceName: string; rrfScore: number }>> {
const ftsQuery = normalizeFtsQuery(queryText);
if (!ftsQuery) {
return [];
}
const rows = this.db
.prepare(
`
SELECT source_name, bm25(local_sl_sources_fts) AS rank
FROM local_sl_sources_fts
WHERE connection_id = ?
AND local_sl_sources_fts MATCH ?
ORDER BY rank ASC, source_name ASC
LIMIT ?
`,
)
.all(connectionId, ftsQuery, Math.max(1, limit)) as SearchRow[];
return rows
.map((row) => ({ sourceName: row.source_name, rrfScore: scoreFromRank(row.rank) }))
.filter((row) => row.rrfScore >= minRrfScore);
}
private deleteByConnectionAndNameSync(connectionId: string, sourceName: string): void {
const remove = this.db.transaction(() => {
this.db
.prepare(
`
DELETE FROM local_sl_sources_fts
WHERE connection_id = ?
AND source_name = ?
`,
)
.run(connectionId, sourceName);
this.db
.prepare(
`
DELETE FROM local_sl_sources
WHERE connection_id = ?
AND source_name = ?
`,
)
.run(connectionId, sourceName);
});
remove();
}
}

View file

@ -0,0 +1,154 @@
import type { ZodType } from 'zod';
import type { GitAuthorResolverPort, ToolContext, ToolOutput } from '../../tools/index.js';
import { BaseTool } from '../../tools/index.js';
import { sourceDefinitionSchema } from '../schemas.js';
import { SemanticLayerService } from '../semantic-layer.service.js';
import { SlSearchService } from '../sl-search.service.js';
export { sourceDefinitionSchema };
// ── Shared output types ──
export interface SemanticLayerStructured {
success: boolean;
sourceName: string;
yaml?: string;
commitHash?: string;
errors?: string[];
validationErrors?: string[];
validationWarnings?: string[];
actionRequiredWarnings?: string[];
}
export interface BaseSemanticLayerToolDeps {
semanticLayerService: SemanticLayerService;
slSearchService: SlSearchService;
authorResolver: GitAuthorResolverPort;
}
// ── Abstract base class ──
export abstract class BaseSemanticLayerTool<TInput extends ZodType = ZodType> extends BaseTool<TInput> {
protected readonly semanticLayerService: SemanticLayerService;
protected readonly slSearchService: SlSearchService;
protected readonly authorResolver: GitAuthorResolverPort;
constructor(deps: BaseSemanticLayerToolDeps) {
super();
this.semanticLayerService = deps.semanticLayerService;
this.slSearchService = deps.slSearchService;
this.authorResolver = deps.authorResolver;
}
protected async readSourceYaml(
connectionId: string,
sourceName: string,
context?: ToolContext,
): Promise<string | null> {
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
try {
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
return content;
} catch {
return null;
}
}
protected buildMarkdown(
success: boolean,
errors: string[],
sourceName: string,
extra?: {
yaml?: string;
commitHash?: string;
validationErrors?: string[];
validationWarnings?: string[];
actionRequiredWarnings?: string[];
editCount?: number;
},
): string {
const parts: string[] = [];
if (success) {
const verb = extra?.editCount != null ? `applied ${extra.editCount} edit(s) to` : 'saved';
parts.push(`Source **${sourceName}** ${verb} successfully.`);
} else {
parts.push(`Source **${sourceName}** update completed with ${errors.length} error(s):`);
for (const err of errors) {
parts.push(`- ${err}`);
}
}
if (extra?.commitHash) {
parts.push(`Commit: \`${extra.commitHash}\``);
}
if (extra?.actionRequiredWarnings && extra.actionRequiredWarnings.length > 0) {
parts.push('\n**Action required:**');
for (const warning of extra.actionRequiredWarnings) {
parts.push(`- ${warning}`);
}
}
if (extra?.validationErrors && extra.validationErrors.length > 0) {
parts.push('\n**Validation errors:**');
for (const ve of extra.validationErrors) {
parts.push(`- ${ve}`);
}
}
if (extra?.validationWarnings && extra.validationWarnings.length > 0) {
parts.push('\n**Validation warnings:**');
for (const vw of extra.validationWarnings) {
parts.push(`- ${vw}`);
}
}
if (extra?.yaml) {
const yaml = extra.yaml;
const MAX_YAML = 2000;
if (yaml.length > MAX_YAML) {
parts.push(`\n**YAML** (${yaml.length} chars, truncated):\n\`\`\`yaml\n${yaml.slice(0, MAX_YAML)}...\n\`\`\``);
} else {
parts.push(`\n**YAML**:\n\`\`\`yaml\n${yaml}\n\`\`\``);
}
}
return parts.join('\n');
}
protected buildOutput(
success: boolean,
errors: string[],
sourceName: string,
extra?: {
yaml?: string;
commitHash?: string;
validationErrors?: string[];
validationWarnings?: string[];
actionRequiredWarnings?: string[];
editCount?: number;
},
): ToolOutput<SemanticLayerStructured> {
return {
markdown: this.buildMarkdown(success, errors, sourceName, extra),
structured: {
success,
sourceName,
yaml: extra?.yaml,
commitHash: extra?.commitHash,
...(errors.length > 0 ? { errors } : {}),
...(extra?.validationErrors && extra.validationErrors.length > 0
? { validationErrors: extra.validationErrors }
: {}),
...(extra?.validationWarnings && extra.validationWarnings.length > 0
? { validationWarnings: extra.validationWarnings }
: {}),
...(extra?.actionRequiredWarnings && extra.actionRequiredWarnings.length > 0
? { actionRequiredWarnings: extra.actionRequiredWarnings }
: {}),
},
};
}
}

View file

@ -0,0 +1,18 @@
import { describe, expect, it } from 'vitest';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
describe('slToolConnectionIdSchema', () => {
it('accepts app UUIDs and local project connection ids', () => {
expect(slToolConnectionIdSchema.parse('00000000-0000-4000-8000-000000000001')).toBe(
'00000000-0000-4000-8000-000000000001',
);
expect(slToolConnectionIdSchema.parse('warehouse')).toBe('warehouse');
expect(slToolConnectionIdSchema.parse('warehouse_prod-1')).toBe('warehouse_prod-1');
});
it('rejects empty, path-like, and hidden connection ids', () => {
for (const value of ['', '../warehouse', 'warehouse/prod', '.warehouse', 'warehouse prod']) {
expect(() => slToolConnectionIdSchema.parse(value)).toThrow();
}
});
});

View file

@ -0,0 +1,6 @@
import { z } from 'zod';
export const slToolConnectionIdSchema = z
.string()
.min(1)
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/, 'Connection id must be alphanumeric and may contain _ or -');

View file

@ -0,0 +1,11 @@
export type { BaseSemanticLayerToolDeps, SemanticLayerStructured } from './base-semantic-layer.tool.js';
export { BaseSemanticLayerTool, sourceDefinitionSchema } from './base-semantic-layer.tool.js';
export type { SlDiscoverySettings } from './sl-discover.tool.js';
export { SlDiscoverTool } from './sl-discover.tool.js';
export { SlEditSourceTool } from './sl-edit-source.tool.js';
export { SlReadSourceTool } from './sl-read-source.tool.js';
export { SlRollbackTool } from './sl-rollback.tool.js';
export { SlValidateTool, validateSemanticLayerEndpoint } from './sl-validate.tool.js';
export { SlWriteSourceTool } from './sl-write-source.tool.js';
export type { SlValidationDeps, SourceValidationResult } from './sl-warehouse-validation.js';
export { revertSourceToPreHead, validateSingleSource } from './sl-warehouse-validation.js';

View file

@ -0,0 +1,337 @@
import { z } from 'zod';
import { DEFAULT_PRIORITY, resolveDescription } from '../descriptions.js';
import type { SemanticLayerSource } from '../types.js';
import type { ToolContext, ToolOutput } from '../../tools/index.js';
import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
export interface SlDiscoverySettings {
maxSources: number;
minRrfScore: number;
maxDetailedSources: number;
}
const slDiscoverInputSchema = z.object({
connectionId: slToolConnectionIdSchema
.optional()
.describe('Data source connection ID (omit to discover across all data sources)'),
query: z.string().optional().describe('Search query to filter sources/columns/measures by name or description'),
sourceName: z
.string()
.optional()
.describe('Inspect a specific source in full detail (requires connectionId if multiple data sources)'),
});
type SlDiscoverInput = z.infer<typeof slDiscoverInputSchema>;
interface SlDiscoverStructured {
sources: Array<{
connectionId: string;
connectionName: string;
name: string;
description?: string;
columnCount: number;
measureCount: number;
joinCount: number;
}>;
detail?: Record<string, unknown>;
totalSources: number;
}
export class SlDiscoverTool extends BaseSemanticLayerTool<typeof slDiscoverInputSchema> {
readonly name = 'sl_discover';
constructor(
deps: BaseSemanticLayerToolDeps,
private readonly discoverySettings: SlDiscoverySettings,
) {
super(deps);
}
get description(): string {
return `<purpose>
Discover available semantic layer sources, columns, measures, and joins.
When called without a connectionId, discovers sources across ALL data sources grouped by data source name and ID.
Use this to understand what data is available before writing a semantic_query.
</purpose>
<when_to_use>
- Before querying: understand available sources across all data sources
- To inspect a specific source in detail (columns, joins, measures, grain) requires connectionId when multiple data sources exist
- To search for sources related to a concept (e.g., "revenue", "customers") across all data sources
</when_to_use>`;
}
get inputSchema() {
return slDiscoverInputSchema;
}
async call(input: SlDiscoverInput, _context: ToolContext): Promise<ToolOutput<SlDiscoverStructured>> {
const { query, sourceName } = input;
// Resolve connectionId: use provided value, or auto-detect
let connectionId = input.connectionId;
if (!connectionId) {
const connections = await this.semanticLayerService.listConnectionIdsWithNames();
if (connections.length === 0) {
return {
markdown: 'No semantic layer sources found. Run a schema scan first.',
structured: { sources: [], totalSources: 0 },
};
}
if (connections.length === 1) {
connectionId = connections[0].id;
} else {
// Multiple connections — aggregate or prompt depending on operation
if (sourceName) {
const connectionList = connections
.map((c) => `- **${c.name}** (${c.connectionType}): \`${c.id}\``)
.join('\n');
return {
markdown: `Multiple data sources have semantic layer sources. Specify a connectionId to inspect source "${sourceName}":\n\n${connectionList}`,
structured: { sources: [], totalSources: 0 },
};
}
return this.discoverAcrossConnections(connections, query);
}
}
// If inspecting a specific source — show the SL interface (columns, measures, joins)
// without the raw SQL. Use `sl_read_source` to see the full YAML including SQL.
if (sourceName) {
const sources = await this.semanticLayerService.loadAllSources(connectionId);
const source = sources.find((s) => s.name === sourceName);
if (!source) {
return {
markdown: `Source **${sourceName}** not found for this connection.`,
structured: { sources: [], totalSources: 0 },
};
}
const parts: string[] = [];
this.appendSourceDetail(parts, source);
if (source.grain?.length) {
parts.push(`Grain: ${source.grain.join(', ')}`);
}
return {
markdown: parts.join('\n'),
structured: {
sources: [
{
connectionId,
connectionName: connectionId,
name: source.name,
description:
resolveDescription(source.descriptions, { priority: DEFAULT_PRIORITY }) ?? undefined,
columnCount: source.columns.length,
measureCount: source.measures.length,
joinCount: source.joins.length,
},
],
totalSources: 1,
},
};
}
// Single connection: list all sources
const connections = await this.semanticLayerService.listConnectionIdsWithNames();
const connInfo = connections.find((c) => c.id === connectionId);
return this.discoverForConnection(connectionId, connInfo?.name ?? connectionId, query);
}
private async discoverAcrossConnections(
connections: Array<{ id: string; name: string; connectionType: string }>,
query?: string,
): Promise<ToolOutput<SlDiscoverStructured>> {
// Load sources from all connections in parallel
const results = await Promise.all(
connections.map(async (conn) => {
const sources = await this.semanticLayerService.loadAllSources(conn.id);
let filtered = sources;
if (query) {
filtered = await this.filterByQuery(conn.id, sources, query);
}
return { conn, sources: filtered };
}),
);
const allSummaries: SlDiscoverStructured['sources'] = [];
const parts: string[] = [];
let totalSources = 0;
for (const { conn, sources } of results) {
if (sources.length === 0) {
continue;
}
totalSources += sources.length;
parts.push(`## ${conn.name} (${conn.connectionType}) — \`${conn.id}\``);
parts.push('');
const config = { priority: DEFAULT_PRIORITY };
for (const s of sources) {
allSummaries.push({
connectionId: conn.id,
connectionName: conn.name,
name: s.name,
description: resolveDescription(s.descriptions, config) ?? undefined,
columnCount: (s.columns ?? []).length,
measureCount: (s.measures ?? []).length,
joinCount: (s.joins ?? []).length,
});
}
this.appendTieredSources(parts, sources, !!query);
}
if (totalSources === 0) {
return {
markdown: query
? `No semantic layer sources found matching "${query}".`
: 'No semantic layer sources found. Run a schema scan first, or create sources with sl_write_source.',
structured: { sources: [], totalSources: 0 },
};
}
const header = `**${totalSources} source(s) found across ${results.filter((r) => r.sources.length > 0).length} data source(s)**${query ? ` matching "${query}"` : ''}:\n`;
parts.unshift(header);
return {
markdown: parts.join('\n'),
structured: { sources: allSummaries, totalSources },
};
}
private async discoverForConnection(
connectionId: string,
connectionName: string,
query?: string,
): Promise<ToolOutput<SlDiscoverStructured>> {
const sources = await this.semanticLayerService.loadAllSources(connectionId);
if (sources.length === 0) {
return {
markdown: 'No semantic layer sources found. Run a schema scan first, or create sources with sl_write_source.',
structured: { sources: [], totalSources: 0 },
};
}
const filtered = query ? await this.filterByQuery(connectionId, sources, query) : sources;
const config = { priority: DEFAULT_PRIORITY };
const summaries = filtered.map((s) => ({
connectionId,
connectionName,
name: s.name,
description: resolveDescription(s.descriptions, config) ?? undefined,
columnCount: (s.columns ?? []).length,
measureCount: (s.measures ?? []).length,
joinCount: (s.joins ?? []).length,
}));
const parts: string[] = [`**${filtered.length} source(s) found**${query ? ` matching "${query}"` : ''}:\n`];
this.appendTieredSources(parts, filtered, !!query);
return {
markdown: parts.join('\n'),
structured: { sources: summaries, totalSources: filtered.length },
};
}
private async filterByQuery(
connectionId: string,
sources: SemanticLayerSource[],
query: string,
): Promise<SemanticLayerSource[]> {
const config = this.discoverySettings;
const searchResults = await this.slSearchService.search(connectionId, query, config.maxSources, config.minRrfScore);
if (searchResults.length > 0) {
const rankedNames = new Set(searchResults.map((r) => r.sourceName));
const nameOrder = new Map(searchResults.map((r, i) => [r.sourceName, i]));
return sources
.filter((s) => rankedNames.has(s.name))
.sort((a, b) => (nameOrder.get(a.name) ?? 0) - (nameOrder.get(b.name) ?? 0));
}
return this.fallbackTermMatch(sources, query);
}
private fallbackTermMatch(sources: SemanticLayerSource[], query: string): SemanticLayerSource[] {
const config = { priority: DEFAULT_PRIORITY };
const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
const scored = sources
.map((s) => {
const searchText = [
s.name,
resolveDescription(s.descriptions, config) ?? '',
...s.columns.map((c) => `${c.name} ${resolveDescription(c.descriptions, config) ?? ''}`),
...s.measures.map((m) => `${m.name} ${m.description ?? ''}`),
]
.join(' ')
.toLowerCase();
const matchCount = terms.filter((term) => searchText.includes(term)).length;
return { source: s, matchCount };
})
.filter((x) => x.matchCount > 0)
.sort((a, b) => b.matchCount - a.matchCount);
return scored.map((x) => x.source);
}
/**
* Render sources in two tiers:
* - Top N (ranked by relevance when query is present) get full detail
* - Remaining sources get a one-liner with name, description, and measure count
*/
private appendTieredSources(parts: string[], sources: SemanticLayerSource[], hasQuery: boolean): void {
const maxDetailed = this.discoverySettings.maxDetailedSources;
const detailLimit = hasQuery ? maxDetailed : 0;
const detailed = sources.slice(0, detailLimit);
const rest = sources.slice(detailLimit);
for (const s of detailed) {
this.appendSourceDetail(parts, s);
}
if (rest.length > 0) {
if (detailed.length > 0) {
parts.push('**Other sources** (pass `sourceName` to inspect):');
}
const defaultConfig = { priority: DEFAULT_PRIORITY };
for (const s of rest) {
const resolvedDesc = resolveDescription(s.descriptions, defaultConfig);
const desc = resolvedDesc ? `${resolvedDesc}` : '';
const stats = [s.measures.length > 0 ? `${s.measures.length} measures` : null, `${s.columns.length} cols`]
.filter(Boolean)
.join(', ');
parts.push(`- **${s.name}**${desc} (${stats})`);
}
parts.push('');
}
}
/** Full detail for a single source: metadata, measures, joins, all public columns. */
private appendSourceDetail(parts: string[], s: SemanticLayerSource): void {
const detailDesc = resolveDescription(s.descriptions, { priority: DEFAULT_PRIORITY });
parts.push(`### ${s.name}${detailDesc ? `${detailDesc}` : ''}`);
parts.push(
`Type: ${s.sql ? 'sql' : 'table'} | Columns: ${s.columns.length} | Measures: ${s.measures.length} | Joins: ${s.joins.length}`,
);
if (s.measures.length > 0) {
parts.push(`Measures: ${s.measures.map((m) => `\`${m.name}\` (${m.expr})`).join(', ')}`);
}
if (s.joins.length > 0) {
parts.push(`Joins: ${s.joins.map((j) => `${j.to} (${j.relationship})`).join(', ')}`);
}
const publicCols = s.columns.filter((c) => c.visibility !== 'hidden');
if (publicCols.length > 0) {
parts.push(`Columns: ${publicCols.map((c) => `\`${s.name}.${c.name}\` (${c.type})`).join(', ')}`);
}
parts.push('');
}
}

View file

@ -0,0 +1,187 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js';
import { SlEditSourceTool } from './sl-edit-source.tool.js';
function makeTool(overrides: any = {}) {
const semanticLayerService = {
readSourceFile: vi.fn().mockResolvedValue({
content:
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n',
}),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
loadAllSources: vi.fn().mockResolvedValue([]),
deleteSource: vi.fn().mockResolvedValue(undefined),
isManifestBacked: vi.fn().mockResolvedValue(false),
...overrides.semanticLayerService,
};
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
...overrides.slSearchService,
};
const tool = new SlEditSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) },
});
return { tool, semanticLayerService, slSearchService };
}
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({
content:
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n',
}),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
loadAllSources: vi.fn().mockResolvedValue([]),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
describe('SlEditSourceTool — session gating', () => {
it('skips slSearchService.indexSources when session is worktree-scoped', async () => {
const { tool, slSearchService } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).not.toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'orders')).toBe(true);
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
});
it('records cross-connection SL edits with targetConnectionId', async () => {
const { tool } = makeTool();
const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' });
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: warehouseConnectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'orders')).toBe(true);
expect(session.actions).toContainEqual(
expect.objectContaining({
target: 'sl',
type: 'updated',
key: 'orders',
targetConnectionId: warehouseConnectionId,
}),
);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).toHaveBeenCalledTimes(1);
});
it('uses session.semanticLayerService when session is present', async () => {
const { tool } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
});
describe('SlEditSourceTool — manifest-backed source without overlay', () => {
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
const { tool, semanticLayerService } = makeTool({
semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
isManifestBacked: vi.fn().mockResolvedValue(true),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'CONSIGNMENTS',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures:\n - name: aav_count\n expr: count(*)' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(semanticLayerService.isManifestBacked).toHaveBeenCalledWith(
'11111111-1111-1111-1111-111111111111',
'CONSIGNMENTS',
);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
const joinedErrors = (result.structured.errors ?? []).join('\n');
expect(joinedErrors).toContain('CONSIGNMENTS');
expect(joinedErrors).toContain('manifest');
expect(joinedErrors).toContain('sl_write_source');
expect(joinedErrors).toContain('overlay');
// Overlay shape: only name + measures/segments/description
expect(joinedErrors).toContain('measures');
expect(joinedErrors).toContain('segments');
});
it('still returns the plain "Source not found" error for truly-missing names', async () => {
const { tool, semanticLayerService } = makeTool({
semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
isManifestBacked: vi.fn().mockResolvedValue(false),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'does_not_exist',
yaml_edits: [{ oldText: 'x', newText: 'y' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.structured.errors).toEqual(['Source not found. Use sl_write_source to create it.']);
expect(semanticLayerService.isManifestBacked).toHaveBeenCalledTimes(1);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,200 @@
import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
import type { SemanticLayerSource } from '../types.js';
import {
BaseSemanticLayerTool,
type BaseSemanticLayerToolDeps,
type SemanticLayerStructured,
} from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const slEditSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z.string().describe('Name of the source to edit'),
yaml_edits: z
.array(
z.object({
oldText: z.string().describe('Exact text to find in the current YAML. Must match exactly (byte-for-byte).'),
newText: z.string().describe('Replacement text. Use empty string to delete.'),
reason: z.string().optional().describe('Brief reason for this edit.'),
}),
)
.optional()
.describe('Targeted exact-match search/replace edits on the raw YAML content.'),
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
});
type SlEditSourceInput = z.infer<typeof slEditSourceInputSchema>;
function actionTargetConnectionId(
runConnectionId: string | null | undefined,
actionConnectionId: string,
): string | null {
return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null;
}
export class SlEditSourceTool extends BaseSemanticLayerTool<typeof slEditSourceInputSchema> {
readonly name = 'sl_edit_source';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Make targeted edits to an existing semantic layer source using exact-match search/replace on YAML content.
If no source exists yet, use sl_write_source instead this tool will reject the call.
</purpose>
<when_to_use>
- Adding/removing a measure on an existing source
- Adding/updating a join relationship
- Updating column descriptions
- Removing an obsolete source (set delete: true)
- Consolidation: delete redundant sources, edit the surviving one
</when_to_use>
<edit_guidelines>
- yaml_edits: exact-match search/replace on raw YAML. oldText must match byte-for-byte (no whitespace normalization or fuzzy matching).
Include enough surrounding context in oldText for a unique match.
- Read the source first with sl_read_source to copy the exact text you want to replace.
- Keep edits scoped to the user's request — don't proactively regenerate all measures.
</edit_guidelines>`;
}
get inputSchema() {
return slEditSourceInputSchema;
}
async call(input: SlEditSourceInput, context: ToolContext): Promise<ToolOutput<SemanticLayerStructured>> {
const { connectionId, sourceName } = input;
const { name: author, email: authorEmail } = await this.authorResolver.resolve(context.userId);
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const skipIndex = context.session?.isWorktreeScoped === true;
// Handle delete
if (input.delete) {
try {
await semanticLayerService.deleteSource(connectionId, sourceName, author, authorEmail);
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: 'removed',
key: sourceName,
detail: 'Deleted source',
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
} catch (error) {
return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName);
}
}
// Read existing source
let currentYaml: string | null = null;
try {
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
currentYaml = content;
} catch {
currentYaml = null;
}
if (!currentYaml) {
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
if (manifestBacked) {
return this.buildOutput(
false,
[
[
`Source "${sourceName}" exists in the schema manifest but has no overlay file yet — sl_edit_source cannot edit it directly.`,
`Bootstrap an overlay with sl_write_source, then re-run sl_edit_source on subsequent changes:`,
` name: ${sourceName}`,
` measures:`,
` - name: <measure_name>`,
` expr: "<expression>"`,
` description: "<what it measures>"`,
`Overlay shape: "name:" plus any of "measures:", "segments:", "description:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`,
].join('\n'),
],
sourceName,
);
}
return this.buildOutput(false, ['Source not found. Use sl_write_source to create it.'], sourceName);
}
const errors: string[] = [];
let yaml = currentYaml;
let editCount = 0;
// Apply yaml_edits (text-level search/replace, exact-match only)
if (input.yaml_edits && input.yaml_edits.length > 0) {
const editResult = applySqlEdits(yaml, input.yaml_edits, { exactOnly: true });
yaml = editResult.sql;
editCount = editResult.appliedEdits;
if (!editResult.success) {
errors.push(...editResult.errors);
}
}
// Parse resulting YAML
let source: SemanticLayerSource;
try {
source = YAML.parse(yaml) as SemanticLayerSource;
} catch (e) {
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
}
// Re-serialize and write
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });
const { errors: validationErrors, warnings: validationWarnings } =
await semanticLayerService.validateWithProposedSource(connectionId, source);
if (validationErrors.length > 0) {
return this.buildOutput(
false,
[...errors, 'Validation failed — edits were NOT saved:', ...validationErrors],
sourceName,
{ yaml: updatedYaml, editCount, validationErrors, validationWarnings },
);
}
const commitMessage = `Edit source ${sourceName}: ${
input.yaml_edits ? `${input.yaml_edits.length} YAML edit(s)` : 'update'
}`;
try {
const result = await semanticLayerService.writeSource(connectionId, source, author, authorEmail, commitMessage);
if (!skipIndex) {
const allSources = await semanticLayerService.loadAllSources(connectionId);
await this.slSearchService.indexSources(connectionId, allSources).catch(() => {});
}
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: 'updated',
key: sourceName,
detail: `Applied ${editCount} edit(s)`,
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(errors.length === 0, errors, sourceName, {
yaml: updatedYaml,
commitHash: result.commitHash ?? undefined,
editCount,
validationErrors,
validationWarnings,
});
} catch (error) {
errors.push(error instanceof Error ? error.message : String(error));
return this.buildOutput(false, errors, sourceName, { yaml: updatedYaml, editCount });
}
}
}

View file

@ -0,0 +1,75 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, type ToolContext } from '../../tools/index.js';
import { SlReadSourceTool } from './sl-read-source.tool.js';
function makeTool(overrides: Partial<Record<string, any>> = {}) {
const semanticLayerService = {
readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_default\n', path: 'default' }),
...overrides.semanticLayerService,
};
const tool = new SlReadSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: {} as never,
authorResolver: { resolve: vi.fn() },
});
return { tool, semanticLayerService };
}
function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
return {
sourceId: 'src',
messageId: 'msg',
userId: 'user',
...overrides,
};
}
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_session\n', path: 'session' }),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
describe('SlReadSourceTool - session-scoped reads', () => {
it('reads through context.session.semanticLayerService when a session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const session = makeSession();
const result = await tool.call(
{ connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' },
makeContext({ session }),
);
expect((session.semanticLayerService as any).readSourceFile).toHaveBeenCalledWith(
'11111111-1111-1111-1111-111111111111',
'foo',
);
expect(semanticLayerService.readSourceFile).not.toHaveBeenCalled();
expect(result.structured.yaml).toContain('foo_session');
});
it('reads through the default service when no session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{ connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' },
makeContext(),
);
expect(semanticLayerService.readSourceFile).toHaveBeenCalledWith('11111111-1111-1111-1111-111111111111', 'foo');
expect(result.structured.yaml).toContain('foo_default');
});
});

View file

@ -0,0 +1,63 @@
import { z } from 'zod';
import type { ToolContext, ToolOutput } from '../../tools/index.js';
import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const slReadSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z.string().describe('Name of the source to read'),
});
type SlReadSourceInput = z.infer<typeof slReadSourceInputSchema>;
interface SlReadSourceStructured {
sourceName: string;
yaml: string;
}
export class SlReadSourceTool extends BaseSemanticLayerTool<typeof slReadSourceInputSchema> {
readonly name = 'sl_read_source';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Read the raw YAML definition of a semantic layer source, including its SQL implementation.
Use this when you need to understand how a source is built e.g., before editing it with sl_edit_source or sl_write_source.
</purpose>
<when_to_use>
- Before editing a source: understand its full definition (SQL, columns, measures, joins)
- When debugging a source: see the underlying SQL query
- When creating a new source based on an existing one
</when_to_use>
<when_not_to_use>
- To discover what sources/measures/dimensions are available for querying use sl_discover instead
- To query data use semantic_query or create_widget with slQuery
</when_not_to_use>`;
}
get inputSchema() {
return slReadSourceInputSchema;
}
async call(input: SlReadSourceInput, context: ToolContext): Promise<ToolOutput<SlReadSourceStructured>> {
const { connectionId, sourceName } = input;
const yaml = await this.readSourceYaml(connectionId, sourceName, context);
if (!yaml) {
return {
markdown: `Source **${sourceName}** not found for connection ${connectionId}.`,
structured: { sourceName, yaml: '' },
};
}
return {
markdown: `## Source: ${sourceName}\n\n\`\`\`yaml\n${yaml}\n\`\`\``,
structured: { sourceName, yaml },
};
}
}

View file

@ -0,0 +1,67 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js';
import { SlRollbackTool } from './sl-rollback.tool.js';
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]),
actions: [{ target: 'sl', type: 'updated', key: 'orders', detail: 'x' }],
semanticLayerService: {} as any,
wikiService: {} as any,
configService: {
writeFile: vi.fn().mockResolvedValue(undefined),
deleteFile: vi.fn().mockResolvedValue(undefined),
} as any,
gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any,
...overrides,
};
}
describe('SlRollbackTool', () => {
const connections = {
getConnectionById: vi.fn(),
listEnabledConnections: vi.fn(),
executeQuery: vi.fn(),
};
it('errors when context.session is absent', async () => {
const tool = new SlRollbackTool({} as never, connections as never, 1);
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/session/i);
});
it('errors when session has no connectionId (wiki-only turn)', async () => {
const tool = new SlRollbackTool({} as never, connections as never, 1);
const session = makeSession({ connectionId: null });
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/connection-scoped session/i);
// Session state untouched
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(true);
expect((session.gitService as any).getFileAtCommit).not.toHaveBeenCalled();
});
it('restores the source content from preHead, clears touched set, prunes actions', async () => {
const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) };
const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1);
const session = makeSession();
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(true);
expect((session.gitService as any).getFileAtCommit).toHaveBeenCalledWith(
expect.stringContaining('orders.yaml'),
'base',
);
expect((session.configService as any).writeFile).toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
expect(session.actions).toEqual([]);
});
});

View file

@ -0,0 +1,87 @@
import { z } from 'zod';
import { BaseTool, deleteTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
import { revertSourceToPreHead } from './sl-warehouse-validation.js';
const slRollbackInputSchema = z.object({
sourceName: z.string().describe('Name of the source to roll back'),
});
type SlRollbackInput = z.infer<typeof slRollbackInputSchema>;
interface SlRollbackStructured {
success: boolean;
sourceName: string;
outcome?: string;
}
export class SlRollbackTool extends BaseTool<typeof slRollbackInputSchema> {
readonly name = 'sl_rollback';
constructor(
private readonly slSourcesRepository: SlSourcesIndexPort,
private readonly connections: SlConnectionCatalogPort,
private readonly probeRowCount: number,
) {
super();
}
get description(): string {
return `<purpose>
Abandon this-session changes to a source and restore it to its pre-session state.
Use when a write/edit failed validation in a way you cannot fix in-session (e.g. the source requires elevated warehouse permissions).
</purpose>`;
}
get inputSchema() {
return slRollbackInputSchema;
}
async call(input: SlRollbackInput, context: ToolContext): Promise<ToolOutput<SlRollbackStructured>> {
const session = context.session;
if (!session) {
return {
markdown:
'Error: sl_rollback requires an active session (ingest WU or memory-agent). Use git revert for interactive rollback.',
structured: { success: false, sourceName: input.sourceName },
};
}
if (!session.connectionId) {
return {
markdown: 'Error: sl_rollback requires a connection-scoped session; this session has no warehouse connection.',
structured: { success: false, sourceName: input.sourceName },
};
}
const outcome = await revertSourceToPreHead(
{
semanticLayerService: session.semanticLayerService,
connections: this.connections,
configService: session.configService,
gitService: session.gitService,
slSourcesRepository: this.slSourcesRepository,
probeRowCount: this.probeRowCount,
},
session.connectionId,
session.preHead,
input.sourceName,
);
deleteTouchedSlSource(session.touchedSlSources, session.connectionId, input.sourceName);
for (let i = session.actions.length - 1; i >= 0; i--) {
const a = session.actions[i];
if (
a.target === 'sl' &&
a.key === input.sourceName &&
(a.targetConnectionId ?? session.connectionId) === session.connectionId
) {
session.actions.splice(i, 1);
}
}
return {
markdown: `Source "${input.sourceName}" rolled back: ${outcome}.`,
structured: { success: true, sourceName: input.sourceName, outcome },
};
}
}

View file

@ -0,0 +1,66 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, type ToolContext } from '../../tools/index.js';
import type { SemanticLayerService } from '../semantic-layer.service.js';
import type { SemanticLayerSource } from '../types.js';
import { SlValidateTool, validateSemanticLayerEndpoint } from './sl-validate.tool.js';
describe('validateSemanticLayerEndpoint', () => {
it('uses the connection warehouse dialect, not hardcoded postgres', async () => {
const serviceMock = {
validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
};
await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService);
expect(serviceMock.validateSourcesForConnection).toHaveBeenCalledWith('conn-1');
});
it('short-circuits when there are no validatable sources', async () => {
const serviceMock = {
validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
};
const result = await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService);
expect(result).toEqual({ errors: [], warnings: [] });
});
});
describe('SlValidateTool — session-aware touched-set filtering', () => {
it('when session present, only returns errors/warnings that mention touched sources', async () => {
const sources: SemanticLayerSource[] = [
{ name: 'orders', table: 'x.orders', grain: ['id'], columns: [], joins: [], measures: [] },
{ name: 'customers', table: 'x.customers', grain: ['id'], columns: [], joins: [], measures: [] },
];
const serviceMock = {
loadAllSources: vi.fn().mockResolvedValue(sources),
validateSourcesForConnection: vi.fn().mockResolvedValue({
errors: ['orders: missing join target', 'customers: invalid grain'],
warnings: ['orders: disconnected-components warning'],
}),
};
const tool = new SlValidateTool({
semanticLayerService: serviceMock as never,
slSearchService: {} as never,
authorResolver: { resolve: vi.fn() },
});
const session: ToolSession = {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: null,
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]),
actions: [],
semanticLayerService: serviceMock as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
};
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ connectionId: 'conn-1' } as any, context);
expect(result.structured.validationErrors).toEqual(['orders: missing join target']);
expect(result.structured.validationWarnings).toEqual(['orders: disconnected-components warning']);
});
});

View file

@ -0,0 +1,130 @@
import { z } from 'zod';
import { type ToolContext, type ToolOutput, touchedSlSourceNamesForConnection } from '../../tools/index.js';
import { SemanticLayerService } from '../semantic-layer.service.js';
import {
BaseSemanticLayerTool,
type BaseSemanticLayerToolDeps,
type SemanticLayerStructured,
} from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const slValidateInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
});
type SlValidateInput = z.infer<typeof slValidateInputSchema>;
type ValidationReport = {
errors: string[];
warnings: string[];
};
export async function validateSemanticLayerEndpoint(
connectionId: string,
semanticLayerService: SemanticLayerService,
): Promise<ValidationReport> {
try {
return await semanticLayerService.validateSourcesForConnection(connectionId);
} catch (e) {
return {
errors: [`Validation call failed: ${e instanceof Error ? e.message : String(e)}`],
warnings: [],
};
}
}
export class SlValidateTool extends BaseSemanticLayerTool<typeof slValidateInputSchema> {
readonly name = 'sl_validate';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Validate that all semantic layer sources for a connection form a consistent model.
Checks: all join targets exist, grain is valid, no missing references.
</purpose>
<when_to_use>
- After making edits with sl_write_source
- Before querying, to ensure the model is healthy
- When troubleshooting query failures
</when_to_use>`;
}
get inputSchema() {
return slValidateInputSchema;
}
async call(input: SlValidateInput, context: ToolContext): Promise<ToolOutput<SemanticLayerStructured>> {
const { connectionId } = input;
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const sources = await semanticLayerService.loadAllSources(connectionId);
if (sources.length === 0) {
return this.buildOutput(true, [], '(all)', {
validationErrors: ['No sources found for this connection.'],
});
}
let { errors, warnings } = await validateSemanticLayerEndpoint(connectionId, semanticLayerService);
const touched = context.session?.touchedSlSources;
if (touched && touched.size > 0) {
const touchedArr = touchedSlSourceNamesForConnection(touched, connectionId);
if (touchedArr.length > 0) {
errors = errors.filter((e) => touchedArr.some((n) => e.includes(n)));
warnings = warnings.filter((w) => touchedArr.some((n) => w.includes(n)));
}
}
const valid = errors.length === 0;
const parts: string[] = [];
parts.push(`**Semantic layer validation** for ${sources.length} source(s):`);
if (valid && warnings.length === 0) {
parts.push('All sources are valid. Join graph is consistent.');
} else {
const summary: string[] = [];
if (errors.length > 0) {
summary.push(`${errors.length} error(s)`);
}
if (warnings.length > 0) {
summary.push(`${warnings.length} warning(s)`);
}
parts.push(`Found ${summary.join(' and ')}:`);
if (errors.length > 0) {
parts.push('', '**Errors:**');
for (const err of errors) {
parts.push(`- ${err}`);
}
}
if (warnings.length > 0) {
parts.push('', '**Warnings:**');
for (const warn of warnings) {
parts.push(`- ${warn}`);
}
}
}
// List sources summary
parts.push('\n**Sources:**');
for (const s of sources) {
parts.push(
`- **${s.name}** (${s.sql ? 'sql' : 'table'}): ${s.columns.length} cols, ${s.measures.length} measures, ${s.joins.length} joins`,
);
}
return {
markdown: parts.join('\n'),
structured: {
success: valid,
sourceName: '(all)',
validationErrors: errors.length > 0 ? errors : undefined,
validationWarnings: warnings.length > 0 ? warnings : undefined,
},
};
}
}

View file

@ -0,0 +1,120 @@
import { describe, expect, it, vi } from 'vitest';
import { validateSingleSource } from './sl-warehouse-validation.js';
function makeDeps(opts: { sourceYaml: string; executeQuery: ReturnType<typeof vi.fn> }) {
return {
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({ content: opts.sourceYaml, path: 'x' }),
isManifestBacked: vi.fn().mockResolvedValue(false),
listManifestSourceNames: vi.fn().mockResolvedValue([]),
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
} as never,
connections: {
executeQuery: opts.executeQuery,
getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType: 'bigquery' }),
listEnabledConnections: vi.fn().mockResolvedValue([]),
} as never,
configService: {} as never,
gitService: {} as never,
slSourcesRepository: { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) } as never,
probeRowCount: 1,
};
}
describe('validateSingleSource warehouse dry-run', () => {
it('surfaces warehouse error when dry-run fails on unknown column', async () => {
const yaml = `name: fct_arr_delta
source_type: sql
sql: |
SELECT * FROM analytics.fct_arr_delta WHERE date_date < CURRENT_DATE()
grain: [date_date]
columns:
- name: date_date
type: time
measures:
- name: count_delta_events
expr: count(*)
joins: []
`;
const executeQuery = vi.fn().mockRejectedValue(new Error('Unrecognized name: date_date at [1:42]'));
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta');
expect(result.errors.join('\n')).toMatch(/Unrecognized name: date_date/);
expect(result.errors.join('\n')).toMatch(/embedded sql dry-run failed/);
});
it('flags declared columns missing from the dry-run result', async () => {
const yaml = `name: fct_arr_delta
source_type: sql
sql: |
SELECT date, customer_id FROM analytics.fct_arr_delta
columns:
- name: date_date
type: time
- name: customer_id
type: string
measures:
- name: count_delta
expr: count(*)
joins: []
grain: [customer_id]
`;
const executeQuery = vi.fn().mockResolvedValue({
headers: ['date', 'customer_id'],
rows: [],
totalRows: 0,
error: null,
});
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta');
expect(result.errors.join('\n')).toMatch(/declared columns absent from sql result — date_date/);
expect(result.errors.join('\n')).toMatch(/warehouse returned:/);
});
it('passes cleanly when dry-run succeeds and declared columns match', async () => {
const yaml = `name: lab_results
source_type: sql
sql: |
SELECT lab_order_id, admin_user_id FROM analytics.raw_lab_results
grain: [lab_order_id]
columns:
- name: lab_order_id
type: string
- name: admin_user_id
type: string
measures:
- name: count_lab_results
expr: count(lab_order_id)
joins: []
`;
const executeQuery = vi.fn().mockResolvedValue({
headers: ['lab_order_id', 'admin_user_id'],
rows: [],
totalRows: 0,
error: null,
});
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'lab_results');
expect(result.errors).toEqual([]);
});
it('uses LIMIT 1 (not LIMIT 0) so runtime policies fire', async () => {
const yaml = `name: foo
source_type: sql
sql: |
SELECT a FROM analytics.bar
grain: [a]
columns:
- {name: a, type: string}
measures: []
joins: []
`;
const executeQuery = vi.fn().mockResolvedValue({ headers: ['a'], rows: [], totalRows: 0, error: null });
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
await validateSingleSource(deps, 'conn-1', 'foo');
const probeSql = executeQuery.mock.calls[0][1] as string;
expect(probeSql).toMatch(/LIMIT 1\b/);
expect(probeSql).not.toMatch(/LIMIT 0\b/);
});
});

View file

@ -0,0 +1,325 @@
import YAML from 'yaml';
import type { GitService, KloFileStorePort } from '../../core/index.js';
import { SYSTEM_GIT_AUTHOR } from '../../tools/index.js';
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
import { sourceOverlaySchema } from '../schemas.js';
import { SemanticLayerService } from '../semantic-layer.service.js';
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
export interface SlValidationDeps {
semanticLayerService: SemanticLayerService;
connections: SlConnectionCatalogPort;
configService: KloFileStorePort;
gitService: GitService;
slSourcesRepository: SlSourcesIndexPort;
probeRowCount: number;
}
export interface SourceValidationResult {
errors: string[];
warnings: string[];
}
const slSourcePath = (connectionId: string, sourceName: string): string =>
`semantic-layer/${connectionId}/${sourceName}.yaml`;
function resolveDialect(warehouse: string | null): string | null {
if (!warehouse) {
return null;
}
return SemanticLayerService.mapDialect(warehouse);
}
function wrapWithZeroRowQuery(sql: string, dialect: string): string {
if (dialect === 'tsql') {
return `SELECT TOP 0 * FROM (${sql}) AS _discovery`;
}
return `SELECT * FROM (${sql}) AS _discovery LIMIT 0`;
}
function wrapWithSingleRowQuery(sql: string, dialect: string): string {
if (dialect === 'tsql') {
return `SELECT TOP 1 * FROM (${sql}) AS _base`;
}
return `SELECT * FROM (${sql}) AS _base LIMIT 1`;
}
/**
* Validate one SL source end-to-end: YAML parse, Zod schema, duplicate-measure detection,
* warehouse dry-run (`SELECT * FROM (sql) LIMIT 1` forces runtime policy enforcement).
*
* Returns errors and hint-style warnings. An empty errors array means the YAML is
* structurally valid AND the warehouse can execute a probe against its embedded sql.
*/
export async function validateSingleSource(
deps: SlValidationDeps,
connectionId: string,
sourceName: string,
): Promise<SourceValidationResult> {
const errors: string[] = [];
const warnings: string[] = [];
let content: string;
try {
const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
content = result.content;
} catch {
errors.push(`${sourceName}.yaml: file not found`);
return { errors, warnings };
}
let parsed: Record<string, unknown>;
try {
parsed = YAML.parse(content);
} catch (e) {
errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
return { errors, warnings };
}
if (!parsed || typeof parsed !== 'object') {
errors.push(`${sourceName}.yaml: top-level content is not an object`);
return { errors, warnings };
}
const isOverlay = !parsed.table && !parsed.sql;
if (!isOverlay) {
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
if (isManifestBacked) {
errors.push(
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` +
`writing it as-is drops the manifest's columns and joins. ` +
`Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` +
`"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` +
`that inherits the manifest schema. Call sl_describe_table to see it first.`,
);
return { errors, warnings };
}
}
const schema = isOverlay ? sourceOverlaySchema : sourceDefinitionSchema;
const result = schema.safeParse(parsed);
if (!result.success) {
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
errors.push(`${sourceName}.yaml: schema — ${issues}`);
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
if (errorPaths.has('joins')) {
warnings.push(
`${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
);
}
if (errorPaths.has('columns')) {
warnings.push(
`${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Do NOT include base table columns.`,
);
}
if (errorPaths.has('measures')) {
warnings.push(
`${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`,
);
}
return { errors, warnings };
}
const measures = (parsed.measures as Array<{ name: string }> | undefined) ?? [];
const seenMeasures = new Set<string>();
for (const m of measures) {
if (seenMeasures.has(m.name)) {
errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`);
}
seenMeasures.add(m.name);
}
let warehouse: string | null = null;
try {
const connection = await deps.connections.getConnectionById(connectionId);
warehouse = connection?.connectionType ?? null;
} catch {
warehouse = null;
}
if (typeof parsed.sql === 'string' && parsed.sql.trim().length > 0) {
const innerSql = parsed.sql.trim().replace(/;+\s*$/, '');
const probeRowCount = deps.probeRowCount;
const dialect = resolveDialect(warehouse);
let probeSql: string;
if (dialect) {
probeSql =
probeRowCount === 0 ? wrapWithZeroRowQuery(innerSql, dialect) : wrapWithSingleRowQuery(innerSql, dialect);
} else {
probeSql = `SELECT * FROM (${innerSql}) AS _probe LIMIT ${probeRowCount}`;
}
const sourceColumns = ((parsed.columns as Array<{ name?: string; type?: string }> | undefined) ?? [])
.map((c) => ({ name: c.name ?? '', type: c.type ?? '' }))
.filter((c) => c.name);
try {
const probe = await deps.connections.executeQuery(connectionId, probeSql);
const actual = new Set((probe.headers ?? []).map((h) => h.toLowerCase()));
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
if (missing.length > 0) {
errors.push(
`${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
);
}
} catch (e) {
errors.push(
formatProbeError({
sourceName,
measureName: null,
probeSql,
warehouse,
sourceColumns,
error: e,
headline: 'embedded sql dry-run failed',
}),
);
}
} else if (isOverlay) {
const measureErrors = await probeOverlayMeasures(deps, connectionId, sourceName, warehouse);
errors.push(...measureErrors);
}
return { errors, warnings };
}
function formatProbeError(args: {
sourceName: string;
measureName: string | null;
probeSql: string;
warehouse: string | null;
sourceColumns: Array<{ name: string; type: string }>;
error: unknown;
headline: string;
}): string {
const { sourceName, measureName, probeSql, warehouse, sourceColumns, error, headline } = args;
const errMsg = error instanceof Error ? error.message : String(error);
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
const lines: string[] = [
measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`,
];
if (warehouse) {
lines.push(` Warehouse: ${warehouse}`);
}
lines.push(` Probe SQL: ${probeSql}`);
if (refColumns.length > 0) {
lines.push(` Referenced columns: ${refColumns.map((c) => `${c.name} (${c.type || '?'})`).join(', ')}`);
}
lines.push(` Error: ${errMsg}`);
return lines.join('\n');
}
function referencesColumn(sql: string, columnName: string): boolean {
if (!columnName) {
return false;
}
const escaped = columnName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(`\\b${escaped}\\b`).test(sql);
}
async function probeOverlayMeasures(
deps: SlValidationDeps,
connectionId: string,
sourceName: string,
warehouse: string | null,
): Promise<string[]> {
const errors: string[] = [];
let composed:
| {
name: string;
table?: string;
sql?: string;
columns?: Array<{ name?: string; type?: string }>;
measures: Array<{ name: string; expr: string; filter?: string; segments?: string[] }>;
segments?: Array<{ name: string; expr: string }>;
}
| undefined;
try {
const all = await deps.semanticLayerService.loadAllSources(connectionId);
composed = all.find((s) => s.name === sourceName);
} catch (e) {
errors.push(
`${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
);
return errors;
}
if (!composed?.table || composed.measures.length === 0) {
return errors;
}
const sourceColumns = (composed.columns ?? [])
.map((c) => ({ name: c.name ?? '', type: c.type ?? '' }))
.filter((c) => c.name);
for (const measure of composed.measures) {
const measureRef = `${sourceName}.${measure.name}`;
let probeSql = `<composed via semantic-layer engine for ${measureRef}>`;
try {
const result = await deps.semanticLayerService.executeQuery(connectionId, {
measures: [measureRef],
dimensions: [],
filters: [],
limit: 1,
});
probeSql = result.sql ?? probeSql;
} catch (e) {
errors.push(
formatProbeError({
sourceName,
measureName: measure.name,
probeSql,
warehouse,
sourceColumns,
error: e,
headline: 'dry-run failed',
}),
);
}
}
return errors;
}
/**
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
* (automatic). Returns a short human-readable description of what happened.
*/
export async function revertSourceToPreHead(
deps: SlValidationDeps,
connectionId: string,
preHead: string | null,
sourceName: string,
): Promise<string> {
const relPath = slSourcePath(connectionId, sourceName);
let preContent: string | null = null;
if (preHead) {
try {
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
} catch {
preContent = null;
}
}
if (preContent !== null) {
await deps.configService.writeFile(
relPath,
preContent,
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
`Revert SL source to pre-session state: ${sourceName}`,
{ skipLock: true },
);
return 'restored to pre-session content';
}
try {
await deps.configService.deleteFile(
relPath,
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
`Drop SL source (not present at session start): ${sourceName}`,
{ skipLock: true },
);
await deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName);
return 'deleted (did not exist at session start)';
} catch {
await deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName);
return 'no-op (already absent)';
}
}

View file

@ -0,0 +1,267 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js';
import { SlWriteSourceTool } from './sl-write-source.tool.js';
function makeTool(overrides: Partial<Record<string, any>> = {}) {
const semanticLayerService = {
listManifestSourceNames: vi.fn().mockResolvedValue(['ACCOUNTS', 'ORDERS']),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
...overrides.semanticLayerService,
};
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
...overrides.slSearchService,
};
const tool = new SlWriteSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) },
});
return { tool, semanticLayerService, slSearchService };
}
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
describe('SlWriteSourceTool — orphan overlay guard', () => {
it('rejects overlay YAMLs targeting a name absent from the manifest', async () => {
const { tool } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'does_not_exist',
source: {
name: 'does_not_exist',
measures: [{ name: 'count_rows', expr: 'count(*)' }],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/no manifest entry with that name exists/i);
expect(result.markdown).toMatch(/ACCOUNTS|ORDERS/);
});
});
describe('SlWriteSourceTool — session gating', () => {
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue([]),
isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
it('skips slSearchService.indexSources when session is worktree-scoped', async () => {
const { tool, slSearchService } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).not.toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'my_source')).toBe(true);
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'my_source' }));
});
it('records cross-connection SL writes with targetConnectionId', async () => {
const { tool } = makeTool();
const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' });
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: warehouseConnectionId,
sourceName: 'mapped_orders',
source: {
name: 'mapped_orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'mapped_orders')).toBe(true);
expect(session.actions).toContainEqual(
expect.objectContaining({
target: 'sl',
key: 'mapped_orders',
targetConnectionId: warehouseConnectionId,
}),
);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).toHaveBeenCalledTimes(1);
});
it('uses session.semanticLayerService when session is present', async () => {
const { tool } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
await tool.call(
{
connectionId: session.connectionId,
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
});
describe('SlWriteSourceTool — disconnected-components warning in markdown', () => {
it('surfaces validation warnings (including disconnected-components) in the markdown body', async () => {
const { tool } = makeTool({
semanticLayerService: {
validateWithProposedSource: vi.fn().mockResolvedValue({
errors: [],
warnings: ['orders: disconnected-components — no join path to ACCOUNTS'],
}),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
source: {
name: 'orders',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.markdown).toMatch(/disconnected-components/i);
});
it('renders per-source warnings prominently when the just-written source becomes a singleton component', async () => {
const { tool } = makeTool({
semanticLayerService: {
validateWithProposedSource: vi.fn().mockResolvedValue({
errors: [],
warnings: ['Model has 2 disconnected components.'],
perSourceWarnings: {
foo: ["Source 'foo' is now a singleton component (no joins to any other source)."],
},
}),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'foo',
source: {
name: 'foo',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.markdown).toMatch(/Action required/i);
expect(result.markdown).toContain("Source 'foo' is now a singleton component");
});
});
describe('SlWriteSourceTool — standalone shadow guard', () => {
it('rejects standalone YAMLs that shadow a manifest entry', async () => {
const { tool } = makeTool({
semanticLayerService: {
isManifestBacked: vi.fn().mockResolvedValue(true),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'ACCOUNTS',
source: {
name: 'ACCOUNTS',
table: 'raw.accounts',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
});
});

View file

@ -0,0 +1,380 @@
import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import { sourceOverlaySchema } from '../schemas.js';
import type { SemanticLayerService } from '../semantic-layer.service.js';
import type { SemanticLayerSource } from '../types.js';
import {
BaseSemanticLayerTool,
type BaseSemanticLayerToolDeps,
type SemanticLayerStructured,
sourceDefinitionSchema,
} from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]);
const slWriteSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z
.string()
.regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)')
.describe('Name of the source to create, edit, or delete'),
source: sourceInputSchema
.optional()
.describe('Source definition (standalone with table/sql) or overlay (measures, computed columns, etc.)'),
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
});
type SlWriteSourceInput = z.infer<typeof slWriteSourceInputSchema>;
function actionTargetConnectionId(
runConnectionId: string | null | undefined,
actionConnectionId: string,
): string | null {
return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null;
}
export class SlWriteSourceTool extends BaseSemanticLayerTool<typeof slWriteSourceInputSchema> {
readonly name = 'sl_write_source';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Create a new semantic layer source or fully rewrite an existing one.
If the source already exists, this tool will overwrite it with the new definition.
</purpose>
<when_to_use>
- First time creating a source definition
- When modeling a new SQL-backed source (e.g., churn risk view, ARR calculation)
- When the user asks to start over / fully rewrite a source
- Consolidating multiple sources into one (write merged definition)
- For targeted edits to existing sources (add/remove measures, update joins), prefer sl_edit_source instead
</when_to_use>
<editing_approach>
- New source: provide \`source\` with full definition
- Full rewrite: provide \`source\` (overwrites existing)
- Targeted edits on an existing source: use sl_edit_source instead
- Delete: set \`delete: true\`
</editing_approach>
<source_definition>
- name: Unique identifier for the source
- table: For physical table/view sources (e.g., "public.orders"). Mutually exclusive with sql.
- sql: For SQL-based sources (the SQL query). Mutually exclusive with table.
- grain: What one row represents (e.g., ["id"], ["customer_id", "product_id"])
- columns: All columns with type (string/number/time/boolean) and optional descriptions
- joins: Relationships to other sources (to, on, relationship: many_to_one/one_to_many/one_to_one)
- measures: Pre-defined aggregations (name, expr like "sum(amount)", optional filter, optional segments bare names of segments defined on the same source, optional description)
- segments: Named, reusable boolean predicates scoped to this source (name, expr a SQL boolean over this source's columns, optional description). A measure references one with \`segments: [name]\`; a query references one with the dotted form \`source.segment_name\`. Use when the same predicate appears on 3+ measures — e.g. extract \`is_paid = true and is_refunded = '0'\` as \`segments: [{name: paid_non_refunded, expr: "..."}]\` and have each measure use \`segments: [paid_non_refunded]\` instead of re-typing the predicate inside \`sum(case when ... then x end)\`. Segments are predicates only — they cannot be selected as dimensions or grouped by; if you need to group by the predicate, add a \`columns[]\` entry instead.
</source_definition>
<join_requirements>
Sources with joins: [] are disconnected from the semantic layer join graph and cannot be composed with other sources in semantic queries.
Before writing, use discover_data to check existing sources and their grain columns.
For each grain/key column in your source (e.g., account_id, item_id), find the matching dimension source (e.g., ACCOUNTS, ITEMS) and declare a many_to_one join.
Example: a source graining on [account_id] should declare:
joins:
- to: ACCOUNTS
on: source_name.account_id = ACCOUNTS.ACCOUNT_ID
relationship: many_to_one
The on condition format: local_column = TARGET_SOURCE.target_column (right side must include target source name).
Do NOT join back to a table that the SQL already aggregates from if the grain column is not in the output (the relationship is already baked into the SQL).
</join_requirements>`;
}
get inputSchema() {
return slWriteSourceInputSchema;
}
async call(input: SlWriteSourceInput, context: ToolContext): Promise<ToolOutput<SemanticLayerStructured>> {
const { connectionId, sourceName } = input;
const { name: author, email: authorEmail } = await this.authorResolver.resolve(context.userId);
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const skipIndex = context.session?.isWorktreeScoped === true;
// Handle delete
if (input.delete) {
try {
await semanticLayerService.deleteSource(connectionId, sourceName, author, authorEmail);
if (!skipIndex) {
const allSources = await semanticLayerService.loadAllSources(connectionId);
await this.slSearchService.indexSources(connectionId, allSources).catch(() => {});
}
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: 'removed',
key: sourceName,
detail: 'Deleted source',
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
} catch (error) {
return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName);
}
}
// Require source for create/rewrite
if (!input.source) {
return this.buildOutput(
false,
['Provide `source` to create or rewrite. For targeted edits, use sl_edit_source.'],
sourceName,
);
}
return this.writeFullSource(
connectionId,
input.source,
sourceName,
author,
authorEmail,
context,
semanticLayerService,
skipIndex,
);
}
private async writeFullSource(
connectionId: string,
source: z.infer<typeof sourceInputSchema>,
sourceName: string,
author: string,
authorEmail: string,
context: ToolContext,
semanticLayerService: SemanticLayerService,
skipIndex: boolean,
): Promise<ToolOutput<SemanticLayerStructured>> {
const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql);
const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName);
const commitMessage = existing
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
const yamlContent = YAML.stringify(source);
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
if (orphanError) {
return this.buildOutput(false, [orphanError], sourceName, { yaml: yamlContent });
}
const shadowError = await this.rejectStandaloneShadow(semanticLayerService, connectionId, sourceName, yamlContent);
if (shadowError) {
return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent });
}
const validatedSource = source as SemanticLayerSource;
const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource);
const validationErrors = validationResult.errors;
const validationWarnings = [...validationResult.warnings];
const actionRequiredWarnings = validationResult.perSourceWarnings?.[sourceName] ?? [];
if (validationErrors.length > 0) {
return this.buildOutput(false, ['Validation failed — source was NOT saved:', ...validationErrors], sourceName, {
yaml: yamlContent,
validationErrors,
validationWarnings,
actionRequiredWarnings,
});
}
try {
const result = await semanticLayerService.writeSource(
connectionId,
validatedSource,
author,
authorEmail,
commitMessage,
);
if (!skipIndex) {
const allSources = await semanticLayerService.loadAllSources(connectionId);
await this.slSearchService.indexSources(connectionId, allSources).catch(() => {});
}
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: existing ? 'updated' : 'created',
key: sourceName,
detail: existing ? `Rewrote source` : `Created source`,
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(true, [], sourceName, {
yaml: yamlContent,
commitHash: result.commitHash ?? undefined,
validationErrors,
validationWarnings,
actionRequiredWarnings,
});
} catch (error) {
return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName);
}
}
private async readSourceYamlFromService(
service: SemanticLayerService,
connectionId: string,
sourceName: string,
): Promise<string | null> {
try {
const { content } = await service.readSourceFile(connectionId, sourceName);
return content;
} catch {
return null;
}
}
private async rejectOrphanOverlay(
semanticLayerService: SemanticLayerService,
connectionId: string,
sourceName: string,
content: string,
): Promise<string | null> {
let parsed: Record<string, unknown>;
try {
parsed = YAML.parse(content) as Record<string, unknown>;
} catch {
return null;
}
if (!parsed || typeof parsed !== 'object') {
return null;
}
const isOverlay = !('table' in parsed && parsed.table) && !('sql' in parsed && parsed.sql);
if (!isOverlay) {
return null;
}
const manifestNames = await semanticLayerService.listManifestSourceNames(connectionId);
if (manifestNames.includes(sourceName)) {
return null;
}
const suggestions = this.nearestMatches(sourceName, manifestNames, 3);
return [
`Error: cannot write "${sourceName}" as an overlay — no manifest entry with that name exists.`,
suggestions.length > 0
? ` Nearest manifest matches: ${suggestions.join(', ')}.`
: ` No manifest entries resemble "${sourceName}".`,
`To customize an existing base table, retarget the overlay at one of the nearest matches.`,
`For a LookML derived_table or any source backed by inline SQL, rewrite as a standalone`,
`curated source with a top-level "sql:" block plus explicit "grain:" and "columns:".`,
].join('\n');
}
private async rejectStandaloneShadow(
semanticLayerService: SemanticLayerService,
connectionId: string,
sourceName: string,
content: string,
): Promise<string | null> {
let parsed: Record<string, unknown>;
try {
parsed = YAML.parse(content) as Record<string, unknown>;
} catch {
return null;
}
if (!parsed || typeof parsed !== 'object') {
return null;
}
const isOverlay = !('table' in parsed && parsed.table) && !('sql' in parsed && parsed.sql);
if (isOverlay) {
return null;
}
const isManifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
if (!isManifestBacked) {
return null;
}
return [
`Error: cannot write "${sourceName}" as a standalone source — a manifest entry with that name already exists.`,
` Writing standalone would drop the manifest's columns and joins, leaving only what you list here.`,
`To add measures/segments on top of the manifest, rewrite this YAML as an overlay:`,
` - Remove "sql:", "table:", "grain:", "columns:", and "joins:".`,
` - Keep only "name:", plus "measures:", "segments:", and/or "description:".`,
` - The manifest's schema is inherited automatically.`,
`If you really need a different base table, use a different source name.`,
].join('\n');
}
private nearestMatches(needle: string, haystack: string[], limit: number): string[] {
if (haystack.length === 0) {
return [];
}
const lowerNeedle = needle.toLowerCase();
const scored = haystack.map((candidate) => {
const lower = candidate.toLowerCase();
const prefixBoost = lower.startsWith(lowerNeedle) || lowerNeedle.startsWith(lower) ? 0.2 : 0;
const substringBoost = lower.includes(lowerNeedle) || lowerNeedle.includes(lower) ? 0.1 : 0;
const score = jaroWinkler(lowerNeedle, lower) + prefixBoost + substringBoost;
return { candidate, score };
});
scored.sort((a, b) => b.score - a.score);
return scored
.filter((s) => s.score > 0.4)
.slice(0, limit)
.map((s) => s.candidate);
}
}
function jaroWinkler(a: string, b: string): number {
if (a === b) {
return 1;
}
const matchDistance = Math.max(0, Math.floor(Math.max(a.length, b.length) / 2) - 1);
const aMatches = new Array<boolean>(a.length).fill(false);
const bMatches = new Array<boolean>(b.length).fill(false);
let matches = 0;
for (let i = 0; i < a.length; i++) {
const start = Math.max(0, i - matchDistance);
const end = Math.min(i + matchDistance + 1, b.length);
for (let j = start; j < end; j++) {
if (bMatches[j]) {
continue;
}
if (a[i] !== b[j]) {
continue;
}
aMatches[i] = true;
bMatches[j] = true;
matches++;
break;
}
}
if (matches === 0) {
return 0;
}
let transpositions = 0;
let k = 0;
for (let i = 0; i < a.length; i++) {
if (!aMatches[i]) {
continue;
}
while (!bMatches[k]) {
k++;
}
if (a[i] !== b[k]) {
transpositions++;
}
k++;
}
const jaro = (matches / a.length + matches / b.length + (matches - transpositions / 2) / matches) / 3;
let prefix = 0;
const maxPrefix = Math.min(4, a.length, b.length);
while (prefix < maxPrefix && a[prefix] === b[prefix]) {
prefix++;
}
return jaro + prefix * 0.1 * (1 - jaro);
}

View file

@ -0,0 +1,88 @@
export interface SemanticLayerSource {
name: string;
descriptions?: Record<string, string>;
table?: string;
sql?: string;
inherits_columns_from?: string;
grain: string[];
columns: Array<{
name: string;
type: string;
role?: string;
visibility?: string;
descriptions?: Record<string, string>;
expr?: string;
natural_granularity?: string;
constraints?: { dbt?: { not_null?: boolean; unique?: boolean } };
enum_values?: { dbt?: string[] };
tests?: {
dbt?: Array<{ name: string; package: string; kwargs?: Record<string, unknown> }>;
dbt_by_package?: Record<string, string[]>;
};
}>;
joins: Array<{
to: string;
on: string;
relationship: string;
alias?: string;
source?: string;
}>;
measures: Array<{
name: string;
expr: string;
filter?: string;
segments?: string[];
description?: string;
}>;
segments?: Array<{
name: string;
expr: string;
description?: string;
}>;
default_time_dimension?: { dbt?: string };
tags?: { dbt?: string[] };
freshness?: { dbt?: { raw?: unknown; loaded_at_field?: string | null } };
}
export interface SemanticLayerQueryInput {
measures: Array<string | { expr: string; name: string }>;
dimensions: Array<string | { field: string; granularity?: string }>;
filters?: string[];
segments?: string[];
order_by?: Array<string | { field: string; direction?: string }>;
limit?: number;
include_empty?: boolean;
}
export interface SemanticLayerQueryExecutionResult {
sql: string;
headers: string[];
rows: unknown[][];
totalRows: number;
plan: Record<string, unknown>;
}
export type SlSearchMatchReason = 'lexical' | 'semantic' | 'dictionary' | 'token' | (string & {});
export interface SlDictionaryMatch {
column: string;
values: string[];
overflowCount?: number;
}
export interface SlSearchLaneSummary {
lane: string;
status: 'available' | 'skipped' | 'failed';
requestedCandidatePoolLimit: number;
effectiveCandidatePoolLimit: number;
returnedCandidateCount: number;
weight: number;
reason?: string;
}
export interface SlSearchMetadata {
score: number;
matchReasons: SlSearchMatchReason[];
dictionaryMatches?: SlDictionaryMatch[];
lanes?: SlSearchLaneSummary[];
}