Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,154 @@
import type { ZodType } from 'zod';
import type { GitAuthorResolverPort, ToolContext, ToolOutput } from '../../tools/index.js';
import { BaseTool } from '../../tools/index.js';
import { sourceDefinitionSchema } from '../schemas.js';
import { SemanticLayerService } from '../semantic-layer.service.js';
import { SlSearchService } from '../sl-search.service.js';
export { sourceDefinitionSchema };
// ── Shared output types ──
export interface SemanticLayerStructured {
success: boolean;
sourceName: string;
yaml?: string;
commitHash?: string;
errors?: string[];
validationErrors?: string[];
validationWarnings?: string[];
actionRequiredWarnings?: string[];
}
export interface BaseSemanticLayerToolDeps {
semanticLayerService: SemanticLayerService;
slSearchService: SlSearchService;
authorResolver: GitAuthorResolverPort;
}
// ── Abstract base class ──
export abstract class BaseSemanticLayerTool<TInput extends ZodType = ZodType> extends BaseTool<TInput> {
protected readonly semanticLayerService: SemanticLayerService;
protected readonly slSearchService: SlSearchService;
protected readonly authorResolver: GitAuthorResolverPort;
constructor(deps: BaseSemanticLayerToolDeps) {
super();
this.semanticLayerService = deps.semanticLayerService;
this.slSearchService = deps.slSearchService;
this.authorResolver = deps.authorResolver;
}
protected async readSourceYaml(
connectionId: string,
sourceName: string,
context?: ToolContext,
): Promise<string | null> {
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
try {
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
return content;
} catch {
return null;
}
}
protected buildMarkdown(
success: boolean,
errors: string[],
sourceName: string,
extra?: {
yaml?: string;
commitHash?: string;
validationErrors?: string[];
validationWarnings?: string[];
actionRequiredWarnings?: string[];
editCount?: number;
},
): string {
const parts: string[] = [];
if (success) {
const verb = extra?.editCount != null ? `applied ${extra.editCount} edit(s) to` : 'saved';
parts.push(`Source **${sourceName}** ${verb} successfully.`);
} else {
parts.push(`Source **${sourceName}** update completed with ${errors.length} error(s):`);
for (const err of errors) {
parts.push(`- ${err}`);
}
}
if (extra?.commitHash) {
parts.push(`Commit: \`${extra.commitHash}\``);
}
if (extra?.actionRequiredWarnings && extra.actionRequiredWarnings.length > 0) {
parts.push('\n**Action required:**');
for (const warning of extra.actionRequiredWarnings) {
parts.push(`- ${warning}`);
}
}
if (extra?.validationErrors && extra.validationErrors.length > 0) {
parts.push('\n**Validation errors:**');
for (const ve of extra.validationErrors) {
parts.push(`- ${ve}`);
}
}
if (extra?.validationWarnings && extra.validationWarnings.length > 0) {
parts.push('\n**Validation warnings:**');
for (const vw of extra.validationWarnings) {
parts.push(`- ${vw}`);
}
}
if (extra?.yaml) {
const yaml = extra.yaml;
const MAX_YAML = 2000;
if (yaml.length > MAX_YAML) {
parts.push(`\n**YAML** (${yaml.length} chars, truncated):\n\`\`\`yaml\n${yaml.slice(0, MAX_YAML)}...\n\`\`\``);
} else {
parts.push(`\n**YAML**:\n\`\`\`yaml\n${yaml}\n\`\`\``);
}
}
return parts.join('\n');
}
protected buildOutput(
success: boolean,
errors: string[],
sourceName: string,
extra?: {
yaml?: string;
commitHash?: string;
validationErrors?: string[];
validationWarnings?: string[];
actionRequiredWarnings?: string[];
editCount?: number;
},
): ToolOutput<SemanticLayerStructured> {
return {
markdown: this.buildMarkdown(success, errors, sourceName, extra),
structured: {
success,
sourceName,
yaml: extra?.yaml,
commitHash: extra?.commitHash,
...(errors.length > 0 ? { errors } : {}),
...(extra?.validationErrors && extra.validationErrors.length > 0
? { validationErrors: extra.validationErrors }
: {}),
...(extra?.validationWarnings && extra.validationWarnings.length > 0
? { validationWarnings: extra.validationWarnings }
: {}),
...(extra?.actionRequiredWarnings && extra.actionRequiredWarnings.length > 0
? { actionRequiredWarnings: extra.actionRequiredWarnings }
: {}),
},
};
}
}

View file

@ -0,0 +1,18 @@
import { describe, expect, it } from 'vitest';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
describe('slToolConnectionIdSchema', () => {
it('accepts app UUIDs and local project connection ids', () => {
expect(slToolConnectionIdSchema.parse('00000000-0000-4000-8000-000000000001')).toBe(
'00000000-0000-4000-8000-000000000001',
);
expect(slToolConnectionIdSchema.parse('warehouse')).toBe('warehouse');
expect(slToolConnectionIdSchema.parse('warehouse_prod-1')).toBe('warehouse_prod-1');
});
it('rejects empty, path-like, and hidden connection ids', () => {
for (const value of ['', '../warehouse', 'warehouse/prod', '.warehouse', 'warehouse prod']) {
expect(() => slToolConnectionIdSchema.parse(value)).toThrow();
}
});
});

View file

@ -0,0 +1,6 @@
import { z } from 'zod';
export const slToolConnectionIdSchema = z
.string()
.min(1)
.regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/, 'Connection id must be alphanumeric and may contain _ or -');

View file

@ -0,0 +1,11 @@
export type { BaseSemanticLayerToolDeps, SemanticLayerStructured } from './base-semantic-layer.tool.js';
export { BaseSemanticLayerTool, sourceDefinitionSchema } from './base-semantic-layer.tool.js';
export type { SlDiscoverySettings } from './sl-discover.tool.js';
export { SlDiscoverTool } from './sl-discover.tool.js';
export { SlEditSourceTool } from './sl-edit-source.tool.js';
export { SlReadSourceTool } from './sl-read-source.tool.js';
export { SlRollbackTool } from './sl-rollback.tool.js';
export { SlValidateTool, validateSemanticLayerEndpoint } from './sl-validate.tool.js';
export { SlWriteSourceTool } from './sl-write-source.tool.js';
export type { SlValidationDeps, SourceValidationResult } from './sl-warehouse-validation.js';
export { revertSourceToPreHead, validateSingleSource } from './sl-warehouse-validation.js';

View file

@ -0,0 +1,337 @@
import { z } from 'zod';
import { DEFAULT_PRIORITY, resolveDescription } from '../descriptions.js';
import type { SemanticLayerSource } from '../types.js';
import type { ToolContext, ToolOutput } from '../../tools/index.js';
import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
export interface SlDiscoverySettings {
maxSources: number;
minRrfScore: number;
maxDetailedSources: number;
}
const slDiscoverInputSchema = z.object({
connectionId: slToolConnectionIdSchema
.optional()
.describe('Data source connection ID (omit to discover across all data sources)'),
query: z.string().optional().describe('Search query to filter sources/columns/measures by name or description'),
sourceName: z
.string()
.optional()
.describe('Inspect a specific source in full detail (requires connectionId if multiple data sources)'),
});
type SlDiscoverInput = z.infer<typeof slDiscoverInputSchema>;
interface SlDiscoverStructured {
sources: Array<{
connectionId: string;
connectionName: string;
name: string;
description?: string;
columnCount: number;
measureCount: number;
joinCount: number;
}>;
detail?: Record<string, unknown>;
totalSources: number;
}
export class SlDiscoverTool extends BaseSemanticLayerTool<typeof slDiscoverInputSchema> {
readonly name = 'sl_discover';
constructor(
deps: BaseSemanticLayerToolDeps,
private readonly discoverySettings: SlDiscoverySettings,
) {
super(deps);
}
get description(): string {
return `<purpose>
Discover available semantic layer sources, columns, measures, and joins.
When called without a connectionId, discovers sources across ALL data sources grouped by data source name and ID.
Use this to understand what data is available before writing a semantic_query.
</purpose>
<when_to_use>
- Before querying: understand available sources across all data sources
- To inspect a specific source in detail (columns, joins, measures, grain) requires connectionId when multiple data sources exist
- To search for sources related to a concept (e.g., "revenue", "customers") across all data sources
</when_to_use>`;
}
get inputSchema() {
return slDiscoverInputSchema;
}
async call(input: SlDiscoverInput, _context: ToolContext): Promise<ToolOutput<SlDiscoverStructured>> {
const { query, sourceName } = input;
// Resolve connectionId: use provided value, or auto-detect
let connectionId = input.connectionId;
if (!connectionId) {
const connections = await this.semanticLayerService.listConnectionIdsWithNames();
if (connections.length === 0) {
return {
markdown: 'No semantic layer sources found. Run a schema scan first.',
structured: { sources: [], totalSources: 0 },
};
}
if (connections.length === 1) {
connectionId = connections[0].id;
} else {
// Multiple connections — aggregate or prompt depending on operation
if (sourceName) {
const connectionList = connections
.map((c) => `- **${c.name}** (${c.connectionType}): \`${c.id}\``)
.join('\n');
return {
markdown: `Multiple data sources have semantic layer sources. Specify a connectionId to inspect source "${sourceName}":\n\n${connectionList}`,
structured: { sources: [], totalSources: 0 },
};
}
return this.discoverAcrossConnections(connections, query);
}
}
// If inspecting a specific source — show the SL interface (columns, measures, joins)
// without the raw SQL. Use `sl_read_source` to see the full YAML including SQL.
if (sourceName) {
const sources = await this.semanticLayerService.loadAllSources(connectionId);
const source = sources.find((s) => s.name === sourceName);
if (!source) {
return {
markdown: `Source **${sourceName}** not found for this connection.`,
structured: { sources: [], totalSources: 0 },
};
}
const parts: string[] = [];
this.appendSourceDetail(parts, source);
if (source.grain?.length) {
parts.push(`Grain: ${source.grain.join(', ')}`);
}
return {
markdown: parts.join('\n'),
structured: {
sources: [
{
connectionId,
connectionName: connectionId,
name: source.name,
description:
resolveDescription(source.descriptions, { priority: DEFAULT_PRIORITY }) ?? undefined,
columnCount: source.columns.length,
measureCount: source.measures.length,
joinCount: source.joins.length,
},
],
totalSources: 1,
},
};
}
// Single connection: list all sources
const connections = await this.semanticLayerService.listConnectionIdsWithNames();
const connInfo = connections.find((c) => c.id === connectionId);
return this.discoverForConnection(connectionId, connInfo?.name ?? connectionId, query);
}
private async discoverAcrossConnections(
connections: Array<{ id: string; name: string; connectionType: string }>,
query?: string,
): Promise<ToolOutput<SlDiscoverStructured>> {
// Load sources from all connections in parallel
const results = await Promise.all(
connections.map(async (conn) => {
const sources = await this.semanticLayerService.loadAllSources(conn.id);
let filtered = sources;
if (query) {
filtered = await this.filterByQuery(conn.id, sources, query);
}
return { conn, sources: filtered };
}),
);
const allSummaries: SlDiscoverStructured['sources'] = [];
const parts: string[] = [];
let totalSources = 0;
for (const { conn, sources } of results) {
if (sources.length === 0) {
continue;
}
totalSources += sources.length;
parts.push(`## ${conn.name} (${conn.connectionType}) — \`${conn.id}\``);
parts.push('');
const config = { priority: DEFAULT_PRIORITY };
for (const s of sources) {
allSummaries.push({
connectionId: conn.id,
connectionName: conn.name,
name: s.name,
description: resolveDescription(s.descriptions, config) ?? undefined,
columnCount: (s.columns ?? []).length,
measureCount: (s.measures ?? []).length,
joinCount: (s.joins ?? []).length,
});
}
this.appendTieredSources(parts, sources, !!query);
}
if (totalSources === 0) {
return {
markdown: query
? `No semantic layer sources found matching "${query}".`
: 'No semantic layer sources found. Run a schema scan first, or create sources with sl_write_source.',
structured: { sources: [], totalSources: 0 },
};
}
const header = `**${totalSources} source(s) found across ${results.filter((r) => r.sources.length > 0).length} data source(s)**${query ? ` matching "${query}"` : ''}:\n`;
parts.unshift(header);
return {
markdown: parts.join('\n'),
structured: { sources: allSummaries, totalSources },
};
}
private async discoverForConnection(
connectionId: string,
connectionName: string,
query?: string,
): Promise<ToolOutput<SlDiscoverStructured>> {
const sources = await this.semanticLayerService.loadAllSources(connectionId);
if (sources.length === 0) {
return {
markdown: 'No semantic layer sources found. Run a schema scan first, or create sources with sl_write_source.',
structured: { sources: [], totalSources: 0 },
};
}
const filtered = query ? await this.filterByQuery(connectionId, sources, query) : sources;
const config = { priority: DEFAULT_PRIORITY };
const summaries = filtered.map((s) => ({
connectionId,
connectionName,
name: s.name,
description: resolveDescription(s.descriptions, config) ?? undefined,
columnCount: (s.columns ?? []).length,
measureCount: (s.measures ?? []).length,
joinCount: (s.joins ?? []).length,
}));
const parts: string[] = [`**${filtered.length} source(s) found**${query ? ` matching "${query}"` : ''}:\n`];
this.appendTieredSources(parts, filtered, !!query);
return {
markdown: parts.join('\n'),
structured: { sources: summaries, totalSources: filtered.length },
};
}
private async filterByQuery(
connectionId: string,
sources: SemanticLayerSource[],
query: string,
): Promise<SemanticLayerSource[]> {
const config = this.discoverySettings;
const searchResults = await this.slSearchService.search(connectionId, query, config.maxSources, config.minRrfScore);
if (searchResults.length > 0) {
const rankedNames = new Set(searchResults.map((r) => r.sourceName));
const nameOrder = new Map(searchResults.map((r, i) => [r.sourceName, i]));
return sources
.filter((s) => rankedNames.has(s.name))
.sort((a, b) => (nameOrder.get(a.name) ?? 0) - (nameOrder.get(b.name) ?? 0));
}
return this.fallbackTermMatch(sources, query);
}
private fallbackTermMatch(sources: SemanticLayerSource[], query: string): SemanticLayerSource[] {
const config = { priority: DEFAULT_PRIORITY };
const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
const scored = sources
.map((s) => {
const searchText = [
s.name,
resolveDescription(s.descriptions, config) ?? '',
...s.columns.map((c) => `${c.name} ${resolveDescription(c.descriptions, config) ?? ''}`),
...s.measures.map((m) => `${m.name} ${m.description ?? ''}`),
]
.join(' ')
.toLowerCase();
const matchCount = terms.filter((term) => searchText.includes(term)).length;
return { source: s, matchCount };
})
.filter((x) => x.matchCount > 0)
.sort((a, b) => b.matchCount - a.matchCount);
return scored.map((x) => x.source);
}
/**
* Render sources in two tiers:
* - Top N (ranked by relevance when query is present) get full detail
* - Remaining sources get a one-liner with name, description, and measure count
*/
private appendTieredSources(parts: string[], sources: SemanticLayerSource[], hasQuery: boolean): void {
const maxDetailed = this.discoverySettings.maxDetailedSources;
const detailLimit = hasQuery ? maxDetailed : 0;
const detailed = sources.slice(0, detailLimit);
const rest = sources.slice(detailLimit);
for (const s of detailed) {
this.appendSourceDetail(parts, s);
}
if (rest.length > 0) {
if (detailed.length > 0) {
parts.push('**Other sources** (pass `sourceName` to inspect):');
}
const defaultConfig = { priority: DEFAULT_PRIORITY };
for (const s of rest) {
const resolvedDesc = resolveDescription(s.descriptions, defaultConfig);
const desc = resolvedDesc ? `${resolvedDesc}` : '';
const stats = [s.measures.length > 0 ? `${s.measures.length} measures` : null, `${s.columns.length} cols`]
.filter(Boolean)
.join(', ');
parts.push(`- **${s.name}**${desc} (${stats})`);
}
parts.push('');
}
}
/** Full detail for a single source: metadata, measures, joins, all public columns. */
private appendSourceDetail(parts: string[], s: SemanticLayerSource): void {
const detailDesc = resolveDescription(s.descriptions, { priority: DEFAULT_PRIORITY });
parts.push(`### ${s.name}${detailDesc ? `${detailDesc}` : ''}`);
parts.push(
`Type: ${s.sql ? 'sql' : 'table'} | Columns: ${s.columns.length} | Measures: ${s.measures.length} | Joins: ${s.joins.length}`,
);
if (s.measures.length > 0) {
parts.push(`Measures: ${s.measures.map((m) => `\`${m.name}\` (${m.expr})`).join(', ')}`);
}
if (s.joins.length > 0) {
parts.push(`Joins: ${s.joins.map((j) => `${j.to} (${j.relationship})`).join(', ')}`);
}
const publicCols = s.columns.filter((c) => c.visibility !== 'hidden');
if (publicCols.length > 0) {
parts.push(`Columns: ${publicCols.map((c) => `\`${s.name}.${c.name}\` (${c.type})`).join(', ')}`);
}
parts.push('');
}
}

View file

@ -0,0 +1,187 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js';
import { SlEditSourceTool } from './sl-edit-source.tool.js';
function makeTool(overrides: any = {}) {
const semanticLayerService = {
readSourceFile: vi.fn().mockResolvedValue({
content:
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n',
}),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
loadAllSources: vi.fn().mockResolvedValue([]),
deleteSource: vi.fn().mockResolvedValue(undefined),
isManifestBacked: vi.fn().mockResolvedValue(false),
...overrides.semanticLayerService,
};
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
...overrides.slSearchService,
};
const tool = new SlEditSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) },
});
return { tool, semanticLayerService, slSearchService };
}
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({
content:
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n',
}),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
loadAllSources: vi.fn().mockResolvedValue([]),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
describe('SlEditSourceTool — session gating', () => {
it('skips slSearchService.indexSources when session is worktree-scoped', async () => {
const { tool, slSearchService } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).not.toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'orders')).toBe(true);
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
});
it('records cross-connection SL edits with targetConnectionId', async () => {
const { tool } = makeTool();
const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' });
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: warehouseConnectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'orders')).toBe(true);
expect(session.actions).toContainEqual(
expect.objectContaining({
target: 'sl',
type: 'updated',
key: 'orders',
targetConnectionId: warehouseConnectionId,
}),
);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).toHaveBeenCalledTimes(1);
});
it('uses session.semanticLayerService when session is present', async () => {
const { tool } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
await tool.call(
{
connectionId: session.connectionId,
sourceName: 'orders',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }],
} as any,
context,
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
});
describe('SlEditSourceTool — manifest-backed source without overlay', () => {
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
const { tool, semanticLayerService } = makeTool({
semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
isManifestBacked: vi.fn().mockResolvedValue(true),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'CONSIGNMENTS',
yaml_edits: [{ oldText: 'measures: []', newText: 'measures:\n - name: aav_count\n expr: count(*)' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(semanticLayerService.isManifestBacked).toHaveBeenCalledWith(
'11111111-1111-1111-1111-111111111111',
'CONSIGNMENTS',
);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
const joinedErrors = (result.structured.errors ?? []).join('\n');
expect(joinedErrors).toContain('CONSIGNMENTS');
expect(joinedErrors).toContain('manifest');
expect(joinedErrors).toContain('sl_write_source');
expect(joinedErrors).toContain('overlay');
// Overlay shape: only name + measures/segments/description
expect(joinedErrors).toContain('measures');
expect(joinedErrors).toContain('segments');
});
it('still returns the plain "Source not found" error for truly-missing names', async () => {
const { tool, semanticLayerService } = makeTool({
semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
isManifestBacked: vi.fn().mockResolvedValue(false),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'does_not_exist',
yaml_edits: [{ oldText: 'x', newText: 'y' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.structured.errors).toEqual(['Source not found. Use sl_write_source to create it.']);
expect(semanticLayerService.isManifestBacked).toHaveBeenCalledTimes(1);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,200 @@
import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import { applySqlEdits } from '../../tools/sql-edit-replacer.js';
import type { SemanticLayerSource } from '../types.js';
import {
BaseSemanticLayerTool,
type BaseSemanticLayerToolDeps,
type SemanticLayerStructured,
} from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const slEditSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z.string().describe('Name of the source to edit'),
yaml_edits: z
.array(
z.object({
oldText: z.string().describe('Exact text to find in the current YAML. Must match exactly (byte-for-byte).'),
newText: z.string().describe('Replacement text. Use empty string to delete.'),
reason: z.string().optional().describe('Brief reason for this edit.'),
}),
)
.optional()
.describe('Targeted exact-match search/replace edits on the raw YAML content.'),
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
});
type SlEditSourceInput = z.infer<typeof slEditSourceInputSchema>;
function actionTargetConnectionId(
runConnectionId: string | null | undefined,
actionConnectionId: string,
): string | null {
return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null;
}
export class SlEditSourceTool extends BaseSemanticLayerTool<typeof slEditSourceInputSchema> {
readonly name = 'sl_edit_source';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Make targeted edits to an existing semantic layer source using exact-match search/replace on YAML content.
If no source exists yet, use sl_write_source instead this tool will reject the call.
</purpose>
<when_to_use>
- Adding/removing a measure on an existing source
- Adding/updating a join relationship
- Updating column descriptions
- Removing an obsolete source (set delete: true)
- Consolidation: delete redundant sources, edit the surviving one
</when_to_use>
<edit_guidelines>
- yaml_edits: exact-match search/replace on raw YAML. oldText must match byte-for-byte (no whitespace normalization or fuzzy matching).
Include enough surrounding context in oldText for a unique match.
- Read the source first with sl_read_source to copy the exact text you want to replace.
- Keep edits scoped to the user's request — don't proactively regenerate all measures.
</edit_guidelines>`;
}
get inputSchema() {
return slEditSourceInputSchema;
}
async call(input: SlEditSourceInput, context: ToolContext): Promise<ToolOutput<SemanticLayerStructured>> {
const { connectionId, sourceName } = input;
const { name: author, email: authorEmail } = await this.authorResolver.resolve(context.userId);
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const skipIndex = context.session?.isWorktreeScoped === true;
// Handle delete
if (input.delete) {
try {
await semanticLayerService.deleteSource(connectionId, sourceName, author, authorEmail);
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: 'removed',
key: sourceName,
detail: 'Deleted source',
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
} catch (error) {
return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName);
}
}
// Read existing source
let currentYaml: string | null = null;
try {
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
currentYaml = content;
} catch {
currentYaml = null;
}
if (!currentYaml) {
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
if (manifestBacked) {
return this.buildOutput(
false,
[
[
`Source "${sourceName}" exists in the schema manifest but has no overlay file yet — sl_edit_source cannot edit it directly.`,
`Bootstrap an overlay with sl_write_source, then re-run sl_edit_source on subsequent changes:`,
` name: ${sourceName}`,
` measures:`,
` - name: <measure_name>`,
` expr: "<expression>"`,
` description: "<what it measures>"`,
`Overlay shape: "name:" plus any of "measures:", "segments:", "description:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`,
].join('\n'),
],
sourceName,
);
}
return this.buildOutput(false, ['Source not found. Use sl_write_source to create it.'], sourceName);
}
const errors: string[] = [];
let yaml = currentYaml;
let editCount = 0;
// Apply yaml_edits (text-level search/replace, exact-match only)
if (input.yaml_edits && input.yaml_edits.length > 0) {
const editResult = applySqlEdits(yaml, input.yaml_edits, { exactOnly: true });
yaml = editResult.sql;
editCount = editResult.appliedEdits;
if (!editResult.success) {
errors.push(...editResult.errors);
}
}
// Parse resulting YAML
let source: SemanticLayerSource;
try {
source = YAML.parse(yaml) as SemanticLayerSource;
} catch (e) {
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
}
// Re-serialize and write
const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 });
const { errors: validationErrors, warnings: validationWarnings } =
await semanticLayerService.validateWithProposedSource(connectionId, source);
if (validationErrors.length > 0) {
return this.buildOutput(
false,
[...errors, 'Validation failed — edits were NOT saved:', ...validationErrors],
sourceName,
{ yaml: updatedYaml, editCount, validationErrors, validationWarnings },
);
}
const commitMessage = `Edit source ${sourceName}: ${
input.yaml_edits ? `${input.yaml_edits.length} YAML edit(s)` : 'update'
}`;
try {
const result = await semanticLayerService.writeSource(connectionId, source, author, authorEmail, commitMessage);
if (!skipIndex) {
const allSources = await semanticLayerService.loadAllSources(connectionId);
await this.slSearchService.indexSources(connectionId, allSources).catch(() => {});
}
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: 'updated',
key: sourceName,
detail: `Applied ${editCount} edit(s)`,
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(errors.length === 0, errors, sourceName, {
yaml: updatedYaml,
commitHash: result.commitHash ?? undefined,
editCount,
validationErrors,
validationWarnings,
});
} catch (error) {
errors.push(error instanceof Error ? error.message : String(error));
return this.buildOutput(false, errors, sourceName, { yaml: updatedYaml, editCount });
}
}
}

View file

@ -0,0 +1,75 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, type ToolContext } from '../../tools/index.js';
import { SlReadSourceTool } from './sl-read-source.tool.js';
function makeTool(overrides: Partial<Record<string, any>> = {}) {
const semanticLayerService = {
readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_default\n', path: 'default' }),
...overrides.semanticLayerService,
};
const tool = new SlReadSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: {} as never,
authorResolver: { resolve: vi.fn() },
});
return { tool, semanticLayerService };
}
function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
return {
sourceId: 'src',
messageId: 'msg',
userId: 'user',
...overrides,
};
}
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_session\n', path: 'session' }),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
describe('SlReadSourceTool - session-scoped reads', () => {
it('reads through context.session.semanticLayerService when a session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const session = makeSession();
const result = await tool.call(
{ connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' },
makeContext({ session }),
);
expect((session.semanticLayerService as any).readSourceFile).toHaveBeenCalledWith(
'11111111-1111-1111-1111-111111111111',
'foo',
);
expect(semanticLayerService.readSourceFile).not.toHaveBeenCalled();
expect(result.structured.yaml).toContain('foo_session');
});
it('reads through the default service when no session is present', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{ connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' },
makeContext(),
);
expect(semanticLayerService.readSourceFile).toHaveBeenCalledWith('11111111-1111-1111-1111-111111111111', 'foo');
expect(result.structured.yaml).toContain('foo_default');
});
});

View file

@ -0,0 +1,63 @@
import { z } from 'zod';
import type { ToolContext, ToolOutput } from '../../tools/index.js';
import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const slReadSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z.string().describe('Name of the source to read'),
});
type SlReadSourceInput = z.infer<typeof slReadSourceInputSchema>;
interface SlReadSourceStructured {
sourceName: string;
yaml: string;
}
export class SlReadSourceTool extends BaseSemanticLayerTool<typeof slReadSourceInputSchema> {
readonly name = 'sl_read_source';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Read the raw YAML definition of a semantic layer source, including its SQL implementation.
Use this when you need to understand how a source is built e.g., before editing it with sl_edit_source or sl_write_source.
</purpose>
<when_to_use>
- Before editing a source: understand its full definition (SQL, columns, measures, joins)
- When debugging a source: see the underlying SQL query
- When creating a new source based on an existing one
</when_to_use>
<when_not_to_use>
- To discover what sources/measures/dimensions are available for querying use sl_discover instead
- To query data use semantic_query or create_widget with slQuery
</when_not_to_use>`;
}
get inputSchema() {
return slReadSourceInputSchema;
}
async call(input: SlReadSourceInput, context: ToolContext): Promise<ToolOutput<SlReadSourceStructured>> {
const { connectionId, sourceName } = input;
const yaml = await this.readSourceYaml(connectionId, sourceName, context);
if (!yaml) {
return {
markdown: `Source **${sourceName}** not found for connection ${connectionId}.`,
structured: { sourceName, yaml: '' },
};
}
return {
markdown: `## Source: ${sourceName}\n\n\`\`\`yaml\n${yaml}\n\`\`\``,
structured: { sourceName, yaml },
};
}
}

View file

@ -0,0 +1,67 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js';
import { SlRollbackTool } from './sl-rollback.tool.js';
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]),
actions: [{ target: 'sl', type: 'updated', key: 'orders', detail: 'x' }],
semanticLayerService: {} as any,
wikiService: {} as any,
configService: {
writeFile: vi.fn().mockResolvedValue(undefined),
deleteFile: vi.fn().mockResolvedValue(undefined),
} as any,
gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any,
...overrides,
};
}
describe('SlRollbackTool', () => {
const connections = {
getConnectionById: vi.fn(),
listEnabledConnections: vi.fn(),
executeQuery: vi.fn(),
};
it('errors when context.session is absent', async () => {
const tool = new SlRollbackTool({} as never, connections as never, 1);
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/session/i);
});
it('errors when session has no connectionId (wiki-only turn)', async () => {
const tool = new SlRollbackTool({} as never, connections as never, 1);
const session = makeSession({ connectionId: null });
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/connection-scoped session/i);
// Session state untouched
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(true);
expect((session.gitService as any).getFileAtCommit).not.toHaveBeenCalled();
});
it('restores the source content from preHead, clears touched set, prunes actions', async () => {
const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) };
const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1);
const session = makeSession();
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(true);
expect((session.gitService as any).getFileAtCommit).toHaveBeenCalledWith(
expect.stringContaining('orders.yaml'),
'base',
);
expect((session.configService as any).writeFile).toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
expect(session.actions).toEqual([]);
});
});

View file

@ -0,0 +1,87 @@
import { z } from 'zod';
import { BaseTool, deleteTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
import { revertSourceToPreHead } from './sl-warehouse-validation.js';
const slRollbackInputSchema = z.object({
sourceName: z.string().describe('Name of the source to roll back'),
});
type SlRollbackInput = z.infer<typeof slRollbackInputSchema>;
interface SlRollbackStructured {
success: boolean;
sourceName: string;
outcome?: string;
}
export class SlRollbackTool extends BaseTool<typeof slRollbackInputSchema> {
readonly name = 'sl_rollback';
constructor(
private readonly slSourcesRepository: SlSourcesIndexPort,
private readonly connections: SlConnectionCatalogPort,
private readonly probeRowCount: number,
) {
super();
}
get description(): string {
return `<purpose>
Abandon this-session changes to a source and restore it to its pre-session state.
Use when a write/edit failed validation in a way you cannot fix in-session (e.g. the source requires elevated warehouse permissions).
</purpose>`;
}
get inputSchema() {
return slRollbackInputSchema;
}
async call(input: SlRollbackInput, context: ToolContext): Promise<ToolOutput<SlRollbackStructured>> {
const session = context.session;
if (!session) {
return {
markdown:
'Error: sl_rollback requires an active session (ingest WU or memory-agent). Use git revert for interactive rollback.',
structured: { success: false, sourceName: input.sourceName },
};
}
if (!session.connectionId) {
return {
markdown: 'Error: sl_rollback requires a connection-scoped session; this session has no warehouse connection.',
structured: { success: false, sourceName: input.sourceName },
};
}
const outcome = await revertSourceToPreHead(
{
semanticLayerService: session.semanticLayerService,
connections: this.connections,
configService: session.configService,
gitService: session.gitService,
slSourcesRepository: this.slSourcesRepository,
probeRowCount: this.probeRowCount,
},
session.connectionId,
session.preHead,
input.sourceName,
);
deleteTouchedSlSource(session.touchedSlSources, session.connectionId, input.sourceName);
for (let i = session.actions.length - 1; i >= 0; i--) {
const a = session.actions[i];
if (
a.target === 'sl' &&
a.key === input.sourceName &&
(a.targetConnectionId ?? session.connectionId) === session.connectionId
) {
session.actions.splice(i, 1);
}
}
return {
markdown: `Source "${input.sourceName}" rolled back: ${outcome}.`,
structured: { success: true, sourceName: input.sourceName, outcome },
};
}
}

View file

@ -0,0 +1,66 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, type ToolContext } from '../../tools/index.js';
import type { SemanticLayerService } from '../semantic-layer.service.js';
import type { SemanticLayerSource } from '../types.js';
import { SlValidateTool, validateSemanticLayerEndpoint } from './sl-validate.tool.js';
describe('validateSemanticLayerEndpoint', () => {
it('uses the connection warehouse dialect, not hardcoded postgres', async () => {
const serviceMock = {
validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
};
await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService);
expect(serviceMock.validateSourcesForConnection).toHaveBeenCalledWith('conn-1');
});
it('short-circuits when there are no validatable sources', async () => {
const serviceMock = {
validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
};
const result = await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService);
expect(result).toEqual({ errors: [], warnings: [] });
});
});
describe('SlValidateTool — session-aware touched-set filtering', () => {
it('when session present, only returns errors/warnings that mention touched sources', async () => {
const sources: SemanticLayerSource[] = [
{ name: 'orders', table: 'x.orders', grain: ['id'], columns: [], joins: [], measures: [] },
{ name: 'customers', table: 'x.customers', grain: ['id'], columns: [], joins: [], measures: [] },
];
const serviceMock = {
loadAllSources: vi.fn().mockResolvedValue(sources),
validateSourcesForConnection: vi.fn().mockResolvedValue({
errors: ['orders: missing join target', 'customers: invalid grain'],
warnings: ['orders: disconnected-components warning'],
}),
};
const tool = new SlValidateTool({
semanticLayerService: serviceMock as never,
slSearchService: {} as never,
authorResolver: { resolve: vi.fn() },
});
const session: ToolSession = {
connectionId: 'conn-1',
isWorktreeScoped: true,
preHead: null,
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]),
actions: [],
semanticLayerService: serviceMock as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
};
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ connectionId: 'conn-1' } as any, context);
expect(result.structured.validationErrors).toEqual(['orders: missing join target']);
expect(result.structured.validationWarnings).toEqual(['orders: disconnected-components warning']);
});
});

View file

@ -0,0 +1,130 @@
import { z } from 'zod';
import { type ToolContext, type ToolOutput, touchedSlSourceNamesForConnection } from '../../tools/index.js';
import { SemanticLayerService } from '../semantic-layer.service.js';
import {
BaseSemanticLayerTool,
type BaseSemanticLayerToolDeps,
type SemanticLayerStructured,
} from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const slValidateInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
});
type SlValidateInput = z.infer<typeof slValidateInputSchema>;
type ValidationReport = {
errors: string[];
warnings: string[];
};
export async function validateSemanticLayerEndpoint(
connectionId: string,
semanticLayerService: SemanticLayerService,
): Promise<ValidationReport> {
try {
return await semanticLayerService.validateSourcesForConnection(connectionId);
} catch (e) {
return {
errors: [`Validation call failed: ${e instanceof Error ? e.message : String(e)}`],
warnings: [],
};
}
}
export class SlValidateTool extends BaseSemanticLayerTool<typeof slValidateInputSchema> {
readonly name = 'sl_validate';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Validate that all semantic layer sources for a connection form a consistent model.
Checks: all join targets exist, grain is valid, no missing references.
</purpose>
<when_to_use>
- After making edits with sl_write_source
- Before querying, to ensure the model is healthy
- When troubleshooting query failures
</when_to_use>`;
}
get inputSchema() {
return slValidateInputSchema;
}
async call(input: SlValidateInput, context: ToolContext): Promise<ToolOutput<SemanticLayerStructured>> {
const { connectionId } = input;
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const sources = await semanticLayerService.loadAllSources(connectionId);
if (sources.length === 0) {
return this.buildOutput(true, [], '(all)', {
validationErrors: ['No sources found for this connection.'],
});
}
let { errors, warnings } = await validateSemanticLayerEndpoint(connectionId, semanticLayerService);
const touched = context.session?.touchedSlSources;
if (touched && touched.size > 0) {
const touchedArr = touchedSlSourceNamesForConnection(touched, connectionId);
if (touchedArr.length > 0) {
errors = errors.filter((e) => touchedArr.some((n) => e.includes(n)));
warnings = warnings.filter((w) => touchedArr.some((n) => w.includes(n)));
}
}
const valid = errors.length === 0;
const parts: string[] = [];
parts.push(`**Semantic layer validation** for ${sources.length} source(s):`);
if (valid && warnings.length === 0) {
parts.push('All sources are valid. Join graph is consistent.');
} else {
const summary: string[] = [];
if (errors.length > 0) {
summary.push(`${errors.length} error(s)`);
}
if (warnings.length > 0) {
summary.push(`${warnings.length} warning(s)`);
}
parts.push(`Found ${summary.join(' and ')}:`);
if (errors.length > 0) {
parts.push('', '**Errors:**');
for (const err of errors) {
parts.push(`- ${err}`);
}
}
if (warnings.length > 0) {
parts.push('', '**Warnings:**');
for (const warn of warnings) {
parts.push(`- ${warn}`);
}
}
}
// List sources summary
parts.push('\n**Sources:**');
for (const s of sources) {
parts.push(
`- **${s.name}** (${s.sql ? 'sql' : 'table'}): ${s.columns.length} cols, ${s.measures.length} measures, ${s.joins.length} joins`,
);
}
return {
markdown: parts.join('\n'),
structured: {
success: valid,
sourceName: '(all)',
validationErrors: errors.length > 0 ? errors : undefined,
validationWarnings: warnings.length > 0 ? warnings : undefined,
},
};
}
}

View file

@ -0,0 +1,120 @@
import { describe, expect, it, vi } from 'vitest';
import { validateSingleSource } from './sl-warehouse-validation.js';
function makeDeps(opts: { sourceYaml: string; executeQuery: ReturnType<typeof vi.fn> }) {
return {
semanticLayerService: {
readSourceFile: vi.fn().mockResolvedValue({ content: opts.sourceYaml, path: 'x' }),
isManifestBacked: vi.fn().mockResolvedValue(false),
listManifestSourceNames: vi.fn().mockResolvedValue([]),
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
} as never,
connections: {
executeQuery: opts.executeQuery,
getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType: 'bigquery' }),
listEnabledConnections: vi.fn().mockResolvedValue([]),
} as never,
configService: {} as never,
gitService: {} as never,
slSourcesRepository: { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) } as never,
probeRowCount: 1,
};
}
describe('validateSingleSource warehouse dry-run', () => {
it('surfaces warehouse error when dry-run fails on unknown column', async () => {
const yaml = `name: fct_arr_delta
source_type: sql
sql: |
SELECT * FROM analytics.fct_arr_delta WHERE date_date < CURRENT_DATE()
grain: [date_date]
columns:
- name: date_date
type: time
measures:
- name: count_delta_events
expr: count(*)
joins: []
`;
const executeQuery = vi.fn().mockRejectedValue(new Error('Unrecognized name: date_date at [1:42]'));
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta');
expect(result.errors.join('\n')).toMatch(/Unrecognized name: date_date/);
expect(result.errors.join('\n')).toMatch(/embedded sql dry-run failed/);
});
it('flags declared columns missing from the dry-run result', async () => {
const yaml = `name: fct_arr_delta
source_type: sql
sql: |
SELECT date, customer_id FROM analytics.fct_arr_delta
columns:
- name: date_date
type: time
- name: customer_id
type: string
measures:
- name: count_delta
expr: count(*)
joins: []
grain: [customer_id]
`;
const executeQuery = vi.fn().mockResolvedValue({
headers: ['date', 'customer_id'],
rows: [],
totalRows: 0,
error: null,
});
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta');
expect(result.errors.join('\n')).toMatch(/declared columns absent from sql result — date_date/);
expect(result.errors.join('\n')).toMatch(/warehouse returned:/);
});
it('passes cleanly when dry-run succeeds and declared columns match', async () => {
const yaml = `name: lab_results
source_type: sql
sql: |
SELECT lab_order_id, admin_user_id FROM analytics.raw_lab_results
grain: [lab_order_id]
columns:
- name: lab_order_id
type: string
- name: admin_user_id
type: string
measures:
- name: count_lab_results
expr: count(lab_order_id)
joins: []
`;
const executeQuery = vi.fn().mockResolvedValue({
headers: ['lab_order_id', 'admin_user_id'],
rows: [],
totalRows: 0,
error: null,
});
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
const result = await validateSingleSource(deps, 'conn-1', 'lab_results');
expect(result.errors).toEqual([]);
});
it('uses LIMIT 1 (not LIMIT 0) so runtime policies fire', async () => {
const yaml = `name: foo
source_type: sql
sql: |
SELECT a FROM analytics.bar
grain: [a]
columns:
- {name: a, type: string}
measures: []
joins: []
`;
const executeQuery = vi.fn().mockResolvedValue({ headers: ['a'], rows: [], totalRows: 0, error: null });
const deps = makeDeps({ sourceYaml: yaml, executeQuery });
await validateSingleSource(deps, 'conn-1', 'foo');
const probeSql = executeQuery.mock.calls[0][1] as string;
expect(probeSql).toMatch(/LIMIT 1\b/);
expect(probeSql).not.toMatch(/LIMIT 0\b/);
});
});

View file

@ -0,0 +1,325 @@
import YAML from 'yaml';
import type { GitService, KloFileStorePort } from '../../core/index.js';
import { SYSTEM_GIT_AUTHOR } from '../../tools/index.js';
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
import { sourceOverlaySchema } from '../schemas.js';
import { SemanticLayerService } from '../semantic-layer.service.js';
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
export interface SlValidationDeps {
semanticLayerService: SemanticLayerService;
connections: SlConnectionCatalogPort;
configService: KloFileStorePort;
gitService: GitService;
slSourcesRepository: SlSourcesIndexPort;
probeRowCount: number;
}
export interface SourceValidationResult {
errors: string[];
warnings: string[];
}
const slSourcePath = (connectionId: string, sourceName: string): string =>
`semantic-layer/${connectionId}/${sourceName}.yaml`;
function resolveDialect(warehouse: string | null): string | null {
if (!warehouse) {
return null;
}
return SemanticLayerService.mapDialect(warehouse);
}
function wrapWithZeroRowQuery(sql: string, dialect: string): string {
if (dialect === 'tsql') {
return `SELECT TOP 0 * FROM (${sql}) AS _discovery`;
}
return `SELECT * FROM (${sql}) AS _discovery LIMIT 0`;
}
function wrapWithSingleRowQuery(sql: string, dialect: string): string {
if (dialect === 'tsql') {
return `SELECT TOP 1 * FROM (${sql}) AS _base`;
}
return `SELECT * FROM (${sql}) AS _base LIMIT 1`;
}
/**
* Validate one SL source end-to-end: YAML parse, Zod schema, duplicate-measure detection,
* warehouse dry-run (`SELECT * FROM (sql) LIMIT 1` forces runtime policy enforcement).
*
* Returns errors and hint-style warnings. An empty errors array means the YAML is
* structurally valid AND the warehouse can execute a probe against its embedded sql.
*/
export async function validateSingleSource(
deps: SlValidationDeps,
connectionId: string,
sourceName: string,
): Promise<SourceValidationResult> {
const errors: string[] = [];
const warnings: string[] = [];
let content: string;
try {
const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
content = result.content;
} catch {
errors.push(`${sourceName}.yaml: file not found`);
return { errors, warnings };
}
let parsed: Record<string, unknown>;
try {
parsed = YAML.parse(content);
} catch (e) {
errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
return { errors, warnings };
}
if (!parsed || typeof parsed !== 'object') {
errors.push(`${sourceName}.yaml: top-level content is not an object`);
return { errors, warnings };
}
const isOverlay = !parsed.table && !parsed.sql;
if (!isOverlay) {
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
if (isManifestBacked) {
errors.push(
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` +
`writing it as-is drops the manifest's columns and joins. ` +
`Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` +
`"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` +
`that inherits the manifest schema. Call sl_describe_table to see it first.`,
);
return { errors, warnings };
}
}
const schema = isOverlay ? sourceOverlaySchema : sourceDefinitionSchema;
const result = schema.safeParse(parsed);
if (!result.success) {
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
errors.push(`${sourceName}.yaml: schema — ${issues}`);
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
if (errorPaths.has('joins')) {
warnings.push(
`${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
);
}
if (errorPaths.has('columns')) {
warnings.push(
`${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Do NOT include base table columns.`,
);
}
if (errorPaths.has('measures')) {
warnings.push(
`${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`,
);
}
return { errors, warnings };
}
const measures = (parsed.measures as Array<{ name: string }> | undefined) ?? [];
const seenMeasures = new Set<string>();
for (const m of measures) {
if (seenMeasures.has(m.name)) {
errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`);
}
seenMeasures.add(m.name);
}
let warehouse: string | null = null;
try {
const connection = await deps.connections.getConnectionById(connectionId);
warehouse = connection?.connectionType ?? null;
} catch {
warehouse = null;
}
if (typeof parsed.sql === 'string' && parsed.sql.trim().length > 0) {
const innerSql = parsed.sql.trim().replace(/;+\s*$/, '');
const probeRowCount = deps.probeRowCount;
const dialect = resolveDialect(warehouse);
let probeSql: string;
if (dialect) {
probeSql =
probeRowCount === 0 ? wrapWithZeroRowQuery(innerSql, dialect) : wrapWithSingleRowQuery(innerSql, dialect);
} else {
probeSql = `SELECT * FROM (${innerSql}) AS _probe LIMIT ${probeRowCount}`;
}
const sourceColumns = ((parsed.columns as Array<{ name?: string; type?: string }> | undefined) ?? [])
.map((c) => ({ name: c.name ?? '', type: c.type ?? '' }))
.filter((c) => c.name);
try {
const probe = await deps.connections.executeQuery(connectionId, probeSql);
const actual = new Set((probe.headers ?? []).map((h) => h.toLowerCase()));
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
if (missing.length > 0) {
errors.push(
`${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
);
}
} catch (e) {
errors.push(
formatProbeError({
sourceName,
measureName: null,
probeSql,
warehouse,
sourceColumns,
error: e,
headline: 'embedded sql dry-run failed',
}),
);
}
} else if (isOverlay) {
const measureErrors = await probeOverlayMeasures(deps, connectionId, sourceName, warehouse);
errors.push(...measureErrors);
}
return { errors, warnings };
}
function formatProbeError(args: {
sourceName: string;
measureName: string | null;
probeSql: string;
warehouse: string | null;
sourceColumns: Array<{ name: string; type: string }>;
error: unknown;
headline: string;
}): string {
const { sourceName, measureName, probeSql, warehouse, sourceColumns, error, headline } = args;
const errMsg = error instanceof Error ? error.message : String(error);
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
const lines: string[] = [
measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`,
];
if (warehouse) {
lines.push(` Warehouse: ${warehouse}`);
}
lines.push(` Probe SQL: ${probeSql}`);
if (refColumns.length > 0) {
lines.push(` Referenced columns: ${refColumns.map((c) => `${c.name} (${c.type || '?'})`).join(', ')}`);
}
lines.push(` Error: ${errMsg}`);
return lines.join('\n');
}
function referencesColumn(sql: string, columnName: string): boolean {
if (!columnName) {
return false;
}
const escaped = columnName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(`\\b${escaped}\\b`).test(sql);
}
async function probeOverlayMeasures(
deps: SlValidationDeps,
connectionId: string,
sourceName: string,
warehouse: string | null,
): Promise<string[]> {
const errors: string[] = [];
let composed:
| {
name: string;
table?: string;
sql?: string;
columns?: Array<{ name?: string; type?: string }>;
measures: Array<{ name: string; expr: string; filter?: string; segments?: string[] }>;
segments?: Array<{ name: string; expr: string }>;
}
| undefined;
try {
const all = await deps.semanticLayerService.loadAllSources(connectionId);
composed = all.find((s) => s.name === sourceName);
} catch (e) {
errors.push(
`${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
);
return errors;
}
if (!composed?.table || composed.measures.length === 0) {
return errors;
}
const sourceColumns = (composed.columns ?? [])
.map((c) => ({ name: c.name ?? '', type: c.type ?? '' }))
.filter((c) => c.name);
for (const measure of composed.measures) {
const measureRef = `${sourceName}.${measure.name}`;
let probeSql = `<composed via semantic-layer engine for ${measureRef}>`;
try {
const result = await deps.semanticLayerService.executeQuery(connectionId, {
measures: [measureRef],
dimensions: [],
filters: [],
limit: 1,
});
probeSql = result.sql ?? probeSql;
} catch (e) {
errors.push(
formatProbeError({
sourceName,
measureName: measure.name,
probeSql,
warehouse,
sourceColumns,
error: e,
headline: 'dry-run failed',
}),
);
}
}
return errors;
}
/**
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
* (automatic). Returns a short human-readable description of what happened.
*/
export async function revertSourceToPreHead(
deps: SlValidationDeps,
connectionId: string,
preHead: string | null,
sourceName: string,
): Promise<string> {
const relPath = slSourcePath(connectionId, sourceName);
let preContent: string | null = null;
if (preHead) {
try {
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
} catch {
preContent = null;
}
}
if (preContent !== null) {
await deps.configService.writeFile(
relPath,
preContent,
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
`Revert SL source to pre-session state: ${sourceName}`,
{ skipLock: true },
);
return 'restored to pre-session content';
}
try {
await deps.configService.deleteFile(
relPath,
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
`Drop SL source (not present at session start): ${sourceName}`,
{ skipLock: true },
);
await deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName);
return 'deleted (did not exist at session start)';
} catch {
await deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName);
return 'no-op (already absent)';
}
}

View file

@ -0,0 +1,267 @@
import { describe, expect, it, vi } from 'vitest';
import type { ToolSession } from '../../tools/index.js';
import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js';
import { SlWriteSourceTool } from './sl-write-source.tool.js';
function makeTool(overrides: Partial<Record<string, any>> = {}) {
const semanticLayerService = {
listManifestSourceNames: vi.fn().mockResolvedValue(['ACCOUNTS', 'ORDERS']),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
...overrides.semanticLayerService,
};
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
...overrides.slSearchService,
};
const tool = new SlWriteSourceTool({
semanticLayerService: semanticLayerService as never,
slSearchService: slSearchService as never,
authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) },
});
return { tool, semanticLayerService, slSearchService };
}
const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' };
describe('SlWriteSourceTool — orphan overlay guard', () => {
it('rejects overlay YAMLs targeting a name absent from the manifest', async () => {
const { tool } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'does_not_exist',
source: {
name: 'does_not_exist',
measures: [{ name: 'count_rows', expr: 'count(*)' }],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/no manifest entry with that name exists/i);
expect(result.markdown).toMatch(/ACCOUNTS|ORDERS/);
});
});
describe('SlWriteSourceTool — session gating', () => {
function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
return {
connectionId: '11111111-1111-1111-1111-111111111111',
isWorktreeScoped: true,
preHead: 'base',
touchedSlSources: createTouchedSlSources(),
actions: [],
semanticLayerService: {
loadSource: vi.fn().mockResolvedValue(null),
loadAllSources: vi.fn().mockResolvedValue([]),
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue([]),
isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any,
wikiService: {} as any,
configService: {} as any,
gitService: {} as any,
...overrides,
};
}
it('skips slSearchService.indexSources when session is worktree-scoped', async () => {
const { tool, slSearchService } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: session.connectionId,
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).not.toHaveBeenCalled();
expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'my_source')).toBe(true);
expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'my_source' }));
});
it('records cross-connection SL writes with targetConnectionId', async () => {
const { tool } = makeTool();
const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' });
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
const context: ToolContext = { ...baseContext, session };
const result = await tool.call(
{
connectionId: warehouseConnectionId,
sourceName: 'mapped_orders',
source: {
name: 'mapped_orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect(result.structured.success).toBe(true);
expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'mapped_orders')).toBe(true);
expect(session.actions).toContainEqual(
expect.objectContaining({
target: 'sl',
key: 'mapped_orders',
targetConnectionId: warehouseConnectionId,
}),
);
});
it('indexes normally when no session is present', async () => {
const { tool, slSearchService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(true);
expect(slSearchService.indexSources).toHaveBeenCalledTimes(1);
});
it('uses session.semanticLayerService when session is present', async () => {
const { tool } = makeTool();
const session = makeSession();
const context: ToolContext = { ...baseContext, session };
await tool.call(
{
connectionId: session.connectionId,
sourceName: 'my_source',
source: {
name: 'my_source',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
context,
);
expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled();
});
});
describe('SlWriteSourceTool — disconnected-components warning in markdown', () => {
it('surfaces validation warnings (including disconnected-components) in the markdown body', async () => {
const { tool } = makeTool({
semanticLayerService: {
validateWithProposedSource: vi.fn().mockResolvedValue({
errors: [],
warnings: ['orders: disconnected-components — no join path to ACCOUNTS'],
}),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
source: {
name: 'orders',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.markdown).toMatch(/disconnected-components/i);
});
it('renders per-source warnings prominently when the just-written source becomes a singleton component', async () => {
const { tool } = makeTool({
semanticLayerService: {
validateWithProposedSource: vi.fn().mockResolvedValue({
errors: [],
warnings: ['Model has 2 disconnected components.'],
perSourceWarnings: {
foo: ["Source 'foo' is now a singleton component (no joins to any other source)."],
},
}),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'foo',
source: {
name: 'foo',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.markdown).toMatch(/Action required/i);
expect(result.markdown).toContain("Source 'foo' is now a singleton component");
});
});
describe('SlWriteSourceTool — standalone shadow guard', () => {
it('rejects standalone YAMLs that shadow a manifest entry', async () => {
const { tool } = makeTool({
semanticLayerService: {
isManifestBacked: vi.fn().mockResolvedValue(true),
},
});
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'ACCOUNTS',
source: {
name: 'ACCOUNTS',
table: 'raw.accounts',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
});
});

View file

@ -0,0 +1,380 @@
import YAML from 'yaml';
import { z } from 'zod';
import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js';
import { sourceOverlaySchema } from '../schemas.js';
import type { SemanticLayerService } from '../semantic-layer.service.js';
import type { SemanticLayerSource } from '../types.js';
import {
BaseSemanticLayerTool,
type BaseSemanticLayerToolDeps,
type SemanticLayerStructured,
sourceDefinitionSchema,
} from './base-semantic-layer.tool.js';
import { slToolConnectionIdSchema } from './connection-id-schema.js';
const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]);
const slWriteSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z
.string()
.regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)')
.describe('Name of the source to create, edit, or delete'),
source: sourceInputSchema
.optional()
.describe('Source definition (standalone with table/sql) or overlay (measures, computed columns, etc.)'),
delete: z.boolean().optional().describe('Set to true to delete this source entirely'),
});
type SlWriteSourceInput = z.infer<typeof slWriteSourceInputSchema>;
function actionTargetConnectionId(
runConnectionId: string | null | undefined,
actionConnectionId: string,
): string | null {
return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null;
}
export class SlWriteSourceTool extends BaseSemanticLayerTool<typeof slWriteSourceInputSchema> {
readonly name = 'sl_write_source';
constructor(deps: BaseSemanticLayerToolDeps) {
super(deps);
}
get description(): string {
return `<purpose>
Create a new semantic layer source or fully rewrite an existing one.
If the source already exists, this tool will overwrite it with the new definition.
</purpose>
<when_to_use>
- First time creating a source definition
- When modeling a new SQL-backed source (e.g., churn risk view, ARR calculation)
- When the user asks to start over / fully rewrite a source
- Consolidating multiple sources into one (write merged definition)
- For targeted edits to existing sources (add/remove measures, update joins), prefer sl_edit_source instead
</when_to_use>
<editing_approach>
- New source: provide \`source\` with full definition
- Full rewrite: provide \`source\` (overwrites existing)
- Targeted edits on an existing source: use sl_edit_source instead
- Delete: set \`delete: true\`
</editing_approach>
<source_definition>
- name: Unique identifier for the source
- table: For physical table/view sources (e.g., "public.orders"). Mutually exclusive with sql.
- sql: For SQL-based sources (the SQL query). Mutually exclusive with table.
- grain: What one row represents (e.g., ["id"], ["customer_id", "product_id"])
- columns: All columns with type (string/number/time/boolean) and optional descriptions
- joins: Relationships to other sources (to, on, relationship: many_to_one/one_to_many/one_to_one)
- measures: Pre-defined aggregations (name, expr like "sum(amount)", optional filter, optional segments bare names of segments defined on the same source, optional description)
- segments: Named, reusable boolean predicates scoped to this source (name, expr a SQL boolean over this source's columns, optional description). A measure references one with \`segments: [name]\`; a query references one with the dotted form \`source.segment_name\`. Use when the same predicate appears on 3+ measures — e.g. extract \`is_paid = true and is_refunded = '0'\` as \`segments: [{name: paid_non_refunded, expr: "..."}]\` and have each measure use \`segments: [paid_non_refunded]\` instead of re-typing the predicate inside \`sum(case when ... then x end)\`. Segments are predicates only — they cannot be selected as dimensions or grouped by; if you need to group by the predicate, add a \`columns[]\` entry instead.
</source_definition>
<join_requirements>
Sources with joins: [] are disconnected from the semantic layer join graph and cannot be composed with other sources in semantic queries.
Before writing, use discover_data to check existing sources and their grain columns.
For each grain/key column in your source (e.g., account_id, item_id), find the matching dimension source (e.g., ACCOUNTS, ITEMS) and declare a many_to_one join.
Example: a source graining on [account_id] should declare:
joins:
- to: ACCOUNTS
on: source_name.account_id = ACCOUNTS.ACCOUNT_ID
relationship: many_to_one
The on condition format: local_column = TARGET_SOURCE.target_column (right side must include target source name).
Do NOT join back to a table that the SQL already aggregates from if the grain column is not in the output (the relationship is already baked into the SQL).
</join_requirements>`;
}
get inputSchema() {
return slWriteSourceInputSchema;
}
async call(input: SlWriteSourceInput, context: ToolContext): Promise<ToolOutput<SemanticLayerStructured>> {
const { connectionId, sourceName } = input;
const { name: author, email: authorEmail } = await this.authorResolver.resolve(context.userId);
const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService;
const skipIndex = context.session?.isWorktreeScoped === true;
// Handle delete
if (input.delete) {
try {
await semanticLayerService.deleteSource(connectionId, sourceName, author, authorEmail);
if (!skipIndex) {
const allSources = await semanticLayerService.loadAllSources(connectionId);
await this.slSearchService.indexSources(connectionId, allSources).catch(() => {});
}
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: 'removed',
key: sourceName,
detail: 'Deleted source',
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined });
} catch (error) {
return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName);
}
}
// Require source for create/rewrite
if (!input.source) {
return this.buildOutput(
false,
['Provide `source` to create or rewrite. For targeted edits, use sl_edit_source.'],
sourceName,
);
}
return this.writeFullSource(
connectionId,
input.source,
sourceName,
author,
authorEmail,
context,
semanticLayerService,
skipIndex,
);
}
private async writeFullSource(
connectionId: string,
source: z.infer<typeof sourceInputSchema>,
sourceName: string,
author: string,
authorEmail: string,
context: ToolContext,
semanticLayerService: SemanticLayerService,
skipIndex: boolean,
): Promise<ToolOutput<SemanticLayerStructured>> {
const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql);
const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName);
const commitMessage = existing
? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}`
: `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`;
const yamlContent = YAML.stringify(source);
const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent);
if (orphanError) {
return this.buildOutput(false, [orphanError], sourceName, { yaml: yamlContent });
}
const shadowError = await this.rejectStandaloneShadow(semanticLayerService, connectionId, sourceName, yamlContent);
if (shadowError) {
return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent });
}
const validatedSource = source as SemanticLayerSource;
const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource);
const validationErrors = validationResult.errors;
const validationWarnings = [...validationResult.warnings];
const actionRequiredWarnings = validationResult.perSourceWarnings?.[sourceName] ?? [];
if (validationErrors.length > 0) {
return this.buildOutput(false, ['Validation failed — source was NOT saved:', ...validationErrors], sourceName, {
yaml: yamlContent,
validationErrors,
validationWarnings,
actionRequiredWarnings,
});
}
try {
const result = await semanticLayerService.writeSource(
connectionId,
validatedSource,
author,
authorEmail,
commitMessage,
);
if (!skipIndex) {
const allSources = await semanticLayerService.loadAllSources(connectionId);
await this.slSearchService.indexSources(connectionId, allSources).catch(() => {});
}
if (context.session) {
addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName);
context.session.actions.push({
target: 'sl',
type: existing ? 'updated' : 'created',
key: sourceName,
detail: existing ? `Rewrote source` : `Created source`,
targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId),
});
}
return this.buildOutput(true, [], sourceName, {
yaml: yamlContent,
commitHash: result.commitHash ?? undefined,
validationErrors,
validationWarnings,
actionRequiredWarnings,
});
} catch (error) {
return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName);
}
}
private async readSourceYamlFromService(
service: SemanticLayerService,
connectionId: string,
sourceName: string,
): Promise<string | null> {
try {
const { content } = await service.readSourceFile(connectionId, sourceName);
return content;
} catch {
return null;
}
}
private async rejectOrphanOverlay(
semanticLayerService: SemanticLayerService,
connectionId: string,
sourceName: string,
content: string,
): Promise<string | null> {
let parsed: Record<string, unknown>;
try {
parsed = YAML.parse(content) as Record<string, unknown>;
} catch {
return null;
}
if (!parsed || typeof parsed !== 'object') {
return null;
}
const isOverlay = !('table' in parsed && parsed.table) && !('sql' in parsed && parsed.sql);
if (!isOverlay) {
return null;
}
const manifestNames = await semanticLayerService.listManifestSourceNames(connectionId);
if (manifestNames.includes(sourceName)) {
return null;
}
const suggestions = this.nearestMatches(sourceName, manifestNames, 3);
return [
`Error: cannot write "${sourceName}" as an overlay — no manifest entry with that name exists.`,
suggestions.length > 0
? ` Nearest manifest matches: ${suggestions.join(', ')}.`
: ` No manifest entries resemble "${sourceName}".`,
`To customize an existing base table, retarget the overlay at one of the nearest matches.`,
`For a LookML derived_table or any source backed by inline SQL, rewrite as a standalone`,
`curated source with a top-level "sql:" block plus explicit "grain:" and "columns:".`,
].join('\n');
}
private async rejectStandaloneShadow(
semanticLayerService: SemanticLayerService,
connectionId: string,
sourceName: string,
content: string,
): Promise<string | null> {
let parsed: Record<string, unknown>;
try {
parsed = YAML.parse(content) as Record<string, unknown>;
} catch {
return null;
}
if (!parsed || typeof parsed !== 'object') {
return null;
}
const isOverlay = !('table' in parsed && parsed.table) && !('sql' in parsed && parsed.sql);
if (isOverlay) {
return null;
}
const isManifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
if (!isManifestBacked) {
return null;
}
return [
`Error: cannot write "${sourceName}" as a standalone source — a manifest entry with that name already exists.`,
` Writing standalone would drop the manifest's columns and joins, leaving only what you list here.`,
`To add measures/segments on top of the manifest, rewrite this YAML as an overlay:`,
` - Remove "sql:", "table:", "grain:", "columns:", and "joins:".`,
` - Keep only "name:", plus "measures:", "segments:", and/or "description:".`,
` - The manifest's schema is inherited automatically.`,
`If you really need a different base table, use a different source name.`,
].join('\n');
}
private nearestMatches(needle: string, haystack: string[], limit: number): string[] {
if (haystack.length === 0) {
return [];
}
const lowerNeedle = needle.toLowerCase();
const scored = haystack.map((candidate) => {
const lower = candidate.toLowerCase();
const prefixBoost = lower.startsWith(lowerNeedle) || lowerNeedle.startsWith(lower) ? 0.2 : 0;
const substringBoost = lower.includes(lowerNeedle) || lowerNeedle.includes(lower) ? 0.1 : 0;
const score = jaroWinkler(lowerNeedle, lower) + prefixBoost + substringBoost;
return { candidate, score };
});
scored.sort((a, b) => b.score - a.score);
return scored
.filter((s) => s.score > 0.4)
.slice(0, limit)
.map((s) => s.candidate);
}
}
function jaroWinkler(a: string, b: string): number {
if (a === b) {
return 1;
}
const matchDistance = Math.max(0, Math.floor(Math.max(a.length, b.length) / 2) - 1);
const aMatches = new Array<boolean>(a.length).fill(false);
const bMatches = new Array<boolean>(b.length).fill(false);
let matches = 0;
for (let i = 0; i < a.length; i++) {
const start = Math.max(0, i - matchDistance);
const end = Math.min(i + matchDistance + 1, b.length);
for (let j = start; j < end; j++) {
if (bMatches[j]) {
continue;
}
if (a[i] !== b[j]) {
continue;
}
aMatches[i] = true;
bMatches[j] = true;
matches++;
break;
}
}
if (matches === 0) {
return 0;
}
let transpositions = 0;
let k = 0;
for (let i = 0; i < a.length; i++) {
if (!aMatches[i]) {
continue;
}
while (!bMatches[k]) {
k++;
}
if (a[i] !== b[k]) {
transpositions++;
}
k++;
}
const jaro = (matches / a.length + matches / b.length + (matches - transpositions / 2) / matches) / 3;
let prefix = 0;
const maxPrefix = Math.min(4, a.length, b.length);
while (prefix < maxPrefix && a[prefix] === b[prefix]) {
prefix++;
}
return jaro + prefix * 0.1 * (1 - jaro);
}