fix: read semantic sources safely (#284)

* fix: read semantic sources safely

* test: retarget reindex per-scope error case to a broken manifest

Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.

* fix(sl): decouple semantic-layer file names from warehouse naming rules

The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.

- New `source-files.ts`: one total filename derivation (safe lowercase
  names verbatim; otherwise slug + sha256-hash suffix, immune to
  case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
  `assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
  name (human renames survive); new sources get a derived filename; a
  derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
  caller to launder IO errors; `deleteSource` distinguishes manifest-backed
  sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
  now a recommendation for new sources) and rejects sourceName/source.name
  mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
  derivation use resolved paths / in-file names instead of interpolating
  `<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
  helpers onto the shared module; dropped dead service methods.

* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator

- Key broken/renamed semantic-layer files by their recoverable in-file
  name (slSourceNameForFile) so mid-edit sources stay reachable under
  their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
  recover deleted files' declared names from the pre-change commit
  instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
  human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
  validateReadOnly in the connection's dialect, sharing one
  driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
  reject smuggled second statements, and strip trailing
  semicolons/comments before row-limit wrapping
This commit is contained in:
Andrey Avtomonov 2026-06-10 14:06:13 +02:00 committed by GitHub
parent 853f39a7c3
commit f3f893bf01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
51 changed files with 1797 additions and 476 deletions

View file

@ -44,12 +44,17 @@ Use this order for most context changes:
Semantic sources are YAML files for queryable tables or custom SQL. They define Semantic sources are YAML files for queryable tables or custom SQL. They define
agent-facing measures, dimensions, segments, joins, and grain. agent-facing measures, dimensions, segments, joins, and grain.
Semantic source files live at: Semantic source files live under:
```text ```text
semantic-layer/<connection-id>/<source-name>.yaml semantic-layer/<connection-id>/
``` ```
The file's `name:` field is the source's identity — it carries the warehouse
identifier verbatim, including case. The filename is a derived label: simple
lowercase names get `<source-name>.yaml`, anything else gets a slugged
filename. Renaming a file does not rename the source.
### Minimal source ### Minimal source
```yaml ```yaml
@ -152,7 +157,7 @@ joins:
| Field | Required | Description | | Field | Required | Description |
|-------|----------|-------------| |-------|----------|-------------|
| `name` | Yes | Source identifier. Use lowercase words and underscores. | | `name` | Yes | Source identity (not the filename). When overlaying an ingested table, match the manifest identifier verbatim, including case (e.g. `SIGNED_UP`); for a new standalone source, lowercase words and underscores are recommended. |
| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. | | `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
| `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. | | `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. |
| `grain` | Yes | Columns that uniquely identify a row at the source grain. | | `grain` | Yes | Columns that uniquely identify a row at the source grain. |

View file

@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string {
return url; return url;
} }
function stripLeadingSqlComments(sql: string): string {
let index = 0;
while (index < sql.length) {
while (/\s/.test(sql[index] ?? '')) {
index += 1;
}
if (sql.startsWith('--', index)) {
const end = sql.indexOf('\n', index + 2);
index = end === -1 ? sql.length : end + 1;
continue;
}
if (sql.startsWith('/*', index)) {
const end = sql.indexOf('*/', index + 2);
if (end === -1) {
return sql.slice(index);
}
index = end + 2;
continue;
}
break;
}
return sql.slice(index);
}
export function isKtxSqliteConnectionConfig( export function isKtxSqliteConnectionConfig(
connection: KtxSqliteConnectionConfig | undefined, connection: KtxSqliteConnectionConfig | undefined,
): connection is KtxSqliteConnectionConfig { ): connection is KtxSqliteConnectionConfig {
@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> { async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId); this.assertConnection(input.connectionId);
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params); const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params);
return { ...result, rowCount: result.rows.length }; return { ...result, rowCount: result.rows.length };
} }

View file

@ -1,4 +1,4 @@
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js'; import { assertReadOnlySql, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
import { getDialectForDriver } from '../../context/connections/dialects.js'; import { getDialectForDriver } from '../../context/connections/dialects.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js'; import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -284,7 +284,7 @@ function isDeniedError(error: unknown): boolean {
} }
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string { function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, ''); const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sqlText));
if (!maxRows) { if (!maxRows) {
return trimmed; return trimmed;
} }

View file

@ -2,16 +2,133 @@ const MUTATING_SQL =
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i; /^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
const READ_SQL = /^\s*(select|with)\b/i; const READ_SQL = /^\s*(select|with)\b/i;
// Agents (and the daemon's sqlglot validator, which ignores comments) routinely
// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading
// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH
// SQL is rejected here while the parser-backed validator accepts it.
function stripLeadingSqlComments(sql: string): string {
let index = 0;
while (index < sql.length) {
while (/\s/.test(sql[index] ?? '')) {
index += 1;
}
if (sql.startsWith('--', index)) {
const end = sql.indexOf('\n', index + 2);
index = end === -1 ? sql.length : end + 1;
continue;
}
if (sql.startsWith('/*', index)) {
const end = sql.indexOf('*/', index + 2);
if (end === -1) {
return sql.slice(index);
}
index = end + 2;
continue;
}
break;
}
return sql.slice(index);
}
// Lexes past one string literal, quoted identifier, or comment starting at
// `index`, using standard-SQL rules ('' and "" escapes; no dialect extensions
// such as backslash escapes or dollar quoting). Returns the index after the
// token, or `index` unchanged when no quoted/comment token starts there.
function skipQuotedOrComment(sql: string, index: number): number {
const quote = sql[index];
if (quote === "'" || quote === '"') {
let i = index + 1;
while (i < sql.length) {
if (sql[i] === quote) {
if (sql[i + 1] === quote) {
i += 2;
continue;
}
return i + 1;
}
i += 1;
}
return sql.length;
}
if (sql.startsWith('--', index)) {
const end = sql.indexOf('\n', index + 2);
return end === -1 ? sql.length : end + 1;
}
if (sql.startsWith('/*', index)) {
const end = sql.indexOf('*/', index + 2);
return end === -1 ? sql.length : end + 2;
}
return index;
}
// Backstop against statement smuggling (`select 1; drop table x`): reject any
// semicolon that is followed by real content. Semicolons inside string
// literals, quoted identifiers, and comments are fine, as are trailing
// semicolons (optionally followed by whitespace and comments). This deliberately
// lexes standard SQL only, so dialect-specific escapes can cause a false
// reject — never a false accept; the canonical gate is the daemon's
// sqlglot-backed validateReadOnly.
function assertSingleSqlStatement(sql: string): void {
let index = 0;
let sawSemicolon = false;
while (index < sql.length) {
const skipped = skipQuotedOrComment(sql, index);
if (skipped > index) {
index = skipped;
continue;
}
if (sql[index] === ';') {
sawSemicolon = true;
} else if (sawSemicolon && !/\s/.test(sql[index])) {
throw new Error('Only one SQL statement can be executed.');
}
index += 1;
}
}
export function assertReadOnlySql(sql: string): string { export function assertReadOnlySql(sql: string): string {
const trimmed = sql.trim(); const trimmed = stripLeadingSqlComments(sql).trim();
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) { if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
throw new Error('Only read-only SELECT/WITH queries can be executed locally.'); throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
} }
assertSingleSqlStatement(trimmed);
return trimmed; return trimmed;
} }
// `assertReadOnlySql` deliberately keeps trailing semicolons, comments, and
// whitespace (e.g. `select 1; -- done`) — harmless for direct single-statement
// execution. A row-limit subquery wrapper needs a bare expression instead: a
// trailing `;` would sit illegally inside the subquery, and a trailing line
// comment would comment out the closing paren and limit clause. Lex forward with
// the same standard-SQL rules as the single-statement gate and truncate at the
// end of the last meaningful token, dropping trailing semicolons, comments, and
// whitespace. Characters inside string literals and quoted identifiers stay
// meaningful, so a `;` or `--` within a literal is never mistaken for a
// terminator (a plain regex cannot make that distinction).
export function stripTrailingSqlNoise(sql: string): string {
let index = 0;
let meaningfulEnd = 0;
while (index < sql.length) {
if (sql.startsWith('--', index) || sql.startsWith('/*', index)) {
index = skipQuotedOrComment(sql, index);
continue;
}
const afterQuoted = skipQuotedOrComment(sql, index);
if (afterQuoted > index) {
meaningfulEnd = afterQuoted;
index = afterQuoted;
continue;
}
if (sql[index] !== ';' && !/\s/.test(sql[index] ?? '')) {
meaningfulEnd = index + 1;
}
index += 1;
}
return sql.slice(0, meaningfulEnd);
}
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string { export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, ''); const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sql));
if (!maxRows) { if (!maxRows) {
return trimmed; return trimmed;
} }

View file

@ -557,12 +557,13 @@ export class GitService {
} }
/** /**
* List all paths under the working tree that match `pathSpec`, scoped to HEAD. * List all paths matching `pathSpec` as they exist at `commitHash`. Reads from
* Used for the reconciler's first-ever run when there's no watermark to diff from. * git object storage, so it's safe against concurrent working-tree mutations
* and can recover paths (e.g. a human-renamed file) that no longer exist on disk.
*/ */
async listFilesAtHead(pathSpec: string): Promise<string[]> { async listFilesAtCommit(pathSpec: string, commitHash: string): Promise<string[]> {
try { try {
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', 'HEAD', '--', pathSpec]); const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', commitHash, '--', pathSpec]);
if (!raw) { if (!raw) {
return []; return [];
} }
@ -572,6 +573,14 @@ export class GitService {
} }
} }
/**
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
* Used for the reconciler's first-ever run when there's no watermark to diff from.
*/
async listFilesAtHead(pathSpec: string): Promise<string[]> {
return this.listFilesAtCommit(pathSpec, 'HEAD');
}
/** /**
* Collapse all commits between `preHead` and current HEAD into a single commit with the given * Collapse all commits between `preHead` and current HEAD into a single commit with the given
* message. Used by the memory agent to squash N per-tool-call commits into one ingest commit. * message. Used by the memory agent to squash N per-tool-call commits into one ingest commit.

View file

@ -3,6 +3,7 @@ import { dirname, join, relative } from 'node:path';
import YAML from 'yaml'; import YAML from 'yaml';
import type { MemoryAction } from '../../../../context/memory/types.js'; import type { MemoryAction } from '../../../../context/memory/types.js';
import { rawSourcesDirForSync } from '../../raw-sources-paths.js'; import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
import { isSlYamlPath } from '../../../sl/source-files.js';
import type { FinalizationOverrideReplay } from '../../types.js'; import type { FinalizationOverrideReplay } from '../../types.js';
import { mergeUsagePreservingExternal } from '../live-database/manifest.js'; import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js'; import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
@ -251,7 +252,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern'); const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
const schemaRoot = join(input.workdir, 'semantic-layer', input.connectionId, '_schema'); const schemaRoot = join(input.workdir, 'semantic-layer', input.connectionId, '_schema');
for (const file of (await walkFiles(schemaRoot)).filter((candidate) => candidate.endsWith('.yaml') || candidate.endsWith('.yml'))) { for (const file of (await walkFiles(schemaRoot)).filter(isSlYamlPath)) {
const path = join(schemaRoot, file); const path = join(schemaRoot, file);
const before = await readFile(path, 'utf-8'); const before = await readFile(path, 'utf-8');
const shard = (YAML.parse(before) ?? {}) as ManifestShard; const shard = (YAML.parse(before) ?? {}) as ManifestShard;

View file

@ -2,7 +2,6 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path'; import { dirname, join } from 'node:path';
import { z } from 'zod'; import { z } from 'zod';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../context/llm/runtime-port.js'; import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
import type { IngestTraceWriter } from './ingest-trace.js'; import type { IngestTraceWriter } from './ingest-trace.js';
import { traceTimed } from './ingest-trace.js'; import { traceTimed } from './ingest-trace.js';
@ -149,11 +148,13 @@ function buildToolSet(input: {
export function finalGateRepairPaths(input: { export function finalGateRepairPaths(input: {
changedWikiPageKeys: string[]; changedWikiPageKeys: string[];
touchedSlSources: TouchedSlSource[]; // Resolved by the caller: SL filenames are derived labels, so the repair
// allowlist must carry the real on-disk paths, not name-interpolated ones.
touchedSlSourcePaths: string[];
}): string[] { }): string[] {
return [ return [
...new Set([ ...new Set([
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`), ...input.touchedSlSourcePaths,
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`), ...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
]), ]),
].sort(); ].sort();

View file

@ -1,3 +1,4 @@
import { isSlYamlPath } from '../../context/sl/source-files.js';
import type { SemanticLayerSource } from '../../context/sl/types.js'; import type { SemanticLayerSource } from '../../context/sl/types.js';
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js'; import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
import type { IngestReportFinalizationMismatch } from './reports.js'; import type { IngestReportFinalizationMismatch } from './reports.js';
@ -64,39 +65,36 @@ export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
); );
} }
export async function deriveFinalizationTouchedSources( // Source identity is the in-file `name:`; filenames are derived labels (see
input: DeriveTouchedSourcesInput, // source-files.ts), so a changed path — manifest shard or standalone file —
): Promise<DeriveTouchedSourcesResult> { // cannot be mapped to a source by parsing its filename. Instead, every changed
// semantic-layer file is attributed through the before/after diff of its
// connection's composed sources. A changed file whose connection diff is empty
// cannot be attributed to any source and is surfaced as unresolved.
export function deriveFinalizationTouchedSources(input: DeriveTouchedSourcesInput): DeriveTouchedSourcesResult {
const touched = new Map<string, TouchedSlSource>(); const touched = new Map<string, TouchedSlSource>();
const unresolvedPaths: string[] = []; const unresolvedPaths: string[] = [];
const pathsByConnection = new Map<string, string[]>();
for (const path of input.changedPaths) { for (const path of input.changedPaths) {
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) { if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path)) {
continue; continue;
} }
const parts = path.split('/'); const connectionId = path.split('/')[1] ?? '';
const connectionId = parts[1] ?? '';
if (!connectionId) { if (!connectionId) {
unresolvedPaths.push(path); unresolvedPaths.push(path);
continue; continue;
} }
if (parts[2] !== '_schema') { pathsByConnection.set(connectionId, [...(pathsByConnection.get(connectionId) ?? []), path]);
const fileName = parts.at(-1) ?? '';
const sourceName = fileName.replace(/\.ya?ml$/, '');
if (!sourceName) {
unresolvedPaths.push(path);
continue;
}
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
continue;
} }
for (const [connectionId, paths] of pathsByConnection) {
const changedNames = changedSourceNames( const changedNames = changedSourceNames(
input.beforeSourcesByConnection.get(connectionId) ?? [], input.beforeSourcesByConnection.get(connectionId) ?? [],
input.afterSourcesByConnection.get(connectionId) ?? [], input.afterSourcesByConnection.get(connectionId) ?? [],
); );
if (changedNames.length === 0) { if (changedNames.length === 0) {
unresolvedPaths.push(path); unresolvedPaths.push(...paths);
continue; continue;
} }
for (const sourceName of changedNames) { for (const sourceName of changedNames) {

View file

@ -8,6 +8,7 @@ import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime
import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js'; import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
import type { CaptureSession, MemoryAction } from '../../context/memory/types.js'; import type { CaptureSession, MemoryAction } from '../../context/memory/types.js';
import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js'; import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js';
import { isSlYamlPath, slSourceFilePath, slSourceNameForFile, sourceNameFromPath } from '../../context/sl/source-files.js';
import type { SemanticLayerSource } from '../../context/sl/types.js'; import type { SemanticLayerSource } from '../../context/sl/types.js';
import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js'; import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js';
import { createTouchedSlSources, type TouchedSlSource } from '../../context/tools/touched-sl-sources.js'; import { createTouchedSlSources, type TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
@ -498,7 +499,7 @@ export class IngestBundleRunner {
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId); const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
const names = files const names = files
.filter((f) => !f.startsWith('_schema/')) .filter((f) => !f.startsWith('_schema/'))
.map((f) => f.replace(/\.yaml$/, '')) .map((f) => sourceNameFromPath(f))
.sort((left, right) => left.localeCompare(right)); .sort((left, right) => left.localeCompare(right));
const body = names.length > 0 ? names.join('\n') : '(no sources yet)'; const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
return `## ${connectionId}\n${body}`; return `## ${connectionId}\n${body}`;
@ -791,14 +792,52 @@ export class IngestBundleRunner {
].sort(); ].sort();
} }
private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] { private async touchedSlSourcesFromPaths(
return paths worktree: IngestSessionWorktree,
.filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/')) paths: string[],
.map((path) => { deletedFileSha: string,
const [, connectionId, fileName] = path.split('/'); ): Promise<TouchedSlSource[]> {
return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') }; const sources: TouchedSlSource[] = [];
}) for (const path of paths) {
.filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0); if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path) || path.includes('/_schema/')) {
continue;
}
const [, connectionId] = path.split('/');
if (!connectionId) {
continue;
}
// Source identity is the in-file `name:`, never the filename — an uppercase
// warehouse source like `WIDGET_SALES` lives in a hash-derived
// `widget_sales-<hash>.yaml`, so parsing the basename yields a phantom name.
// Read the live file; when it was deleted this run, recover its declared
// name from the pre-change commit the way `revertSourceToPreHead` resolves a
// gone file from history. The filename is a last resort only when the content
// is unrecoverable from both.
let content: string | null;
try {
content = await readFile(join(worktree.workdir, path), 'utf-8');
} catch {
content = await worktree.git.getFileAtCommit(path, deletedFileSha).catch(() => null);
}
const sourceName = content === null ? sourceNameFromPath(path) : slSourceNameForFile(path, content);
if (sourceName.length > 0) {
sources.push({ connectionId, sourceName });
}
}
return sources;
}
// Inverse direction for commits and repair allowlists: resolve each touched
// source to its real on-disk path, falling back to the writer's derived
// filename when the file was deleted in this run.
private async touchedSlSourcePaths(workdir: string, touched: TouchedSlSource[]): Promise<string[]> {
const service = this.deps.semanticLayerService.forWorktree(workdir);
const paths: string[] = [];
for (const source of touched) {
const file = await service.readSourceFile(source.connectionId, source.sourceName);
paths.push(file?.path ?? slSourceFilePath(source.connectionId, source.sourceName));
}
return paths;
} }
private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] { private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] {
@ -1558,7 +1597,7 @@ export class IngestBundleRunner {
projectionTouchedSources = projection.touchedSources; projectionTouchedSources = projection.touchedSources;
projectionChangedWikiPageKeys = projection.changedWikiPageKeys; projectionChangedWikiPageKeys = projection.changedWikiPageKeys;
const projectionPaths = [ const projectionPaths = [
...projection.touchedSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`), ...(await this.touchedSlSourcePaths(sessionWorktree.workdir, projection.touchedSources)),
...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`), ...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
]; ];
projectionTouchedPaths = projectionPaths; projectionTouchedPaths = projectionPaths;
@ -1740,7 +1779,11 @@ export class IngestBundleRunner {
await validateFinalIngestArtifacts({ await validateFinalIngestArtifacts({
connectionIds: slConnectionIds, connectionIds: slConnectionIds,
changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths), changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths),
touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths), touchedSlSources: await this.touchedSlSourcesFromPaths(
sessionWorktree,
touchedPaths,
await sessionWorktree.git.revParseHead(),
),
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
validateTouchedSources: (touched) => validateTouchedSources: (touched) =>
@ -2289,20 +2332,34 @@ export class IngestBundleRunner {
) )
: []; : [];
const changedConnectionIds = [ // Validate the write scope before deriving touched sources: attribution
...new Set([ // by before/after diff is only defined for connections whose
...slConnectionIds, // pre-finalization snapshot was loaded (slConnectionIds), and an
...finalizationTouchedPaths // out-of-scope write would otherwise surface downstream as a bogus
.filter((path) => path.startsWith('semantic-layer/')) // unresolved-path or declaration-mismatch failure instead of the real
.map((path) => path.split('/')[1]) // policy violation.
.filter((connectionId): connectionId is string => Boolean(connectionId)), await traceTimed(
]), runTrace,
].sort(); 'finalization',
'semantic_layer_target_policy',
{
sourceKey: job.sourceKey,
allowedTargetConnectionIds: slConnectionIds,
touchedPaths: [...new Set(finalizationTouchedPaths)].sort(),
},
async () => {
assertSemanticLayerTargetPathsAllowed({
paths: finalizationTouchedPaths,
allowedConnectionIds: new Set(slConnectionIds),
});
},
);
const postFinalizationSourcesByConnection = await this.loadSourcesByConnection( const postFinalizationSourcesByConnection = await this.loadSourcesByConnection(
sessionWorktree.workdir, sessionWorktree.workdir,
changedConnectionIds, slConnectionIds,
); );
const scope = await deriveFinalizationTouchedSources({ const scope = deriveFinalizationTouchedSources({
changedPaths: finalizationTouchedPaths, changedPaths: finalizationTouchedPaths,
beforeSourcesByConnection: preFinalizationSourcesByConnection, beforeSourcesByConnection: preFinalizationSourcesByConnection,
afterSourcesByConnection: postFinalizationSourcesByConnection, afterSourcesByConnection: postFinalizationSourcesByConnection,
@ -2437,7 +2494,7 @@ export class IngestBundleRunner {
...(isolatedDiffEnabled ? projectionTouchedSources : []), ...(isolatedDiffEnabled ? projectionTouchedSources : []),
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources), ...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId), ...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
...this.touchedSlSourcesFromPaths(postReconciliationPaths), ...(await this.touchedSlSourcesFromPaths(sessionWorktree, postReconciliationPaths, preReconciliationSha)),
...finalizationTouchedSources, ...finalizationTouchedSources,
]); ]);
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({ const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
@ -2528,7 +2585,7 @@ export class IngestBundleRunner {
const gateError = this.errorMessage(error); const gateError = this.errorMessage(error);
const repairPaths = finalGateRepairPaths({ const repairPaths = finalGateRepairPaths({
changedWikiPageKeys: finalChangedWikiPageKeys, changedWikiPageKeys: finalChangedWikiPageKeys,
touchedSlSources: finalTouchedSlSources, touchedSlSourcePaths: await this.touchedSlSourcePaths(sessionWorktree.workdir, finalTouchedSlSources),
}); });
emitStageProgress('final_gates', 89, 'Repairing final artifact gates'); emitStageProgress('final_gates', 89, 'Repairing final artifact gates');
const gateRepair = await repairFinalGateFailure({ const gateRepair = await repairFinalGateFailure({

View file

@ -4,6 +4,7 @@ import { fileURLToPath } from 'node:url';
import YAML from 'yaml'; import YAML from 'yaml';
import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js'; import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js';
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js'; import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
import type { KtxEmbeddingPort } from '../../context/core/embedding.js'; import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
import type { KtxLogger } from '../../context/core/config.js'; import type { KtxLogger } from '../../context/core/config.js';
import { noopLogger } from '../../context/core/config.js'; import { noopLogger } from '../../context/core/config.js';
@ -95,6 +96,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
memoryModel?: string; memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort; semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort; queryExecutor?: KtxSqlQueryExecutorPort;
sqlAnalysis?: SqlAnalysisPort;
jobIdFactory?: () => string; jobIdFactory?: () => string;
logger?: KtxLogger; logger?: KtxLogger;
embeddingProvider?: KtxEmbeddingProvider | null; embeddingProvider?: KtxEmbeddingProvider | null;
@ -271,16 +273,13 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
} }
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) { async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
let content: string;
try {
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
content = file.content; if (!file) {
} catch (error) { return this.validateComposedSource(deps, connectionId, sourceName, 'no standalone or overlay file found');
return this.validateComposedSource(deps, connectionId, sourceName, error);
} }
try { try {
const parsed = YAML.parse(content) as unknown as Record<string, unknown>; const parsed = YAML.parse(file.content) as unknown as Record<string, unknown>;
return this.validateParsedSource(sourceName, parsed); return this.validateParsedSource(sourceName, parsed);
} catch (error) { } catch (error) {
return { return {
@ -519,6 +518,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
authorResolver: GitAuthorResolverPort; authorResolver: GitAuthorResolverPort;
slSourcesRepository: SlSourcesIndexPort; slSourcesRepository: SlSourcesIndexPort;
connections: SlConnectionCatalogPort; connections: SlConnectionCatalogPort;
sqlAnalysis?: SqlAnalysisPort;
contextStore: SqliteContextEvidenceStore; contextStore: SqliteContextEvidenceStore;
embedding: KtxEmbeddingPort; embedding: KtxEmbeddingPort;
}) { }) {
@ -551,6 +551,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }); const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
const warehouseVerificationTools = createWarehouseVerificationTools({ const warehouseVerificationTools = createWarehouseVerificationTools({
connections: deps.connections, connections: deps.connections,
...(deps.sqlAnalysis ? { sqlAnalysis: deps.sqlAnalysis } : {}),
fallbackFileStore: deps.project.fileStore, fallbackFileStore: deps.project.fileStore,
wikiSearchTool, wikiSearchTool,
slDiscoverTool, slDiscoverTool,
@ -699,6 +700,7 @@ export function createLocalBundleIngestRuntime(
authorResolver: new LocalAuthorResolver(), authorResolver: new LocalAuthorResolver(),
slSourcesRepository, slSourcesRepository,
connections, connections,
...(options.sqlAnalysis ? { sqlAnalysis: options.sqlAnalysis } : {}),
contextStore, contextStore,
embedding, embedding,
}); });

View file

@ -2,6 +2,7 @@ import { randomUUID } from 'node:crypto';
import { cp, mkdir, rm } from 'node:fs/promises'; import { cp, mkdir, rm } from 'node:fs/promises';
import { isAbsolute, resolve } from 'node:path'; import { isAbsolute, resolve } from 'node:path';
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js'; import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
import type { KtxLogger } from '../../context/core/config.js'; import type { KtxLogger } from '../../context/core/config.js';
import { createAbortError, isAbortError } from '../../context/core/abort.js'; import { createAbortError, isAbortError } from '../../context/core/abort.js';
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
@ -35,6 +36,7 @@ export interface RunLocalIngestOptions {
memoryModel?: string; memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort; semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort; queryExecutor?: KtxSqlQueryExecutorPort;
sqlAnalysis?: SqlAnalysisPort;
logger?: KtxLogger; logger?: KtxLogger;
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null; embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
abortSignal?: AbortSignal; abortSignal?: AbortSignal;
@ -159,6 +161,7 @@ async function runScheduledPullJob(options: {
memoryModel?: string; memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort; semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort; queryExecutor?: KtxSqlQueryExecutorPort;
sqlAnalysis?: SqlAnalysisPort;
logger?: KtxLogger; logger?: KtxLogger;
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null; embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
abortSignal?: AbortSignal; abortSignal?: AbortSignal;
@ -214,6 +217,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
memoryModel: options.memoryModel, memoryModel: options.memoryModel,
semanticLayerCompute: options.semanticLayerCompute, semanticLayerCompute: options.semanticLayerCompute,
queryExecutor: options.queryExecutor, queryExecutor: options.queryExecutor,
sqlAnalysis: options.sqlAnalysis,
logger: options.logger, logger: options.logger,
embeddingProvider: options.embeddingProvider, embeddingProvider: options.embeddingProvider,
abortSignal: options.abortSignal, abortSignal: options.abortSignal,
@ -397,6 +401,7 @@ export async function runLocalMetabaseIngest(
memoryModel: options.memoryModel, memoryModel: options.memoryModel,
semanticLayerCompute: options.semanticLayerCompute, semanticLayerCompute: options.semanticLayerCompute,
queryExecutor: options.queryExecutor, queryExecutor: options.queryExecutor,
sqlAnalysis: options.sqlAnalysis,
logger: options.logger, logger: options.logger,
embeddingProvider: options.embeddingProvider, embeddingProvider: options.embeddingProvider,
abortSignal: options.abortSignal, abortSignal: options.abortSignal,

View file

@ -1,5 +1,6 @@
import type { KtxFileStorePort } from '../../../core/file-store.js'; import type { KtxFileStorePort } from '../../../core/file-store.js';
import type { SlConnectionCatalogPort } from '../../../sl/ports.js'; import type { SlConnectionCatalogPort } from '../../../sl/ports.js';
import type { SqlAnalysisPort } from '../../../sql-analysis/ports.js';
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js'; import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
import type { BaseTool, ToolContext } from '../../../tools/base-tool.js'; import type { BaseTool, ToolContext } from '../../../tools/base-tool.js';
import { DiscoverDataTool } from './discover-data.tool.js'; import { DiscoverDataTool } from './discover-data.tool.js';
@ -8,6 +9,7 @@ import { SqlExecutionTool } from './sql-execution.tool.js';
export function createWarehouseVerificationTools(deps: { export function createWarehouseVerificationTools(deps: {
connections: SlConnectionCatalogPort; connections: SlConnectionCatalogPort;
sqlAnalysis?: SqlAnalysisPort;
fallbackFileStore: KtxFileStorePort; fallbackFileStore: KtxFileStorePort;
wikiSearchTool: BaseTool; wikiSearchTool: BaseTool;
slDiscoverTool: BaseTool; slDiscoverTool: BaseTool;
@ -18,7 +20,7 @@ export function createWarehouseVerificationTools(deps: {
}); });
return [ return [
new EntityDetailsTool(catalogFactory), new EntityDetailsTool(catalogFactory),
new SqlExecutionTool(deps.connections), new SqlExecutionTool(deps.connections, deps.sqlAnalysis),
new DiscoverDataTool({ new DiscoverDataTool({
wikiSearchTool: deps.wikiSearchTool, wikiSearchTool: deps.wikiSearchTool,
slDiscoverTool: deps.slDiscoverTool, slDiscoverTool: deps.slDiscoverTool,

View file

@ -1,6 +1,8 @@
import { z } from 'zod'; import { z } from 'zod';
import { assertReadOnlySql, limitSqlForExecution } from '../../../../context/connections/read-only-sql.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../../../context/connections/read-only-sql.js';
import type { SlConnectionCatalogPort } from '../../../../context/sl/ports.js'; import type { SlConnectionCatalogPort } from '../../../../context/sl/ports.js';
import { sqlAnalysisDialectForDriver } from '../../../../context/sql-analysis/dialect.js';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { BaseTool, type ToolContext, type ToolOutput } from '../../../../context/tools/base-tool.js'; import { BaseTool, type ToolContext, type ToolOutput } from '../../../../context/tools/base-tool.js';
const sqlExecutionInputSchema = z.object({ const sqlExecutionInputSchema = z.object({
@ -40,7 +42,10 @@ function markdownTable(headers: string[], rows: unknown[][], totalRows: number):
export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> { export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
readonly name = 'sql_execution'; readonly name = 'sql_execution';
constructor(private readonly connections: SlConnectionCatalogPort) { constructor(
private readonly connections: SlConnectionCatalogPort,
private readonly sqlAnalysis?: SqlAnalysisPort,
) {
super(); super();
} }
@ -69,9 +74,24 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
}; };
} }
if (!this.sqlAnalysis) {
throw new Error('sql_execution requires parser-backed SQL validation.');
}
let sql: string; let sql: string;
let wrappedSql: string; let wrappedSql: string;
try { try {
const connection = await this.connections.getConnectionById(input.connectionId);
if (!connection) {
throw new Error(`Connection not found: ${input.connectionId}`);
}
const validation = await this.sqlAnalysis.validateReadOnly(
input.sql,
sqlAnalysisDialectForDriver(connection.connectionType),
);
if (!validation.ok) {
throw new Error(validation.error ?? 'SQL is not read-only.');
}
sql = assertReadOnlySql(input.sql); sql = assertReadOnlySql(input.sql);
wrappedSql = limitSqlForExecution(sql, input.rowLimit); wrappedSql = limitSqlForExecution(sql, input.rowLimit);
} catch (error) { } catch (error) {

View file

@ -8,9 +8,12 @@ import { createKtxEntityDetailsService } from '../../context/scan/entity-details
import type { KtxScanConnector } from '../../context/scan/types.js'; import type { KtxScanConnector } from '../../context/scan/types.js';
import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js'; import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js';
import { createKtxDiscoverDataService } from '../../context/search/discover.js'; import { createKtxDiscoverDataService } from '../../context/search/discover.js';
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../../context/sql-analysis/ports.js'; import { sqlAnalysisDialectForDriver } from '../../context/sql-analysis/dialect.js';
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
import { compileLocalSlQuery } from '../../context/sl/local-query.js'; import { compileLocalSlQuery } from '../../context/sl/local-query.js';
import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js'; import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js';
import { readLocalSlSource } from '../../context/sl/local-sl.js';
import { assertSafeConnectionId } from '../../context/sl/source-files.js';
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js'; import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js'; import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js';
@ -22,64 +25,12 @@ interface CreateLocalProjectMcpContextPortsOptions {
embeddingService: KtxEmbeddingPort | null; embeddingService: KtxEmbeddingPort | null;
} }
function dialectForDriver(driver: string | undefined): string {
const normalized = (driver ?? 'postgres').toUpperCase();
const map: Record<string, string> = {
POSTGRES: 'postgres',
BIGQUERY: 'bigquery',
SNOWFLAKE: 'snowflake',
MYSQL: 'mysql',
SQLSERVER: 'tsql',
SQLITE: 'sqlite',
DUCKDB: 'duckdb',
CLICKHOUSE: 'clickhouse',
DATABRICKS: 'databricks',
};
return map[normalized] ?? 'postgres';
}
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
return dialectForDriver(driver) as SqlAnalysisDialect;
}
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
}
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> { async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
if (connector?.cleanup) { if (connector?.cleanup) {
await connector.cleanup(); await connector.cleanup();
} }
} }
function slPath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
}
async function executeValidatedReadOnlySql( async function executeValidatedReadOnlySql(
project: KtxLocalProject, project: KtxLocalProject,
options: CreateLocalProjectMcpContextPortsOptions, options: CreateLocalProjectMcpContextPortsOptions,
@ -201,13 +152,11 @@ export function createLocalProjectMcpContextPorts(
}, },
semanticLayer: { semanticLayer: {
async readSource(input) { async readSource(input) {
const path = slPath(input.connectionId, input.sourceName); const source = await readLocalSlSource(project, {
try { connectionId: input.connectionId,
const result = await project.fileStore.readFile(path); sourceName: input.sourceName,
return { sourceName: input.sourceName, yaml: result.content }; });
} catch { return source ? { sourceName: source.name, yaml: source.yaml } : null;
return null;
}
}, },
async query(input, executionOptions) { async query(input, executionOptions) {
if (!options.semanticLayerCompute) { if (!options.semanticLayerCompute) {

View file

@ -375,6 +375,9 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) { async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
try { try {
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
if (!file) {
return { errors: [`${sourceName}: no standalone or overlay file found`], warnings: [] };
}
const parsed = YAML.parse(file.content) as SemanticLayerSource; const parsed = YAML.parse(file.content) as SemanticLayerSource;
const isOverlay = parsed.table == null && parsed.sql == null; const isOverlay = parsed.table == null && parsed.sql == null;
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed); const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);

View file

@ -483,7 +483,7 @@ export class MemoryAgentService {
if (session.connectionId) { if (session.connectionId) {
for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) { for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) {
try { try {
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName).catch(() => null); const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName);
if (file?.content) { if (file?.content) {
const parsed = this.parseYamlOrNull(file.content); const parsed = this.parseYamlOrNull(file.content);
if (parsed) { if (parsed) {

View file

@ -3,6 +3,7 @@ import { buildLiveDatabaseManifestShards, type LiveDatabaseManifestExistingDescr
import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sql/skill-schemas.js'; import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sql/skill-schemas.js';
import type { KtxScanRelationshipConfig } from '../project/config.js'; import type { KtxScanRelationshipConfig } from '../project/config.js';
import type { KtxLocalProject } from '../../context/project/project.js'; import type { KtxLocalProject } from '../../context/project/project.js';
import { isSlYamlPath } from '../../context/sl/source-files.js';
import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js'; import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js';
import { import {
buildKtxRelationshipArtifacts, buildKtxRelationshipArtifacts,
@ -205,7 +206,7 @@ async function loadExistingManifestState(
let files: string[]; let files: string[];
try { try {
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml')); files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter(isSlYamlPath);
} catch { } catch {
return { descriptions, preservedJoins, usage }; return { descriptions, preservedJoins, usage };
} }

View file

@ -2,8 +2,10 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-ex
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
import type { KtxMcpProgressCallback } from '../mcp/types.js'; import type { KtxMcpProgressCallback } from '../mcp/types.js';
import type { KtxLocalProject } from '../../context/project/project.js'; import type { KtxLocalProject } from '../../context/project/project.js';
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
import { loadLocalSlSourceRecords } from './local-sl.js'; import { loadLocalSlSourceRecords } from './local-sl.js';
import { toResolvedWire } from './semantic-layer.service.js'; import { toResolvedWire } from './semantic-layer.service.js';
import { assertSafeConnectionId } from './source-files.js';
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js'; import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js';
const COMPILE_ONLY_REASON = const COMPILE_ONLY_REASON =
@ -24,43 +26,6 @@ export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionRe
dialect: string; dialect: string;
} }
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function dialectForDriver(driver: string | undefined): string {
const normalized = (driver ?? 'postgres').toUpperCase();
const map: Record<string, string> = {
POSTGRES: 'postgres',
BIGQUERY: 'bigquery',
SNOWFLAKE: 'snowflake',
MYSQL: 'mysql',
SQLSERVER: 'tsql',
SQLITE: 'sqlite',
DUCKDB: 'duckdb',
CLICKHOUSE: 'clickhouse',
DATABRICKS: 'databricks',
};
return map[normalized] ?? 'postgres';
}
function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string { function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string {
if (requested) { if (requested) {
return assertSafeConnectionId(requested); return assertSafeConnectionId(requested);
@ -93,7 +58,7 @@ export async function compileLocalSlQuery(
): Promise<CompileLocalSlQueryResult> { ): Promise<CompileLocalSlQueryResult> {
await options.onProgress?.({ progress: 0, message: 'Compiling query' }); await options.onProgress?.({ progress: 0, message: 'Compiling query' });
const connectionId = resolveLocalConnectionId(project, options.connectionId); const connectionId = resolveLocalConnectionId(project, options.connectionId);
const dialect = dialectForDriver(project.config.connections[connectionId]?.driver); const dialect = sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver);
const sources = await loadComputableSources(project, connectionId); const sources = await loadComputableSources(project, connectionId);
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' }); await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });

View file

@ -2,7 +2,6 @@ import { join } from 'node:path';
import YAML from 'yaml'; import YAML from 'yaml';
import { z } from 'zod'; import { z } from 'zod';
import type { KtxEmbeddingPort } from '../../context/core/embedding.js'; import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
import type { KtxFileWriteResult } from '../../context/core/file-store.js';
import type { KtxLocalProject } from '../../context/project/project.js'; import type { KtxLocalProject } from '../../context/project/project.js';
import { HybridSearchCore } from '../../context/search/hybrid-search-core.js'; import { HybridSearchCore } from '../../context/search/hybrid-search-core.js';
import type { SearchCandidateGenerator } from '../../context/search/types.js'; import type { SearchCandidateGenerator } from '../../context/search/types.js';
@ -18,6 +17,13 @@ import {
} from './semantic-layer.service.js'; } from './semantic-layer.service.js';
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js'; import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js'; import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
import {
assertSafeConnectionId,
isSafeConnectionId,
isSlYamlPath,
slSourceNameForFile,
sourceNameFromPath,
} from './source-files.js';
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js'; import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js'; import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js'; import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
@ -69,58 +75,10 @@ export type ResolvedSlSource =
| { kind: 'not-found' } | { kind: 'not-found' }
| { kind: 'ambiguous'; connectionIds: string[] }; | { kind: 'ambiguous'; connectionIds: string[] };
const LOCAL_AUTHOR = 'ktx';
const LOCAL_AUTHOR_EMAIL = 'ktx@example.com';
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
}
function isRecord(value: unknown): value is Record<string, unknown> { function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value); return typeof value === 'object' && value !== null && !Array.isArray(value);
} }
function slPath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
}
function sourceNameFromPath(path: string): string {
return (
path
.split('/')
.at(-1)
?.replace(/\.ya?ml$/, '') ?? path
);
}
function parseYamlRecord(raw: string): Record<string, unknown> { function parseYamlRecord(raw: string): Record<string, unknown> {
const parsed = YAML.parse(raw) as unknown; const parsed = YAML.parse(raw) as unknown;
if (!isRecord(parsed)) { if (!isRecord(parsed)) {
@ -215,12 +173,17 @@ export async function loadLocalSlSourceRecords(
const dir = `semantic-layer/${connectionId}`; const dir = `semantic-layer/${connectionId}`;
const schemaDir = `${dir}/_schema`; const schemaDir = `${dir}/_schema`;
const listed = await project.fileStore.listFiles(dir); const listed = await project.fileStore.listFiles(dir);
const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort(); const paths = listed.files.filter(isSlYamlPath).sort();
const sources = new Map<string, LocalSlSourceRecord>(); const sources = new Map<string, LocalSlSourceRecord>();
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) { for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path); const raw = await project.fileStore.readFile(path);
const tables = manifestTables(parseYamlRecord(raw.content)); let tables: Record<string, ManifestTableEntry> | null;
try {
tables = manifestTables(parseYamlRecord(raw.content));
} catch (error) {
throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`);
}
if (!tables) { if (!tables) {
continue; continue;
} }
@ -237,7 +200,29 @@ export async function loadLocalSlSourceRecords(
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) { for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path); const raw = await project.fileStore.readFile(path);
const parsed = parseYamlRecord(raw.content); let parsed: Record<string, unknown>;
try {
parsed = parseYamlRecord(raw.content);
} catch {
// A source mid-edit (e.g. an agent saved half-written YAML) must not take
// down reads, listings, or search for its siblings. Key it by the same
// name the writer side uses (the intact top-level `name:`, recovered even
// when the YAML is broken below it; filename only as a last resort) so a
// broken uppercase/hashed/human-renamed source stays reachable under its
// real name, and surface the raw content for repair.
const brokenName = slSourceNameForFile(path, raw.content);
sources.set(brokenName, {
connectionId,
name: brokenName,
path,
columnCount: 0,
measureCount: 0,
joinCount: 0,
yaml: raw.content,
source: { name: brokenName, grain: [], columns: [], joins: [], measures: [] },
});
continue;
}
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path); const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
if (parsed.table || parsed.sql) { if (parsed.table || parsed.sql) {
const source = parsedStandaloneSource(parsed, name); const source = parsedStandaloneSource(parsed, name);
@ -292,50 +277,21 @@ export async function validateLocalSlSource(
} }
} }
/** @internal */
export async function writeLocalSlSource(
project: KtxLocalProject,
input: { connectionId: string; sourceName: string; yaml: string },
): Promise<KtxFileWriteResult> {
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
if (!validation.valid) {
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
}
const parsed = parseYamlRecord(input.yaml);
if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) {
throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`);
}
const path = slPath(input.connectionId, input.sourceName);
return project.fileStore.writeFile(
path,
input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`,
LOCAL_AUTHOR,
LOCAL_AUTHOR_EMAIL,
`Write semantic-layer source: ${input.connectionId}/${input.sourceName}`,
);
}
/** @internal */
export async function readLocalSlSource( export async function readLocalSlSource(
project: KtxLocalProject, project: KtxLocalProject,
input: { connectionId: string; sourceName: string }, input: { connectionId: string; sourceName: string },
): Promise<LocalSlSource | null> { ): Promise<LocalSlSource | null> {
const path = slPath(input.connectionId, input.sourceName); // Source identity is the in-file `name:` (mirroring the warehouse identifier
try { // verbatim, e.g. Snowflake's uppercase `WIDGET_SALES`), never the filename. The
const result = await project.fileStore.readFile(path); // record loader resolves standalone files, overlays, manifest-backed sources,
return { // and mid-edit files whose YAML no longer parses — so readers — `ktx sl read`,
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }), // `ktx sl validate`, and the `sl_read_source` MCP tool — can surface broken
yaml: result.content, // content for repair instead of failing on it.
};
} catch {
const records = await loadLocalSlSourceRecords(project, { const records = await loadLocalSlSourceRecords(project, {
connectionId: input.connectionId, connectionId: input.connectionId,
}); });
const record = records.find((source) => source.name === input.sourceName); const record = records.find((source) => source.name === input.sourceName);
return record ? { ...record } : null; return record ? { ...record } : null;
}
} }
export async function resolveLocalSlSource( export async function resolveLocalSlSource(

View file

@ -6,6 +6,7 @@ import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-sch
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js'; import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
import { normalizeSemanticLayerDescriptions } from './description-normalization.js'; import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
import { isSlYamlPath, resolveSlSourceFile, slDeclaredSourceName, slSourceFilePath } from './source-files.js';
import type { import type {
ResolvedSemanticLayerSource, ResolvedSemanticLayerSource,
SemanticLayerColumnOverride, SemanticLayerColumnOverride,
@ -135,8 +136,30 @@ export class SemanticLayerService {
// ── YAML File Operations ──────────────────────────────── // ── YAML File Operations ────────────────────────────────
private sourcePath(connectionId: string, sourceName: string): string { // The in-file `name:` is the source's identity; the filename is only a derived
return `${SL_DIR_PREFIX}/${connectionId}/${sourceName}.yaml`; // label. Rewrites land on the file that already declares the name (humans may
// rename files freely); new sources get a derived filename. A file already
// sitting at the derived path that declares a name declares a *different* one
// (the resolver would have matched it otherwise) — fail instead of clobbering
// it. A nameless/unparseable file there is the broken remains of this very
// source (the derived path is a function of the name), so overwriting it is
// the repair path, not data loss.
private async resolveWritePath(connectionId: string, sourceName: string): Promise<string> {
const existing = await resolveSlSourceFile(this.configService, connectionId, sourceName);
if (existing) {
return existing.path;
}
const path = slSourceFilePath(connectionId, sourceName);
let occupant: string | null = null;
try {
occupant = slDeclaredSourceName((await this.configService.readFile(path)).content);
} catch {
return path;
}
if (occupant !== null) {
throw new Error(`Cannot write source '${sourceName}': ${path} already defines source '${occupant}'`);
}
return path;
} }
async writeSource( async writeSource(
@ -185,39 +208,42 @@ export class SemanticLayerService {
} }
} }
const path = this.sourcePath(connectionId, source.name); const path = await this.resolveWritePath(connectionId, source.name);
const normalizedSource = normalizeSemanticLayerDescriptions(source); const normalizedSource = normalizeSemanticLayerDescriptions(source);
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' }); const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
const message = commitMessage ?? `Update semantic layer source: ${source.name}`; const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
const result = await this.configService.writeFile(path, content, author, authorEmail, message, { const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
skipLock: options?.skipLock, skipLock: options?.skipLock,
}); });
return { ...result, warnings }; // The filename is derived from (or resolved by) the source name — surface
// the actual path so callers don't have to re-resolve it.
return { ...result, path, warnings };
} }
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string }> { /**
const path = this.sourcePath(connectionId, sourceName); * Raw standalone/overlay file for a source, resolved by its in-file `name:`.
const result = await this.configService.readFile(path); * Returns null when no file declares the name (the source may still exist as
return { content: result.content, path }; * a manifest entry under `_schema/`).
*/
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string } | null> {
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
return file ? { content: file.content, path: file.path } : null;
} }
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> { async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
let content: string; const file = await this.readSourceFile(connectionId, sourceName);
try { if (!file) {
const result = await this.readSourceFile(connectionId, sourceName);
content = result.content;
} catch {
return null; return null;
} }
try { try {
return YAML.parse(content) as SemanticLayerSource; return YAML.parse(file.content) as SemanticLayerSource;
} catch (error) { } catch (error) {
// Distinguish a YAML parse failure from a missing file. The file exists but // Distinguish a YAML parse failure from a missing file. The file exists but
// its contents are unparseable — callers that treat null as "does not exist" // its contents are unparseable — callers that treat null as "does not exist"
// could otherwise overwrite the broken file. Surface the parse failure via // could otherwise overwrite the broken file. Surface the parse failure via
// the service logger so the broken source is at least visible. // the service logger so the broken source is at least visible.
this.logger.warn( this.logger.warn(
`[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`, `[loadSource] ${file.path}: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
); );
return null; return null;
} }
@ -231,7 +257,7 @@ export class SemanticLayerService {
let allFiles: string[]; let allFiles: string[];
try { try {
const result = await this.configService.listFiles(dir); const result = await this.configService.listFiles(dir);
allFiles = result.files.filter((f) => f.endsWith('.yaml')); allFiles = result.files.filter((f) => isSlYamlPath(f));
} catch (e) { } catch (e) {
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`; const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
loadErrors.push(message); loadErrors.push(message);
@ -338,7 +364,7 @@ export class SemanticLayerService {
let allFiles: string[]; let allFiles: string[];
try { try {
const listing = await this.configService.listFiles(dir); const listing = await this.configService.listFiles(dir);
allFiles = listing.files.filter((f) => f.endsWith('.yaml')); allFiles = listing.files.filter((f) => isSlYamlPath(f));
} catch { } catch {
return result; return result;
} }
@ -408,7 +434,7 @@ export class SemanticLayerService {
const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`; const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`;
try { try {
const result = await this.configService.listFiles(schemaDir); const result = await this.configService.listFiles(schemaDir);
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml')); const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
for (const filePath of yamlFiles) { for (const filePath of yamlFiles) {
try { try {
const { content } = await this.configService.readFile(filePath); const { content } = await this.configService.readFile(filePath);
@ -449,7 +475,7 @@ export class SemanticLayerService {
let yamlFiles: string[]; let yamlFiles: string[];
try { try {
const result = await this.configService.listFiles(schemaDir); const result = await this.configService.listFiles(schemaDir);
yamlFiles = result.files.filter((f) => f.endsWith('.yaml')); yamlFiles = result.files.filter((f) => isSlYamlPath(f));
} catch { } catch {
return null; return null;
} }
@ -533,7 +559,7 @@ export class SemanticLayerService {
.map((c) => c.name); .map((c) => c.name);
if (absentDeclaredColumns.length > 0) { if (absentDeclaredColumns.length > 0) {
errors.push( errors.push(
`${source.name}.yaml: table "${source.table}" matched manifest ${manifestLabel}, ` + `${source.name}: table "${source.table}" matched manifest ${manifestLabel}, ` +
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` + `but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
`Available columns: ${[...manifestColumns.values()].join(', ')}`, `Available columns: ${[...manifestColumns.values()].join(', ')}`,
); );
@ -545,7 +571,7 @@ export class SemanticLayerService {
}); });
if (missingGrainColumns.length > 0) { if (missingGrainColumns.length > 0) {
errors.push( errors.push(
`${source.name}.yaml: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`, `${source.name}: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
); );
} }
@ -562,7 +588,7 @@ export class SemanticLayerService {
}); });
if (missing.length > 0) { if (missing.length > 0) {
errors.push( errors.push(
`${source.name}.yaml: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`, `${source.name}: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
); );
} }
} }
@ -577,7 +603,7 @@ export class SemanticLayerService {
}); });
if (missing.length > 0) { if (missing.length > 0) {
errors.push( errors.push(
`${source.name}.yaml: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`, `${source.name}: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
); );
} }
} }
@ -592,7 +618,7 @@ export class SemanticLayerService {
}); });
if (exprMissing.length > 0) { if (exprMissing.length > 0) {
errors.push( errors.push(
`${source.name}.yaml: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`, `${source.name}: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
); );
} }
@ -606,7 +632,7 @@ export class SemanticLayerService {
}); });
if (filterMissing.length > 0) { if (filterMissing.length > 0) {
errors.push( errors.push(
`${source.name}.yaml: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`, `${source.name}: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
); );
} }
} }
@ -619,7 +645,7 @@ export class SemanticLayerService {
} }
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) { if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
errors.push( errors.push(
`${source.name}.yaml: join to "${join.to}" references local column ` + `${source.name}: join to "${join.to}" references local column ` +
`"${parsed.localColumn}" that is not a valid output column`, `"${parsed.localColumn}" that is not a valid output column`,
); );
} }
@ -631,7 +657,7 @@ export class SemanticLayerService {
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase())); const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) { if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
errors.push( errors.push(
`${source.name}.yaml: join to "${join.to}" references target column ` + `${source.name}: join to "${join.to}" references target column ` +
`"${parsed.targetColumn}" that does not exist on the target source`, `"${parsed.targetColumn}" that does not exist on the target source`,
); );
} }
@ -650,43 +676,30 @@ export class SemanticLayerService {
return SemanticLayerService.mapDialect(connection.connectionType); return SemanticLayerService.mapDialect(connection.connectionType);
} }
async listSourceNames(connectionId: string): Promise<string[]> {
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
try {
const result = await this.configService.listFiles(dir);
return result.files.filter((f) => f.endsWith('.yaml')).map((f) => f.replace(`${dir}/`, '').replace('.yaml', ''));
} catch {
return [];
}
}
async listFilesForConnection(connectionId: string): Promise<string[]> { async listFilesForConnection(connectionId: string): Promise<string[]> {
const dir = `${SL_DIR_PREFIX}/${connectionId}`; const dir = `${SL_DIR_PREFIX}/${connectionId}`;
try { try {
const result = await this.configService.listFiles(dir, true); const result = await this.configService.listFiles(dir, true);
return result.files.filter((f) => f.endsWith('.yaml')); return result.files.filter((f) => isSlYamlPath(f));
} catch { } catch {
return []; return [];
} }
} }
async readFileByPath(connectionId: string, relativePath: string): Promise<{ content: string; readOnly: boolean }> {
const fullPath = `${SL_DIR_PREFIX}/${connectionId}/${relativePath}`;
const result = await this.configService.readFile(fullPath);
return {
content: result.content,
readOnly: relativePath.startsWith('_schema/'),
};
}
async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) { async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) {
const path = this.sourcePath(connectionId, sourceName); const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
return this.configService.deleteFile(path, author, authorEmail, `Delete semantic layer source: ${sourceName}`); if (!file) {
// `deleteFile` returns null for a missing path, which would let a no-op
// delete read as success. Distinguish the two real cases instead.
if (await this.isManifestBacked(connectionId, sourceName)) {
throw new Error(
`Source '${sourceName}' is defined by the scan manifest (_schema/) and has no overlay file to delete. ` +
`Rescan the connection to remove it from the manifest.`,
);
} }
throw new Error(`Semantic-layer source not found: ${connectionId}/${sourceName}`);
async getSourceHistory(connectionId: string, sourceName: string) { }
const path = this.sourcePath(connectionId, sourceName); return this.configService.deleteFile(file.path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
return this.configService.getFileHistory(path);
} }
/** /**
@ -815,7 +828,7 @@ export class SemanticLayerService {
return []; return [];
} }
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\/.+\.ya?ml$/.test(file)); const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\//.test(file) && isSlYamlPath(file));
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = []; const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
for (const filePath of schemaFiles) { for (const filePath of schemaFiles) {
const connectionId = filePath.split('/')[1]; const connectionId = filePath.split('/')[1];
@ -844,7 +857,7 @@ export class SemanticLayerService {
let allFiles: string[]; let allFiles: string[];
try { try {
const result = await this.configService.listFiles(dir); const result = await this.configService.listFiles(dir);
allFiles = result.files.filter((f) => f.endsWith('.yaml')); allFiles = result.files.filter((f) => isSlYamlPath(f));
} catch { } catch {
return warnings; return warnings;
} }
@ -1030,7 +1043,7 @@ export class SemanticLayerService {
try { try {
const result = await this.configService.listFiles(dir); const result = await this.configService.listFiles(dir);
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml')); const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
for (const filePath of yamlFiles) { for (const filePath of yamlFiles) {
try { try {

View file

@ -0,0 +1,160 @@
import { createHash } from 'node:crypto';
import YAML from 'yaml';
import type { KtxFileStorePort } from '../../context/core/file-store.js';
// Semantic-layer source identity lives in the file's `name:` field, which mirrors
// the warehouse identifier verbatim (Snowflake's uppercase `SIGNED_UP`, `EVENT$LOG`).
// The filename is a derived label and never participates in identity: reads resolve
// a source by scanning the connection directory and matching `name:`, and writes
// reuse the resolved file's path, so files can be freely renamed by humans without
// changing which source they define.
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
export function assertSafeConnectionId(connectionId: string): string {
if (!isSafeConnectionId(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
export function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
}
export function sourceNameFromPath(path: string): string {
return (
path
.split('/')
.at(-1)
?.replace(/\.ya?ml$/, '') ?? path
);
}
// The one predicate for "this path is a semantic-layer YAML file". ktx itself
// always writes `.yaml` (see `slSourceFileName`), but humans rename freely and
// the dbt ecosystem's habit is `.yml`, so every reader must accept both — a
// listing that recognizes only one extension makes the same file visible to
// some entry points and invisible to others.
export function isSlYamlPath(path: string): boolean {
return path.endsWith('.yaml') || path.endsWith('.yml');
}
// Windows refuses these basenames regardless of extension — a genuinely universal
// filesystem invariant, so the static list is acceptable.
const WINDOWS_RESERVED_BASENAME = /^(?:con|prn|aux|nul|com[0-9]|lpt[0-9])$/;
const SAFE_FILE_BASENAME = /^[a-z0-9][a-z0-9_]{0,63}$/;
/**
* Derive the filename for a semantic-layer source. Total over all possible
* source names never throws.
*
* Names that are already safe lowercase snake_case become `<name>.yaml`;
* anything else becomes `<slug>-<8 hex of sha256(name)>.yaml`. The two ranges
* are disjoint and the mapping is injective: safe filenames contain no `-`,
* hashed filenames always end in `-<8 hex>`, and slugs are lowercased so names
* differing only by case get distinct hashes instead of colliding paths on
* case-insensitive filesystems (macOS APFS, Windows).
*
* @internal
*/
export function slSourceFileName(sourceName: string): string {
if (SAFE_FILE_BASENAME.test(sourceName) && !WINDOWS_RESERVED_BASENAME.test(sourceName)) {
return `${sourceName}.yaml`;
}
const slug = sourceName
.toLowerCase()
.replace(/[^a-z0-9_]+/g, '_')
.replace(/_+/g, '_')
.replace(/^_+|_+$/g, '')
.slice(0, 64);
const hash = createHash('sha256').update(sourceName, 'utf-8').digest('hex').slice(0, 8);
return `${slug || 'src'}-${hash}.yaml`;
}
export function slSourceFilePath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${slSourceFileName(sourceName)}`;
}
export interface SlSourceFile {
path: string;
content: string;
}
// Same keying as `loadLocalSlSourceRecords`: the in-file `name:` is the identity;
// the filename is only a fallback for files so broken that even the `name:` is
// unrecoverable, or genuinely nameless ones. A file left mid-edit with a syntax
// error below its `name:` line keeps its declared identity (see
// `slDeclaredSourceName`), so a human-renamed source is still addressed by name
// while broken instead of silently reverting to its filename.
export function slSourceNameForFile(path: string, content: string): string {
return slDeclaredSourceName(content) ?? sourceNameFromPath(path);
}
/**
* The `name:` a semantic-layer YAML file declares, or null when the file is
* nameless or so broken even the name is unrecoverable. Null is how
* `writeSource` tells a genuine name conflict at a derived path apart from the
* broken remains of the source being written, which a rewrite must repair
* rather than refuse.
*
* Uses `parseDocument`, not `parse`: a file with a syntax error below the
* `name:` line still parses into a partial tree whose top-level `name:` is
* intact. `parse` would throw on the same input and drop the source to its
* filename wrong for human-renamed files, whose filename is not the name.
*/
export function slDeclaredSourceName(content: string): string | null {
let doc: ReturnType<typeof YAML.parseDocument>;
try {
doc = YAML.parseDocument(content);
} catch {
return null;
}
const name = doc.get('name');
return typeof name === 'string' && name.length > 0 ? name : null;
}
/**
* Find the standalone/overlay file that defines `sourceName` for a connection.
* Returns null when no file declares the name (the source may still exist as a
* manifest entry under `_schema/`). Throws when more than one file declares the
* same name that breaks the one-file-per-name invariant and must be repaired
* by hand rather than silently picking one.
*/
export async function resolveSlSourceFile(
fileStore: Pick<KtxFileStorePort, 'listFiles' | 'readFile'>,
connectionId: string,
sourceName: string,
): Promise<SlSourceFile | null> {
const dir = `semantic-layer/${assertSafeConnectionId(connectionId)}`;
const schemaDir = `${dir}/_schema`;
const listed = await fileStore.listFiles(dir);
const paths = listed.files.filter((file) => isSlYamlPath(file) && !file.startsWith(`${schemaDir}/`)).sort();
const matches: SlSourceFile[] = [];
for (const path of paths) {
const raw = await fileStore.readFile(path);
if (slSourceNameForFile(path, raw.content) === sourceName) {
matches.push({ path, content: raw.content });
}
}
if (matches.length > 1) {
throw new Error(
`Multiple semantic-layer files declare source "${sourceName}": ${matches.map((match) => match.path).join(', ')}`,
);
}
return matches[0] ?? null;
}

View file

@ -46,12 +46,8 @@ export abstract class BaseSemanticLayerTool<TInput extends ZodType = ZodType> ex
): Promise<string | null> { ): Promise<string | null> {
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService; const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
try { const file = await semanticLayerService.readSourceFile(connectionId, sourceName);
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName); return file?.content ?? null;
return content;
} catch {
return null;
}
} }
protected buildMarkdown( protected buildMarkdown(

View file

@ -113,13 +113,8 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
} }
// Read existing source // Read existing source
let currentYaml: string | null = null; const currentFile = await semanticLayerService.readSourceFile(connectionId, sourceName);
try { const currentYaml = currentFile?.content ?? null;
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
currentYaml = content;
} catch {
currentYaml = null;
}
if (!currentYaml) { if (!currentYaml) {
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName); const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
if (manifestBacked) { if (manifestBacked) {
@ -165,6 +160,20 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
} catch (e) { } catch (e) {
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName); return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
} }
// The in-file `name:` is the source's identity — an edited name would make
// writeSource create a second source instead of updating this one.
if (source.name !== sourceName) {
return this.buildOutput(
false,
[
`Edits change "name:" from "${sourceName}" to "${source.name ?? '<missing>'}" — renaming is not supported. ` +
`Delete the source and recreate it under the new name.`,
],
sourceName,
);
}
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest }); source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
// Re-serialize and write // Re-serialize and write

View file

@ -1,10 +1,11 @@
import YAML from 'yaml'; import YAML from 'yaml';
import type { GitService } from '../../../context/core/git.service.js'; import type { GitService } from '../../../context/core/git.service.js';
import type { KtxFileStorePort } from '../../../context/core/file-store.js'; import type { KtxFileListResult, KtxFileReadResult, KtxFileStorePort } from '../../../context/core/file-store.js';
import { SYSTEM_GIT_AUTHOR } from '../../../context/tools/authors.js'; import { SYSTEM_GIT_AUTHOR } from '../../../context/tools/authors.js';
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js'; import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
import { sourceOverlaySchema } from '../schemas.js'; import { sourceOverlaySchema } from '../schemas.js';
import { SemanticLayerService } from '../semantic-layer.service.js'; import { SemanticLayerService } from '../semantic-layer.service.js';
import { resolveSlSourceFile, slSourceFilePath } from '../source-files.js';
import type { SemanticLayerSource } from '../types.js'; import type { SemanticLayerSource } from '../types.js';
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js'; import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
@ -23,9 +24,6 @@ export interface SourceValidationResult {
warnings: string[]; warnings: string[];
} }
const slSourcePath = (connectionId: string, sourceName: string): string =>
`semantic-layer/${connectionId}/${sourceName}.yaml`;
function resolveDialect(warehouse: string | null): string | null { function resolveDialect(warehouse: string | null): string | null {
if (!warehouse) { if (!warehouse) {
return null; return null;
@ -63,24 +61,21 @@ export async function validateSingleSource(
const errors: string[] = []; const errors: string[] = [];
const warnings: string[] = []; const warnings: string[] = [];
let content: string; const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
try { if (!file) {
const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); errors.push(`${sourceName}: no standalone or overlay file found`);
content = result.content;
} catch {
errors.push(`${sourceName}.yaml: file not found`);
return { errors, warnings }; return { errors, warnings };
} }
let parsed: Record<string, unknown>; let parsed: Record<string, unknown>;
try { try {
parsed = YAML.parse(content); parsed = YAML.parse(file.content);
} catch (e) { } catch (e) {
errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`); errors.push(`${sourceName}: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
return { errors, warnings }; return { errors, warnings };
} }
if (!parsed || typeof parsed !== 'object') { if (!parsed || typeof parsed !== 'object') {
errors.push(`${sourceName}.yaml: top-level content is not an object`); errors.push(`${sourceName}: top-level content is not an object`);
return { errors, warnings }; return { errors, warnings };
} }
@ -89,7 +84,7 @@ export async function validateSingleSource(
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName); const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
if (isManifestBacked) { if (isManifestBacked) {
errors.push( errors.push(
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` + `${sourceName}: standalone source shadows an existing manifest entry — ` +
`writing it as-is drops the manifest's columns and joins. ` + `writing it as-is drops the manifest's columns and joins. ` +
`Remove "sql:", "table:", "grain:", and base-table "columns:" and keep only ` + `Remove "sql:", "table:", "grain:", and base-table "columns:" and keep only ` +
`"name:" plus overlay fields such as "measures:", "segments:", "descriptions:", ` + `"name:" plus overlay fields such as "measures:", "segments:", "descriptions:", ` +
@ -103,21 +98,21 @@ export async function validateSingleSource(
const result = schema.safeParse(parsed); const result = schema.safeParse(parsed);
if (!result.success) { if (!result.success) {
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; '); const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
errors.push(`${sourceName}.yaml: schema — ${issues}`); errors.push(`${sourceName}: schema — ${issues}`);
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0]))); const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
if (errorPaths.has('joins')) { if (errorPaths.has('joins')) {
warnings.push( warnings.push(
`${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`, `${sourceName}: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
); );
} }
if (errorPaths.has('columns')) { if (errorPaths.has('columns')) {
warnings.push( warnings.push(
`${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`, `${sourceName}: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
); );
} }
if (errorPaths.has('measures')) { if (errorPaths.has('measures')) {
warnings.push( warnings.push(
`${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`, `${sourceName}: hint — measure format: {name, expr, description (optional), filter (optional)}`,
); );
} }
return { errors, warnings }; return { errors, warnings };
@ -135,7 +130,7 @@ export async function validateSingleSource(
const seenMeasures = new Set<string>(); const seenMeasures = new Set<string>();
for (const m of measures) { for (const m of measures) {
if (seenMeasures.has(m.name)) { if (seenMeasures.has(m.name)) {
errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`); errors.push(`${sourceName}: duplicate measure name "${m.name}"`);
} }
seenMeasures.add(m.name); seenMeasures.add(m.name);
} }
@ -168,7 +163,7 @@ export async function validateSingleSource(
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase())); const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
if (missing.length > 0) { if (missing.length > 0) {
errors.push( errors.push(
`${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`, `${sourceName}: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
); );
} }
} catch (e) { } catch (e) {
@ -205,7 +200,7 @@ function formatProbeError(args: {
const errMsg = error instanceof Error ? error.message : String(error); const errMsg = error instanceof Error ? error.message : String(error);
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name)); const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
const lines: string[] = [ const lines: string[] = [
measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`, measureName ? `${sourceName}: measure "${measureName}" ${headline}.` : `${sourceName}: ${headline}.`,
]; ];
if (warehouse) { if (warehouse) {
lines.push(` Warehouse: ${warehouse}`); lines.push(` Warehouse: ${warehouse}`);
@ -249,7 +244,7 @@ async function probeOverlayMeasures(
composed = all.find((s) => s.name === sourceName); composed = all.find((s) => s.name === sourceName);
} catch (e) { } catch (e) {
errors.push( errors.push(
`${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`, `${sourceName}: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
); );
return errors; return errors;
} }
@ -289,6 +284,26 @@ async function probeOverlayMeasures(
return errors; return errors;
} }
/**
* A read-only view of the config repo at one commit, shaped for
* `resolveSlSourceFile` so namefile resolution runs against history exactly as
* it does against the working tree one resolver, two backing stores. Used to
* recover the path a source occupied at `preHead` after the live file is gone.
*/
function gitCommitFileStore(
git: GitService,
commitHash: string,
): Pick<KtxFileStorePort, 'listFiles' | 'readFile'> {
return {
async listFiles(path: string): Promise<KtxFileListResult> {
return { files: await git.listFilesAtCommit(path, commitHash) };
},
async readFile(path: string): Promise<KtxFileReadResult> {
return { content: await git.getFileAtCommit(path, commitHash) };
},
};
}
/** /**
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't * Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate * exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
@ -300,8 +315,16 @@ export async function revertSourceToPreHead(
preHead: string | null, preHead: string | null,
sourceName: string, sourceName: string,
): Promise<string> { ): Promise<string> {
const relPath = slSourcePath(connectionId, sourceName); // Find the file that defines this source. While it is still on disk
// (invalid-but-present) the live resolver finds it by its in-file `name:`.
// Once the session deleted it, the path is gone too — and humans rename files
// freely, so it is NOT the writer-derived filename. Recover it from history by
// resolving the name against the preHead commit instead of guessing.
const live = await resolveSlSourceFile(deps.configService, connectionId, sourceName);
let relPath: string;
let preContent: string | null = null; let preContent: string | null = null;
if (live) {
relPath = live.path;
if (preHead) { if (preHead) {
try { try {
preContent = await deps.gitService.getFileAtCommit(relPath, preHead); preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
@ -309,6 +332,13 @@ export async function revertSourceToPreHead(
preContent = null; preContent = null;
} }
} }
} else {
const atPreHead = preHead
? await resolveSlSourceFile(gitCommitFileStore(deps.gitService, preHead), connectionId, sourceName)
: null;
relPath = atPreHead?.path ?? slSourceFilePath(connectionId, sourceName);
preContent = atPreHead?.content ?? null;
}
if (preContent !== null) { if (preContent !== null) {
await deps.configService.writeFile( await deps.configService.writeFile(

View file

@ -22,8 +22,12 @@ const slWriteSourceInputSchema = z.object({
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'), connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
sourceName: z sourceName: z
.string() .string()
.regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)') .min(1)
.describe('Name of the source to create, edit, or delete'), .describe(
"Name of the source to create, edit, or delete. Must equal the source's `name:`. Use the verbatim " +
'warehouse identifier when overlaying a manifest source (e.g. SIGNED_UP); snake_case is recommended ' +
'for new standalone sources.',
),
source: sourceInputSchema source: sourceInputSchema
.optional() .optional()
.describe( .describe(
@ -152,6 +156,17 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
); );
} }
// The in-file `name:` is the source's identity; the file is written under
// source.name while the orphan/shadow checks key on sourceName — a mismatch
// would validate one source and save another.
if (input.source.name !== sourceName) {
return this.buildOutput(
false,
[`source.name "${input.source.name}" does not match sourceName "${sourceName}" — they must be identical.`],
sourceName,
);
}
return this.writeFullSource( return this.writeFullSource(
connectionId, connectionId,
input.source, input.source,
@ -253,12 +268,8 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
connectionId: string, connectionId: string,
sourceName: string, sourceName: string,
): Promise<string | null> { ): Promise<string | null> {
try { const file = await service.readSourceFile(connectionId, sourceName);
const { content } = await service.readSourceFile(connectionId, sourceName); return file?.content ?? null;
return content;
} catch {
return null;
}
} }
private async rejectOrphanOverlay( private async rejectOrphanOverlay(

View file

@ -0,0 +1,23 @@
import type { SqlAnalysisDialect } from './ports.js';
// One mapping from ktx connection identity to the sqlglot dialect name used by
// the Python daemon (SQL analysis, read-only validation) and semantic-layer
// compute. Keys cover both vocabularies that name a connection's engine:
// ktx.yaml driver names ("postgres", "sqlserver") and the local connection-type
// spellings exposed by KtxConnectionInfo.connectionType ("POSTGRESQL").
const SQLGLOT_DIALECTS: Record<string, SqlAnalysisDialect> = {
postgres: 'postgres',
postgresql: 'postgres',
bigquery: 'bigquery',
snowflake: 'snowflake',
mysql: 'mysql',
sqlserver: 'tsql',
sqlite: 'sqlite',
duckdb: 'duckdb',
clickhouse: 'clickhouse',
databricks: 'databricks',
};
export function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
return SQLGLOT_DIALECTS[(driver ?? '').toLowerCase()] ?? 'postgres';
}

View file

@ -13,7 +13,7 @@ import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
import { readIngestReportSnapshotFile } from './ingest-report-file.js'; import { readIngestReportSnapshotFile } from './ingest-report-file.js';
import { createCliOperationalLogger } from './io/logger.js'; import { createCliOperationalLogger } from './io/logger.js';
import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import { createKtxCliLocalIngestAdapters, resolveKtxCliSqlAnalysis } from './local-adapters.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js'; import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js';
import { import {
@ -87,6 +87,7 @@ export interface KtxIngestDeps {
| 'memoryModel' | 'memoryModel'
| 'semanticLayerCompute' | 'semanticLayerCompute'
| 'queryExecutor' | 'queryExecutor'
| 'sqlAnalysis'
| 'logger' | 'logger'
| 'pullConfigOptions' | 'pullConfigOptions'
>; >;
@ -724,7 +725,7 @@ export async function runKtxIngest(
const localIngestOptions = deps.localIngestOptions ?? {}; const localIngestOptions = deps.localIngestOptions ?? {};
const managedDaemon = managedDaemonOptionsForIngestRun(args, deps.runtimeIo ?? io); const managedDaemon = managedDaemonOptionsForIngestRun(args, deps.runtimeIo ?? io);
const operationalLogger = createCliOperationalLogger(io, args.outputMode); const operationalLogger = createCliOperationalLogger(io, args.outputMode);
const adapterOptions = { const baseAdapterOptions = {
...(localIngestOptions.pullConfigOptions ?? {}), ...(localIngestOptions.pullConfigOptions ?? {}),
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
...(managedDaemon ? { managedDaemon } : {}), ...(managedDaemon ? { managedDaemon } : {}),
@ -734,6 +735,10 @@ export async function runKtxIngest(
: {}), : {}),
logger: operationalLogger, logger: operationalLogger,
}; };
// One parser-backed SQL analysis port per run: the historic-sql adapter and
// the ingest sql_execution tool share the same daemon-backed validator.
const sqlAnalysis = localIngestOptions.sqlAnalysis ?? resolveKtxCliSqlAnalysis(baseAdapterOptions);
const adapterOptions = { ...baseAdapterOptions, sqlAnalysis };
const queryExecutor = const queryExecutor =
localIngestOptions.queryExecutor ?? localIngestOptions.queryExecutor ??
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject); (deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject);
@ -783,6 +788,7 @@ export async function runKtxIngest(
metabaseConnectionId: args.connectionId, metabaseConnectionId: args.connectionId,
...localIngestOptions, ...localIngestOptions,
queryExecutor, queryExecutor,
sqlAnalysis,
trigger: 'manual_resync', trigger: 'manual_resync',
jobIdFactory: deps.jobIdFactory, jobIdFactory: deps.jobIdFactory,
embeddingProvider, embeddingProvider,
@ -861,6 +867,7 @@ export async function runKtxIngest(
jobId, jobId,
...localIngestOptions, ...localIngestOptions,
queryExecutor, queryExecutor,
sqlAnalysis,
pullConfigOptions: adapterOptions, pullConfigOptions: adapterOptions,
embeddingProvider, embeddingProvider,
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}), ...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),

View file

@ -71,7 +71,7 @@ function ktxCliLookerOptions(
}; };
} }
function ktxCliHistoricSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions) { export function resolveKtxCliSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions): SqlAnalysisPort {
if (options.sqlAnalysis) { if (options.sqlAnalysis) {
return options.sqlAnalysis; return options.sqlAnalysis;
} }
@ -289,7 +289,7 @@ function historicSqlOptionsForLocalRun(
} }
const base = { const base = {
sqlAnalysis: ktxCliHistoricSqlAnalysis(options), sqlAnalysis: resolveKtxCliSqlAnalysis(options),
}; };
if (dialect === 'postgres') { if (dialect === 'postgres') {

View file

@ -12,7 +12,7 @@ LookML views map to SL sources, `measure:` to measures, `explore: { join: }` to
| LookML | KTX form | Notes | | LookML | KTX form | Notes |
|---|---|---| |---|---|---|
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns | | `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
| `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists | | `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists |
| `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in | | `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in |
| `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone | | `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone |

View file

@ -12,8 +12,8 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to
| MetricFlow | KTX form | Notes | | MetricFlow | KTX form | Notes |
|---|---|---| |---|---|---|
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. | | `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
| `semantic_model: X { model: source('s','t') }` | **Overlay** at `<connId>/X.yaml` over table `t`. | Same shape; `source()` still resolves to a physical table. | | `semantic_model: X { model: source('s','t') }` | **Overlay** named `X` over table `t`. | Same shape; `source()` still resolves to a physical table. |
| `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. | | `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. |
| `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. | | `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. |
| `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "<agg>(<expr>)" }]` | Aggregation inlined. `agg: count_distinct``count(distinct ...)`. | | `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "<agg>(<expr>)" }]` | Aggregation inlined. `agg: count_distinct``count(distinct ...)`. |

View file

@ -21,7 +21,7 @@ skills must verify warehouse identifiers with `discover_data`,
## Part 1 - Schema reference ## Part 1 - Schema reference
An SL source is a YAML file at `semantic-layer/<connectionId>/<source_name>.yaml`. There are three flavors: An SL source is a YAML file under `semantic-layer/<connectionId>/`. The file's `name:` field is the source's identity — it mirrors the warehouse identifier verbatim (e.g. Snowflake's uppercase `SIGNED_UP`); the filename is only a derived label. Always address sources by name through the `sl_*` tools, never by file path. There are three flavors:
### Overlay sources ### Overlay sources

View file

@ -1,5 +1,9 @@
import { describe, expect, it } from 'vitest'; import { describe, expect, it } from 'vitest';
import { assertReadOnlySql, limitSqlForExecution } from '../../../src/context/connections/read-only-sql.js'; import {
assertReadOnlySql,
limitSqlForExecution,
stripTrailingSqlNoise,
} from '../../../src/context/connections/read-only-sql.js';
describe('assertReadOnlySql', () => { describe('assertReadOnlySql', () => {
it('allows select and with queries', () => { it('allows select and with queries', () => {
@ -15,6 +19,51 @@ describe('assertReadOnlySql', () => {
'Only read-only SELECT/WITH queries can be executed locally', 'Only read-only SELECT/WITH queries can be executed locally',
); );
}); });
it('accepts read-only queries that begin with leading comments', () => {
expect(assertReadOnlySql('-- daily widget sales\nselect count(*) from public.widget_sales')).toBe(
'select count(*) from public.widget_sales',
);
expect(assertReadOnlySql('/* block */\n with paid as (select 1) select * from paid')).toContain('with paid');
});
it('still rejects mutating statements hidden behind leading comments', () => {
expect(() => assertReadOnlySql('-- harmless\n delete from orders')).toThrow(
'Only read-only SELECT/WITH queries can be executed locally',
);
});
it('rejects a second statement smuggled after a semicolon', () => {
expect(() => assertReadOnlySql('select 1; drop table orders')).toThrow(
'Only one SQL statement can be executed.',
);
expect(() => assertReadOnlySql('select 1;\n-- pad\ndelete from orders')).toThrow(
'Only one SQL statement can be executed.',
);
expect(() => assertReadOnlySql('select 1; /* pad */ truncate orders;')).toThrow(
'Only one SQL statement can be executed.',
);
});
it('accepts trailing semicolons, including repeated ones followed by comments', () => {
expect(assertReadOnlySql('select 1;')).toBe('select 1;');
expect(assertReadOnlySql('select 1 ;; \n')).toBe('select 1 ;;');
expect(assertReadOnlySql('select 1; -- done')).toBe('select 1; -- done');
});
it('ignores semicolons inside string literals, quoted identifiers, and comments', () => {
expect(assertReadOnlySql("select string_agg(name, '; ') from t")).toBe("select string_agg(name, '; ') from t");
expect(assertReadOnlySql("select 'it''s; quoted' from t")).toBe("select 'it''s; quoted' from t");
expect(assertReadOnlySql('select ";" from "t;u"')).toBe('select ";" from "t;u"');
expect(assertReadOnlySql('select 1 -- tail; comment')).toBe('select 1 -- tail; comment');
expect(assertReadOnlySql('select 1 /* a;b */ + 2')).toBe('select 1 /* a;b */ + 2');
});
it('rejects statements smuggled after a string literal that closes a semicolon early', () => {
expect(() => assertReadOnlySql("select 'a'; delete from orders")).toThrow(
'Only one SQL statement can be executed.',
);
});
}); });
describe('limitSqlForExecution', () => { describe('limitSqlForExecution', () => {
@ -27,4 +76,42 @@ describe('limitSqlForExecution', () => {
it('returns the trimmed SQL when no maxRows value is provided', () => { it('returns the trimmed SQL when no maxRows value is provided', () => {
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders'); expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
}); });
it('strips leading comments before wrapping with a row limit', () => {
expect(limitSqlForExecution('-- top customers\nselect * from public.orders', 25)).toBe(
'select * from (select * from public.orders) as ktx_query_result limit 25',
);
});
it('drops a trailing semicolon followed by a comment so the subquery stays valid', () => {
// The single-statement gate accepts `select 1; -- done`; without stripping
// the terminator the wrapper would embed `select 1; -- done` and comment out
// the closing paren and limit clause.
expect(limitSqlForExecution('select 1; -- done', 5)).toBe(
'select * from (select 1) as ktx_query_result limit 5',
);
expect(limitSqlForExecution('select 1; /* note */', 5)).toBe(
'select * from (select 1) as ktx_query_result limit 5',
);
});
it('drops a trailing line comment with no semicolon before wrapping', () => {
expect(limitSqlForExecution('select 1 -- done', 5)).toBe('select * from (select 1) as ktx_query_result limit 5');
});
});
describe('stripTrailingSqlNoise', () => {
it('removes trailing semicolons, comments, and whitespace', () => {
expect(stripTrailingSqlNoise('select 1;')).toBe('select 1');
expect(stripTrailingSqlNoise('select 1 ;; ')).toBe('select 1');
expect(stripTrailingSqlNoise('select 1; -- done')).toBe('select 1');
expect(stripTrailingSqlNoise('select 1 -- done')).toBe('select 1');
expect(stripTrailingSqlNoise('select 1; /* trailing */')).toBe('select 1');
});
it('preserves semicolons and comment markers inside literals and mid-statement', () => {
expect(stripTrailingSqlNoise("select 'a; -- b'")).toBe("select 'a; -- b'");
expect(stripTrailingSqlNoise('select 1 /* a;b */ + 2')).toBe('select 1 /* a;b */ + 2');
expect(stripTrailingSqlNoise('select ";" from "t;u"')).toBe('select ";" from "t;u"');
});
}); });

View file

@ -1,6 +1,6 @@
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises'; import { mkdir, mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os'; import { tmpdir } from 'node:os';
import { join } from 'node:path'; import { dirname, join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxCoreConfig } from '../../../src/context/core/config.js'; import type { KtxCoreConfig } from '../../../src/context/core/config.js';
import { GitService } from '../../../src/context/core/git.service.js'; import { GitService } from '../../../src/context/core/git.service.js';
@ -35,10 +35,29 @@ describe('GitService', () => {
}); });
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => { const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
await mkdir(dirname(join(tempDir, filePath)), { recursive: true });
await writeFile(join(tempDir, filePath), content, 'utf-8'); await writeFile(join(tempDir, filePath), content, 'utf-8');
return service.commitFile(filePath, message, 'Test', 'test@example.com'); return service.commitFile(filePath, message, 'Test', 'test@example.com');
}; };
describe('listFilesAtCommit', () => {
it('lists matching paths at a commit and recovers files deleted since', async () => {
await writeAndCommit('semantic-layer/warehouse/custom.yaml', 'name: orders\n');
const atSeed = await service.revParseHead();
await service.deleteFile('semantic-layer/warehouse/custom.yaml', 'drop', 'Test', 'test@example.com');
// HEAD no longer has the file; the seed commit still does.
await expect(service.listFilesAtCommit('semantic-layer/warehouse', 'HEAD')).resolves.toEqual([]);
await expect(service.listFilesAtCommit('semantic-layer/warehouse', atSeed)).resolves.toEqual([
'semantic-layer/warehouse/custom.yaml',
]);
});
it('returns [] for a pathspec that matches nothing', async () => {
await expect(service.listFilesAtCommit('does/not/exist', 'HEAD')).resolves.toEqual([]);
});
});
describe('cold-start bootstrap commit', () => { describe('cold-start bootstrap commit', () => {
it('writes an empty commit on init so HEAD always resolves', async () => { it('writes an empty commit on init so HEAD always resolves', async () => {
// beforeEach already ran onModuleInit() against an empty temp dir. // beforeEach already ran onModuleInit() against an empty temp dir.

View file

@ -159,13 +159,16 @@ describe('reindexLocalIndexes', () => {
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n', '---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8', 'utf-8',
); );
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true }); // A broken standalone source is surfaced for repair rather than failing the
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8'); // scope, so use a corrupt machine-generated manifest shard, which is the
// remaining fatal read failure that the per-scope catch must isolate.
await mkdir(join(project.projectDir, 'semantic-layer/warehouse/_schema'), { recursive: true });
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/broken.yaml'), 'not: [valid', 'utf-8');
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null }); const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined(); expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML'); expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('_schema/broken.yaml');
}); });
it('marks a scope errored when configured embeddings fail', async () => { it('marks a scope errored when configured embeddings fail', async () => {

View file

@ -33,14 +33,14 @@ async function makeHarness() {
} }
describe('finalGateRepairPaths', () => { describe('finalGateRepairPaths', () => {
it('derives sorted wiki and semantic-layer file paths', () => { it('derives sorted, deduplicated wiki and semantic-layer file paths', () => {
expect( expect(
finalGateRepairPaths({ finalGateRepairPaths({
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'], changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
touchedSlSources: [ touchedSlSourcePaths: [
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }, 'semantic-layer/warehouse/mart_account_segments.yaml',
{ connectionId: 'warehouse', sourceName: 'orders' }, 'semantic-layer/warehouse/orders.yaml',
{ connectionId: 'warehouse', sourceName: 'orders' }, 'semantic-layer/warehouse/orders.yaml',
], ],
}), }),
).toEqual([ ).toEqual([

View file

@ -18,19 +18,49 @@ describe('deriveFinalizationWikiPageKeys', () => {
}); });
describe('deriveFinalizationTouchedSources', () => { describe('deriveFinalizationTouchedSources', () => {
it('maps standalone semantic-layer files directly', async () => { it('resolves standalone files by the source diff, not the filename', () => {
const result = await deriveFinalizationTouchedSources({ // The file carries a derived label (`signed_up-<hash>.yaml`); the source it
// defines is the in-file `name:` (`SIGNED_UP`), visible only via the diff.
const result = deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/signed_up-1a2b3c4d.yaml'],
beforeSourcesByConnection: new Map([['warehouse', []]]),
afterSourcesByConnection: new Map([
['warehouse', [{ name: 'SIGNED_UP', grain: [], columns: [], joins: [], measures: [] }]],
]),
});
expect(result).toEqual({
touchedSources: [{ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }],
unresolvedPaths: [],
});
});
it('resolves deleted standalone files by the name that disappeared', () => {
const result = deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/signed_up-1a2b3c4d.yaml'],
beforeSourcesByConnection: new Map([
['warehouse', [{ name: 'SIGNED_UP', grain: [], columns: [], joins: [], measures: [] }]],
]),
afterSourcesByConnection: new Map([['warehouse', []]]),
});
expect(result).toEqual({
touchedSources: [{ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }],
unresolvedPaths: [],
});
});
it('flags standalone changes that produce no source diff', () => {
const result = deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/orders.yaml'], changedPaths: ['semantic-layer/warehouse/orders.yaml'],
beforeSourcesByConnection: new Map(), beforeSourcesByConnection: new Map(),
afterSourcesByConnection: new Map(), afterSourcesByConnection: new Map(),
}); });
expect(result).toEqual({ expect(result).toEqual({
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }], touchedSources: [],
unresolvedPaths: [], unresolvedPaths: ['semantic-layer/warehouse/orders.yaml'],
}); });
}); });
it('resolves aggregate _schema changes by comparing loaded source snapshots', async () => { it('resolves aggregate _schema changes by comparing loaded source snapshots', () => {
const beforeSourcesByConnection = new Map([ const beforeSourcesByConnection = new Map([
[ [
'warehouse', 'warehouse',
@ -72,7 +102,7 @@ describe('deriveFinalizationTouchedSources', () => {
], ],
]); ]);
const result = await deriveFinalizationTouchedSources({ const result = deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'], changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
beforeSourcesByConnection, beforeSourcesByConnection,
afterSourcesByConnection, afterSourcesByConnection,
@ -84,11 +114,11 @@ describe('deriveFinalizationTouchedSources', () => {
}); });
}); });
it('flags aggregate _schema changes that cannot be resolved to logical sources', async () => { it('flags aggregate _schema changes that cannot be resolved to logical sources', () => {
const beforeSourcesByConnection = new Map([['warehouse', []]]); const beforeSourcesByConnection = new Map([['warehouse', []]]);
const afterSourcesByConnection = new Map([['warehouse', []]]); const afterSourcesByConnection = new Map([['warehouse', []]]);
const result = await deriveFinalizationTouchedSources({ const result = deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'], changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
beforeSourcesByConnection, beforeSourcesByConnection,
afterSourcesByConnection, afterSourcesByConnection,

View file

@ -70,6 +70,15 @@ async function loadSourcesFromRoot(root: string) {
}; };
} }
// Mirrors the production contract: resolve the standalone/overlay file for a
// source, null when absent. Fixtures keep filename == name, so a direct read
// is a faithful shortcut.
async function readSourceFileFromRoot(root: string, connectionId: string, sourceName: string) {
const relPath = `semantic-layer/${connectionId}/${sourceName}.yaml`;
const content = await readFile(join(root, relPath), 'utf-8').catch(() => null);
return content === null ? null : { content, path: relPath };
}
async function listGlobalWikiPageKeys(root: string): Promise<string[]> { async function listGlobalWikiPageKeys(root: string): Promise<string[]> {
const dir = join(root, 'wiki/global'); const dir = join(root, 'wiki/global');
const entries = await readdir(dir).catch(() => []); const entries = await readdir(dir).catch(() => []);
@ -172,11 +181,17 @@ function makeDeps(
const semanticLayerService: any = { const semanticLayerService: any = {
loadAllSources: vi.fn(async () => loadSourcesFromRoot(runtime.configDir)), loadAllSources: vi.fn(async () => loadSourcesFromRoot(runtime.configDir)),
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']), listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
readSourceFile: vi.fn((connectionId: string, sourceName: string) =>
readSourceFileFromRoot(runtime.configDir, connectionId, sourceName),
),
}; };
semanticLayerService.forWorktree = vi.fn((workdir: string) => ({ semanticLayerService.forWorktree = vi.fn((workdir: string) => ({
...semanticLayerService, ...semanticLayerService,
loadAllSources: vi.fn(async () => loadSourcesFromRoot(workdir)), loadAllSources: vi.fn(async () => loadSourcesFromRoot(workdir)),
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']), listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
readSourceFile: vi.fn((connectionId: string, sourceName: string) =>
readSourceFileFromRoot(workdir, connectionId, sourceName),
),
})); }));
const deps: IngestBundleRunnerDeps = { const deps: IngestBundleRunnerDeps = {
@ -2366,8 +2381,11 @@ describe('IngestBundleRunner isolated diff path', () => {
join(runtime.configDir, '.ktx/ingest-traces/job-finalization-target-policy/trace.jsonl'), join(runtime.configDir, '.ktx/ingest-traces/job-finalization-target-policy/trace.jsonl'),
'utf-8', 'utf-8',
); );
expect(trace).toContain('finalization_committed'); // The policy check runs inside finalization, before touched-source
expect(trace).toContain('semantic_layer_target_policy'); // derivation — an out-of-scope write fails the finalization stage
// instead of reading as committed.
expect(trace).not.toContain('finalization_committed');
expect(trace).toContain('semantic_layer_target_policy_failed');
expect(trace).toContain('ingest_failed'); expect(trace).toContain('ingest_failed');
} finally { } finally {
await rm(runtime.homeDir, { recursive: true, force: true }); await rm(runtime.homeDir, { recursive: true, force: true });

View file

@ -1776,12 +1776,24 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
}, },
], ],
}); });
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
);
let head = 'pre-finalization'; let head = 'pre-finalization';
// Touched-source derivation diffs composed sources before/after finalization
// (the filename never carries identity), so the mock must reflect the write:
// `orders` exists only once the finalization commit lands.
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
Promise.resolve({
sources:
connectionId === 'warehouse-2' && head === 'post-finalization'
? [{ name: `${connectionId}_source` }, { name: 'orders' }]
: [{ name: `${connectionId}_source` }],
loadErrors: [],
}),
);
const git = { const git = {
revParseHead: vi.fn(async () => head), revParseHead: vi.fn(async () => head),
// Touched-source derivation reads each changed file's `name:`; the worktree
// is mocked (no files on disk), so serve the source content from history.
getFileAtCommit: vi.fn(async () => 'name: orders\n'),
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => { commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) { if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
head = 'post-finalization'; head = 'post-finalization';
@ -1854,10 +1866,41 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
}), }),
); );
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2'); expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]); expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [
{ name: 'warehouse-2_source' },
{ name: 'orders' },
]);
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success'); expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
}); });
it('recovers a deleted hash-named SL source by its in-file name, not its filename', async () => {
const runner = buildRunner();
// An uppercase warehouse source lives in a hash-derived filename, so parsing
// the basename yields the phantom `widget_sales-1a2b3c4d`. The real name must
// come from the file's `name:`, recovered from history once it was deleted.
const deletedPath = 'semantic-layer/warehouse/widget_sales-1a2b3c4d.yaml';
const getFileAtCommit = vi.fn(async () => 'name: WIDGET_SALES\ntable: WIDGET_SALES\n');
const worktree = { workdir: join(tmpdir(), 'ktx-absent-worktree-recover'), git: { getFileAtCommit } };
const touched = await (runner as any).touchedSlSourcesFromPaths(worktree, [deletedPath], 'pre-change-sha');
expect(touched).toEqual([{ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }]);
expect(getFileAtCommit).toHaveBeenCalledWith(deletedPath, 'pre-change-sha');
});
it('falls back to the filename only when a deleted SL file is unrecoverable from history', async () => {
const runner = buildRunner();
const deletedPath = 'semantic-layer/warehouse/orders.yaml';
const getFileAtCommit = vi.fn(async () => {
throw new Error('path not present at commit');
});
const worktree = { workdir: join(tmpdir(), 'ktx-absent-worktree-fallback'), git: { getFileAtCommit } };
const touched = await (runner as any).touchedSlSourcesFromPaths(worktree, [deletedPath], 'pre-change-sha');
expect(touched).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
});
it('includes finalization actions in memory-flow saved counts', async () => { it('includes finalization actions in memory-flow saved counts', async () => {
const deps = makeDeps(); const deps = makeDeps();
deps.adapter.source = 'historic-sql'; deps.adapter.source = 'historic-sql';

View file

@ -1,13 +1,21 @@
import { describe, expect, it, vi } from 'vitest'; import { beforeEach, describe, expect, it, vi } from 'vitest';
import type { SlConnectionCatalogPort } from '../../../../../src/context/sl/ports.js'; import type { SlConnectionCatalogPort } from '../../../../../src/context/sl/ports.js';
import type { SqlAnalysisPort } from '../../../../../src/context/sql-analysis/ports.js';
import type { ToolContext } from '../../../../../src/context/tools/base-tool.js'; import type { ToolContext } from '../../../../../src/context/tools/base-tool.js';
import { SqlExecutionTool } from '../../../../../src/context/ingest/tools/warehouse-verification/sql-execution.tool.js'; import { SqlExecutionTool } from '../../../../../src/context/ingest/tools/warehouse-verification/sql-execution.tool.js';
describe('SqlExecutionTool', () => { describe('SqlExecutionTool', () => {
const connections = { const connections = {
executeQuery: vi.fn(), executeQuery: vi.fn(),
} as unknown as SlConnectionCatalogPort & { executeQuery: ReturnType<typeof vi.fn> }; getConnectionById: vi.fn(async () => ({ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' })),
const tool = new SqlExecutionTool(connections); } as unknown as SlConnectionCatalogPort & {
executeQuery: ReturnType<typeof vi.fn>;
getConnectionById: ReturnType<typeof vi.fn>;
};
const sqlAnalysis = {
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
} as unknown as SqlAnalysisPort & { validateReadOnly: ReturnType<typeof vi.fn> };
const tool = new SqlExecutionTool(connections, sqlAnalysis);
const context: ToolContext = { const context: ToolContext = {
sourceId: 'ingest', sourceId: 'ingest',
messageId: 'm1', messageId: 'm1',
@ -15,7 +23,15 @@ describe('SqlExecutionTool', () => {
session: { allowedConnectionNames: new Set(['warehouse']) } as any, session: { allowedConnectionNames: new Set(['warehouse']) } as any,
}; };
it('wraps read-only SQL with a capped row limit', async () => { beforeEach(() => {
connections.executeQuery.mockReset();
connections.getConnectionById.mockReset();
connections.getConnectionById.mockResolvedValue({ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' });
sqlAnalysis.validateReadOnly.mockReset();
sqlAnalysis.validateReadOnly.mockResolvedValue({ ok: true, error: null });
});
it('validates with the parser-backed validator in the connection dialect, then wraps with a capped row limit', async () => {
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 }); connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
const result = await tool.call( const result = await tool.call(
@ -23,6 +39,7 @@ describe('SqlExecutionTool', () => {
context, context,
); );
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select status from public.orders', 'postgres');
expect(connections.executeQuery).toHaveBeenCalledWith( expect(connections.executeQuery).toHaveBeenCalledWith(
'warehouse', 'warehouse',
'select * from (select status from public.orders) as ktx_query_result limit 5', 'select * from (select status from public.orders) as ktx_query_result limit 5',
@ -31,14 +48,46 @@ describe('SqlExecutionTool', () => {
expect(result.structured.wrappedSql).toContain('limit 5'); expect(result.structured.wrappedSql).toContain('limit 5');
}); });
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => { it('maps connection types to sqlglot dialects', async () => {
connections.executeQuery.mockClear(); connections.getConnectionById.mockResolvedValue({ id: 'warehouse', name: 'warehouse', connectionType: 'SNOWFLAKE' });
connections.executeQuery.mockResolvedValue({ headers: [], rows: [], totalRows: 0 });
await tool.call({ connectionId: 'warehouse', sql: 'select 1' }, context);
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select 1', 'snowflake');
});
it('returns the validator error without executing when validation fails', async () => {
sqlAnalysis.validateReadOnly.mockResolvedValue({ ok: false, error: 'SQL contains read/write operation: Insert' });
const result = await tool.call(
{ connectionId: 'warehouse', sql: 'with x as (insert into t values (1) returning *) select * from x' },
context,
);
expect(result.markdown).toContain('SQL contains read/write operation: Insert');
expect(result.structured.error).toContain('SQL contains read/write operation: Insert');
expect(connections.executeQuery).not.toHaveBeenCalled();
});
it('throws when no parser-backed validator is configured', async () => {
const unvalidated = new SqlExecutionTool(connections);
await expect(unvalidated.call({ connectionId: 'warehouse', sql: 'select 1' }, context)).rejects.toThrow(
'sql_execution requires parser-backed SQL validation.',
);
expect(connections.executeQuery).not.toHaveBeenCalled();
});
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])(
'keeps the local backstop even when the validator approves: %s',
async (sql) => {
const result = await tool.call({ connectionId: 'warehouse', sql }, context); const result = await tool.call({ connectionId: 'warehouse', sql }, context);
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.'); expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
expect(connections.executeQuery).not.toHaveBeenCalled(); expect(connections.executeQuery).not.toHaveBeenCalled();
}); },
);
it('surfaces connector errors verbatim', async () => { it('surfaces connector errors verbatim', async () => {
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist')); connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));

View file

@ -5,7 +5,9 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKtxProject } from '../../../src/context/project/project.js'; import { initKtxProject } from '../../../src/context/project/project.js';
import { KtxQueryError } from '../../../src/errors.js'; import { KtxQueryError } from '../../../src/errors.js';
import { createKtxConnectorCapabilities, type KtxQueryResult, type KtxScanConnector, type KtxSchemaSnapshot } from '../../../src/context/scan/types.js'; import { createKtxConnectorCapabilities, type KtxQueryResult, type KtxScanConnector, type KtxSchemaSnapshot } from '../../../src/context/scan/types.js';
import { writeLocalSlSource } from '../../../src/context/sl/local-sl.js'; import { SemanticLayerService } from '../../../src/context/sl/semantic-layer.service.js';
import type { SemanticLayerSource } from '../../../src/context/sl/types.js';
import { seedSlSourceFile } from '../sl/sl-source-seeding.test-utils.js';
import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js'; import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js';
describe('createLocalProjectMcpContextPorts', () => { describe('createLocalProjectMcpContextPorts', () => {
@ -739,7 +741,7 @@ describe('createLocalProjectMcpContextPorts', () => {
it('reads seeded semantic-layer sources', async () => { it('reads seeded semantic-layer sources', async () => {
const project = await initKtxProject({ projectDir: tempDir }); const project = await initKtxProject({ projectDir: tempDir });
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: [ yaml: [
@ -763,7 +765,92 @@ describe('createLocalProjectMcpContextPorts', () => {
}); });
}); });
it('rejects path traversal keys before touching the project directory', async () => { it('reads manifest-backed sources with uppercase warehouse identifiers', async () => {
const project = await initKtxProject({ projectDir: tempDir });
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
[
'tables:',
' WIDGET_SALES:',
' table: PUBLIC.WIDGET_SALES',
' columns:',
' - name: ID',
' type: number',
' pk: true',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'seed uppercase manifest shard',
);
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
await expect(
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }),
).resolves.toMatchObject({
sourceName: 'WIDGET_SALES',
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
});
});
it('composes an overlay written for an uppercase manifest source at a derived filename', async () => {
const project = await initKtxProject({ projectDir: tempDir });
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
[
'tables:',
' WIDGET_SALES:',
' table: PUBLIC.WIDGET_SALES',
' columns:',
' - name: ID',
' type: number',
' pk: true',
'',
].join('\n'),
'ktx',
'ktx@example.com',
'seed uppercase manifest shard',
);
// The production write path: agents overlay manifest sources via
// SemanticLayerService.writeSource using the verbatim warehouse name.
const service = new SemanticLayerService(project.fileStore as never, {} as never, {} as never);
const overlay = {
name: 'WIDGET_SALES',
measures: [{ name: 'widget_sales_count', expr: 'count(*)' }],
} as SemanticLayerSource;
const write = await service.writeSource('warehouse', overlay, 'ktx', 'ktx@example.com');
expect(write.path).toMatch(/^semantic-layer\/warehouse\/widget_sales-[0-9a-f]{8}\.yaml$/);
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
await expect(
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }),
).resolves.toMatchObject({
sourceName: 'WIDGET_SALES',
yaml: expect.stringContaining('widget_sales_count'),
});
});
it('returns a standalone source verbatim even when its YAML is currently broken', async () => {
const project = await initKtxProject({ projectDir: tempDir });
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
'ktx',
'ktx@example.com',
'seed broken source mid-edit',
);
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
await expect(
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'orders' }),
).resolves.toMatchObject({
sourceName: 'orders',
yaml: expect.stringContaining('[unterminated'),
});
});
it('keeps path-traversal keys away from the project directory', async () => {
const project = await initKtxProject({ projectDir: tempDir }); const project = await initKtxProject({ projectDir: tempDir });
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null }); const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
@ -774,12 +861,14 @@ describe('createLocalProjectMcpContextPorts', () => {
}), }),
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".'); ).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
// Source reads never derive a file path from the name; a traversal-style
// name simply matches no record.
await expect( await expect(
ports.semanticLayer?.readSource({ ports.semanticLayer?.readSource({
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: '../orders', sourceName: '../orders',
}), }),
).rejects.toThrow('Unsafe semantic-layer source name'); ).resolves.toBeNull();
}); });
it('uses semantic compute for compile-only sl_query when supplied', async () => { it('uses semantic compute for compile-only sl_query when supplied', async () => {
@ -788,7 +877,7 @@ describe('createLocalProjectMcpContextPorts', () => {
driver: 'postgres', driver: 'postgres',
url: 'env:DATABASE_URL', url: 'env:DATABASE_URL',
}; };
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: [ yaml: [
@ -850,7 +939,7 @@ describe('createLocalProjectMcpContextPorts', () => {
driver: 'postgres', driver: 'postgres',
url: 'env:DATABASE_URL', url: 'env:DATABASE_URL',
}; };
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: [ yaml: [

View file

@ -357,6 +357,10 @@ describe('MemoryAgentService.gateRevertInvalidSources (J3)', () => {
const configService = { const configService = {
writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}), writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}),
deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}), deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}),
// Revert resolves the live file by name; with no listing it falls back
// to the writer-derived filename.
listFiles: vi.fn().mockResolvedValue({ files: [] }),
readFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
}; };
const gitService = { const gitService = {
getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')), getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')),

View file

@ -5,7 +5,8 @@ import { afterEach, beforeEach, describe, it } from 'vitest';
import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js'; import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js';
import type { JsonValue } from '../../../src/context/ingest/ports.js'; import type { JsonValue } from '../../../src/context/ingest/ports.js';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js'; import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../../../src/context/sl/local-sl.js'; import { type LocalSlSourceSearchResult, searchLocalSlSources } from '../../../src/context/sl/local-sl.js';
import { seedSlSourceFile } from '../sl/sl-source-seeding.test-utils.js';
import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js'; import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js';
import { import {
type LocalKnowledgeSearchResult, type LocalKnowledgeSearchResult,
@ -99,12 +100,12 @@ function toContextConformanceResult(result: ContextEvidenceSearchResult): Search
} }
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> { async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
}); });
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'finance', connectionId: 'finance',
sourceName: 'orders', sourceName: 'orders',
yaml: FINANCE_ORDERS_YAML, yaml: FINANCE_ORDERS_YAML,

View file

@ -9,8 +9,8 @@ import {
resolveLocalSlSource, resolveLocalSlSource,
searchLocalSlSources, searchLocalSlSources,
validateLocalSlSource, validateLocalSlSource,
writeLocalSlSource,
} from '../../../src/context/sl/local-sl.js'; } from '../../../src/context/sl/local-sl.js';
import { seedSlSourceFile } from './sl-source-seeding.test-utils.js';
const ORDERS_YAML = [ const ORDERS_YAML = [
'name: orders', 'name: orders',
@ -60,7 +60,7 @@ describe('local semantic-layer helpers', () => {
}); });
it('writes, reads, lists, and validates semantic-layer sources', async () => { it('writes, reads, lists, and validates semantic-layer sources', async () => {
const write = await writeLocalSlSource(project, { const write = await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -92,7 +92,7 @@ describe('local semantic-layer helpers', () => {
}); });
it('resolves a scoped source by connection id', async () => { it('resolves a scoped source by connection id', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -115,7 +115,7 @@ describe('local semantic-layer helpers', () => {
}); });
it('returns not-found for a missing scoped source', async () => { it('returns not-found for a missing scoped source', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -130,12 +130,12 @@ describe('local semantic-layer helpers', () => {
}); });
it('resolves a unique source name across all connections', async () => { it('resolves a unique source name across all connections', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
}); });
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'analytics', connectionId: 'analytics',
sourceName: 'tickets', sourceName: 'tickets',
yaml: SUPPORT_YAML, yaml: SUPPORT_YAML,
@ -157,7 +157,7 @@ describe('local semantic-layer helpers', () => {
}); });
it('returns not-found for a missing unscoped source', async () => { it('returns not-found for a missing unscoped source', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -169,12 +169,12 @@ describe('local semantic-layer helpers', () => {
}); });
it('reports sorted ambiguous connection ids for duplicate source names', async () => { it('reports sorted ambiguous connection ids for duplicate source names', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
}); });
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'analytics', connectionId: 'analytics',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -261,6 +261,153 @@ describe('local semantic-layer helpers', () => {
); );
}); });
it('reads manifest-backed scan sources whose warehouse identifiers are uppercase', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
`tables:
WIDGET_SALES:
table: PUBLIC.WIDGET_SALES
columns:
- name: ID
type: number
pk: true
- name: EMAIL
type: string
`,
'ktx',
'ktx@example.com',
'Add uppercase manifest shard',
);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'WIDGET_SALES' })).resolves.toEqual(
expect.objectContaining({
connectionId: 'warehouse',
name: 'WIDGET_SALES',
path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#WIDGET_SALES',
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
}),
);
});
it('reads manifest-backed sources whose names are not filename-safe', async () => {
// Snowflake and Postgres unquoted identifiers allow `$`; manifest keys
// carry the warehouse name verbatim, so the lookup must accept it.
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
`tables:
EVENT$LOG:
table: PUBLIC.EVENT$LOG
columns:
- name: ID
type: number
pk: true
`,
'ktx',
'ktx@example.com',
'Add manifest shard with dollar-sign table name',
);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'EVENT$LOG' })).resolves.toEqual(
expect.objectContaining({
connectionId: 'warehouse',
name: 'EVENT$LOG',
path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#EVENT$LOG',
yaml: expect.stringContaining('table: PUBLIC.EVENT$LOG'),
}),
);
});
it('reads a manifest-backed source while a sibling standalone file has broken YAML', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
`tables:
WIDGET_SALES:
table: PUBLIC.WIDGET_SALES
columns:
- name: ID
type: number
pk: true
`,
'ktx',
'ktx@example.com',
'Add manifest shard',
);
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
'ktx',
'ktx@example.com',
'seed a sibling source mid-edit with broken YAML',
);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'WIDGET_SALES' })).resolves.toEqual(
expect.objectContaining({
name: 'WIDGET_SALES',
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
}),
);
// The broken sibling stays visible in listings instead of hiding or
// failing the whole connection.
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
expect.objectContaining({ name: 'orders', columnCount: 0 }),
expect.objectContaining({ name: 'WIDGET_SALES', columnCount: 1 }),
]);
});
it('returns the raw YAML of a standalone source whose content no longer parses', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
'ktx',
'ktx@example.com',
'seed a source mid-edit with broken YAML',
);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' })).resolves.toEqual(
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/orders.yaml',
yaml: expect.stringContaining('[unterminated'),
}),
);
});
it('reads a broken source by its declared name even when the filename differs', async () => {
// Identity is the intact top-level `name:`, recovered via parseDocument even
// when the YAML is broken below it — never the filename. A human-renamed or
// hashed-filename source (e.g. an uppercase warehouse name) saved mid-edit
// must stay reachable under the name it declares, matching the writer side
// (resolveSlSourceFile). Keying it by the filename would make it invisible
// under its real name.
await project.fileStore.writeFile(
'semantic-layer/warehouse/renamed-by-hand.yaml',
'name: SIGNED_UP\nmeasures:\n - name: signups\n expr: [unterminated\n',
'ktx',
'ktx@example.com',
'seed a human-renamed source mid-edit with broken YAML',
);
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'SIGNED_UP' })).resolves.toEqual(
expect.objectContaining({
connectionId: 'warehouse',
name: 'SIGNED_UP',
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
yaml: expect.stringContaining('[unterminated'),
}),
);
// The filename is not the identity, so it does not resolve a source.
await expect(
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'renamed-by-hand' }),
).resolves.toBeNull();
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
expect.objectContaining({ name: 'SIGNED_UP', columnCount: 0 }),
]);
});
it('expands manifest-backed scan sources when listing all connections', async () => { it('expands manifest-backed scan sources when listing all connections', async () => {
await project.fileStore.writeFile( await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml', 'semantic-layer/warehouse/_schema/public.yaml',
@ -292,12 +439,12 @@ describe('local semantic-layer helpers', () => {
}); });
it('searches local semantic-layer source text through SQLite FTS', async () => { it('searches local semantic-layer source text through SQLite FTS', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
}); });
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'tickets', sourceName: 'tickets',
yaml: SUPPORT_YAML, yaml: SUPPORT_YAML,
@ -365,12 +512,12 @@ describe('local semantic-layer helpers', () => {
}); });
it('searches all connections with one global hybrid ranking pass', async () => { it('searches all connections with one global hybrid ranking pass', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
}); });
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'finance', connectionId: 'finance',
sourceName: 'orders', sourceName: 'orders',
yaml: [ yaml: [
@ -403,7 +550,7 @@ describe('local semantic-layer helpers', () => {
}); });
it('returns dictionary evidence when collected sample values explain a match', async () => { it('returns dictionary evidence when collected sample values explain a match', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -456,7 +603,7 @@ describe('local semantic-layer helpers', () => {
}); });
it('adds the token lane alongside lexical matches for normalized query terms', async () => { it('adds the token lane alongside lexical matches for normalized query terms', async () => {
await writeLocalSlSource(project, { await seedSlSourceFile(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: 'orders', sourceName: 'orders',
yaml: ORDERS_YAML, yaml: ORDERS_YAML,
@ -471,21 +618,13 @@ describe('local semantic-layer helpers', () => {
}); });
}); });
it('reports schema validation errors without writing invalid YAML', async () => { it('reports schema validation errors for invalid YAML', async () => {
const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n'); const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n');
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({ await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
valid: false, valid: false,
errors: expect.arrayContaining([expect.stringContaining('grain')]), errors: expect.arrayContaining([expect.stringContaining('grain')]),
}); });
await expect(
writeLocalSlSource(project, {
connectionId: 'warehouse',
sourceName: 'broken',
yaml: invalidYaml,
}),
).rejects.toThrow('Invalid semantic-layer source');
}); });
it('reports overlay columns that are not computed columns', async () => { it('reports overlay columns that are not computed columns', async () => {
@ -506,12 +645,40 @@ describe('local semantic-layer helpers', () => {
}); });
}); });
it('rejects unsafe source paths', async () => { it('never derives a file path from a traversal-style source name', async () => {
// Reads match names against loaded records, so a traversal-style name is
// simply not found; the writer-side guarantee (derived filenames contain
// no separators) is covered by the source-files tests.
await expect( await expect(
readLocalSlSource(project, { readLocalSlSource(project, {
connectionId: 'warehouse', connectionId: 'warehouse',
sourceName: '../orders', sourceName: '../orders',
}), }),
).rejects.toThrow('Unsafe semantic-layer source name'); ).resolves.toBeNull();
});
it('reads a source from a human-renamed file by its in-file name', async () => {
// The filename is a derived label, not identity: a file renamed by a human
// still resolves under the `name:` it declares.
await project.fileStore.writeFile(
'semantic-layer/warehouse/custom-file-name.yaml',
ORDERS_YAML,
'ktx',
'ktx@example.com',
'Seed source at a human-chosen filename',
);
await expect(
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' }),
).resolves.toMatchObject({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/custom-file-name.yaml',
yaml: ORDERS_YAML,
});
await expect(
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'custom-file-name' }),
).resolves.toBeNull();
}); });
}); });

View file

@ -5,8 +5,9 @@ import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js'; import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { assertSearchBackendConformanceCase } from '../search/backend-conformance.test-utils.js'; import { assertSearchBackendConformanceCase } from '../search/backend-conformance.test-utils.js';
import { searchLocalSlSources, writeLocalSlSource, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js'; import { searchLocalSlSources, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js';
import { searchLocalSlSourcesWithPglitePrototype } from '../../../src/context/sl/pglite-sl-search-prototype.js'; import { searchLocalSlSourcesWithPglitePrototype } from '../../../src/context/sl/pglite-sl-search-prototype.js';
import { seedSlSourceFile } from './sl-source-seeding.test-utils.js';
const ORDERS_YAML = [ const ORDERS_YAML = [
'name: orders', 'name: orders',
@ -107,9 +108,9 @@ function toConformanceResult(result: LocalSlSourceSearchResult) {
} }
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> { async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML }); await seedSlSourceFile(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
await writeLocalSlSource(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML }); await seedSlSourceFile(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML }); await seedSlSourceFile(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
await project.fileStore.writeFile( await project.fileStore.writeFile(
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',

View file

@ -1,5 +1,9 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { Mock } from 'vitest'; import type { Mock } from 'vitest';
import { beforeEach, describe, expect, it, vi } from 'vitest'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { import {
ColumnNameCollisionError, ColumnNameCollisionError,
@ -71,6 +75,7 @@ describe('loadSource', () => {
it('warns and returns null when an existing source file has invalid YAML', async () => { it('warns and returns null when an existing source file has invalid YAML', async () => {
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() }; const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
const configService = { const configService = {
listFiles: vi.fn().mockResolvedValue({ files: ['semantic-layer/warehouse/orders.yaml'] }),
readFile: vi.fn().mockResolvedValue({ content: 'name: [' }), readFile: vi.fn().mockResolvedValue({ content: 'name: [' }),
}; };
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never); const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never);
@ -79,9 +84,33 @@ describe('loadSource', () => {
expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml'); expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml');
expect(logger.warn).toHaveBeenCalledWith( expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining('[loadSource] warehouse/orders.yaml: YAML parse failed:'), expect.stringContaining('[loadSource] semantic-layer/warehouse/orders.yaml: YAML parse failed:'),
); );
}); });
it('returns null when no file declares the source name', async () => {
const configService = {
listFiles: vi.fn().mockResolvedValue({ files: [] }),
readFile: vi.fn(),
};
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
await expect(service.loadSource('warehouse', 'orders')).resolves.toBeNull();
expect(configService.readFile).not.toHaveBeenCalled();
});
it('resolves a source by its in-file name when the filename differs', async () => {
const configService = {
listFiles: vi.fn().mockResolvedValue({ files: ['semantic-layer/warehouse/renamed.yaml'] }),
readFile: vi.fn().mockResolvedValue({ content: 'name: SIGNED_UP\nmeasures: []\n' }),
};
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
await expect(service.loadSource('warehouse', 'SIGNED_UP')).resolves.toEqual({
name: 'SIGNED_UP',
measures: [],
});
});
}); });
describe('composeOverlay', () => { describe('composeOverlay', () => {
@ -1242,3 +1271,177 @@ describe('findDanglingSegmentRefs', () => {
expect(findDanglingSegmentRefs({})).toEqual([]); expect(findDanglingSegmentRefs({})).toEqual([]);
}); });
}); });
describe('writeSource / deleteSource file naming', () => {
let tempDir: string;
let project: KtxLocalProject;
let service: SemanticLayerService;
const author = 'T U';
const authorEmail = 't@u.com';
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-service-files-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
service = new SemanticLayerService(project.fileStore as never, connectionCatalog() as never, pythonPort as never);
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
const signedUp: SemanticLayerSource = {
name: 'SIGNED_UP',
table: 'PUBLIC.SIGNED_UP',
grain: ['ID'],
columns: [{ name: 'ID', type: 'number' }],
joins: [],
measures: [],
};
it('writes a new uppercase source at a derived filename and reads it back by name', async () => {
const result = await service.writeSource('warehouse', signedUp, author, authorEmail);
expect(result.path).toMatch(/^semantic-layer\/warehouse\/signed_up-[0-9a-f]{8}\.yaml$/);
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
expect(file?.path).toBe(result.path);
expect(file?.content).toContain('name: SIGNED_UP');
// Rewriting lands on the same file instead of deriving a second one.
const rewrite = await service.writeSource('warehouse', signedUp, author, authorEmail);
expect(rewrite.path).toBe(result.path);
});
it('repairs a broken file occupying the derived path instead of refusing the write', async () => {
const written = await service.writeSource('warehouse', signedUp, author, authorEmail);
await project.fileStore.writeFile(
written.path,
'name: SIGNED_UP\nmeasures: [unterminated\n',
author,
authorEmail,
'break the file',
);
const repaired = await service.writeSource('warehouse', signedUp, author, authorEmail);
expect(repaired.path).toBe(written.path);
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
expect(file?.path).toBe(written.path);
expect(file?.content).toContain('name: SIGNED_UP');
});
it('rewrites a human-renamed file in place', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/custom.yaml',
'name: orders\nmeasures: []\n',
author,
authorEmail,
'seed renamed file',
);
const result = await service.writeSource(
'warehouse',
{ name: 'orders', grain: [], columns: [], joins: [], measures: [] },
author,
authorEmail,
);
expect(result.path).toBe('semantic-layer/warehouse/custom.yaml');
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
expect(listed.files).toEqual(['semantic-layer/warehouse/custom.yaml']);
});
it('repairs a human-renamed broken file in place instead of deriving a second one', async () => {
// Renamed (filename ≠ name) AND mid-edit broken: identity must survive the
// syntax error so the rewrite lands on the original file rather than creating
// a duplicate at the derived path that later trips the by-name resolver.
await project.fileStore.writeFile(
'semantic-layer/warehouse/custom.yaml',
'name: SIGNED_UP\nmeasures: [unterminated\n',
author,
authorEmail,
'seed broken renamed file',
);
const repaired = await service.writeSource('warehouse', signedUp, author, authorEmail);
expect(repaired.path).toBe('semantic-layer/warehouse/custom.yaml');
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
expect(listed.files).toEqual(['semantic-layer/warehouse/custom.yaml']);
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
expect(file?.path).toBe('semantic-layer/warehouse/custom.yaml');
expect(file?.content).toContain('name: SIGNED_UP');
});
it('keeps a .yml-renamed file visible to the loader and the by-name resolver alike', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/custom.yml',
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\nmeasures: []\n',
author,
authorEmail,
'seed .yml file',
);
const { sources, loadErrors } = await service.loadAllSources('warehouse');
expect(loadErrors).toEqual([]);
expect(sources.map((source) => source.name)).toEqual(['orders']);
const file = await service.readSourceFile('warehouse', 'orders');
expect(file?.path).toBe('semantic-layer/warehouse/custom.yml');
});
it('refuses to clobber a derived path occupied by a different source', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
'name: other_source\nmeasures: []\n',
author,
authorEmail,
'seed conflicting file',
);
await expect(
service.writeSource(
'warehouse',
{ name: 'orders', grain: [], columns: [], joins: [], measures: [] },
author,
authorEmail,
),
).rejects.toThrow("already defines source 'other_source'");
});
it('deletes the file resolved by name, wherever it lives', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/custom.yaml',
'name: orders\nmeasures: []\n',
author,
authorEmail,
'seed renamed file',
);
await service.deleteSource('warehouse', 'orders', author, authorEmail);
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
expect(listed.files).toEqual([]);
});
it('explains manifest-backed deletes instead of silently succeeding', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
'tables:\n payments:\n table: public.payments\n columns:\n - name: id\n type: number\n',
author,
authorEmail,
'seed manifest shard',
);
await expect(service.deleteSource('warehouse', 'payments', author, authorEmail)).rejects.toThrow(
/scan manifest/,
);
});
it('throws a plain not-found error for unknown sources', async () => {
await expect(service.deleteSource('warehouse', 'missing', author, authorEmail)).rejects.toThrow(
'Semantic-layer source not found: warehouse/missing',
);
});
});

View file

@ -0,0 +1,22 @@
import type { KtxFileWriteResult } from '../../../src/context/core/file-store.js';
import type { KtxLocalProject } from '../../../src/context/project/project.js';
import { slSourceFilePath } from '../../../src/context/sl/source-files.js';
/**
* Seed a standalone/overlay semantic-layer file at the writer-derived path,
* bypassing tool-level validation. Production writes go through
* `SemanticLayerService.writeSource`; tests that only need a file on disk use
* this instead.
*/
export async function seedSlSourceFile(
project: KtxLocalProject,
input: { connectionId: string; sourceName: string; yaml: string },
): Promise<KtxFileWriteResult> {
return project.fileStore.writeFile(
slSourceFilePath(input.connectionId, input.sourceName),
input.yaml,
'ktx',
'ktx@example.com',
`Seed semantic-layer source: ${input.connectionId}/${input.sourceName}`,
);
}

View file

@ -0,0 +1,154 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import {
resolveSlSourceFile,
slSourceFileName,
slSourceFilePath,
slSourceNameForFile,
} from '../../../src/context/sl/source-files.js';
describe('slSourceFileName', () => {
it('keeps safe lowercase snake_case names verbatim', () => {
expect(slSourceFileName('orders')).toBe('orders.yaml');
expect(slSourceFileName('mart_account_segments')).toBe('mart_account_segments.yaml');
expect(slSourceFileName('orders2')).toBe('orders2.yaml');
});
it('derives a slug-hash filename for any other name and never throws', () => {
expect(slSourceFileName('SIGNED_UP')).toMatch(/^signed_up-[0-9a-f]{8}\.yaml$/);
expect(slSourceFileName('EVENT$LOG')).toMatch(/^event_log-[0-9a-f]{8}\.yaml$/);
expect(slSourceFileName('my.dotted.name')).toMatch(/^my_dotted_name-[0-9a-f]{8}\.yaml$/);
expect(slSourceFileName('汉字')).toMatch(/^src-[0-9a-f]{8}\.yaml$/);
expect(slSourceFileName(' ')).toMatch(/^src-[0-9a-f]{8}\.yaml$/);
});
it('is deterministic', () => {
expect(slSourceFileName('EVENT$LOG')).toBe(slSourceFileName('EVENT$LOG'));
});
it('never emits path separators or traversal segments', () => {
for (const name of ['../orders', 'a/b', 'a\\b', '..', './x']) {
const fileName = slSourceFileName(name);
expect(fileName).not.toContain('/');
expect(fileName).not.toContain('\\');
expect(fileName).not.toContain('..');
}
});
it('keeps case-differing names disjoint on case-insensitive filesystems', () => {
// Safe-branch filenames contain no `-`; hash-branch filenames always end
// in `-<8 hex>` with a hash of the raw name, so `events` vs `EVENTS`
// cannot collide even when the filesystem folds case (macOS, Windows).
const lower = slSourceFileName('events');
const upper = slSourceFileName('EVENTS');
expect(lower).toBe('events.yaml');
expect(upper).toMatch(/^events-[0-9a-f]{8}\.yaml$/);
expect(upper.toLowerCase()).not.toBe(lower.toLowerCase());
expect(lower).not.toContain('-');
});
it('routes Windows reserved device basenames through the hash branch', () => {
expect(slSourceFileName('con')).toMatch(/^con-[0-9a-f]{8}\.yaml$/);
expect(slSourceFileName('lpt1')).toMatch(/^lpt1-[0-9a-f]{8}\.yaml$/);
});
it('caps overlong names', () => {
const longName = `a${'b'.repeat(300)}`;
const fileName = slSourceFileName(longName);
expect(fileName.length).toBeLessThanOrEqual(64 + '-12345678.yaml'.length);
expect(fileName).toMatch(/^ab+-[0-9a-f]{8}\.yaml$/);
});
});
describe('slSourceFilePath', () => {
it('rejects unsafe connection ids but accepts any source name', () => {
expect(slSourceFilePath('warehouse', 'EVENT$LOG')).toMatch(
/^semantic-layer\/warehouse\/event_log-[0-9a-f]{8}\.yaml$/,
);
expect(() => slSourceFilePath('../warehouse', 'orders')).toThrow('Unsafe connection id');
});
});
describe('slSourceNameForFile', () => {
it('prefers the in-file name and falls back to the filename', () => {
expect(slSourceNameForFile('semantic-layer/warehouse/custom.yaml', 'name: SIGNED_UP\n')).toBe('SIGNED_UP');
expect(slSourceNameForFile('semantic-layer/warehouse/orders.yaml', 'measures: []\n')).toBe('orders');
expect(slSourceNameForFile('semantic-layer/warehouse/orders.yaml', 'measures: [unterminated\n')).toBe('orders');
});
it('recovers the declared name when the file is broken below the name: line', () => {
// A human-renamed file left mid-edit keeps its identity: the syntax error is
// under `measures:`, so the top-level `name:` is still recoverable and must
// win over the (unrelated) filename.
expect(slSourceNameForFile('semantic-layer/warehouse/renamed-by-hand.yaml', 'name: SIGNED_UP\nmeasures: [oops\n')).toBe(
'SIGNED_UP',
);
});
});
describe('resolveSlSourceFile', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-source-files-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seed(path: string, content: string): Promise<void> {
await project.fileStore.writeFile(path, content, 'ktx', 'ktx@example.com', `seed ${path}`);
}
it('matches by in-file name regardless of the filename', async () => {
await seed('semantic-layer/warehouse/renamed-by-hand.yaml', 'name: SIGNED_UP\nmeasures: []\n');
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'SIGNED_UP')).resolves.toEqual({
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
content: 'name: SIGNED_UP\nmeasures: []\n',
});
});
it('returns null when no file declares the name and ignores manifest shards', async () => {
await seed('semantic-layer/warehouse/_schema/public.yaml', 'tables:\n orders:\n table: public.orders\n');
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).resolves.toBeNull();
});
it('falls back to the filename for broken YAML', async () => {
const broken = 'name: orders\nmeasures: [unterminated\n';
await seed('semantic-layer/warehouse/orders.yaml', broken);
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).resolves.toEqual({
path: 'semantic-layer/warehouse/orders.yaml',
content: broken,
});
});
it('matches a human-renamed broken file by its still-recoverable name', async () => {
// Filename ≠ name, so the filename fallback cannot find it; resolution must
// come from the intact top-level `name:` even though the YAML is broken.
const broken = 'name: SIGNED_UP\nmeasures: [unterminated\n';
await seed('semantic-layer/warehouse/renamed-by-hand.yaml', broken);
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'SIGNED_UP')).resolves.toEqual({
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
content: broken,
});
});
it('throws when two files declare the same source name', async () => {
await seed('semantic-layer/warehouse/orders.yaml', 'name: orders\nmeasures: []\n');
await seed('semantic-layer/warehouse/orders_copy.yaml', 'name: orders\nmeasures: []\n');
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).rejects.toThrow(
'Multiple semantic-layer files declare source "orders"',
);
});
});

View file

@ -188,7 +188,7 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => { it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
const { tool, semanticLayerService } = makeTool({ const { tool, semanticLayerService } = makeTool({
semanticLayerService: { semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')), readSourceFile: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(true), isManifestBacked: vi.fn().mockResolvedValue(true),
}, },
}); });
@ -222,7 +222,7 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
it('still returns the plain "Source not found" error for truly-missing names', async () => { it('still returns the plain "Source not found" error for truly-missing names', async () => {
const { tool, semanticLayerService } = makeTool({ const { tool, semanticLayerService } = makeTool({
semanticLayerService: { semanticLayerService: {
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')), readSourceFile: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false), isManifestBacked: vi.fn().mockResolvedValue(false),
}, },
}); });
@ -241,3 +241,20 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
expect(semanticLayerService.writeSource).not.toHaveBeenCalled(); expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
}); });
}); });
describe('SlEditSourceTool — name edits', () => {
it('rejects edits that change the in-file name', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
yaml_edits: [{ oldText: 'name: orders', newText: 'name: renamed_orders' }],
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/renaming is not supported/i);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
});
});

View file

@ -16,8 +16,15 @@ function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
configService: { configService: {
writeFile: vi.fn().mockResolvedValue(undefined), writeFile: vi.fn().mockResolvedValue(undefined),
deleteFile: vi.fn().mockResolvedValue(undefined), deleteFile: vi.fn().mockResolvedValue(undefined),
// No live file for `orders` — revert recovers the preHead path from history.
listFiles: vi.fn().mockResolvedValue({ files: [] }),
readFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
} as any,
gitService: {
// The source lived at its derived filename at preHead.
listFilesAtCommit: vi.fn().mockResolvedValue(['semantic-layer/conn-1/orders.yaml']),
getFileAtCommit: vi.fn().mockResolvedValue('name: orders\nmeasures: []\n'),
} as any, } as any,
gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any,
...overrides, ...overrides,
}; };
} }
@ -65,4 +72,33 @@ describe('SlRollbackTool', () => {
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false); expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
expect(session.actions).toEqual([]); expect(session.actions).toEqual([]);
}); });
it('restores a deleted human-renamed source at the path it occupied at preHead', async () => {
// The source lived at a custom filename (≠ the writer-derived `orders.yaml`)
// and the session deleted it. Revert must recover the custom path from the
// preHead commit and restore there, not write/no-op against the derived path.
const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) };
const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1);
const renamedContent = 'name: orders\ntable: public.orders\nmeasures: []\n';
const session = makeSession({
gitService: {
listFilesAtCommit: vi.fn().mockResolvedValue(['semantic-layer/conn-1/custom.yaml']),
getFileAtCommit: vi.fn().mockResolvedValue(renamedContent),
} as any,
});
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
const result = await tool.call({ sourceName: 'orders' } as any, context);
expect(result.structured.success).toBe(true);
expect((session.configService as any).writeFile).toHaveBeenCalledWith(
'semantic-layer/conn-1/custom.yaml',
renamedContent,
expect.anything(),
expect.anything(),
expect.anything(),
expect.anything(),
);
expect((session.configService as any).deleteFile).not.toHaveBeenCalled();
});
}); });

View file

@ -13,7 +13,7 @@ function makeTool(overrides: Partial<Record<string, any>> = {}) {
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
deleteSource: vi.fn().mockResolvedValue(undefined), deleteSource: vi.fn().mockResolvedValue(undefined),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), readSourceFile: vi.fn().mockResolvedValue(null),
...overrides.semanticLayerService, ...overrides.semanticLayerService,
}; };
const slSearchService = { const slSearchService = {
@ -66,7 +66,7 @@ describe('SlWriteSourceTool — session gating', () => {
deleteSource: vi.fn().mockResolvedValue(undefined), deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue([]), listManifestSourceNames: vi.fn().mockResolvedValue([]),
isManifestBacked: vi.fn().mockResolvedValue(false), isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), readSourceFile: vi.fn().mockResolvedValue(null),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null), findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any, } as any,
wikiService: {} as any, wikiService: {} as any,
@ -248,7 +248,7 @@ describe('SlWriteSourceTool — session gating', () => {
deleteSource: vi.fn().mockResolvedValue(undefined), deleteSource: vi.fn().mockResolvedValue(undefined),
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']), listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
isManifestBacked: vi.fn().mockResolvedValue(false), isManifestBacked: vi.fn().mockResolvedValue(false),
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), readSourceFile: vi.fn().mockResolvedValue(null),
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null), findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
} as any, } as any,
}); });
@ -377,3 +377,36 @@ describe('SlWriteSourceTool — standalone shadow guard', () => {
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i); expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
}); });
}); });
describe('SlWriteSourceTool — source name identity', () => {
it('accepts verbatim warehouse identifiers as sourceName', () => {
const { tool } = makeTool();
const base = { connectionId: '11111111-1111-1111-1111-111111111111' };
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'SIGNED_UP' }).success).toBe(true);
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'EVENT$LOG' }).success).toBe(true);
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'orders' }).success).toBe(true);
expect(tool.inputSchema.safeParse({ ...base, sourceName: '' }).success).toBe(false);
});
it('rejects a source whose name does not match sourceName', async () => {
const { tool, semanticLayerService } = makeTool();
const result = await tool.call(
{
connectionId: '11111111-1111-1111-1111-111111111111',
sourceName: 'orders',
source: {
name: 'other_orders',
sql: 'select 1 as id',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
measures: [],
joins: [],
} as any,
} as any,
baseContext,
);
expect(result.structured.success).toBe(false);
expect(result.markdown).toMatch(/does not match sourceName/);
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,29 @@
import { describe, expect, it } from 'vitest';
import { sqlAnalysisDialectForDriver } from '../../../src/context/sql-analysis/dialect.js';
describe('sqlAnalysisDialectForDriver', () => {
it('maps ktx.yaml driver names to sqlglot dialects', () => {
expect(sqlAnalysisDialectForDriver('postgres')).toBe('postgres');
expect(sqlAnalysisDialectForDriver('bigquery')).toBe('bigquery');
expect(sqlAnalysisDialectForDriver('snowflake')).toBe('snowflake');
expect(sqlAnalysisDialectForDriver('mysql')).toBe('mysql');
expect(sqlAnalysisDialectForDriver('sqlserver')).toBe('tsql');
expect(sqlAnalysisDialectForDriver('sqlite')).toBe('sqlite');
expect(sqlAnalysisDialectForDriver('duckdb')).toBe('duckdb');
expect(sqlAnalysisDialectForDriver('clickhouse')).toBe('clickhouse');
expect(sqlAnalysisDialectForDriver('databricks')).toBe('databricks');
});
it('maps local connection-type spellings to sqlglot dialects', () => {
expect(sqlAnalysisDialectForDriver('POSTGRESQL')).toBe('postgres');
expect(sqlAnalysisDialectForDriver('SQLSERVER')).toBe('tsql');
expect(sqlAnalysisDialectForDriver('BIGQUERY')).toBe('bigquery');
expect(sqlAnalysisDialectForDriver('SQLITE')).toBe('sqlite');
});
it('defaults to postgres for unknown or missing drivers', () => {
expect(sqlAnalysisDialectForDriver(undefined)).toBe('postgres');
expect(sqlAnalysisDialectForDriver('')).toBe('postgres');
expect(sqlAnalysisDialectForDriver('unknown')).toBe('postgres');
});
});