mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
fix: read semantic sources safely (#284)
* fix: read semantic sources safely
* test: retarget reindex per-scope error case to a broken manifest
Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.
* fix(sl): decouple semantic-layer file names from warehouse naming rules
The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.
- New `source-files.ts`: one total filename derivation (safe lowercase
names verbatim; otherwise slug + sha256-hash suffix, immune to
case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
`assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
name (human renames survive); new sources get a derived filename; a
derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
caller to launder IO errors; `deleteSource` distinguishes manifest-backed
sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
now a recommendation for new sources) and rejects sourceName/source.name
mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
derivation use resolved paths / in-file names instead of interpolating
`<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
helpers onto the shared module; dropped dead service methods.
* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator
- Key broken/renamed semantic-layer files by their recoverable in-file
name (slSourceNameForFile) so mid-edit sources stay reachable under
their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
recover deleted files' declared names from the pre-change commit
instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
validateReadOnly in the connection's dialect, sharing one
driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
reject smuggled second statements, and strip trailing
semicolons/comments before row-limit wrapping
This commit is contained in:
parent
853f39a7c3
commit
f3f893bf01
51 changed files with 1797 additions and 476 deletions
|
|
@ -44,12 +44,17 @@ Use this order for most context changes:
|
|||
Semantic sources are YAML files for queryable tables or custom SQL. They define
|
||||
agent-facing measures, dimensions, segments, joins, and grain.
|
||||
|
||||
Semantic source files live at:
|
||||
Semantic source files live under:
|
||||
|
||||
```text
|
||||
semantic-layer/<connection-id>/<source-name>.yaml
|
||||
semantic-layer/<connection-id>/
|
||||
```
|
||||
|
||||
The file's `name:` field is the source's identity — it carries the warehouse
|
||||
identifier verbatim, including case. The filename is a derived label: simple
|
||||
lowercase names get `<source-name>.yaml`, anything else gets a slugged
|
||||
filename. Renaming a file does not rename the source.
|
||||
|
||||
### Minimal source
|
||||
|
||||
```yaml
|
||||
|
|
@ -152,7 +157,7 @@ joins:
|
|||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `name` | Yes | Source identifier. Use lowercase words and underscores. |
|
||||
| `name` | Yes | Source identity (not the filename). When overlaying an ingested table, match the manifest identifier verbatim, including case (e.g. `SIGNED_UP`); for a new standalone source, lowercase words and underscores are recommended. |
|
||||
| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
|
||||
| `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. |
|
||||
| `grain` | Yes | Columns that uniquely identify a row at the source grain. |
|
||||
|
|
|
|||
|
|
@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string {
|
|||
return url;
|
||||
}
|
||||
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
export function isKtxSqliteConnectionConfig(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
): connection is KtxSqliteConnectionConfig {
|
||||
|
|
@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
|||
|
||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
||||
const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
|
||||
import { assertReadOnlySql, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
|
|
@ -284,7 +284,7 @@ function isDeniedError(error: unknown): boolean {
|
|||
}
|
||||
|
||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
||||
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sqlText));
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,16 +2,133 @@ const MUTATING_SQL =
|
|||
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
||||
const READ_SQL = /^\s*(select|with)\b/i;
|
||||
|
||||
// Agents (and the daemon's sqlglot validator, which ignores comments) routinely
|
||||
// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading
|
||||
// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH
|
||||
// SQL is rejected here while the parser-backed validator accepts it.
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
// Lexes past one string literal, quoted identifier, or comment starting at
|
||||
// `index`, using standard-SQL rules ('' and "" escapes; no dialect extensions
|
||||
// such as backslash escapes or dollar quoting). Returns the index after the
|
||||
// token, or `index` unchanged when no quoted/comment token starts there.
|
||||
function skipQuotedOrComment(sql: string, index: number): number {
|
||||
const quote = sql[index];
|
||||
if (quote === "'" || quote === '"') {
|
||||
let i = index + 1;
|
||||
while (i < sql.length) {
|
||||
if (sql[i] === quote) {
|
||||
if (sql[i + 1] === quote) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
return i + 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return sql.length;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
return end === -1 ? sql.length : end + 1;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
return end === -1 ? sql.length : end + 2;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Backstop against statement smuggling (`select 1; drop table x`): reject any
|
||||
// semicolon that is followed by real content. Semicolons inside string
|
||||
// literals, quoted identifiers, and comments are fine, as are trailing
|
||||
// semicolons (optionally followed by whitespace and comments). This deliberately
|
||||
// lexes standard SQL only, so dialect-specific escapes can cause a false
|
||||
// reject — never a false accept; the canonical gate is the daemon's
|
||||
// sqlglot-backed validateReadOnly.
|
||||
function assertSingleSqlStatement(sql: string): void {
|
||||
let index = 0;
|
||||
let sawSemicolon = false;
|
||||
while (index < sql.length) {
|
||||
const skipped = skipQuotedOrComment(sql, index);
|
||||
if (skipped > index) {
|
||||
index = skipped;
|
||||
continue;
|
||||
}
|
||||
if (sql[index] === ';') {
|
||||
sawSemicolon = true;
|
||||
} else if (sawSemicolon && !/\s/.test(sql[index])) {
|
||||
throw new Error('Only one SQL statement can be executed.');
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
export function assertReadOnlySql(sql: string): string {
|
||||
const trimmed = sql.trim();
|
||||
const trimmed = stripLeadingSqlComments(sql).trim();
|
||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
}
|
||||
assertSingleSqlStatement(trimmed);
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
// `assertReadOnlySql` deliberately keeps trailing semicolons, comments, and
|
||||
// whitespace (e.g. `select 1; -- done`) — harmless for direct single-statement
|
||||
// execution. A row-limit subquery wrapper needs a bare expression instead: a
|
||||
// trailing `;` would sit illegally inside the subquery, and a trailing line
|
||||
// comment would comment out the closing paren and limit clause. Lex forward with
|
||||
// the same standard-SQL rules as the single-statement gate and truncate at the
|
||||
// end of the last meaningful token, dropping trailing semicolons, comments, and
|
||||
// whitespace. Characters inside string literals and quoted identifiers stay
|
||||
// meaningful, so a `;` or `--` within a literal is never mistaken for a
|
||||
// terminator (a plain regex cannot make that distinction).
|
||||
export function stripTrailingSqlNoise(sql: string): string {
|
||||
let index = 0;
|
||||
let meaningfulEnd = 0;
|
||||
while (index < sql.length) {
|
||||
if (sql.startsWith('--', index) || sql.startsWith('/*', index)) {
|
||||
index = skipQuotedOrComment(sql, index);
|
||||
continue;
|
||||
}
|
||||
const afterQuoted = skipQuotedOrComment(sql, index);
|
||||
if (afterQuoted > index) {
|
||||
meaningfulEnd = afterQuoted;
|
||||
index = afterQuoted;
|
||||
continue;
|
||||
}
|
||||
if (sql[index] !== ';' && !/\s/.test(sql[index] ?? '')) {
|
||||
meaningfulEnd = index + 1;
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
return sql.slice(0, meaningfulEnd);
|
||||
}
|
||||
|
||||
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, '');
|
||||
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sql));
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -557,12 +557,13 @@ export class GitService {
|
|||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
* List all paths matching `pathSpec` as they exist at `commitHash`. Reads from
|
||||
* git object storage, so it's safe against concurrent working-tree mutations
|
||||
* and can recover paths (e.g. a human-renamed file) that no longer exist on disk.
|
||||
*/
|
||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||
async listFilesAtCommit(pathSpec: string, commitHash: string): Promise<string[]> {
|
||||
try {
|
||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', 'HEAD', '--', pathSpec]);
|
||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', commitHash, '--', pathSpec]);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -572,6 +573,14 @@ export class GitService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
*/
|
||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||
return this.listFilesAtCommit(pathSpec, 'HEAD');
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse all commits between `preHead` and current HEAD into a single commit with the given
|
||||
* message. Used by the memory agent to squash N per-tool-call commits into one ingest commit.
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { dirname, join, relative } from 'node:path';
|
|||
import YAML from 'yaml';
|
||||
import type { MemoryAction } from '../../../../context/memory/types.js';
|
||||
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
|
||||
import { isSlYamlPath } from '../../../sl/source-files.js';
|
||||
import type { FinalizationOverrideReplay } from '../../types.js';
|
||||
import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
|
||||
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
|
||||
|
|
@ -251,7 +252,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
|
||||
|
||||
const schemaRoot = join(input.workdir, 'semantic-layer', input.connectionId, '_schema');
|
||||
for (const file of (await walkFiles(schemaRoot)).filter((candidate) => candidate.endsWith('.yaml') || candidate.endsWith('.yml'))) {
|
||||
for (const file of (await walkFiles(schemaRoot)).filter(isSlYamlPath)) {
|
||||
const path = join(schemaRoot, file);
|
||||
const before = await readFile(path, 'utf-8');
|
||||
const shard = (YAML.parse(before) ?? {}) as ManifestShard;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
||||
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||
import type { IngestTraceWriter } from './ingest-trace.js';
|
||||
import { traceTimed } from './ingest-trace.js';
|
||||
|
||||
|
|
@ -149,11 +148,13 @@ function buildToolSet(input: {
|
|||
|
||||
export function finalGateRepairPaths(input: {
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
// Resolved by the caller: SL filenames are derived labels, so the repair
|
||||
// allowlist must carry the real on-disk paths, not name-interpolated ones.
|
||||
touchedSlSourcePaths: string[];
|
||||
}): string[] {
|
||||
return [
|
||||
...new Set([
|
||||
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...input.touchedSlSourcePaths,
|
||||
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
]),
|
||||
].sort();
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
||||
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
||||
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||
import type { IngestReportFinalizationMismatch } from './reports.js';
|
||||
|
|
@ -64,39 +65,36 @@ export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
|
|||
);
|
||||
}
|
||||
|
||||
export async function deriveFinalizationTouchedSources(
|
||||
input: DeriveTouchedSourcesInput,
|
||||
): Promise<DeriveTouchedSourcesResult> {
|
||||
// Source identity is the in-file `name:`; filenames are derived labels (see
|
||||
// source-files.ts), so a changed path — manifest shard or standalone file —
|
||||
// cannot be mapped to a source by parsing its filename. Instead, every changed
|
||||
// semantic-layer file is attributed through the before/after diff of its
|
||||
// connection's composed sources. A changed file whose connection diff is empty
|
||||
// cannot be attributed to any source and is surfaced as unresolved.
|
||||
export function deriveFinalizationTouchedSources(input: DeriveTouchedSourcesInput): DeriveTouchedSourcesResult {
|
||||
const touched = new Map<string, TouchedSlSource>();
|
||||
const unresolvedPaths: string[] = [];
|
||||
|
||||
const pathsByConnection = new Map<string, string[]>();
|
||||
for (const path of input.changedPaths) {
|
||||
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
|
||||
if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path)) {
|
||||
continue;
|
||||
}
|
||||
const parts = path.split('/');
|
||||
const connectionId = parts[1] ?? '';
|
||||
const connectionId = path.split('/')[1] ?? '';
|
||||
if (!connectionId) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
if (parts[2] !== '_schema') {
|
||||
const fileName = parts.at(-1) ?? '';
|
||||
const sourceName = fileName.replace(/\.ya?ml$/, '');
|
||||
if (!sourceName) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
||||
continue;
|
||||
}
|
||||
pathsByConnection.set(connectionId, [...(pathsByConnection.get(connectionId) ?? []), path]);
|
||||
}
|
||||
|
||||
for (const [connectionId, paths] of pathsByConnection) {
|
||||
const changedNames = changedSourceNames(
|
||||
input.beforeSourcesByConnection.get(connectionId) ?? [],
|
||||
input.afterSourcesByConnection.get(connectionId) ?? [],
|
||||
);
|
||||
if (changedNames.length === 0) {
|
||||
unresolvedPaths.push(path);
|
||||
unresolvedPaths.push(...paths);
|
||||
continue;
|
||||
}
|
||||
for (const sourceName of changedNames) {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime
|
|||
import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
||||
import type { CaptureSession, MemoryAction } from '../../context/memory/types.js';
|
||||
import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js';
|
||||
import { isSlYamlPath, slSourceFilePath, slSourceNameForFile, sourceNameFromPath } from '../../context/sl/source-files.js';
|
||||
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
||||
import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js';
|
||||
import { createTouchedSlSources, type TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||
|
|
@ -498,7 +499,7 @@ export class IngestBundleRunner {
|
|||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||
const names = files
|
||||
.filter((f) => !f.startsWith('_schema/'))
|
||||
.map((f) => f.replace(/\.yaml$/, ''))
|
||||
.map((f) => sourceNameFromPath(f))
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||
return `## ${connectionId}\n${body}`;
|
||||
|
|
@ -791,14 +792,52 @@ export class IngestBundleRunner {
|
|||
].sort();
|
||||
}
|
||||
|
||||
private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] {
|
||||
return paths
|
||||
.filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/'))
|
||||
.map((path) => {
|
||||
const [, connectionId, fileName] = path.split('/');
|
||||
return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') };
|
||||
})
|
||||
.filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0);
|
||||
private async touchedSlSourcesFromPaths(
|
||||
worktree: IngestSessionWorktree,
|
||||
paths: string[],
|
||||
deletedFileSha: string,
|
||||
): Promise<TouchedSlSource[]> {
|
||||
const sources: TouchedSlSource[] = [];
|
||||
for (const path of paths) {
|
||||
if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path) || path.includes('/_schema/')) {
|
||||
continue;
|
||||
}
|
||||
const [, connectionId] = path.split('/');
|
||||
if (!connectionId) {
|
||||
continue;
|
||||
}
|
||||
// Source identity is the in-file `name:`, never the filename — an uppercase
|
||||
// warehouse source like `WIDGET_SALES` lives in a hash-derived
|
||||
// `widget_sales-<hash>.yaml`, so parsing the basename yields a phantom name.
|
||||
// Read the live file; when it was deleted this run, recover its declared
|
||||
// name from the pre-change commit the way `revertSourceToPreHead` resolves a
|
||||
// gone file from history. The filename is a last resort only when the content
|
||||
// is unrecoverable from both.
|
||||
let content: string | null;
|
||||
try {
|
||||
content = await readFile(join(worktree.workdir, path), 'utf-8');
|
||||
} catch {
|
||||
content = await worktree.git.getFileAtCommit(path, deletedFileSha).catch(() => null);
|
||||
}
|
||||
const sourceName = content === null ? sourceNameFromPath(path) : slSourceNameForFile(path, content);
|
||||
if (sourceName.length > 0) {
|
||||
sources.push({ connectionId, sourceName });
|
||||
}
|
||||
}
|
||||
return sources;
|
||||
}
|
||||
|
||||
// Inverse direction for commits and repair allowlists: resolve each touched
|
||||
// source to its real on-disk path, falling back to the writer's derived
|
||||
// filename when the file was deleted in this run.
|
||||
private async touchedSlSourcePaths(workdir: string, touched: TouchedSlSource[]): Promise<string[]> {
|
||||
const service = this.deps.semanticLayerService.forWorktree(workdir);
|
||||
const paths: string[] = [];
|
||||
for (const source of touched) {
|
||||
const file = await service.readSourceFile(source.connectionId, source.sourceName);
|
||||
paths.push(file?.path ?? slSourceFilePath(source.connectionId, source.sourceName));
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] {
|
||||
|
|
@ -1558,7 +1597,7 @@ export class IngestBundleRunner {
|
|||
projectionTouchedSources = projection.touchedSources;
|
||||
projectionChangedWikiPageKeys = projection.changedWikiPageKeys;
|
||||
const projectionPaths = [
|
||||
...projection.touchedSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...(await this.touchedSlSourcePaths(sessionWorktree.workdir, projection.touchedSources)),
|
||||
...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
];
|
||||
projectionTouchedPaths = projectionPaths;
|
||||
|
|
@ -1740,7 +1779,11 @@ export class IngestBundleRunner {
|
|||
await validateFinalIngestArtifacts({
|
||||
connectionIds: slConnectionIds,
|
||||
changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths),
|
||||
touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths),
|
||||
touchedSlSources: await this.touchedSlSourcesFromPaths(
|
||||
sessionWorktree,
|
||||
touchedPaths,
|
||||
await sessionWorktree.git.revParseHead(),
|
||||
),
|
||||
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
||||
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
||||
validateTouchedSources: (touched) =>
|
||||
|
|
@ -2289,20 +2332,34 @@ export class IngestBundleRunner {
|
|||
)
|
||||
: [];
|
||||
|
||||
const changedConnectionIds = [
|
||||
...new Set([
|
||||
...slConnectionIds,
|
||||
...finalizationTouchedPaths
|
||||
.filter((path) => path.startsWith('semantic-layer/'))
|
||||
.map((path) => path.split('/')[1])
|
||||
.filter((connectionId): connectionId is string => Boolean(connectionId)),
|
||||
]),
|
||||
].sort();
|
||||
// Validate the write scope before deriving touched sources: attribution
|
||||
// by before/after diff is only defined for connections whose
|
||||
// pre-finalization snapshot was loaded (slConnectionIds), and an
|
||||
// out-of-scope write would otherwise surface downstream as a bogus
|
||||
// unresolved-path or declaration-mismatch failure instead of the real
|
||||
// policy violation.
|
||||
await traceTimed(
|
||||
runTrace,
|
||||
'finalization',
|
||||
'semantic_layer_target_policy',
|
||||
{
|
||||
sourceKey: job.sourceKey,
|
||||
allowedTargetConnectionIds: slConnectionIds,
|
||||
touchedPaths: [...new Set(finalizationTouchedPaths)].sort(),
|
||||
},
|
||||
async () => {
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: finalizationTouchedPaths,
|
||||
allowedConnectionIds: new Set(slConnectionIds),
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
const postFinalizationSourcesByConnection = await this.loadSourcesByConnection(
|
||||
sessionWorktree.workdir,
|
||||
changedConnectionIds,
|
||||
slConnectionIds,
|
||||
);
|
||||
const scope = await deriveFinalizationTouchedSources({
|
||||
const scope = deriveFinalizationTouchedSources({
|
||||
changedPaths: finalizationTouchedPaths,
|
||||
beforeSourcesByConnection: preFinalizationSourcesByConnection,
|
||||
afterSourcesByConnection: postFinalizationSourcesByConnection,
|
||||
|
|
@ -2437,7 +2494,7 @@ export class IngestBundleRunner {
|
|||
...(isolatedDiffEnabled ? projectionTouchedSources : []),
|
||||
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
|
||||
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
|
||||
...this.touchedSlSourcesFromPaths(postReconciliationPaths),
|
||||
...(await this.touchedSlSourcesFromPaths(sessionWorktree, postReconciliationPaths, preReconciliationSha)),
|
||||
...finalizationTouchedSources,
|
||||
]);
|
||||
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
|
||||
|
|
@ -2528,7 +2585,7 @@ export class IngestBundleRunner {
|
|||
const gateError = this.errorMessage(error);
|
||||
const repairPaths = finalGateRepairPaths({
|
||||
changedWikiPageKeys: finalChangedWikiPageKeys,
|
||||
touchedSlSources: finalTouchedSlSources,
|
||||
touchedSlSourcePaths: await this.touchedSlSourcePaths(sessionWorktree.workdir, finalTouchedSlSources),
|
||||
});
|
||||
emitStageProgress('final_gates', 89, 'Repairing final artifact gates');
|
||||
const gateRepair = await repairFinalGateFailure({
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { fileURLToPath } from 'node:url';
|
|||
import YAML from 'yaml';
|
||||
import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js';
|
||||
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
||||
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||
import type { KtxLogger } from '../../context/core/config.js';
|
||||
import { noopLogger } from '../../context/core/config.js';
|
||||
|
|
@ -95,6 +96,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
|
|||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
jobIdFactory?: () => string;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: KtxEmbeddingProvider | null;
|
||||
|
|
@ -271,16 +273,13 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
|||
}
|
||||
|
||||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||
let content: string;
|
||||
try {
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
content = file.content;
|
||||
} catch (error) {
|
||||
return this.validateComposedSource(deps, connectionId, sourceName, error);
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
return this.validateComposedSource(deps, connectionId, sourceName, 'no standalone or overlay file found');
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
|
||||
const parsed = YAML.parse(file.content) as unknown as Record<string, unknown>;
|
||||
return this.validateParsedSource(sourceName, parsed);
|
||||
} catch (error) {
|
||||
return {
|
||||
|
|
@ -519,6 +518,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
authorResolver: GitAuthorResolverPort;
|
||||
slSourcesRepository: SlSourcesIndexPort;
|
||||
connections: SlConnectionCatalogPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
contextStore: SqliteContextEvidenceStore;
|
||||
embedding: KtxEmbeddingPort;
|
||||
}) {
|
||||
|
|
@ -551,6 +551,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
|
||||
const warehouseVerificationTools = createWarehouseVerificationTools({
|
||||
connections: deps.connections,
|
||||
...(deps.sqlAnalysis ? { sqlAnalysis: deps.sqlAnalysis } : {}),
|
||||
fallbackFileStore: deps.project.fileStore,
|
||||
wikiSearchTool,
|
||||
slDiscoverTool,
|
||||
|
|
@ -699,6 +700,7 @@ export function createLocalBundleIngestRuntime(
|
|||
authorResolver: new LocalAuthorResolver(),
|
||||
slSourcesRepository,
|
||||
connections,
|
||||
...(options.sqlAnalysis ? { sqlAnalysis: options.sqlAnalysis } : {}),
|
||||
contextStore,
|
||||
embedding,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { randomUUID } from 'node:crypto';
|
|||
import { cp, mkdir, rm } from 'node:fs/promises';
|
||||
import { isAbsolute, resolve } from 'node:path';
|
||||
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
||||
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import type { KtxLogger } from '../../context/core/config.js';
|
||||
import { createAbortError, isAbortError } from '../../context/core/abort.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
||||
|
|
@ -35,6 +36,7 @@ export interface RunLocalIngestOptions {
|
|||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
||||
abortSignal?: AbortSignal;
|
||||
|
|
@ -159,6 +161,7 @@ async function runScheduledPullJob(options: {
|
|||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
||||
abortSignal?: AbortSignal;
|
||||
|
|
@ -214,6 +217,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
|
|||
memoryModel: options.memoryModel,
|
||||
semanticLayerCompute: options.semanticLayerCompute,
|
||||
queryExecutor: options.queryExecutor,
|
||||
sqlAnalysis: options.sqlAnalysis,
|
||||
logger: options.logger,
|
||||
embeddingProvider: options.embeddingProvider,
|
||||
abortSignal: options.abortSignal,
|
||||
|
|
@ -397,6 +401,7 @@ export async function runLocalMetabaseIngest(
|
|||
memoryModel: options.memoryModel,
|
||||
semanticLayerCompute: options.semanticLayerCompute,
|
||||
queryExecutor: options.queryExecutor,
|
||||
sqlAnalysis: options.sqlAnalysis,
|
||||
logger: options.logger,
|
||||
embeddingProvider: options.embeddingProvider,
|
||||
abortSignal: options.abortSignal,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import type { KtxFileStorePort } from '../../../core/file-store.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/ports.js';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/ports.js';
|
||||
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/base-tool.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
|
|
@ -8,6 +9,7 @@ import { SqlExecutionTool } from './sql-execution.tool.js';
|
|||
|
||||
export function createWarehouseVerificationTools(deps: {
|
||||
connections: SlConnectionCatalogPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
fallbackFileStore: KtxFileStorePort;
|
||||
wikiSearchTool: BaseTool;
|
||||
slDiscoverTool: BaseTool;
|
||||
|
|
@ -18,7 +20,7 @@ export function createWarehouseVerificationTools(deps: {
|
|||
});
|
||||
return [
|
||||
new EntityDetailsTool(catalogFactory),
|
||||
new SqlExecutionTool(deps.connections),
|
||||
new SqlExecutionTool(deps.connections, deps.sqlAnalysis),
|
||||
new DiscoverDataTool({
|
||||
wikiSearchTool: deps.wikiSearchTool,
|
||||
slDiscoverTool: deps.slDiscoverTool,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { z } from 'zod';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../../../context/connections/read-only-sql.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../../context/sl/ports.js';
|
||||
import { sqlAnalysisDialectForDriver } from '../../../../context/sql-analysis/dialect.js';
|
||||
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../../context/tools/base-tool.js';
|
||||
|
||||
const sqlExecutionInputSchema = z.object({
|
||||
|
|
@ -40,7 +42,10 @@ function markdownTable(headers: string[], rows: unknown[][], totalRows: number):
|
|||
export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
||||
readonly name = 'sql_execution';
|
||||
|
||||
constructor(private readonly connections: SlConnectionCatalogPort) {
|
||||
constructor(
|
||||
private readonly connections: SlConnectionCatalogPort,
|
||||
private readonly sqlAnalysis?: SqlAnalysisPort,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
|
|
@ -69,9 +74,24 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
|||
};
|
||||
}
|
||||
|
||||
if (!this.sqlAnalysis) {
|
||||
throw new Error('sql_execution requires parser-backed SQL validation.');
|
||||
}
|
||||
|
||||
let sql: string;
|
||||
let wrappedSql: string;
|
||||
try {
|
||||
const connection = await this.connections.getConnectionById(input.connectionId);
|
||||
if (!connection) {
|
||||
throw new Error(`Connection not found: ${input.connectionId}`);
|
||||
}
|
||||
const validation = await this.sqlAnalysis.validateReadOnly(
|
||||
input.sql,
|
||||
sqlAnalysisDialectForDriver(connection.connectionType),
|
||||
);
|
||||
if (!validation.ok) {
|
||||
throw new Error(validation.error ?? 'SQL is not read-only.');
|
||||
}
|
||||
sql = assertReadOnlySql(input.sql);
|
||||
wrappedSql = limitSqlForExecution(sql, input.rowLimit);
|
||||
} catch (error) {
|
||||
|
|
|
|||
|
|
@ -8,9 +8,12 @@ import { createKtxEntityDetailsService } from '../../context/scan/entity-details
|
|||
import type { KtxScanConnector } from '../../context/scan/types.js';
|
||||
import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js';
|
||||
import { createKtxDiscoverDataService } from '../../context/search/discover.js';
|
||||
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import { sqlAnalysisDialectForDriver } from '../../context/sql-analysis/dialect.js';
|
||||
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import { compileLocalSlQuery } from '../../context/sl/local-query.js';
|
||||
import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js';
|
||||
import { readLocalSlSource } from '../../context/sl/local-sl.js';
|
||||
import { assertSafeConnectionId } from '../../context/sl/source-files.js';
|
||||
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
|
||||
import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js';
|
||||
|
||||
|
|
@ -22,64 +25,12 @@ interface CreateLocalProjectMcpContextPortsOptions {
|
|||
embeddingService: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
function dialectForDriver(driver: string | undefined): string {
|
||||
const normalized = (driver ?? 'postgres').toUpperCase();
|
||||
const map: Record<string, string> = {
|
||||
POSTGRES: 'postgres',
|
||||
BIGQUERY: 'bigquery',
|
||||
SNOWFLAKE: 'snowflake',
|
||||
MYSQL: 'mysql',
|
||||
SQLSERVER: 'tsql',
|
||||
SQLITE: 'sqlite',
|
||||
DUCKDB: 'duckdb',
|
||||
CLICKHOUSE: 'clickhouse',
|
||||
DATABRICKS: 'databricks',
|
||||
};
|
||||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
return dialectForDriver(driver) as SqlAnalysisDialect;
|
||||
}
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
function assertSafeSourceName(sourceName: string): string {
|
||||
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
|
||||
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
|
||||
}
|
||||
return assertSafePathToken('semantic-layer source name', sourceName);
|
||||
}
|
||||
|
||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||
if (connector?.cleanup) {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
function slPath(connectionId: string, sourceName: string): string {
|
||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
|
||||
}
|
||||
|
||||
async function executeValidatedReadOnlySql(
|
||||
project: KtxLocalProject,
|
||||
options: CreateLocalProjectMcpContextPortsOptions,
|
||||
|
|
@ -201,13 +152,11 @@ export function createLocalProjectMcpContextPorts(
|
|||
},
|
||||
semanticLayer: {
|
||||
async readSource(input) {
|
||||
const path = slPath(input.connectionId, input.sourceName);
|
||||
try {
|
||||
const result = await project.fileStore.readFile(path);
|
||||
return { sourceName: input.sourceName, yaml: result.content };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const source = await readLocalSlSource(project, {
|
||||
connectionId: input.connectionId,
|
||||
sourceName: input.sourceName,
|
||||
});
|
||||
return source ? { sourceName: source.name, yaml: source.yaml } : null;
|
||||
},
|
||||
async query(input, executionOptions) {
|
||||
if (!options.semanticLayerCompute) {
|
||||
|
|
|
|||
|
|
@ -375,6 +375,9 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
|||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||
try {
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
return { errors: [`${sourceName}: no standalone or overlay file found`], warnings: [] };
|
||||
}
|
||||
const parsed = YAML.parse(file.content) as SemanticLayerSource;
|
||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||
|
|
|
|||
|
|
@ -483,7 +483,7 @@ export class MemoryAgentService {
|
|||
if (session.connectionId) {
|
||||
for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) {
|
||||
try {
|
||||
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName).catch(() => null);
|
||||
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (file?.content) {
|
||||
const parsed = this.parseYamlOrNull(file.content);
|
||||
if (parsed) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { buildLiveDatabaseManifestShards, type LiveDatabaseManifestExistingDescr
|
|||
import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sql/skill-schemas.js';
|
||||
import type { KtxScanRelationshipConfig } from '../project/config.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
||||
import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js';
|
||||
import {
|
||||
buildKtxRelationshipArtifacts,
|
||||
|
|
@ -205,7 +206,7 @@ async function loadExistingManifestState(
|
|||
|
||||
let files: string[];
|
||||
try {
|
||||
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml'));
|
||||
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter(isSlYamlPath);
|
||||
} catch {
|
||||
return { descriptions, preservedJoins, usage };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,10 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-ex
|
|||
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
||||
import type { KtxMcpProgressCallback } from '../mcp/types.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
|
||||
import { loadLocalSlSourceRecords } from './local-sl.js';
|
||||
import { toResolvedWire } from './semantic-layer.service.js';
|
||||
import { assertSafeConnectionId } from './source-files.js';
|
||||
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js';
|
||||
|
||||
const COMPILE_ONLY_REASON =
|
||||
|
|
@ -24,43 +26,6 @@ export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionRe
|
|||
dialect: string;
|
||||
}
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
function dialectForDriver(driver: string | undefined): string {
|
||||
const normalized = (driver ?? 'postgres').toUpperCase();
|
||||
const map: Record<string, string> = {
|
||||
POSTGRES: 'postgres',
|
||||
BIGQUERY: 'bigquery',
|
||||
SNOWFLAKE: 'snowflake',
|
||||
MYSQL: 'mysql',
|
||||
SQLSERVER: 'tsql',
|
||||
SQLITE: 'sqlite',
|
||||
DUCKDB: 'duckdb',
|
||||
CLICKHOUSE: 'clickhouse',
|
||||
DATABRICKS: 'databricks',
|
||||
};
|
||||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string {
|
||||
if (requested) {
|
||||
return assertSafeConnectionId(requested);
|
||||
|
|
@ -93,7 +58,7 @@ export async function compileLocalSlQuery(
|
|||
): Promise<CompileLocalSlQueryResult> {
|
||||
await options.onProgress?.({ progress: 0, message: 'Compiling query' });
|
||||
const connectionId = resolveLocalConnectionId(project, options.connectionId);
|
||||
const dialect = dialectForDriver(project.config.connections[connectionId]?.driver);
|
||||
const dialect = sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver);
|
||||
const sources = await loadComputableSources(project, connectionId);
|
||||
|
||||
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import { join } from 'node:path';
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||
import type { KtxFileWriteResult } from '../../context/core/file-store.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { HybridSearchCore } from '../../context/search/hybrid-search-core.js';
|
||||
import type { SearchCandidateGenerator } from '../../context/search/types.js';
|
||||
|
|
@ -18,6 +17,13 @@ import {
|
|||
} from './semantic-layer.service.js';
|
||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
||||
import {
|
||||
assertSafeConnectionId,
|
||||
isSafeConnectionId,
|
||||
isSlYamlPath,
|
||||
slSourceNameForFile,
|
||||
sourceNameFromPath,
|
||||
} from './source-files.js';
|
||||
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
||||
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
|
||||
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
|
||||
|
|
@ -69,58 +75,10 @@ export type ResolvedSlSource =
|
|||
| { kind: 'not-found' }
|
||||
| { kind: 'ambiguous'; connectionIds: string[] };
|
||||
|
||||
const LOCAL_AUTHOR = 'ktx';
|
||||
const LOCAL_AUTHOR_EMAIL = 'ktx@example.com';
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
|
||||
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
|
||||
}
|
||||
|
||||
function assertSafeSourceName(sourceName: string): string {
|
||||
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
|
||||
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
|
||||
}
|
||||
return assertSafePathToken('semantic-layer source name', sourceName);
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function slPath(connectionId: string, sourceName: string): string {
|
||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
|
||||
}
|
||||
|
||||
function sourceNameFromPath(path: string): string {
|
||||
return (
|
||||
path
|
||||
.split('/')
|
||||
.at(-1)
|
||||
?.replace(/\.ya?ml$/, '') ?? path
|
||||
);
|
||||
}
|
||||
|
||||
function parseYamlRecord(raw: string): Record<string, unknown> {
|
||||
const parsed = YAML.parse(raw) as unknown;
|
||||
if (!isRecord(parsed)) {
|
||||
|
|
@ -215,12 +173,17 @@ export async function loadLocalSlSourceRecords(
|
|||
const dir = `semantic-layer/${connectionId}`;
|
||||
const schemaDir = `${dir}/_schema`;
|
||||
const listed = await project.fileStore.listFiles(dir);
|
||||
const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort();
|
||||
const paths = listed.files.filter(isSlYamlPath).sort();
|
||||
const sources = new Map<string, LocalSlSourceRecord>();
|
||||
|
||||
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
|
||||
const raw = await project.fileStore.readFile(path);
|
||||
const tables = manifestTables(parseYamlRecord(raw.content));
|
||||
let tables: Record<string, ManifestTableEntry> | null;
|
||||
try {
|
||||
tables = manifestTables(parseYamlRecord(raw.content));
|
||||
} catch (error) {
|
||||
throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
if (!tables) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -237,7 +200,29 @@ export async function loadLocalSlSourceRecords(
|
|||
|
||||
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
|
||||
const raw = await project.fileStore.readFile(path);
|
||||
const parsed = parseYamlRecord(raw.content);
|
||||
let parsed: Record<string, unknown>;
|
||||
try {
|
||||
parsed = parseYamlRecord(raw.content);
|
||||
} catch {
|
||||
// A source mid-edit (e.g. an agent saved half-written YAML) must not take
|
||||
// down reads, listings, or search for its siblings. Key it by the same
|
||||
// name the writer side uses (the intact top-level `name:`, recovered even
|
||||
// when the YAML is broken below it; filename only as a last resort) so a
|
||||
// broken uppercase/hashed/human-renamed source stays reachable under its
|
||||
// real name, and surface the raw content for repair.
|
||||
const brokenName = slSourceNameForFile(path, raw.content);
|
||||
sources.set(brokenName, {
|
||||
connectionId,
|
||||
name: brokenName,
|
||||
path,
|
||||
columnCount: 0,
|
||||
measureCount: 0,
|
||||
joinCount: 0,
|
||||
yaml: raw.content,
|
||||
source: { name: brokenName, grain: [], columns: [], joins: [], measures: [] },
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
|
||||
if (parsed.table || parsed.sql) {
|
||||
const source = parsedStandaloneSource(parsed, name);
|
||||
|
|
@ -292,50 +277,21 @@ export async function validateLocalSlSource(
|
|||
}
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function writeLocalSlSource(
|
||||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string; yaml: string },
|
||||
): Promise<KtxFileWriteResult> {
|
||||
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
|
||||
if (!validation.valid) {
|
||||
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
|
||||
}
|
||||
|
||||
const parsed = parseYamlRecord(input.yaml);
|
||||
if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) {
|
||||
throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`);
|
||||
}
|
||||
|
||||
const path = slPath(input.connectionId, input.sourceName);
|
||||
return project.fileStore.writeFile(
|
||||
path,
|
||||
input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`,
|
||||
LOCAL_AUTHOR,
|
||||
LOCAL_AUTHOR_EMAIL,
|
||||
`Write semantic-layer source: ${input.connectionId}/${input.sourceName}`,
|
||||
);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function readLocalSlSource(
|
||||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string },
|
||||
): Promise<LocalSlSource | null> {
|
||||
const path = slPath(input.connectionId, input.sourceName);
|
||||
try {
|
||||
const result = await project.fileStore.readFile(path);
|
||||
return {
|
||||
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
|
||||
yaml: result.content,
|
||||
};
|
||||
} catch {
|
||||
const records = await loadLocalSlSourceRecords(project, {
|
||||
connectionId: input.connectionId,
|
||||
});
|
||||
const record = records.find((source) => source.name === input.sourceName);
|
||||
return record ? { ...record } : null;
|
||||
}
|
||||
// Source identity is the in-file `name:` (mirroring the warehouse identifier
|
||||
// verbatim, e.g. Snowflake's uppercase `WIDGET_SALES`), never the filename. The
|
||||
// record loader resolves standalone files, overlays, manifest-backed sources,
|
||||
// and mid-edit files whose YAML no longer parses — so readers — `ktx sl read`,
|
||||
// `ktx sl validate`, and the `sl_read_source` MCP tool — can surface broken
|
||||
// content for repair instead of failing on it.
|
||||
const records = await loadLocalSlSourceRecords(project, {
|
||||
connectionId: input.connectionId,
|
||||
});
|
||||
const record = records.find((source) => source.name === input.sourceName);
|
||||
return record ? { ...record } : null;
|
||||
}
|
||||
|
||||
export async function resolveLocalSlSource(
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-sch
|
|||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import { isSlYamlPath, resolveSlSourceFile, slDeclaredSourceName, slSourceFilePath } from './source-files.js';
|
||||
import type {
|
||||
ResolvedSemanticLayerSource,
|
||||
SemanticLayerColumnOverride,
|
||||
|
|
@ -135,8 +136,30 @@ export class SemanticLayerService {
|
|||
|
||||
// ── YAML File Operations ────────────────────────────────
|
||||
|
||||
private sourcePath(connectionId: string, sourceName: string): string {
|
||||
return `${SL_DIR_PREFIX}/${connectionId}/${sourceName}.yaml`;
|
||||
// The in-file `name:` is the source's identity; the filename is only a derived
|
||||
// label. Rewrites land on the file that already declares the name (humans may
|
||||
// rename files freely); new sources get a derived filename. A file already
|
||||
// sitting at the derived path that declares a name declares a *different* one
|
||||
// (the resolver would have matched it otherwise) — fail instead of clobbering
|
||||
// it. A nameless/unparseable file there is the broken remains of this very
|
||||
// source (the derived path is a function of the name), so overwriting it is
|
||||
// the repair path, not data loss.
|
||||
private async resolveWritePath(connectionId: string, sourceName: string): Promise<string> {
|
||||
const existing = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||
if (existing) {
|
||||
return existing.path;
|
||||
}
|
||||
const path = slSourceFilePath(connectionId, sourceName);
|
||||
let occupant: string | null = null;
|
||||
try {
|
||||
occupant = slDeclaredSourceName((await this.configService.readFile(path)).content);
|
||||
} catch {
|
||||
return path;
|
||||
}
|
||||
if (occupant !== null) {
|
||||
throw new Error(`Cannot write source '${sourceName}': ${path} already defines source '${occupant}'`);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
async writeSource(
|
||||
|
|
@ -185,39 +208,42 @@ export class SemanticLayerService {
|
|||
}
|
||||
}
|
||||
|
||||
const path = this.sourcePath(connectionId, source.name);
|
||||
const path = await this.resolveWritePath(connectionId, source.name);
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||
skipLock: options?.skipLock,
|
||||
});
|
||||
return { ...result, warnings };
|
||||
// The filename is derived from (or resolved by) the source name — surface
|
||||
// the actual path so callers don't have to re-resolve it.
|
||||
return { ...result, path, warnings };
|
||||
}
|
||||
|
||||
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string }> {
|
||||
const path = this.sourcePath(connectionId, sourceName);
|
||||
const result = await this.configService.readFile(path);
|
||||
return { content: result.content, path };
|
||||
/**
|
||||
* Raw standalone/overlay file for a source, resolved by its in-file `name:`.
|
||||
* Returns null when no file declares the name (the source may still exist as
|
||||
* a manifest entry under `_schema/`).
|
||||
*/
|
||||
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string } | null> {
|
||||
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||
return file ? { content: file.content, path: file.path } : null;
|
||||
}
|
||||
|
||||
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
||||
let content: string;
|
||||
try {
|
||||
const result = await this.readSourceFile(connectionId, sourceName);
|
||||
content = result.content;
|
||||
} catch {
|
||||
const file = await this.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return YAML.parse(content) as SemanticLayerSource;
|
||||
return YAML.parse(file.content) as SemanticLayerSource;
|
||||
} catch (error) {
|
||||
// Distinguish a YAML parse failure from a missing file. The file exists but
|
||||
// its contents are unparseable — callers that treat null as "does not exist"
|
||||
// could otherwise overwrite the broken file. Surface the parse failure via
|
||||
// the service logger so the broken source is at least visible.
|
||||
this.logger.warn(
|
||||
`[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
`[loadSource] ${file.path}: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
|
@ -231,7 +257,7 @@ export class SemanticLayerService {
|
|||
let allFiles: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
allFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch (e) {
|
||||
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
|
||||
loadErrors.push(message);
|
||||
|
|
@ -338,7 +364,7 @@ export class SemanticLayerService {
|
|||
let allFiles: string[];
|
||||
try {
|
||||
const listing = await this.configService.listFiles(dir);
|
||||
allFiles = listing.files.filter((f) => f.endsWith('.yaml'));
|
||||
allFiles = listing.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return result;
|
||||
}
|
||||
|
|
@ -408,7 +434,7 @@ export class SemanticLayerService {
|
|||
const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`;
|
||||
try {
|
||||
const result = await this.configService.listFiles(schemaDir);
|
||||
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
for (const filePath of yamlFiles) {
|
||||
try {
|
||||
const { content } = await this.configService.readFile(filePath);
|
||||
|
|
@ -449,7 +475,7 @@ export class SemanticLayerService {
|
|||
let yamlFiles: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(schemaDir);
|
||||
yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -533,7 +559,7 @@ export class SemanticLayerService {
|
|||
.map((c) => c.name);
|
||||
if (absentDeclaredColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||
`${source.name}: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
|
||||
`Available columns: ${[...manifestColumns.values()].join(', ')}`,
|
||||
);
|
||||
|
|
@ -545,7 +571,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (missingGrainColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||
`${source.name}: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -562,7 +588,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
`${source.name}: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -577,7 +603,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
`${source.name}: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -592,7 +618,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (exprMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||
`${source.name}: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -606,7 +632,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (filterMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||
`${source.name}: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -619,7 +645,7 @@ export class SemanticLayerService {
|
|||
}
|
||||
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references local column ` +
|
||||
`${source.name}: join to "${join.to}" references local column ` +
|
||||
`"${parsed.localColumn}" that is not a valid output column`,
|
||||
);
|
||||
}
|
||||
|
|
@ -631,7 +657,7 @@ export class SemanticLayerService {
|
|||
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
|
||||
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references target column ` +
|
||||
`${source.name}: join to "${join.to}" references target column ` +
|
||||
`"${parsed.targetColumn}" that does not exist on the target source`,
|
||||
);
|
||||
}
|
||||
|
|
@ -650,43 +676,30 @@ export class SemanticLayerService {
|
|||
return SemanticLayerService.mapDialect(connection.connectionType);
|
||||
}
|
||||
|
||||
async listSourceNames(connectionId: string): Promise<string[]> {
|
||||
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
return result.files.filter((f) => f.endsWith('.yaml')).map((f) => f.replace(`${dir}/`, '').replace('.yaml', ''));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async listFilesForConnection(connectionId: string): Promise<string[]> {
|
||||
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir, true);
|
||||
return result.files.filter((f) => f.endsWith('.yaml'));
|
||||
return result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async readFileByPath(connectionId: string, relativePath: string): Promise<{ content: string; readOnly: boolean }> {
|
||||
const fullPath = `${SL_DIR_PREFIX}/${connectionId}/${relativePath}`;
|
||||
const result = await this.configService.readFile(fullPath);
|
||||
return {
|
||||
content: result.content,
|
||||
readOnly: relativePath.startsWith('_schema/'),
|
||||
};
|
||||
}
|
||||
|
||||
async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) {
|
||||
const path = this.sourcePath(connectionId, sourceName);
|
||||
return this.configService.deleteFile(path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
|
||||
}
|
||||
|
||||
async getSourceHistory(connectionId: string, sourceName: string) {
|
||||
const path = this.sourcePath(connectionId, sourceName);
|
||||
return this.configService.getFileHistory(path);
|
||||
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||
if (!file) {
|
||||
// `deleteFile` returns null for a missing path, which would let a no-op
|
||||
// delete read as success. Distinguish the two real cases instead.
|
||||
if (await this.isManifestBacked(connectionId, sourceName)) {
|
||||
throw new Error(
|
||||
`Source '${sourceName}' is defined by the scan manifest (_schema/) and has no overlay file to delete. ` +
|
||||
`Rescan the connection to remove it from the manifest.`,
|
||||
);
|
||||
}
|
||||
throw new Error(`Semantic-layer source not found: ${connectionId}/${sourceName}`);
|
||||
}
|
||||
return this.configService.deleteFile(file.path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -815,7 +828,7 @@ export class SemanticLayerService {
|
|||
return [];
|
||||
}
|
||||
|
||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\/.+\.ya?ml$/.test(file));
|
||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\//.test(file) && isSlYamlPath(file));
|
||||
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
|
||||
for (const filePath of schemaFiles) {
|
||||
const connectionId = filePath.split('/')[1];
|
||||
|
|
@ -844,7 +857,7 @@ export class SemanticLayerService {
|
|||
let allFiles: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
allFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return warnings;
|
||||
}
|
||||
|
|
@ -1030,7 +1043,7 @@ export class SemanticLayerService {
|
|||
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
|
||||
for (const filePath of yamlFiles) {
|
||||
try {
|
||||
|
|
|
|||
160
packages/cli/src/context/sl/source-files.ts
Normal file
160
packages/cli/src/context/sl/source-files.ts
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import YAML from 'yaml';
|
||||
import type { KtxFileStorePort } from '../../context/core/file-store.js';
|
||||
|
||||
// Semantic-layer source identity lives in the file's `name:` field, which mirrors
|
||||
// the warehouse identifier verbatim (Snowflake's uppercase `SIGNED_UP`, `EVENT$LOG`).
|
||||
// The filename is a derived label and never participates in identity: reads resolve
|
||||
// a source by scanning the connection directory and matching `name:`, and writes
|
||||
// reuse the resolved file's path, so files can be freely renamed by humans without
|
||||
// changing which source they define.
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!isSafeConnectionId(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
export function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
|
||||
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
|
||||
}
|
||||
|
||||
export function sourceNameFromPath(path: string): string {
|
||||
return (
|
||||
path
|
||||
.split('/')
|
||||
.at(-1)
|
||||
?.replace(/\.ya?ml$/, '') ?? path
|
||||
);
|
||||
}
|
||||
|
||||
// The one predicate for "this path is a semantic-layer YAML file". ktx itself
|
||||
// always writes `.yaml` (see `slSourceFileName`), but humans rename freely and
|
||||
// the dbt ecosystem's habit is `.yml`, so every reader must accept both — a
|
||||
// listing that recognizes only one extension makes the same file visible to
|
||||
// some entry points and invisible to others.
|
||||
export function isSlYamlPath(path: string): boolean {
|
||||
return path.endsWith('.yaml') || path.endsWith('.yml');
|
||||
}
|
||||
|
||||
// Windows refuses these basenames regardless of extension — a genuinely universal
|
||||
// filesystem invariant, so the static list is acceptable.
|
||||
const WINDOWS_RESERVED_BASENAME = /^(?:con|prn|aux|nul|com[0-9]|lpt[0-9])$/;
|
||||
|
||||
const SAFE_FILE_BASENAME = /^[a-z0-9][a-z0-9_]{0,63}$/;
|
||||
|
||||
/**
|
||||
* Derive the filename for a semantic-layer source. Total over all possible
|
||||
* source names — never throws.
|
||||
*
|
||||
* Names that are already safe lowercase snake_case become `<name>.yaml`;
|
||||
* anything else becomes `<slug>-<8 hex of sha256(name)>.yaml`. The two ranges
|
||||
* are disjoint and the mapping is injective: safe filenames contain no `-`,
|
||||
* hashed filenames always end in `-<8 hex>`, and slugs are lowercased so names
|
||||
* differing only by case get distinct hashes instead of colliding paths on
|
||||
* case-insensitive filesystems (macOS APFS, Windows).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export function slSourceFileName(sourceName: string): string {
|
||||
if (SAFE_FILE_BASENAME.test(sourceName) && !WINDOWS_RESERVED_BASENAME.test(sourceName)) {
|
||||
return `${sourceName}.yaml`;
|
||||
}
|
||||
const slug = sourceName
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9_]+/g, '_')
|
||||
.replace(/_+/g, '_')
|
||||
.replace(/^_+|_+$/g, '')
|
||||
.slice(0, 64);
|
||||
const hash = createHash('sha256').update(sourceName, 'utf-8').digest('hex').slice(0, 8);
|
||||
return `${slug || 'src'}-${hash}.yaml`;
|
||||
}
|
||||
|
||||
export function slSourceFilePath(connectionId: string, sourceName: string): string {
|
||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${slSourceFileName(sourceName)}`;
|
||||
}
|
||||
|
||||
export interface SlSourceFile {
|
||||
path: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
// Same keying as `loadLocalSlSourceRecords`: the in-file `name:` is the identity;
|
||||
// the filename is only a fallback for files so broken that even the `name:` is
|
||||
// unrecoverable, or genuinely nameless ones. A file left mid-edit with a syntax
|
||||
// error below its `name:` line keeps its declared identity (see
|
||||
// `slDeclaredSourceName`), so a human-renamed source is still addressed by name
|
||||
// while broken instead of silently reverting to its filename.
|
||||
export function slSourceNameForFile(path: string, content: string): string {
|
||||
return slDeclaredSourceName(content) ?? sourceNameFromPath(path);
|
||||
}
|
||||
|
||||
/**
|
||||
* The `name:` a semantic-layer YAML file declares, or null when the file is
|
||||
* nameless or so broken even the name is unrecoverable. Null is how
|
||||
* `writeSource` tells a genuine name conflict at a derived path apart from the
|
||||
* broken remains of the source being written, which a rewrite must repair
|
||||
* rather than refuse.
|
||||
*
|
||||
* Uses `parseDocument`, not `parse`: a file with a syntax error below the
|
||||
* `name:` line still parses into a partial tree whose top-level `name:` is
|
||||
* intact. `parse` would throw on the same input and drop the source to its
|
||||
* filename — wrong for human-renamed files, whose filename is not the name.
|
||||
*/
|
||||
export function slDeclaredSourceName(content: string): string | null {
|
||||
let doc: ReturnType<typeof YAML.parseDocument>;
|
||||
try {
|
||||
doc = YAML.parseDocument(content);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const name = doc.get('name');
|
||||
return typeof name === 'string' && name.length > 0 ? name : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the standalone/overlay file that defines `sourceName` for a connection.
|
||||
* Returns null when no file declares the name (the source may still exist as a
|
||||
* manifest entry under `_schema/`). Throws when more than one file declares the
|
||||
* same name — that breaks the one-file-per-name invariant and must be repaired
|
||||
* by hand rather than silently picking one.
|
||||
*/
|
||||
export async function resolveSlSourceFile(
|
||||
fileStore: Pick<KtxFileStorePort, 'listFiles' | 'readFile'>,
|
||||
connectionId: string,
|
||||
sourceName: string,
|
||||
): Promise<SlSourceFile | null> {
|
||||
const dir = `semantic-layer/${assertSafeConnectionId(connectionId)}`;
|
||||
const schemaDir = `${dir}/_schema`;
|
||||
const listed = await fileStore.listFiles(dir);
|
||||
const paths = listed.files.filter((file) => isSlYamlPath(file) && !file.startsWith(`${schemaDir}/`)).sort();
|
||||
|
||||
const matches: SlSourceFile[] = [];
|
||||
for (const path of paths) {
|
||||
const raw = await fileStore.readFile(path);
|
||||
if (slSourceNameForFile(path, raw.content) === sourceName) {
|
||||
matches.push({ path, content: raw.content });
|
||||
}
|
||||
}
|
||||
if (matches.length > 1) {
|
||||
throw new Error(
|
||||
`Multiple semantic-layer files declare source "${sourceName}": ${matches.map((match) => match.path).join(', ')}`,
|
||||
);
|
||||
}
|
||||
return matches[0] ?? null;
|
||||
}
|
||||
|
|
@ -46,12 +46,8 @@ export abstract class BaseSemanticLayerTool<TInput extends ZodType = ZodType> ex
|
|||
): Promise<string | null> {
|
||||
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
|
||||
try {
|
||||
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
return content;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const file = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
return file?.content ?? null;
|
||||
}
|
||||
|
||||
protected buildMarkdown(
|
||||
|
|
|
|||
|
|
@ -113,13 +113,8 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
}
|
||||
|
||||
// Read existing source
|
||||
let currentYaml: string | null = null;
|
||||
try {
|
||||
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
currentYaml = content;
|
||||
} catch {
|
||||
currentYaml = null;
|
||||
}
|
||||
const currentFile = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
const currentYaml = currentFile?.content ?? null;
|
||||
if (!currentYaml) {
|
||||
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
|
||||
if (manifestBacked) {
|
||||
|
|
@ -165,6 +160,20 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
} catch (e) {
|
||||
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
|
||||
}
|
||||
|
||||
// The in-file `name:` is the source's identity — an edited name would make
|
||||
// writeSource create a second source instead of updating this one.
|
||||
if (source.name !== sourceName) {
|
||||
return this.buildOutput(
|
||||
false,
|
||||
[
|
||||
`Edits change "name:" from "${sourceName}" to "${source.name ?? '<missing>'}" — renaming is not supported. ` +
|
||||
`Delete the source and recreate it under the new name.`,
|
||||
],
|
||||
sourceName,
|
||||
);
|
||||
}
|
||||
|
||||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
|
||||
// Re-serialize and write
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import YAML from 'yaml';
|
||||
import type { GitService } from '../../../context/core/git.service.js';
|
||||
import type { KtxFileStorePort } from '../../../context/core/file-store.js';
|
||||
import type { KtxFileListResult, KtxFileReadResult, KtxFileStorePort } from '../../../context/core/file-store.js';
|
||||
import { SYSTEM_GIT_AUTHOR } from '../../../context/tools/authors.js';
|
||||
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import { resolveSlSourceFile, slSourceFilePath } from '../source-files.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
|
||||
|
||||
|
|
@ -23,9 +24,6 @@ export interface SourceValidationResult {
|
|||
warnings: string[];
|
||||
}
|
||||
|
||||
const slSourcePath = (connectionId: string, sourceName: string): string =>
|
||||
`semantic-layer/${connectionId}/${sourceName}.yaml`;
|
||||
|
||||
function resolveDialect(warehouse: string | null): string | null {
|
||||
if (!warehouse) {
|
||||
return null;
|
||||
|
|
@ -63,24 +61,21 @@ export async function validateSingleSource(
|
|||
const errors: string[] = [];
|
||||
const warnings: string[] = [];
|
||||
|
||||
let content: string;
|
||||
try {
|
||||
const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
content = result.content;
|
||||
} catch {
|
||||
errors.push(`${sourceName}.yaml: file not found`);
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
errors.push(`${sourceName}: no standalone or overlay file found`);
|
||||
return { errors, warnings };
|
||||
}
|
||||
|
||||
let parsed: Record<string, unknown>;
|
||||
try {
|
||||
parsed = YAML.parse(content);
|
||||
parsed = YAML.parse(file.content);
|
||||
} catch (e) {
|
||||
errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
|
||||
errors.push(`${sourceName}: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
|
||||
return { errors, warnings };
|
||||
}
|
||||
if (!parsed || typeof parsed !== 'object') {
|
||||
errors.push(`${sourceName}.yaml: top-level content is not an object`);
|
||||
errors.push(`${sourceName}: top-level content is not an object`);
|
||||
return { errors, warnings };
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +84,7 @@ export async function validateSingleSource(
|
|||
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
|
||||
if (isManifestBacked) {
|
||||
errors.push(
|
||||
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` +
|
||||
`${sourceName}: standalone source shadows an existing manifest entry — ` +
|
||||
`writing it as-is drops the manifest's columns and joins. ` +
|
||||
`Remove "sql:", "table:", "grain:", and base-table "columns:" and keep only ` +
|
||||
`"name:" plus overlay fields such as "measures:", "segments:", "descriptions:", ` +
|
||||
|
|
@ -103,21 +98,21 @@ export async function validateSingleSource(
|
|||
const result = schema.safeParse(parsed);
|
||||
if (!result.success) {
|
||||
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
|
||||
errors.push(`${sourceName}.yaml: schema — ${issues}`);
|
||||
errors.push(`${sourceName}: schema — ${issues}`);
|
||||
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
|
||||
if (errorPaths.has('joins')) {
|
||||
warnings.push(
|
||||
`${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
|
||||
`${sourceName}: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
|
||||
);
|
||||
}
|
||||
if (errorPaths.has('columns')) {
|
||||
warnings.push(
|
||||
`${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
|
||||
`${sourceName}: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
|
||||
);
|
||||
}
|
||||
if (errorPaths.has('measures')) {
|
||||
warnings.push(
|
||||
`${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`,
|
||||
`${sourceName}: hint — measure format: {name, expr, description (optional), filter (optional)}`,
|
||||
);
|
||||
}
|
||||
return { errors, warnings };
|
||||
|
|
@ -135,7 +130,7 @@ export async function validateSingleSource(
|
|||
const seenMeasures = new Set<string>();
|
||||
for (const m of measures) {
|
||||
if (seenMeasures.has(m.name)) {
|
||||
errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`);
|
||||
errors.push(`${sourceName}: duplicate measure name "${m.name}"`);
|
||||
}
|
||||
seenMeasures.add(m.name);
|
||||
}
|
||||
|
|
@ -168,7 +163,7 @@ export async function validateSingleSource(
|
|||
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
|
||||
`${sourceName}: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
|
|
@ -205,7 +200,7 @@ function formatProbeError(args: {
|
|||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
|
||||
const lines: string[] = [
|
||||
measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`,
|
||||
measureName ? `${sourceName}: measure "${measureName}" ${headline}.` : `${sourceName}: ${headline}.`,
|
||||
];
|
||||
if (warehouse) {
|
||||
lines.push(` Warehouse: ${warehouse}`);
|
||||
|
|
@ -249,7 +244,7 @@ async function probeOverlayMeasures(
|
|||
composed = all.find((s) => s.name === sourceName);
|
||||
} catch (e) {
|
||||
errors.push(
|
||||
`${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
|
||||
`${sourceName}: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
|
||||
);
|
||||
return errors;
|
||||
}
|
||||
|
|
@ -289,6 +284,26 @@ async function probeOverlayMeasures(
|
|||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* A read-only view of the config repo at one commit, shaped for
|
||||
* `resolveSlSourceFile` so name→file resolution runs against history exactly as
|
||||
* it does against the working tree — one resolver, two backing stores. Used to
|
||||
* recover the path a source occupied at `preHead` after the live file is gone.
|
||||
*/
|
||||
function gitCommitFileStore(
|
||||
git: GitService,
|
||||
commitHash: string,
|
||||
): Pick<KtxFileStorePort, 'listFiles' | 'readFile'> {
|
||||
return {
|
||||
async listFiles(path: string): Promise<KtxFileListResult> {
|
||||
return { files: await git.listFilesAtCommit(path, commitHash) };
|
||||
},
|
||||
async readFile(path: string): Promise<KtxFileReadResult> {
|
||||
return { content: await git.getFileAtCommit(path, commitHash) };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
|
||||
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
|
||||
|
|
@ -300,14 +315,29 @@ export async function revertSourceToPreHead(
|
|||
preHead: string | null,
|
||||
sourceName: string,
|
||||
): Promise<string> {
|
||||
const relPath = slSourcePath(connectionId, sourceName);
|
||||
// Find the file that defines this source. While it is still on disk
|
||||
// (invalid-but-present) the live resolver finds it by its in-file `name:`.
|
||||
// Once the session deleted it, the path is gone too — and humans rename files
|
||||
// freely, so it is NOT the writer-derived filename. Recover it from history by
|
||||
// resolving the name against the preHead commit instead of guessing.
|
||||
const live = await resolveSlSourceFile(deps.configService, connectionId, sourceName);
|
||||
let relPath: string;
|
||||
let preContent: string | null = null;
|
||||
if (preHead) {
|
||||
try {
|
||||
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
|
||||
} catch {
|
||||
preContent = null;
|
||||
if (live) {
|
||||
relPath = live.path;
|
||||
if (preHead) {
|
||||
try {
|
||||
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
|
||||
} catch {
|
||||
preContent = null;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const atPreHead = preHead
|
||||
? await resolveSlSourceFile(gitCommitFileStore(deps.gitService, preHead), connectionId, sourceName)
|
||||
: null;
|
||||
relPath = atPreHead?.path ?? slSourceFilePath(connectionId, sourceName);
|
||||
preContent = atPreHead?.content ?? null;
|
||||
}
|
||||
|
||||
if (preContent !== null) {
|
||||
|
|
|
|||
|
|
@ -22,8 +22,12 @@ const slWriteSourceInputSchema = z.object({
|
|||
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
|
||||
sourceName: z
|
||||
.string()
|
||||
.regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)')
|
||||
.describe('Name of the source to create, edit, or delete'),
|
||||
.min(1)
|
||||
.describe(
|
||||
"Name of the source to create, edit, or delete. Must equal the source's `name:`. Use the verbatim " +
|
||||
'warehouse identifier when overlaying a manifest source (e.g. SIGNED_UP); snake_case is recommended ' +
|
||||
'for new standalone sources.',
|
||||
),
|
||||
source: sourceInputSchema
|
||||
.optional()
|
||||
.describe(
|
||||
|
|
@ -152,6 +156,17 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
);
|
||||
}
|
||||
|
||||
// The in-file `name:` is the source's identity; the file is written under
|
||||
// source.name while the orphan/shadow checks key on sourceName — a mismatch
|
||||
// would validate one source and save another.
|
||||
if (input.source.name !== sourceName) {
|
||||
return this.buildOutput(
|
||||
false,
|
||||
[`source.name "${input.source.name}" does not match sourceName "${sourceName}" — they must be identical.`],
|
||||
sourceName,
|
||||
);
|
||||
}
|
||||
|
||||
return this.writeFullSource(
|
||||
connectionId,
|
||||
input.source,
|
||||
|
|
@ -253,12 +268,8 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
connectionId: string,
|
||||
sourceName: string,
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const { content } = await service.readSourceFile(connectionId, sourceName);
|
||||
return content;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const file = await service.readSourceFile(connectionId, sourceName);
|
||||
return file?.content ?? null;
|
||||
}
|
||||
|
||||
private async rejectOrphanOverlay(
|
||||
|
|
|
|||
23
packages/cli/src/context/sql-analysis/dialect.ts
Normal file
23
packages/cli/src/context/sql-analysis/dialect.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import type { SqlAnalysisDialect } from './ports.js';
|
||||
|
||||
// One mapping from ktx connection identity to the sqlglot dialect name used by
|
||||
// the Python daemon (SQL analysis, read-only validation) and semantic-layer
|
||||
// compute. Keys cover both vocabularies that name a connection's engine:
|
||||
// ktx.yaml driver names ("postgres", "sqlserver") and the local connection-type
|
||||
// spellings exposed by KtxConnectionInfo.connectionType ("POSTGRESQL").
|
||||
const SQLGLOT_DIALECTS: Record<string, SqlAnalysisDialect> = {
|
||||
postgres: 'postgres',
|
||||
postgresql: 'postgres',
|
||||
bigquery: 'bigquery',
|
||||
snowflake: 'snowflake',
|
||||
mysql: 'mysql',
|
||||
sqlserver: 'tsql',
|
||||
sqlite: 'sqlite',
|
||||
duckdb: 'duckdb',
|
||||
clickhouse: 'clickhouse',
|
||||
databricks: 'databricks',
|
||||
};
|
||||
|
||||
export function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
return SQLGLOT_DIALECTS[(driver ?? '').toLowerCase()] ?? 'postgres';
|
||||
}
|
||||
|
|
@ -13,7 +13,7 @@ import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
|
|||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
|
||||
import { createCliOperationalLogger } from './io/logger.js';
|
||||
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
|
||||
import { createKtxCliLocalIngestAdapters, resolveKtxCliSqlAnalysis } from './local-adapters.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js';
|
||||
import {
|
||||
|
|
@ -87,6 +87,7 @@ export interface KtxIngestDeps {
|
|||
| 'memoryModel'
|
||||
| 'semanticLayerCompute'
|
||||
| 'queryExecutor'
|
||||
| 'sqlAnalysis'
|
||||
| 'logger'
|
||||
| 'pullConfigOptions'
|
||||
>;
|
||||
|
|
@ -724,7 +725,7 @@ export async function runKtxIngest(
|
|||
const localIngestOptions = deps.localIngestOptions ?? {};
|
||||
const managedDaemon = managedDaemonOptionsForIngestRun(args, deps.runtimeIo ?? io);
|
||||
const operationalLogger = createCliOperationalLogger(io, args.outputMode);
|
||||
const adapterOptions = {
|
||||
const baseAdapterOptions = {
|
||||
...(localIngestOptions.pullConfigOptions ?? {}),
|
||||
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
|
||||
...(managedDaemon ? { managedDaemon } : {}),
|
||||
|
|
@ -734,6 +735,10 @@ export async function runKtxIngest(
|
|||
: {}),
|
||||
logger: operationalLogger,
|
||||
};
|
||||
// One parser-backed SQL analysis port per run: the historic-sql adapter and
|
||||
// the ingest sql_execution tool share the same daemon-backed validator.
|
||||
const sqlAnalysis = localIngestOptions.sqlAnalysis ?? resolveKtxCliSqlAnalysis(baseAdapterOptions);
|
||||
const adapterOptions = { ...baseAdapterOptions, sqlAnalysis };
|
||||
const queryExecutor =
|
||||
localIngestOptions.queryExecutor ??
|
||||
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject);
|
||||
|
|
@ -783,6 +788,7 @@ export async function runKtxIngest(
|
|||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
sqlAnalysis,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
embeddingProvider,
|
||||
|
|
@ -861,6 +867,7 @@ export async function runKtxIngest(
|
|||
jobId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
sqlAnalysis,
|
||||
pullConfigOptions: adapterOptions,
|
||||
embeddingProvider,
|
||||
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ function ktxCliLookerOptions(
|
|||
};
|
||||
}
|
||||
|
||||
function ktxCliHistoricSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions) {
|
||||
export function resolveKtxCliSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions): SqlAnalysisPort {
|
||||
if (options.sqlAnalysis) {
|
||||
return options.sqlAnalysis;
|
||||
}
|
||||
|
|
@ -289,7 +289,7 @@ function historicSqlOptionsForLocalRun(
|
|||
}
|
||||
|
||||
const base = {
|
||||
sqlAnalysis: ktxCliHistoricSqlAnalysis(options),
|
||||
sqlAnalysis: resolveKtxCliSqlAnalysis(options),
|
||||
};
|
||||
|
||||
if (dialect === 'postgres') {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ LookML views map to SL sources, `measure:` to measures, `explore: { join: }` to
|
|||
|
||||
| LookML | KTX form | Notes |
|
||||
|---|---|---|
|
||||
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
|
||||
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
|
||||
| `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists |
|
||||
| `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in |
|
||||
| `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone |
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to
|
|||
|
||||
| MetricFlow | KTX form | Notes |
|
||||
|---|---|---|
|
||||
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
|
||||
| `semantic_model: X { model: source('s','t') }` | **Overlay** at `<connId>/X.yaml` over table `t`. | Same shape; `source()` still resolves to a physical table. |
|
||||
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
|
||||
| `semantic_model: X { model: source('s','t') }` | **Overlay** named `X` over table `t`. | Same shape; `source()` still resolves to a physical table. |
|
||||
| `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. |
|
||||
| `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. |
|
||||
| `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "<agg>(<expr>)" }]` | Aggregation inlined. `agg: count_distinct` → `count(distinct ...)`. |
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ skills must verify warehouse identifiers with `discover_data`,
|
|||
|
||||
## Part 1 - Schema reference
|
||||
|
||||
An SL source is a YAML file at `semantic-layer/<connectionId>/<source_name>.yaml`. There are three flavors:
|
||||
An SL source is a YAML file under `semantic-layer/<connectionId>/`. The file's `name:` field is the source's identity — it mirrors the warehouse identifier verbatim (e.g. Snowflake's uppercase `SIGNED_UP`); the filename is only a derived label. Always address sources by name through the `sl_*` tools, never by file path. There are three flavors:
|
||||
|
||||
### Overlay sources
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../../src/context/connections/read-only-sql.js';
|
||||
import {
|
||||
assertReadOnlySql,
|
||||
limitSqlForExecution,
|
||||
stripTrailingSqlNoise,
|
||||
} from '../../../src/context/connections/read-only-sql.js';
|
||||
|
||||
describe('assertReadOnlySql', () => {
|
||||
it('allows select and with queries', () => {
|
||||
|
|
@ -15,6 +19,51 @@ describe('assertReadOnlySql', () => {
|
|||
'Only read-only SELECT/WITH queries can be executed locally',
|
||||
);
|
||||
});
|
||||
|
||||
it('accepts read-only queries that begin with leading comments', () => {
|
||||
expect(assertReadOnlySql('-- daily widget sales\nselect count(*) from public.widget_sales')).toBe(
|
||||
'select count(*) from public.widget_sales',
|
||||
);
|
||||
expect(assertReadOnlySql('/* block */\n with paid as (select 1) select * from paid')).toContain('with paid');
|
||||
});
|
||||
|
||||
it('still rejects mutating statements hidden behind leading comments', () => {
|
||||
expect(() => assertReadOnlySql('-- harmless\n delete from orders')).toThrow(
|
||||
'Only read-only SELECT/WITH queries can be executed locally',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects a second statement smuggled after a semicolon', () => {
|
||||
expect(() => assertReadOnlySql('select 1; drop table orders')).toThrow(
|
||||
'Only one SQL statement can be executed.',
|
||||
);
|
||||
expect(() => assertReadOnlySql('select 1;\n-- pad\ndelete from orders')).toThrow(
|
||||
'Only one SQL statement can be executed.',
|
||||
);
|
||||
expect(() => assertReadOnlySql('select 1; /* pad */ truncate orders;')).toThrow(
|
||||
'Only one SQL statement can be executed.',
|
||||
);
|
||||
});
|
||||
|
||||
it('accepts trailing semicolons, including repeated ones followed by comments', () => {
|
||||
expect(assertReadOnlySql('select 1;')).toBe('select 1;');
|
||||
expect(assertReadOnlySql('select 1 ;; \n')).toBe('select 1 ;;');
|
||||
expect(assertReadOnlySql('select 1; -- done')).toBe('select 1; -- done');
|
||||
});
|
||||
|
||||
it('ignores semicolons inside string literals, quoted identifiers, and comments', () => {
|
||||
expect(assertReadOnlySql("select string_agg(name, '; ') from t")).toBe("select string_agg(name, '; ') from t");
|
||||
expect(assertReadOnlySql("select 'it''s; quoted' from t")).toBe("select 'it''s; quoted' from t");
|
||||
expect(assertReadOnlySql('select ";" from "t;u"')).toBe('select ";" from "t;u"');
|
||||
expect(assertReadOnlySql('select 1 -- tail; comment')).toBe('select 1 -- tail; comment');
|
||||
expect(assertReadOnlySql('select 1 /* a;b */ + 2')).toBe('select 1 /* a;b */ + 2');
|
||||
});
|
||||
|
||||
it('rejects statements smuggled after a string literal that closes a semicolon early', () => {
|
||||
expect(() => assertReadOnlySql("select 'a'; delete from orders")).toThrow(
|
||||
'Only one SQL statement can be executed.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('limitSqlForExecution', () => {
|
||||
|
|
@ -27,4 +76,42 @@ describe('limitSqlForExecution', () => {
|
|||
it('returns the trimmed SQL when no maxRows value is provided', () => {
|
||||
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
|
||||
});
|
||||
|
||||
it('strips leading comments before wrapping with a row limit', () => {
|
||||
expect(limitSqlForExecution('-- top customers\nselect * from public.orders', 25)).toBe(
|
||||
'select * from (select * from public.orders) as ktx_query_result limit 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('drops a trailing semicolon followed by a comment so the subquery stays valid', () => {
|
||||
// The single-statement gate accepts `select 1; -- done`; without stripping
|
||||
// the terminator the wrapper would embed `select 1; -- done` and comment out
|
||||
// the closing paren and limit clause.
|
||||
expect(limitSqlForExecution('select 1; -- done', 5)).toBe(
|
||||
'select * from (select 1) as ktx_query_result limit 5',
|
||||
);
|
||||
expect(limitSqlForExecution('select 1; /* note */', 5)).toBe(
|
||||
'select * from (select 1) as ktx_query_result limit 5',
|
||||
);
|
||||
});
|
||||
|
||||
it('drops a trailing line comment with no semicolon before wrapping', () => {
|
||||
expect(limitSqlForExecution('select 1 -- done', 5)).toBe('select * from (select 1) as ktx_query_result limit 5');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripTrailingSqlNoise', () => {
|
||||
it('removes trailing semicolons, comments, and whitespace', () => {
|
||||
expect(stripTrailingSqlNoise('select 1;')).toBe('select 1');
|
||||
expect(stripTrailingSqlNoise('select 1 ;; ')).toBe('select 1');
|
||||
expect(stripTrailingSqlNoise('select 1; -- done')).toBe('select 1');
|
||||
expect(stripTrailingSqlNoise('select 1 -- done')).toBe('select 1');
|
||||
expect(stripTrailingSqlNoise('select 1; /* trailing */')).toBe('select 1');
|
||||
});
|
||||
|
||||
it('preserves semicolons and comment markers inside literals and mid-statement', () => {
|
||||
expect(stripTrailingSqlNoise("select 'a; -- b'")).toBe("select 'a; -- b'");
|
||||
expect(stripTrailingSqlNoise('select 1 /* a;b */ + 2')).toBe('select 1 /* a;b */ + 2');
|
||||
expect(stripTrailingSqlNoise('select ";" from "t;u"')).toBe('select ";" from "t;u"');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
||||
import { mkdir, mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxCoreConfig } from '../../../src/context/core/config.js';
|
||||
import { GitService } from '../../../src/context/core/git.service.js';
|
||||
|
|
@ -35,10 +35,29 @@ describe('GitService', () => {
|
|||
});
|
||||
|
||||
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
|
||||
await mkdir(dirname(join(tempDir, filePath)), { recursive: true });
|
||||
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
||||
return service.commitFile(filePath, message, 'Test', 'test@example.com');
|
||||
};
|
||||
|
||||
describe('listFilesAtCommit', () => {
|
||||
it('lists matching paths at a commit and recovers files deleted since', async () => {
|
||||
await writeAndCommit('semantic-layer/warehouse/custom.yaml', 'name: orders\n');
|
||||
const atSeed = await service.revParseHead();
|
||||
await service.deleteFile('semantic-layer/warehouse/custom.yaml', 'drop', 'Test', 'test@example.com');
|
||||
|
||||
// HEAD no longer has the file; the seed commit still does.
|
||||
await expect(service.listFilesAtCommit('semantic-layer/warehouse', 'HEAD')).resolves.toEqual([]);
|
||||
await expect(service.listFilesAtCommit('semantic-layer/warehouse', atSeed)).resolves.toEqual([
|
||||
'semantic-layer/warehouse/custom.yaml',
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns [] for a pathspec that matches nothing', async () => {
|
||||
await expect(service.listFilesAtCommit('does/not/exist', 'HEAD')).resolves.toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cold-start bootstrap commit', () => {
|
||||
it('writes an empty commit on init so HEAD always resolves', async () => {
|
||||
// beforeEach already ran onModuleInit() against an empty temp dir.
|
||||
|
|
|
|||
|
|
@ -159,13 +159,16 @@ describe('reindexLocalIndexes', () => {
|
|||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
|
||||
// A broken standalone source is surfaced for repair rather than failing the
|
||||
// scope, so use a corrupt machine-generated manifest shard, which is the
|
||||
// remaining fatal read failure that the per-scope catch must isolate.
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse/_schema'), { recursive: true });
|
||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/broken.yaml'), 'not: [valid', 'utf-8');
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
|
||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('_schema/broken.yaml');
|
||||
});
|
||||
|
||||
it('marks a scope errored when configured embeddings fail', async () => {
|
||||
|
|
|
|||
|
|
@ -33,14 +33,14 @@ async function makeHarness() {
|
|||
}
|
||||
|
||||
describe('finalGateRepairPaths', () => {
|
||||
it('derives sorted wiki and semantic-layer file paths', () => {
|
||||
it('derives sorted, deduplicated wiki and semantic-layer file paths', () => {
|
||||
expect(
|
||||
finalGateRepairPaths({
|
||||
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
|
||||
touchedSlSources: [
|
||||
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
touchedSlSourcePaths: [
|
||||
'semantic-layer/warehouse/mart_account_segments.yaml',
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
|
|
|
|||
|
|
@ -18,19 +18,49 @@ describe('deriveFinalizationWikiPageKeys', () => {
|
|||
});
|
||||
|
||||
describe('deriveFinalizationTouchedSources', () => {
|
||||
it('maps standalone semantic-layer files directly', async () => {
|
||||
const result = await deriveFinalizationTouchedSources({
|
||||
it('resolves standalone files by the source diff, not the filename', () => {
|
||||
// The file carries a derived label (`signed_up-<hash>.yaml`); the source it
|
||||
// defines is the in-file `name:` (`SIGNED_UP`), visible only via the diff.
|
||||
const result = deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/signed_up-1a2b3c4d.yaml'],
|
||||
beforeSourcesByConnection: new Map([['warehouse', []]]),
|
||||
afterSourcesByConnection: new Map([
|
||||
['warehouse', [{ name: 'SIGNED_UP', grain: [], columns: [], joins: [], measures: [] }]],
|
||||
]),
|
||||
});
|
||||
expect(result).toEqual({
|
||||
touchedSources: [{ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }],
|
||||
unresolvedPaths: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves deleted standalone files by the name that disappeared', () => {
|
||||
const result = deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/signed_up-1a2b3c4d.yaml'],
|
||||
beforeSourcesByConnection: new Map([
|
||||
['warehouse', [{ name: 'SIGNED_UP', grain: [], columns: [], joins: [], measures: [] }]],
|
||||
]),
|
||||
afterSourcesByConnection: new Map([['warehouse', []]]),
|
||||
});
|
||||
expect(result).toEqual({
|
||||
touchedSources: [{ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }],
|
||||
unresolvedPaths: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('flags standalone changes that produce no source diff', () => {
|
||||
const result = deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/orders.yaml'],
|
||||
beforeSourcesByConnection: new Map(),
|
||||
afterSourcesByConnection: new Map(),
|
||||
});
|
||||
expect(result).toEqual({
|
||||
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
||||
unresolvedPaths: [],
|
||||
touchedSources: [],
|
||||
unresolvedPaths: ['semantic-layer/warehouse/orders.yaml'],
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves aggregate _schema changes by comparing loaded source snapshots', async () => {
|
||||
it('resolves aggregate _schema changes by comparing loaded source snapshots', () => {
|
||||
const beforeSourcesByConnection = new Map([
|
||||
[
|
||||
'warehouse',
|
||||
|
|
@ -72,7 +102,7 @@ describe('deriveFinalizationTouchedSources', () => {
|
|||
],
|
||||
]);
|
||||
|
||||
const result = await deriveFinalizationTouchedSources({
|
||||
const result = deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||
beforeSourcesByConnection,
|
||||
afterSourcesByConnection,
|
||||
|
|
@ -84,11 +114,11 @@ describe('deriveFinalizationTouchedSources', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('flags aggregate _schema changes that cannot be resolved to logical sources', async () => {
|
||||
it('flags aggregate _schema changes that cannot be resolved to logical sources', () => {
|
||||
const beforeSourcesByConnection = new Map([['warehouse', []]]);
|
||||
const afterSourcesByConnection = new Map([['warehouse', []]]);
|
||||
|
||||
const result = await deriveFinalizationTouchedSources({
|
||||
const result = deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||
beforeSourcesByConnection,
|
||||
afterSourcesByConnection,
|
||||
|
|
|
|||
|
|
@ -70,6 +70,15 @@ async function loadSourcesFromRoot(root: string) {
|
|||
};
|
||||
}
|
||||
|
||||
// Mirrors the production contract: resolve the standalone/overlay file for a
|
||||
// source, null when absent. Fixtures keep filename == name, so a direct read
|
||||
// is a faithful shortcut.
|
||||
async function readSourceFileFromRoot(root: string, connectionId: string, sourceName: string) {
|
||||
const relPath = `semantic-layer/${connectionId}/${sourceName}.yaml`;
|
||||
const content = await readFile(join(root, relPath), 'utf-8').catch(() => null);
|
||||
return content === null ? null : { content, path: relPath };
|
||||
}
|
||||
|
||||
async function listGlobalWikiPageKeys(root: string): Promise<string[]> {
|
||||
const dir = join(root, 'wiki/global');
|
||||
const entries = await readdir(dir).catch(() => []);
|
||||
|
|
@ -172,11 +181,17 @@ function makeDeps(
|
|||
const semanticLayerService: any = {
|
||||
loadAllSources: vi.fn(async () => loadSourcesFromRoot(runtime.configDir)),
|
||||
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
|
||||
readSourceFile: vi.fn((connectionId: string, sourceName: string) =>
|
||||
readSourceFileFromRoot(runtime.configDir, connectionId, sourceName),
|
||||
),
|
||||
};
|
||||
semanticLayerService.forWorktree = vi.fn((workdir: string) => ({
|
||||
...semanticLayerService,
|
||||
loadAllSources: vi.fn(async () => loadSourcesFromRoot(workdir)),
|
||||
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
|
||||
readSourceFile: vi.fn((connectionId: string, sourceName: string) =>
|
||||
readSourceFileFromRoot(workdir, connectionId, sourceName),
|
||||
),
|
||||
}));
|
||||
|
||||
const deps: IngestBundleRunnerDeps = {
|
||||
|
|
@ -2366,8 +2381,11 @@ describe('IngestBundleRunner isolated diff path', () => {
|
|||
join(runtime.configDir, '.ktx/ingest-traces/job-finalization-target-policy/trace.jsonl'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(trace).toContain('finalization_committed');
|
||||
expect(trace).toContain('semantic_layer_target_policy');
|
||||
// The policy check runs inside finalization, before touched-source
|
||||
// derivation — an out-of-scope write fails the finalization stage
|
||||
// instead of reading as committed.
|
||||
expect(trace).not.toContain('finalization_committed');
|
||||
expect(trace).toContain('semantic_layer_target_policy_failed');
|
||||
expect(trace).toContain('ingest_failed');
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
|
|
|
|||
|
|
@ -1776,12 +1776,24 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
},
|
||||
],
|
||||
});
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
|
||||
);
|
||||
let head = 'pre-finalization';
|
||||
// Touched-source derivation diffs composed sources before/after finalization
|
||||
// (the filename never carries identity), so the mock must reflect the write:
|
||||
// `orders` exists only once the finalization commit lands.
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve({
|
||||
sources:
|
||||
connectionId === 'warehouse-2' && head === 'post-finalization'
|
||||
? [{ name: `${connectionId}_source` }, { name: 'orders' }]
|
||||
: [{ name: `${connectionId}_source` }],
|
||||
loadErrors: [],
|
||||
}),
|
||||
);
|
||||
const git = {
|
||||
revParseHead: vi.fn(async () => head),
|
||||
// Touched-source derivation reads each changed file's `name:`; the worktree
|
||||
// is mocked (no files on disk), so serve the source content from history.
|
||||
getFileAtCommit: vi.fn(async () => 'name: orders\n'),
|
||||
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
|
||||
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
|
||||
head = 'post-finalization';
|
||||
|
|
@ -1854,10 +1866,41 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
}),
|
||||
);
|
||||
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
|
||||
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]);
|
||||
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [
|
||||
{ name: 'warehouse-2_source' },
|
||||
{ name: 'orders' },
|
||||
]);
|
||||
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||
});
|
||||
|
||||
it('recovers a deleted hash-named SL source by its in-file name, not its filename', async () => {
|
||||
const runner = buildRunner();
|
||||
// An uppercase warehouse source lives in a hash-derived filename, so parsing
|
||||
// the basename yields the phantom `widget_sales-1a2b3c4d`. The real name must
|
||||
// come from the file's `name:`, recovered from history once it was deleted.
|
||||
const deletedPath = 'semantic-layer/warehouse/widget_sales-1a2b3c4d.yaml';
|
||||
const getFileAtCommit = vi.fn(async () => 'name: WIDGET_SALES\ntable: WIDGET_SALES\n');
|
||||
const worktree = { workdir: join(tmpdir(), 'ktx-absent-worktree-recover'), git: { getFileAtCommit } };
|
||||
|
||||
const touched = await (runner as any).touchedSlSourcesFromPaths(worktree, [deletedPath], 'pre-change-sha');
|
||||
|
||||
expect(touched).toEqual([{ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }]);
|
||||
expect(getFileAtCommit).toHaveBeenCalledWith(deletedPath, 'pre-change-sha');
|
||||
});
|
||||
|
||||
it('falls back to the filename only when a deleted SL file is unrecoverable from history', async () => {
|
||||
const runner = buildRunner();
|
||||
const deletedPath = 'semantic-layer/warehouse/orders.yaml';
|
||||
const getFileAtCommit = vi.fn(async () => {
|
||||
throw new Error('path not present at commit');
|
||||
});
|
||||
const worktree = { workdir: join(tmpdir(), 'ktx-absent-worktree-fallback'), git: { getFileAtCommit } };
|
||||
|
||||
const touched = await (runner as any).touchedSlSourcesFromPaths(worktree, [deletedPath], 'pre-change-sha');
|
||||
|
||||
expect(touched).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
|
||||
});
|
||||
|
||||
it('includes finalization actions in memory-flow saved counts', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.source = 'historic-sql';
|
||||
|
|
|
|||
|
|
@ -1,13 +1,21 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { SlConnectionCatalogPort } from '../../../../../src/context/sl/ports.js';
|
||||
import type { SqlAnalysisPort } from '../../../../../src/context/sql-analysis/ports.js';
|
||||
import type { ToolContext } from '../../../../../src/context/tools/base-tool.js';
|
||||
import { SqlExecutionTool } from '../../../../../src/context/ingest/tools/warehouse-verification/sql-execution.tool.js';
|
||||
|
||||
describe('SqlExecutionTool', () => {
|
||||
const connections = {
|
||||
executeQuery: vi.fn(),
|
||||
} as unknown as SlConnectionCatalogPort & { executeQuery: ReturnType<typeof vi.fn> };
|
||||
const tool = new SqlExecutionTool(connections);
|
||||
getConnectionById: vi.fn(async () => ({ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' })),
|
||||
} as unknown as SlConnectionCatalogPort & {
|
||||
executeQuery: ReturnType<typeof vi.fn>;
|
||||
getConnectionById: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
const sqlAnalysis = {
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
|
||||
} as unknown as SqlAnalysisPort & { validateReadOnly: ReturnType<typeof vi.fn> };
|
||||
const tool = new SqlExecutionTool(connections, sqlAnalysis);
|
||||
const context: ToolContext = {
|
||||
sourceId: 'ingest',
|
||||
messageId: 'm1',
|
||||
|
|
@ -15,7 +23,15 @@ describe('SqlExecutionTool', () => {
|
|||
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
||||
};
|
||||
|
||||
it('wraps read-only SQL with a capped row limit', async () => {
|
||||
beforeEach(() => {
|
||||
connections.executeQuery.mockReset();
|
||||
connections.getConnectionById.mockReset();
|
||||
connections.getConnectionById.mockResolvedValue({ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' });
|
||||
sqlAnalysis.validateReadOnly.mockReset();
|
||||
sqlAnalysis.validateReadOnly.mockResolvedValue({ ok: true, error: null });
|
||||
});
|
||||
|
||||
it('validates with the parser-backed validator in the connection dialect, then wraps with a capped row limit', async () => {
|
||||
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
|
||||
|
||||
const result = await tool.call(
|
||||
|
|
@ -23,6 +39,7 @@ describe('SqlExecutionTool', () => {
|
|||
context,
|
||||
);
|
||||
|
||||
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select status from public.orders', 'postgres');
|
||||
expect(connections.executeQuery).toHaveBeenCalledWith(
|
||||
'warehouse',
|
||||
'select * from (select status from public.orders) as ktx_query_result limit 5',
|
||||
|
|
@ -31,15 +48,47 @@ describe('SqlExecutionTool', () => {
|
|||
expect(result.structured.wrappedSql).toContain('limit 5');
|
||||
});
|
||||
|
||||
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => {
|
||||
connections.executeQuery.mockClear();
|
||||
it('maps connection types to sqlglot dialects', async () => {
|
||||
connections.getConnectionById.mockResolvedValue({ id: 'warehouse', name: 'warehouse', connectionType: 'SNOWFLAKE' });
|
||||
connections.executeQuery.mockResolvedValue({ headers: [], rows: [], totalRows: 0 });
|
||||
|
||||
const result = await tool.call({ connectionId: 'warehouse', sql }, context);
|
||||
await tool.call({ connectionId: 'warehouse', sql: 'select 1' }, context);
|
||||
|
||||
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select 1', 'snowflake');
|
||||
});
|
||||
|
||||
it('returns the validator error without executing when validation fails', async () => {
|
||||
sqlAnalysis.validateReadOnly.mockResolvedValue({ ok: false, error: 'SQL contains read/write operation: Insert' });
|
||||
|
||||
const result = await tool.call(
|
||||
{ connectionId: 'warehouse', sql: 'with x as (insert into t values (1) returning *) select * from x' },
|
||||
context,
|
||||
);
|
||||
|
||||
expect(result.markdown).toContain('SQL contains read/write operation: Insert');
|
||||
expect(result.structured.error).toContain('SQL contains read/write operation: Insert');
|
||||
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('throws when no parser-backed validator is configured', async () => {
|
||||
const unvalidated = new SqlExecutionTool(connections);
|
||||
|
||||
await expect(unvalidated.call({ connectionId: 'warehouse', sql: 'select 1' }, context)).rejects.toThrow(
|
||||
'sql_execution requires parser-backed SQL validation.',
|
||||
);
|
||||
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])(
|
||||
'keeps the local backstop even when the validator approves: %s',
|
||||
async (sql) => {
|
||||
const result = await tool.call({ connectionId: 'warehouse', sql }, context);
|
||||
|
||||
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||
},
|
||||
);
|
||||
|
||||
it('surfaces connector errors verbatim', async () => {
|
||||
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|||
import { initKtxProject } from '../../../src/context/project/project.js';
|
||||
import { KtxQueryError } from '../../../src/errors.js';
|
||||
import { createKtxConnectorCapabilities, type KtxQueryResult, type KtxScanConnector, type KtxSchemaSnapshot } from '../../../src/context/scan/types.js';
|
||||
import { writeLocalSlSource } from '../../../src/context/sl/local-sl.js';
|
||||
import { SemanticLayerService } from '../../../src/context/sl/semantic-layer.service.js';
|
||||
import type { SemanticLayerSource } from '../../../src/context/sl/types.js';
|
||||
import { seedSlSourceFile } from '../sl/sl-source-seeding.test-utils.js';
|
||||
import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js';
|
||||
|
||||
describe('createLocalProjectMcpContextPorts', () => {
|
||||
|
|
@ -739,7 +741,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
|
||||
it('reads seeded semantic-layer sources', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: [
|
||||
|
|
@ -763,7 +765,92 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('rejects path traversal keys before touching the project directory', async () => {
|
||||
it('reads manifest-backed sources with uppercase warehouse identifiers', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||
[
|
||||
'tables:',
|
||||
' WIDGET_SALES:',
|
||||
' table: PUBLIC.WIDGET_SALES',
|
||||
' columns:',
|
||||
' - name: ID',
|
||||
' type: number',
|
||||
' pk: true',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed uppercase manifest shard',
|
||||
);
|
||||
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||
|
||||
await expect(
|
||||
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }),
|
||||
).resolves.toMatchObject({
|
||||
sourceName: 'WIDGET_SALES',
|
||||
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
|
||||
});
|
||||
});
|
||||
|
||||
it('composes an overlay written for an uppercase manifest source at a derived filename', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||
[
|
||||
'tables:',
|
||||
' WIDGET_SALES:',
|
||||
' table: PUBLIC.WIDGET_SALES',
|
||||
' columns:',
|
||||
' - name: ID',
|
||||
' type: number',
|
||||
' pk: true',
|
||||
'',
|
||||
].join('\n'),
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed uppercase manifest shard',
|
||||
);
|
||||
|
||||
// The production write path: agents overlay manifest sources via
|
||||
// SemanticLayerService.writeSource using the verbatim warehouse name.
|
||||
const service = new SemanticLayerService(project.fileStore as never, {} as never, {} as never);
|
||||
const overlay = {
|
||||
name: 'WIDGET_SALES',
|
||||
measures: [{ name: 'widget_sales_count', expr: 'count(*)' }],
|
||||
} as SemanticLayerSource;
|
||||
const write = await service.writeSource('warehouse', overlay, 'ktx', 'ktx@example.com');
|
||||
expect(write.path).toMatch(/^semantic-layer\/warehouse\/widget_sales-[0-9a-f]{8}\.yaml$/);
|
||||
|
||||
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||
await expect(
|
||||
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }),
|
||||
).resolves.toMatchObject({
|
||||
sourceName: 'WIDGET_SALES',
|
||||
yaml: expect.stringContaining('widget_sales_count'),
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a standalone source verbatim even when its YAML is currently broken', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed broken source mid-edit',
|
||||
);
|
||||
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||
|
||||
await expect(
|
||||
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'orders' }),
|
||||
).resolves.toMatchObject({
|
||||
sourceName: 'orders',
|
||||
yaml: expect.stringContaining('[unterminated'),
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps path-traversal keys away from the project directory', async () => {
|
||||
const project = await initKtxProject({ projectDir: tempDir });
|
||||
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||
|
||||
|
|
@ -774,12 +861,14 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
}),
|
||||
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
|
||||
|
||||
// Source reads never derive a file path from the name; a traversal-style
|
||||
// name simply matches no record.
|
||||
await expect(
|
||||
ports.semanticLayer?.readSource({
|
||||
connectionId: 'warehouse',
|
||||
sourceName: '../orders',
|
||||
}),
|
||||
).rejects.toThrow('Unsafe semantic-layer source name');
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('uses semantic compute for compile-only sl_query when supplied', async () => {
|
||||
|
|
@ -788,7 +877,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: [
|
||||
|
|
@ -850,7 +939,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
|||
driver: 'postgres',
|
||||
url: 'env:DATABASE_URL',
|
||||
};
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: [
|
||||
|
|
|
|||
|
|
@ -357,6 +357,10 @@ describe('MemoryAgentService.gateRevertInvalidSources (J3)', () => {
|
|||
const configService = {
|
||||
writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}),
|
||||
deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}),
|
||||
// Revert resolves the live file by name; with no listing it falls back
|
||||
// to the writer-derived filename.
|
||||
listFiles: vi.fn().mockResolvedValue({ files: [] }),
|
||||
readFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
||||
};
|
||||
const gitService = {
|
||||
getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')),
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ import { afterEach, beforeEach, describe, it } from 'vitest';
|
|||
import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js';
|
||||
import type { JsonValue } from '../../../src/context/ingest/ports.js';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||
import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../../../src/context/sl/local-sl.js';
|
||||
import { type LocalSlSourceSearchResult, searchLocalSlSources } from '../../../src/context/sl/local-sl.js';
|
||||
import { seedSlSourceFile } from '../sl/sl-source-seeding.test-utils.js';
|
||||
import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js';
|
||||
import {
|
||||
type LocalKnowledgeSearchResult,
|
||||
|
|
@ -99,12 +100,12 @@ function toContextConformanceResult(result: ContextEvidenceSearchResult): Search
|
|||
}
|
||||
|
||||
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'finance',
|
||||
sourceName: 'orders',
|
||||
yaml: FINANCE_ORDERS_YAML,
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ import {
|
|||
resolveLocalSlSource,
|
||||
searchLocalSlSources,
|
||||
validateLocalSlSource,
|
||||
writeLocalSlSource,
|
||||
} from '../../../src/context/sl/local-sl.js';
|
||||
import { seedSlSourceFile } from './sl-source-seeding.test-utils.js';
|
||||
|
||||
const ORDERS_YAML = [
|
||||
'name: orders',
|
||||
|
|
@ -60,7 +60,7 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('writes, reads, lists, and validates semantic-layer sources', async () => {
|
||||
const write = await writeLocalSlSource(project, {
|
||||
const write = await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -92,7 +92,7 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('resolves a scoped source by connection id', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -115,7 +115,7 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('returns not-found for a missing scoped source', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -130,12 +130,12 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('resolves a unique source name across all connections', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'analytics',
|
||||
sourceName: 'tickets',
|
||||
yaml: SUPPORT_YAML,
|
||||
|
|
@ -157,7 +157,7 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('returns not-found for a missing unscoped source', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -169,12 +169,12 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('reports sorted ambiguous connection ids for duplicate source names', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'analytics',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -261,6 +261,153 @@ describe('local semantic-layer helpers', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('reads manifest-backed scan sources whose warehouse identifiers are uppercase', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||
`tables:
|
||||
WIDGET_SALES:
|
||||
table: PUBLIC.WIDGET_SALES
|
||||
columns:
|
||||
- name: ID
|
||||
type: number
|
||||
pk: true
|
||||
- name: EMAIL
|
||||
type: string
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add uppercase manifest shard',
|
||||
);
|
||||
|
||||
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'WIDGET_SALES' })).resolves.toEqual(
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
name: 'WIDGET_SALES',
|
||||
path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#WIDGET_SALES',
|
||||
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('reads manifest-backed sources whose names are not filename-safe', async () => {
|
||||
// Snowflake and Postgres unquoted identifiers allow `$`; manifest keys
|
||||
// carry the warehouse name verbatim, so the lookup must accept it.
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||
`tables:
|
||||
EVENT$LOG:
|
||||
table: PUBLIC.EVENT$LOG
|
||||
columns:
|
||||
- name: ID
|
||||
type: number
|
||||
pk: true
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add manifest shard with dollar-sign table name',
|
||||
);
|
||||
|
||||
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'EVENT$LOG' })).resolves.toEqual(
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
name: 'EVENT$LOG',
|
||||
path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#EVENT$LOG',
|
||||
yaml: expect.stringContaining('table: PUBLIC.EVENT$LOG'),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('reads a manifest-backed source while a sibling standalone file has broken YAML', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||
`tables:
|
||||
WIDGET_SALES:
|
||||
table: PUBLIC.WIDGET_SALES
|
||||
columns:
|
||||
- name: ID
|
||||
type: number
|
||||
pk: true
|
||||
`,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Add manifest shard',
|
||||
);
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed a sibling source mid-edit with broken YAML',
|
||||
);
|
||||
|
||||
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'WIDGET_SALES' })).resolves.toEqual(
|
||||
expect.objectContaining({
|
||||
name: 'WIDGET_SALES',
|
||||
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
|
||||
}),
|
||||
);
|
||||
|
||||
// The broken sibling stays visible in listings instead of hiding or
|
||||
// failing the whole connection.
|
||||
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
|
||||
expect.objectContaining({ name: 'orders', columnCount: 0 }),
|
||||
expect.objectContaining({ name: 'WIDGET_SALES', columnCount: 1 }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns the raw YAML of a standalone source whose content no longer parses', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed a source mid-edit with broken YAML',
|
||||
);
|
||||
|
||||
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' })).resolves.toEqual(
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
name: 'orders',
|
||||
path: 'semantic-layer/warehouse/orders.yaml',
|
||||
yaml: expect.stringContaining('[unterminated'),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('reads a broken source by its declared name even when the filename differs', async () => {
|
||||
// Identity is the intact top-level `name:`, recovered via parseDocument even
|
||||
// when the YAML is broken below it — never the filename. A human-renamed or
|
||||
// hashed-filename source (e.g. an uppercase warehouse name) saved mid-edit
|
||||
// must stay reachable under the name it declares, matching the writer side
|
||||
// (resolveSlSourceFile). Keying it by the filename would make it invisible
|
||||
// under its real name.
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||
'name: SIGNED_UP\nmeasures:\n - name: signups\n expr: [unterminated\n',
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'seed a human-renamed source mid-edit with broken YAML',
|
||||
);
|
||||
|
||||
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'SIGNED_UP' })).resolves.toEqual(
|
||||
expect.objectContaining({
|
||||
connectionId: 'warehouse',
|
||||
name: 'SIGNED_UP',
|
||||
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||
yaml: expect.stringContaining('[unterminated'),
|
||||
}),
|
||||
);
|
||||
|
||||
// The filename is not the identity, so it does not resolve a source.
|
||||
await expect(
|
||||
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'renamed-by-hand' }),
|
||||
).resolves.toBeNull();
|
||||
|
||||
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
|
||||
expect.objectContaining({ name: 'SIGNED_UP', columnCount: 0 }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('expands manifest-backed scan sources when listing all connections', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
|
|
@ -292,12 +439,12 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('searches local semantic-layer source text through SQLite FTS', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'tickets',
|
||||
yaml: SUPPORT_YAML,
|
||||
|
|
@ -365,12 +512,12 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('searches all connections with one global hybrid ranking pass', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'finance',
|
||||
sourceName: 'orders',
|
||||
yaml: [
|
||||
|
|
@ -403,7 +550,7 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('returns dictionary evidence when collected sample values explain a match', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -456,7 +603,7 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
|
||||
it('adds the token lane alongside lexical matches for normalized query terms', async () => {
|
||||
await writeLocalSlSource(project, {
|
||||
await seedSlSourceFile(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
yaml: ORDERS_YAML,
|
||||
|
|
@ -471,21 +618,13 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('reports schema validation errors without writing invalid YAML', async () => {
|
||||
it('reports schema validation errors for invalid YAML', async () => {
|
||||
const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n');
|
||||
|
||||
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
|
||||
valid: false,
|
||||
errors: expect.arrayContaining([expect.stringContaining('grain')]),
|
||||
});
|
||||
|
||||
await expect(
|
||||
writeLocalSlSource(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'broken',
|
||||
yaml: invalidYaml,
|
||||
}),
|
||||
).rejects.toThrow('Invalid semantic-layer source');
|
||||
});
|
||||
|
||||
it('reports overlay columns that are not computed columns', async () => {
|
||||
|
|
@ -506,12 +645,40 @@ describe('local semantic-layer helpers', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('rejects unsafe source paths', async () => {
|
||||
it('never derives a file path from a traversal-style source name', async () => {
|
||||
// Reads match names against loaded records, so a traversal-style name is
|
||||
// simply not found; the writer-side guarantee (derived filenames contain
|
||||
// no separators) is covered by the source-files tests.
|
||||
await expect(
|
||||
readLocalSlSource(project, {
|
||||
connectionId: 'warehouse',
|
||||
sourceName: '../orders',
|
||||
}),
|
||||
).rejects.toThrow('Unsafe semantic-layer source name');
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('reads a source from a human-renamed file by its in-file name', async () => {
|
||||
// The filename is a derived label, not identity: a file renamed by a human
|
||||
// still resolves under the `name:` it declares.
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/custom-file-name.yaml',
|
||||
ORDERS_YAML,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
'Seed source at a human-chosen filename',
|
||||
);
|
||||
|
||||
await expect(
|
||||
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' }),
|
||||
).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
name: 'orders',
|
||||
path: 'semantic-layer/warehouse/custom-file-name.yaml',
|
||||
yaml: ORDERS_YAML,
|
||||
});
|
||||
|
||||
await expect(
|
||||
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'custom-file-name' }),
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ import { join } from 'node:path';
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||
import { assertSearchBackendConformanceCase } from '../search/backend-conformance.test-utils.js';
|
||||
import { searchLocalSlSources, writeLocalSlSource, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js';
|
||||
import { searchLocalSlSources, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js';
|
||||
import { searchLocalSlSourcesWithPglitePrototype } from '../../../src/context/sl/pglite-sl-search-prototype.js';
|
||||
import { seedSlSourceFile } from './sl-source-seeding.test-utils.js';
|
||||
|
||||
const ORDERS_YAML = [
|
||||
'name: orders',
|
||||
|
|
@ -107,9 +108,9 @@ function toConformanceResult(result: LocalSlSourceSearchResult) {
|
|||
}
|
||||
|
||||
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
||||
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
|
||||
await writeLocalSlSource(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
|
||||
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
|
||||
await seedSlSourceFile(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
|
||||
await seedSlSourceFile(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
|
||||
await seedSlSourceFile(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
|
||||
|
||||
await project.fileStore.writeFile(
|
||||
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { Mock } from 'vitest';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||
|
||||
import {
|
||||
ColumnNameCollisionError,
|
||||
|
|
@ -71,6 +75,7 @@ describe('loadSource', () => {
|
|||
it('warns and returns null when an existing source file has invalid YAML', async () => {
|
||||
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
||||
const configService = {
|
||||
listFiles: vi.fn().mockResolvedValue({ files: ['semantic-layer/warehouse/orders.yaml'] }),
|
||||
readFile: vi.fn().mockResolvedValue({ content: 'name: [' }),
|
||||
};
|
||||
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never);
|
||||
|
|
@ -79,9 +84,33 @@ describe('loadSource', () => {
|
|||
|
||||
expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml');
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining('[loadSource] warehouse/orders.yaml: YAML parse failed:'),
|
||||
expect.stringContaining('[loadSource] semantic-layer/warehouse/orders.yaml: YAML parse failed:'),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns null when no file declares the source name', async () => {
|
||||
const configService = {
|
||||
listFiles: vi.fn().mockResolvedValue({ files: [] }),
|
||||
readFile: vi.fn(),
|
||||
};
|
||||
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
|
||||
|
||||
await expect(service.loadSource('warehouse', 'orders')).resolves.toBeNull();
|
||||
expect(configService.readFile).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('resolves a source by its in-file name when the filename differs', async () => {
|
||||
const configService = {
|
||||
listFiles: vi.fn().mockResolvedValue({ files: ['semantic-layer/warehouse/renamed.yaml'] }),
|
||||
readFile: vi.fn().mockResolvedValue({ content: 'name: SIGNED_UP\nmeasures: []\n' }),
|
||||
};
|
||||
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
|
||||
|
||||
await expect(service.loadSource('warehouse', 'SIGNED_UP')).resolves.toEqual({
|
||||
name: 'SIGNED_UP',
|
||||
measures: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('composeOverlay', () => {
|
||||
|
|
@ -1242,3 +1271,177 @@ describe('findDanglingSegmentRefs', () => {
|
|||
expect(findDanglingSegmentRefs({})).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('writeSource / deleteSource file naming', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
let service: SemanticLayerService;
|
||||
|
||||
const author = 'T U';
|
||||
const authorEmail = 't@u.com';
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-service-files-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||
service = new SemanticLayerService(project.fileStore as never, connectionCatalog() as never, pythonPort as never);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const signedUp: SemanticLayerSource = {
|
||||
name: 'SIGNED_UP',
|
||||
table: 'PUBLIC.SIGNED_UP',
|
||||
grain: ['ID'],
|
||||
columns: [{ name: 'ID', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
};
|
||||
|
||||
it('writes a new uppercase source at a derived filename and reads it back by name', async () => {
|
||||
const result = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||
|
||||
expect(result.path).toMatch(/^semantic-layer\/warehouse\/signed_up-[0-9a-f]{8}\.yaml$/);
|
||||
|
||||
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
|
||||
expect(file?.path).toBe(result.path);
|
||||
expect(file?.content).toContain('name: SIGNED_UP');
|
||||
|
||||
// Rewriting lands on the same file instead of deriving a second one.
|
||||
const rewrite = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||
expect(rewrite.path).toBe(result.path);
|
||||
});
|
||||
|
||||
it('repairs a broken file occupying the derived path instead of refusing the write', async () => {
|
||||
const written = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||
await project.fileStore.writeFile(
|
||||
written.path,
|
||||
'name: SIGNED_UP\nmeasures: [unterminated\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'break the file',
|
||||
);
|
||||
|
||||
const repaired = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||
|
||||
expect(repaired.path).toBe(written.path);
|
||||
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
|
||||
expect(file?.path).toBe(written.path);
|
||||
expect(file?.content).toContain('name: SIGNED_UP');
|
||||
});
|
||||
|
||||
it('rewrites a human-renamed file in place', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/custom.yaml',
|
||||
'name: orders\nmeasures: []\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'seed renamed file',
|
||||
);
|
||||
|
||||
const result = await service.writeSource(
|
||||
'warehouse',
|
||||
{ name: 'orders', grain: [], columns: [], joins: [], measures: [] },
|
||||
author,
|
||||
authorEmail,
|
||||
);
|
||||
|
||||
expect(result.path).toBe('semantic-layer/warehouse/custom.yaml');
|
||||
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
|
||||
expect(listed.files).toEqual(['semantic-layer/warehouse/custom.yaml']);
|
||||
});
|
||||
|
||||
it('repairs a human-renamed broken file in place instead of deriving a second one', async () => {
|
||||
// Renamed (filename ≠ name) AND mid-edit broken: identity must survive the
|
||||
// syntax error so the rewrite lands on the original file rather than creating
|
||||
// a duplicate at the derived path that later trips the by-name resolver.
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/custom.yaml',
|
||||
'name: SIGNED_UP\nmeasures: [unterminated\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'seed broken renamed file',
|
||||
);
|
||||
|
||||
const repaired = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||
|
||||
expect(repaired.path).toBe('semantic-layer/warehouse/custom.yaml');
|
||||
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
|
||||
expect(listed.files).toEqual(['semantic-layer/warehouse/custom.yaml']);
|
||||
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
|
||||
expect(file?.path).toBe('semantic-layer/warehouse/custom.yaml');
|
||||
expect(file?.content).toContain('name: SIGNED_UP');
|
||||
});
|
||||
|
||||
it('keeps a .yml-renamed file visible to the loader and the by-name resolver alike', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/custom.yml',
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\nmeasures: []\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'seed .yml file',
|
||||
);
|
||||
|
||||
const { sources, loadErrors } = await service.loadAllSources('warehouse');
|
||||
expect(loadErrors).toEqual([]);
|
||||
expect(sources.map((source) => source.name)).toEqual(['orders']);
|
||||
|
||||
const file = await service.readSourceFile('warehouse', 'orders');
|
||||
expect(file?.path).toBe('semantic-layer/warehouse/custom.yml');
|
||||
});
|
||||
|
||||
it('refuses to clobber a derived path occupied by a different source', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'name: other_source\nmeasures: []\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'seed conflicting file',
|
||||
);
|
||||
|
||||
await expect(
|
||||
service.writeSource(
|
||||
'warehouse',
|
||||
{ name: 'orders', grain: [], columns: [], joins: [], measures: [] },
|
||||
author,
|
||||
authorEmail,
|
||||
),
|
||||
).rejects.toThrow("already defines source 'other_source'");
|
||||
});
|
||||
|
||||
it('deletes the file resolved by name, wherever it lives', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/custom.yaml',
|
||||
'name: orders\nmeasures: []\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'seed renamed file',
|
||||
);
|
||||
|
||||
await service.deleteSource('warehouse', 'orders', author, authorEmail);
|
||||
|
||||
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
|
||||
expect(listed.files).toEqual([]);
|
||||
});
|
||||
|
||||
it('explains manifest-backed deletes instead of silently succeeding', async () => {
|
||||
await project.fileStore.writeFile(
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
'tables:\n payments:\n table: public.payments\n columns:\n - name: id\n type: number\n',
|
||||
author,
|
||||
authorEmail,
|
||||
'seed manifest shard',
|
||||
);
|
||||
|
||||
await expect(service.deleteSource('warehouse', 'payments', author, authorEmail)).rejects.toThrow(
|
||||
/scan manifest/,
|
||||
);
|
||||
});
|
||||
|
||||
it('throws a plain not-found error for unknown sources', async () => {
|
||||
await expect(service.deleteSource('warehouse', 'missing', author, authorEmail)).rejects.toThrow(
|
||||
'Semantic-layer source not found: warehouse/missing',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
22
packages/cli/test/context/sl/sl-source-seeding.test-utils.ts
Normal file
22
packages/cli/test/context/sl/sl-source-seeding.test-utils.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
import type { KtxFileWriteResult } from '../../../src/context/core/file-store.js';
|
||||
import type { KtxLocalProject } from '../../../src/context/project/project.js';
|
||||
import { slSourceFilePath } from '../../../src/context/sl/source-files.js';
|
||||
|
||||
/**
|
||||
* Seed a standalone/overlay semantic-layer file at the writer-derived path,
|
||||
* bypassing tool-level validation. Production writes go through
|
||||
* `SemanticLayerService.writeSource`; tests that only need a file on disk use
|
||||
* this instead.
|
||||
*/
|
||||
export async function seedSlSourceFile(
|
||||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string; yaml: string },
|
||||
): Promise<KtxFileWriteResult> {
|
||||
return project.fileStore.writeFile(
|
||||
slSourceFilePath(input.connectionId, input.sourceName),
|
||||
input.yaml,
|
||||
'ktx',
|
||||
'ktx@example.com',
|
||||
`Seed semantic-layer source: ${input.connectionId}/${input.sourceName}`,
|
||||
);
|
||||
}
|
||||
154
packages/cli/test/context/sl/source-files.test.ts
Normal file
154
packages/cli/test/context/sl/source-files.test.ts
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||
import {
|
||||
resolveSlSourceFile,
|
||||
slSourceFileName,
|
||||
slSourceFilePath,
|
||||
slSourceNameForFile,
|
||||
} from '../../../src/context/sl/source-files.js';
|
||||
|
||||
describe('slSourceFileName', () => {
|
||||
it('keeps safe lowercase snake_case names verbatim', () => {
|
||||
expect(slSourceFileName('orders')).toBe('orders.yaml');
|
||||
expect(slSourceFileName('mart_account_segments')).toBe('mart_account_segments.yaml');
|
||||
expect(slSourceFileName('orders2')).toBe('orders2.yaml');
|
||||
});
|
||||
|
||||
it('derives a slug-hash filename for any other name and never throws', () => {
|
||||
expect(slSourceFileName('SIGNED_UP')).toMatch(/^signed_up-[0-9a-f]{8}\.yaml$/);
|
||||
expect(slSourceFileName('EVENT$LOG')).toMatch(/^event_log-[0-9a-f]{8}\.yaml$/);
|
||||
expect(slSourceFileName('my.dotted.name')).toMatch(/^my_dotted_name-[0-9a-f]{8}\.yaml$/);
|
||||
expect(slSourceFileName('汉字')).toMatch(/^src-[0-9a-f]{8}\.yaml$/);
|
||||
expect(slSourceFileName(' ')).toMatch(/^src-[0-9a-f]{8}\.yaml$/);
|
||||
});
|
||||
|
||||
it('is deterministic', () => {
|
||||
expect(slSourceFileName('EVENT$LOG')).toBe(slSourceFileName('EVENT$LOG'));
|
||||
});
|
||||
|
||||
it('never emits path separators or traversal segments', () => {
|
||||
for (const name of ['../orders', 'a/b', 'a\\b', '..', './x']) {
|
||||
const fileName = slSourceFileName(name);
|
||||
expect(fileName).not.toContain('/');
|
||||
expect(fileName).not.toContain('\\');
|
||||
expect(fileName).not.toContain('..');
|
||||
}
|
||||
});
|
||||
|
||||
it('keeps case-differing names disjoint on case-insensitive filesystems', () => {
|
||||
// Safe-branch filenames contain no `-`; hash-branch filenames always end
|
||||
// in `-<8 hex>` with a hash of the raw name, so `events` vs `EVENTS`
|
||||
// cannot collide even when the filesystem folds case (macOS, Windows).
|
||||
const lower = slSourceFileName('events');
|
||||
const upper = slSourceFileName('EVENTS');
|
||||
expect(lower).toBe('events.yaml');
|
||||
expect(upper).toMatch(/^events-[0-9a-f]{8}\.yaml$/);
|
||||
expect(upper.toLowerCase()).not.toBe(lower.toLowerCase());
|
||||
expect(lower).not.toContain('-');
|
||||
});
|
||||
|
||||
it('routes Windows reserved device basenames through the hash branch', () => {
|
||||
expect(slSourceFileName('con')).toMatch(/^con-[0-9a-f]{8}\.yaml$/);
|
||||
expect(slSourceFileName('lpt1')).toMatch(/^lpt1-[0-9a-f]{8}\.yaml$/);
|
||||
});
|
||||
|
||||
it('caps overlong names', () => {
|
||||
const longName = `a${'b'.repeat(300)}`;
|
||||
const fileName = slSourceFileName(longName);
|
||||
expect(fileName.length).toBeLessThanOrEqual(64 + '-12345678.yaml'.length);
|
||||
expect(fileName).toMatch(/^ab+-[0-9a-f]{8}\.yaml$/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('slSourceFilePath', () => {
|
||||
it('rejects unsafe connection ids but accepts any source name', () => {
|
||||
expect(slSourceFilePath('warehouse', 'EVENT$LOG')).toMatch(
|
||||
/^semantic-layer\/warehouse\/event_log-[0-9a-f]{8}\.yaml$/,
|
||||
);
|
||||
expect(() => slSourceFilePath('../warehouse', 'orders')).toThrow('Unsafe connection id');
|
||||
});
|
||||
});
|
||||
|
||||
describe('slSourceNameForFile', () => {
|
||||
it('prefers the in-file name and falls back to the filename', () => {
|
||||
expect(slSourceNameForFile('semantic-layer/warehouse/custom.yaml', 'name: SIGNED_UP\n')).toBe('SIGNED_UP');
|
||||
expect(slSourceNameForFile('semantic-layer/warehouse/orders.yaml', 'measures: []\n')).toBe('orders');
|
||||
expect(slSourceNameForFile('semantic-layer/warehouse/orders.yaml', 'measures: [unterminated\n')).toBe('orders');
|
||||
});
|
||||
|
||||
it('recovers the declared name when the file is broken below the name: line', () => {
|
||||
// A human-renamed file left mid-edit keeps its identity: the syntax error is
|
||||
// under `measures:`, so the top-level `name:` is still recoverable and must
|
||||
// win over the (unrelated) filename.
|
||||
expect(slSourceNameForFile('semantic-layer/warehouse/renamed-by-hand.yaml', 'name: SIGNED_UP\nmeasures: [oops\n')).toBe(
|
||||
'SIGNED_UP',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveSlSourceFile', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-source-files-'));
|
||||
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function seed(path: string, content: string): Promise<void> {
|
||||
await project.fileStore.writeFile(path, content, 'ktx', 'ktx@example.com', `seed ${path}`);
|
||||
}
|
||||
|
||||
it('matches by in-file name regardless of the filename', async () => {
|
||||
await seed('semantic-layer/warehouse/renamed-by-hand.yaml', 'name: SIGNED_UP\nmeasures: []\n');
|
||||
|
||||
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'SIGNED_UP')).resolves.toEqual({
|
||||
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||
content: 'name: SIGNED_UP\nmeasures: []\n',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns null when no file declares the name and ignores manifest shards', async () => {
|
||||
await seed('semantic-layer/warehouse/_schema/public.yaml', 'tables:\n orders:\n table: public.orders\n');
|
||||
|
||||
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('falls back to the filename for broken YAML', async () => {
|
||||
const broken = 'name: orders\nmeasures: [unterminated\n';
|
||||
await seed('semantic-layer/warehouse/orders.yaml', broken);
|
||||
|
||||
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).resolves.toEqual({
|
||||
path: 'semantic-layer/warehouse/orders.yaml',
|
||||
content: broken,
|
||||
});
|
||||
});
|
||||
|
||||
it('matches a human-renamed broken file by its still-recoverable name', async () => {
|
||||
// Filename ≠ name, so the filename fallback cannot find it; resolution must
|
||||
// come from the intact top-level `name:` even though the YAML is broken.
|
||||
const broken = 'name: SIGNED_UP\nmeasures: [unterminated\n';
|
||||
await seed('semantic-layer/warehouse/renamed-by-hand.yaml', broken);
|
||||
|
||||
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'SIGNED_UP')).resolves.toEqual({
|
||||
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||
content: broken,
|
||||
});
|
||||
});
|
||||
|
||||
it('throws when two files declare the same source name', async () => {
|
||||
await seed('semantic-layer/warehouse/orders.yaml', 'name: orders\nmeasures: []\n');
|
||||
await seed('semantic-layer/warehouse/orders_copy.yaml', 'name: orders\nmeasures: []\n');
|
||||
|
||||
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).rejects.toThrow(
|
||||
'Multiple semantic-layer files declare source "orders"',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -188,7 +188,7 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
|||
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
|
||||
const { tool, semanticLayerService } = makeTool({
|
||||
semanticLayerService: {
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
||||
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(true),
|
||||
},
|
||||
});
|
||||
|
|
@ -222,7 +222,7 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
|||
it('still returns the plain "Source not found" error for truly-missing names', async () => {
|
||||
const { tool, semanticLayerService } = makeTool({
|
||||
semanticLayerService: {
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
||||
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
},
|
||||
});
|
||||
|
|
@ -241,3 +241,20 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
|||
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('SlEditSourceTool — name edits', () => {
|
||||
it('rejects edits that change the in-file name', async () => {
|
||||
const { tool, semanticLayerService } = makeTool();
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: '11111111-1111-1111-1111-111111111111',
|
||||
sourceName: 'orders',
|
||||
yaml_edits: [{ oldText: 'name: orders', newText: 'name: renamed_orders' }],
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toMatch(/renaming is not supported/i);
|
||||
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -16,8 +16,15 @@ function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
|
|||
configService: {
|
||||
writeFile: vi.fn().mockResolvedValue(undefined),
|
||||
deleteFile: vi.fn().mockResolvedValue(undefined),
|
||||
// No live file for `orders` — revert recovers the preHead path from history.
|
||||
listFiles: vi.fn().mockResolvedValue({ files: [] }),
|
||||
readFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
||||
} as any,
|
||||
gitService: {
|
||||
// The source lived at its derived filename at preHead.
|
||||
listFilesAtCommit: vi.fn().mockResolvedValue(['semantic-layer/conn-1/orders.yaml']),
|
||||
getFileAtCommit: vi.fn().mockResolvedValue('name: orders\nmeasures: []\n'),
|
||||
} as any,
|
||||
gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
|
@ -65,4 +72,33 @@ describe('SlRollbackTool', () => {
|
|||
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
|
||||
expect(session.actions).toEqual([]);
|
||||
});
|
||||
|
||||
it('restores a deleted human-renamed source at the path it occupied at preHead', async () => {
|
||||
// The source lived at a custom filename (≠ the writer-derived `orders.yaml`)
|
||||
// and the session deleted it. Revert must recover the custom path from the
|
||||
// preHead commit and restore there, not write/no-op against the derived path.
|
||||
const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) };
|
||||
const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1);
|
||||
const renamedContent = 'name: orders\ntable: public.orders\nmeasures: []\n';
|
||||
const session = makeSession({
|
||||
gitService: {
|
||||
listFilesAtCommit: vi.fn().mockResolvedValue(['semantic-layer/conn-1/custom.yaml']),
|
||||
getFileAtCommit: vi.fn().mockResolvedValue(renamedContent),
|
||||
} as any,
|
||||
});
|
||||
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
|
||||
|
||||
const result = await tool.call({ sourceName: 'orders' } as any, context);
|
||||
|
||||
expect(result.structured.success).toBe(true);
|
||||
expect((session.configService as any).writeFile).toHaveBeenCalledWith(
|
||||
'semantic-layer/conn-1/custom.yaml',
|
||||
renamedContent,
|
||||
expect.anything(),
|
||||
expect.anything(),
|
||||
expect.anything(),
|
||||
expect.anything(),
|
||||
);
|
||||
expect((session.configService as any).deleteFile).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ function makeTool(overrides: Partial<Record<string, any>> = {}) {
|
|||
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
|
||||
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
|
||||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
||||
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||
...overrides.semanticLayerService,
|
||||
};
|
||||
const slSearchService = {
|
||||
|
|
@ -66,7 +66,7 @@ describe('SlWriteSourceTool — session gating', () => {
|
|||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||
listManifestSourceNames: vi.fn().mockResolvedValue([]),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
||||
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
||||
} as any,
|
||||
wikiService: {} as any,
|
||||
|
|
@ -248,7 +248,7 @@ describe('SlWriteSourceTool — session gating', () => {
|
|||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
||||
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
||||
} as any,
|
||||
});
|
||||
|
|
@ -377,3 +377,36 @@ describe('SlWriteSourceTool — standalone shadow guard', () => {
|
|||
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
|
||||
});
|
||||
});
|
||||
|
||||
describe('SlWriteSourceTool — source name identity', () => {
|
||||
it('accepts verbatim warehouse identifiers as sourceName', () => {
|
||||
const { tool } = makeTool();
|
||||
const base = { connectionId: '11111111-1111-1111-1111-111111111111' };
|
||||
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'SIGNED_UP' }).success).toBe(true);
|
||||
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'EVENT$LOG' }).success).toBe(true);
|
||||
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'orders' }).success).toBe(true);
|
||||
expect(tool.inputSchema.safeParse({ ...base, sourceName: '' }).success).toBe(false);
|
||||
});
|
||||
|
||||
it('rejects a source whose name does not match sourceName', async () => {
|
||||
const { tool, semanticLayerService } = makeTool();
|
||||
const result = await tool.call(
|
||||
{
|
||||
connectionId: '11111111-1111-1111-1111-111111111111',
|
||||
sourceName: 'orders',
|
||||
source: {
|
||||
name: 'other_orders',
|
||||
sql: 'select 1 as id',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'string' }],
|
||||
measures: [],
|
||||
joins: [],
|
||||
} as any,
|
||||
} as any,
|
||||
baseContext,
|
||||
);
|
||||
expect(result.structured.success).toBe(false);
|
||||
expect(result.markdown).toMatch(/does not match sourceName/);
|
||||
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
29
packages/cli/test/context/sql-analysis/dialect.test.ts
Normal file
29
packages/cli/test/context/sql-analysis/dialect.test.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { sqlAnalysisDialectForDriver } from '../../../src/context/sql-analysis/dialect.js';
|
||||
|
||||
describe('sqlAnalysisDialectForDriver', () => {
|
||||
it('maps ktx.yaml driver names to sqlglot dialects', () => {
|
||||
expect(sqlAnalysisDialectForDriver('postgres')).toBe('postgres');
|
||||
expect(sqlAnalysisDialectForDriver('bigquery')).toBe('bigquery');
|
||||
expect(sqlAnalysisDialectForDriver('snowflake')).toBe('snowflake');
|
||||
expect(sqlAnalysisDialectForDriver('mysql')).toBe('mysql');
|
||||
expect(sqlAnalysisDialectForDriver('sqlserver')).toBe('tsql');
|
||||
expect(sqlAnalysisDialectForDriver('sqlite')).toBe('sqlite');
|
||||
expect(sqlAnalysisDialectForDriver('duckdb')).toBe('duckdb');
|
||||
expect(sqlAnalysisDialectForDriver('clickhouse')).toBe('clickhouse');
|
||||
expect(sqlAnalysisDialectForDriver('databricks')).toBe('databricks');
|
||||
});
|
||||
|
||||
it('maps local connection-type spellings to sqlglot dialects', () => {
|
||||
expect(sqlAnalysisDialectForDriver('POSTGRESQL')).toBe('postgres');
|
||||
expect(sqlAnalysisDialectForDriver('SQLSERVER')).toBe('tsql');
|
||||
expect(sqlAnalysisDialectForDriver('BIGQUERY')).toBe('bigquery');
|
||||
expect(sqlAnalysisDialectForDriver('SQLITE')).toBe('sqlite');
|
||||
});
|
||||
|
||||
it('defaults to postgres for unknown or missing drivers', () => {
|
||||
expect(sqlAnalysisDialectForDriver(undefined)).toBe('postgres');
|
||||
expect(sqlAnalysisDialectForDriver('')).toBe('postgres');
|
||||
expect(sqlAnalysisDialectForDriver('unknown')).toBe('postgres');
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue