mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
fix: read semantic sources safely (#284)
* fix: read semantic sources safely
* test: retarget reindex per-scope error case to a broken manifest
Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.
* fix(sl): decouple semantic-layer file names from warehouse naming rules
The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.
- New `source-files.ts`: one total filename derivation (safe lowercase
names verbatim; otherwise slug + sha256-hash suffix, immune to
case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
`assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
name (human renames survive); new sources get a derived filename; a
derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
caller to launder IO errors; `deleteSource` distinguishes manifest-backed
sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
now a recommendation for new sources) and rejects sourceName/source.name
mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
derivation use resolved paths / in-file names instead of interpolating
`<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
helpers onto the shared module; dropped dead service methods.
* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator
- Key broken/renamed semantic-layer files by their recoverable in-file
name (slSourceNameForFile) so mid-edit sources stay reachable under
their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
recover deleted files' declared names from the pre-change commit
instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
validateReadOnly in the connection's dialect, sharing one
driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
reject smuggled second statements, and strip trailing
semicolons/comments before row-limit wrapping
This commit is contained in:
parent
853f39a7c3
commit
f3f893bf01
51 changed files with 1797 additions and 476 deletions
|
|
@ -44,12 +44,17 @@ Use this order for most context changes:
|
||||||
Semantic sources are YAML files for queryable tables or custom SQL. They define
|
Semantic sources are YAML files for queryable tables or custom SQL. They define
|
||||||
agent-facing measures, dimensions, segments, joins, and grain.
|
agent-facing measures, dimensions, segments, joins, and grain.
|
||||||
|
|
||||||
Semantic source files live at:
|
Semantic source files live under:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
semantic-layer/<connection-id>/<source-name>.yaml
|
semantic-layer/<connection-id>/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The file's `name:` field is the source's identity — it carries the warehouse
|
||||||
|
identifier verbatim, including case. The filename is a derived label: simple
|
||||||
|
lowercase names get `<source-name>.yaml`, anything else gets a slugged
|
||||||
|
filename. Renaming a file does not rename the source.
|
||||||
|
|
||||||
### Minimal source
|
### Minimal source
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|
@ -152,7 +157,7 @@ joins:
|
||||||
|
|
||||||
| Field | Required | Description |
|
| Field | Required | Description |
|
||||||
|-------|----------|-------------|
|
|-------|----------|-------------|
|
||||||
| `name` | Yes | Source identifier. Use lowercase words and underscores. |
|
| `name` | Yes | Source identity (not the filename). When overlaying an ingested table, match the manifest identifier verbatim, including case (e.g. `SIGNED_UP`); for a new standalone source, lowercase words and underscores are recommended. |
|
||||||
| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
|
| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
|
||||||
| `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. |
|
| `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. |
|
||||||
| `grain` | Yes | Columns that uniquely identify a row at the source grain. |
|
| `grain` | Yes | Columns that uniquely identify a row at the source grain. |
|
||||||
|
|
|
||||||
|
|
@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
function stripLeadingSqlComments(sql: string): string {
|
|
||||||
let index = 0;
|
|
||||||
while (index < sql.length) {
|
|
||||||
while (/\s/.test(sql[index] ?? '')) {
|
|
||||||
index += 1;
|
|
||||||
}
|
|
||||||
if (sql.startsWith('--', index)) {
|
|
||||||
const end = sql.indexOf('\n', index + 2);
|
|
||||||
index = end === -1 ? sql.length : end + 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (sql.startsWith('/*', index)) {
|
|
||||||
const end = sql.indexOf('*/', index + 2);
|
|
||||||
if (end === -1) {
|
|
||||||
return sql.slice(index);
|
|
||||||
}
|
|
||||||
index = end + 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return sql.slice(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isKtxSqliteConnectionConfig(
|
export function isKtxSqliteConnectionConfig(
|
||||||
connection: KtxSqliteConnectionConfig | undefined,
|
connection: KtxSqliteConnectionConfig | undefined,
|
||||||
): connection is KtxSqliteConnectionConfig {
|
): connection is KtxSqliteConnectionConfig {
|
||||||
|
|
@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
||||||
|
|
||||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||||
this.assertConnection(input.connectionId);
|
this.assertConnection(input.connectionId);
|
||||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params);
|
||||||
return { ...result, rowCount: result.rows.length };
|
return { ...result, rowCount: result.rows.length };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
|
import { assertReadOnlySql, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
|
||||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||||
|
|
@ -284,7 +284,7 @@ function isDeniedError(error: unknown): boolean {
|
||||||
}
|
}
|
||||||
|
|
||||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sqlText));
|
||||||
if (!maxRows) {
|
if (!maxRows) {
|
||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,133 @@ const MUTATING_SQL =
|
||||||
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
||||||
const READ_SQL = /^\s*(select|with)\b/i;
|
const READ_SQL = /^\s*(select|with)\b/i;
|
||||||
|
|
||||||
|
// Agents (and the daemon's sqlglot validator, which ignores comments) routinely
|
||||||
|
// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading
|
||||||
|
// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH
|
||||||
|
// SQL is rejected here while the parser-backed validator accepts it.
|
||||||
|
function stripLeadingSqlComments(sql: string): string {
|
||||||
|
let index = 0;
|
||||||
|
while (index < sql.length) {
|
||||||
|
while (/\s/.test(sql[index] ?? '')) {
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
if (sql.startsWith('--', index)) {
|
||||||
|
const end = sql.indexOf('\n', index + 2);
|
||||||
|
index = end === -1 ? sql.length : end + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (sql.startsWith('/*', index)) {
|
||||||
|
const end = sql.indexOf('*/', index + 2);
|
||||||
|
if (end === -1) {
|
||||||
|
return sql.slice(index);
|
||||||
|
}
|
||||||
|
index = end + 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return sql.slice(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lexes past one string literal, quoted identifier, or comment starting at
|
||||||
|
// `index`, using standard-SQL rules ('' and "" escapes; no dialect extensions
|
||||||
|
// such as backslash escapes or dollar quoting). Returns the index after the
|
||||||
|
// token, or `index` unchanged when no quoted/comment token starts there.
|
||||||
|
function skipQuotedOrComment(sql: string, index: number): number {
|
||||||
|
const quote = sql[index];
|
||||||
|
if (quote === "'" || quote === '"') {
|
||||||
|
let i = index + 1;
|
||||||
|
while (i < sql.length) {
|
||||||
|
if (sql[i] === quote) {
|
||||||
|
if (sql[i + 1] === quote) {
|
||||||
|
i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
return sql.length;
|
||||||
|
}
|
||||||
|
if (sql.startsWith('--', index)) {
|
||||||
|
const end = sql.indexOf('\n', index + 2);
|
||||||
|
return end === -1 ? sql.length : end + 1;
|
||||||
|
}
|
||||||
|
if (sql.startsWith('/*', index)) {
|
||||||
|
const end = sql.indexOf('*/', index + 2);
|
||||||
|
return end === -1 ? sql.length : end + 2;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backstop against statement smuggling (`select 1; drop table x`): reject any
|
||||||
|
// semicolon that is followed by real content. Semicolons inside string
|
||||||
|
// literals, quoted identifiers, and comments are fine, as are trailing
|
||||||
|
// semicolons (optionally followed by whitespace and comments). This deliberately
|
||||||
|
// lexes standard SQL only, so dialect-specific escapes can cause a false
|
||||||
|
// reject — never a false accept; the canonical gate is the daemon's
|
||||||
|
// sqlglot-backed validateReadOnly.
|
||||||
|
function assertSingleSqlStatement(sql: string): void {
|
||||||
|
let index = 0;
|
||||||
|
let sawSemicolon = false;
|
||||||
|
while (index < sql.length) {
|
||||||
|
const skipped = skipQuotedOrComment(sql, index);
|
||||||
|
if (skipped > index) {
|
||||||
|
index = skipped;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (sql[index] === ';') {
|
||||||
|
sawSemicolon = true;
|
||||||
|
} else if (sawSemicolon && !/\s/.test(sql[index])) {
|
||||||
|
throw new Error('Only one SQL statement can be executed.');
|
||||||
|
}
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function assertReadOnlySql(sql: string): string {
|
export function assertReadOnlySql(sql: string): string {
|
||||||
const trimmed = sql.trim();
|
const trimmed = stripLeadingSqlComments(sql).trim();
|
||||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||||
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
||||||
}
|
}
|
||||||
|
assertSingleSqlStatement(trimmed);
|
||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// `assertReadOnlySql` deliberately keeps trailing semicolons, comments, and
|
||||||
|
// whitespace (e.g. `select 1; -- done`) — harmless for direct single-statement
|
||||||
|
// execution. A row-limit subquery wrapper needs a bare expression instead: a
|
||||||
|
// trailing `;` would sit illegally inside the subquery, and a trailing line
|
||||||
|
// comment would comment out the closing paren and limit clause. Lex forward with
|
||||||
|
// the same standard-SQL rules as the single-statement gate and truncate at the
|
||||||
|
// end of the last meaningful token, dropping trailing semicolons, comments, and
|
||||||
|
// whitespace. Characters inside string literals and quoted identifiers stay
|
||||||
|
// meaningful, so a `;` or `--` within a literal is never mistaken for a
|
||||||
|
// terminator (a plain regex cannot make that distinction).
|
||||||
|
export function stripTrailingSqlNoise(sql: string): string {
|
||||||
|
let index = 0;
|
||||||
|
let meaningfulEnd = 0;
|
||||||
|
while (index < sql.length) {
|
||||||
|
if (sql.startsWith('--', index) || sql.startsWith('/*', index)) {
|
||||||
|
index = skipQuotedOrComment(sql, index);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const afterQuoted = skipQuotedOrComment(sql, index);
|
||||||
|
if (afterQuoted > index) {
|
||||||
|
meaningfulEnd = afterQuoted;
|
||||||
|
index = afterQuoted;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (sql[index] !== ';' && !/\s/.test(sql[index] ?? '')) {
|
||||||
|
meaningfulEnd = index + 1;
|
||||||
|
}
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
return sql.slice(0, meaningfulEnd);
|
||||||
|
}
|
||||||
|
|
||||||
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
||||||
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, '');
|
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sql));
|
||||||
if (!maxRows) {
|
if (!maxRows) {
|
||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -557,12 +557,13 @@ export class GitService {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
* List all paths matching `pathSpec` as they exist at `commitHash`. Reads from
|
||||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
* git object storage, so it's safe against concurrent working-tree mutations
|
||||||
|
* and can recover paths (e.g. a human-renamed file) that no longer exist on disk.
|
||||||
*/
|
*/
|
||||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
async listFilesAtCommit(pathSpec: string, commitHash: string): Promise<string[]> {
|
||||||
try {
|
try {
|
||||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', 'HEAD', '--', pathSpec]);
|
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', commitHash, '--', pathSpec]);
|
||||||
if (!raw) {
|
if (!raw) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
@ -572,6 +573,14 @@ export class GitService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||||
|
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||||
|
*/
|
||||||
|
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||||
|
return this.listFilesAtCommit(pathSpec, 'HEAD');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collapse all commits between `preHead` and current HEAD into a single commit with the given
|
* Collapse all commits between `preHead` and current HEAD into a single commit with the given
|
||||||
* message. Used by the memory agent to squash N per-tool-call commits into one ingest commit.
|
* message. Used by the memory agent to squash N per-tool-call commits into one ingest commit.
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import { dirname, join, relative } from 'node:path';
|
||||||
import YAML from 'yaml';
|
import YAML from 'yaml';
|
||||||
import type { MemoryAction } from '../../../../context/memory/types.js';
|
import type { MemoryAction } from '../../../../context/memory/types.js';
|
||||||
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
|
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
|
||||||
|
import { isSlYamlPath } from '../../../sl/source-files.js';
|
||||||
import type { FinalizationOverrideReplay } from '../../types.js';
|
import type { FinalizationOverrideReplay } from '../../types.js';
|
||||||
import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
|
import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
|
||||||
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
|
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
|
||||||
|
|
@ -251,7 +252,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
||||||
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
|
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
|
||||||
|
|
||||||
const schemaRoot = join(input.workdir, 'semantic-layer', input.connectionId, '_schema');
|
const schemaRoot = join(input.workdir, 'semantic-layer', input.connectionId, '_schema');
|
||||||
for (const file of (await walkFiles(schemaRoot)).filter((candidate) => candidate.endsWith('.yaml') || candidate.endsWith('.yml'))) {
|
for (const file of (await walkFiles(schemaRoot)).filter(isSlYamlPath)) {
|
||||||
const path = join(schemaRoot, file);
|
const path = join(schemaRoot, file);
|
||||||
const before = await readFile(path, 'utf-8');
|
const before = await readFile(path, 'utf-8');
|
||||||
const shard = (YAML.parse(before) ?? {}) as ManifestShard;
|
const shard = (YAML.parse(before) ?? {}) as ManifestShard;
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||||
import { dirname, join } from 'node:path';
|
import { dirname, join } from 'node:path';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
||||||
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
|
||||||
import type { IngestTraceWriter } from './ingest-trace.js';
|
import type { IngestTraceWriter } from './ingest-trace.js';
|
||||||
import { traceTimed } from './ingest-trace.js';
|
import { traceTimed } from './ingest-trace.js';
|
||||||
|
|
||||||
|
|
@ -149,11 +148,13 @@ function buildToolSet(input: {
|
||||||
|
|
||||||
export function finalGateRepairPaths(input: {
|
export function finalGateRepairPaths(input: {
|
||||||
changedWikiPageKeys: string[];
|
changedWikiPageKeys: string[];
|
||||||
touchedSlSources: TouchedSlSource[];
|
// Resolved by the caller: SL filenames are derived labels, so the repair
|
||||||
|
// allowlist must carry the real on-disk paths, not name-interpolated ones.
|
||||||
|
touchedSlSourcePaths: string[];
|
||||||
}): string[] {
|
}): string[] {
|
||||||
return [
|
return [
|
||||||
...new Set([
|
...new Set([
|
||||||
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
...input.touchedSlSourcePaths,
|
||||||
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||||
]),
|
]),
|
||||||
].sort();
|
].sort();
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
||||||
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
||||||
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||||
import type { IngestReportFinalizationMismatch } from './reports.js';
|
import type { IngestReportFinalizationMismatch } from './reports.js';
|
||||||
|
|
@ -64,39 +65,36 @@ export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function deriveFinalizationTouchedSources(
|
// Source identity is the in-file `name:`; filenames are derived labels (see
|
||||||
input: DeriveTouchedSourcesInput,
|
// source-files.ts), so a changed path — manifest shard or standalone file —
|
||||||
): Promise<DeriveTouchedSourcesResult> {
|
// cannot be mapped to a source by parsing its filename. Instead, every changed
|
||||||
|
// semantic-layer file is attributed through the before/after diff of its
|
||||||
|
// connection's composed sources. A changed file whose connection diff is empty
|
||||||
|
// cannot be attributed to any source and is surfaced as unresolved.
|
||||||
|
export function deriveFinalizationTouchedSources(input: DeriveTouchedSourcesInput): DeriveTouchedSourcesResult {
|
||||||
const touched = new Map<string, TouchedSlSource>();
|
const touched = new Map<string, TouchedSlSource>();
|
||||||
const unresolvedPaths: string[] = [];
|
const unresolvedPaths: string[] = [];
|
||||||
|
|
||||||
|
const pathsByConnection = new Map<string, string[]>();
|
||||||
for (const path of input.changedPaths) {
|
for (const path of input.changedPaths) {
|
||||||
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
|
if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const parts = path.split('/');
|
const connectionId = path.split('/')[1] ?? '';
|
||||||
const connectionId = parts[1] ?? '';
|
|
||||||
if (!connectionId) {
|
if (!connectionId) {
|
||||||
unresolvedPaths.push(path);
|
unresolvedPaths.push(path);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (parts[2] !== '_schema') {
|
pathsByConnection.set(connectionId, [...(pathsByConnection.get(connectionId) ?? []), path]);
|
||||||
const fileName = parts.at(-1) ?? '';
|
|
||||||
const sourceName = fileName.replace(/\.ya?ml$/, '');
|
|
||||||
if (!sourceName) {
|
|
||||||
unresolvedPaths.push(path);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const [connectionId, paths] of pathsByConnection) {
|
||||||
const changedNames = changedSourceNames(
|
const changedNames = changedSourceNames(
|
||||||
input.beforeSourcesByConnection.get(connectionId) ?? [],
|
input.beforeSourcesByConnection.get(connectionId) ?? [],
|
||||||
input.afterSourcesByConnection.get(connectionId) ?? [],
|
input.afterSourcesByConnection.get(connectionId) ?? [],
|
||||||
);
|
);
|
||||||
if (changedNames.length === 0) {
|
if (changedNames.length === 0) {
|
||||||
unresolvedPaths.push(path);
|
unresolvedPaths.push(...paths);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (const sourceName of changedNames) {
|
for (const sourceName of changedNames) {
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime
|
||||||
import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
||||||
import type { CaptureSession, MemoryAction } from '../../context/memory/types.js';
|
import type { CaptureSession, MemoryAction } from '../../context/memory/types.js';
|
||||||
import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js';
|
import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js';
|
||||||
|
import { isSlYamlPath, slSourceFilePath, slSourceNameForFile, sourceNameFromPath } from '../../context/sl/source-files.js';
|
||||||
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
||||||
import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js';
|
import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js';
|
||||||
import { createTouchedSlSources, type TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
import { createTouchedSlSources, type TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||||
|
|
@ -498,7 +499,7 @@ export class IngestBundleRunner {
|
||||||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||||
const names = files
|
const names = files
|
||||||
.filter((f) => !f.startsWith('_schema/'))
|
.filter((f) => !f.startsWith('_schema/'))
|
||||||
.map((f) => f.replace(/\.yaml$/, ''))
|
.map((f) => sourceNameFromPath(f))
|
||||||
.sort((left, right) => left.localeCompare(right));
|
.sort((left, right) => left.localeCompare(right));
|
||||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||||
return `## ${connectionId}\n${body}`;
|
return `## ${connectionId}\n${body}`;
|
||||||
|
|
@ -791,14 +792,52 @@ export class IngestBundleRunner {
|
||||||
].sort();
|
].sort();
|
||||||
}
|
}
|
||||||
|
|
||||||
private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] {
|
private async touchedSlSourcesFromPaths(
|
||||||
return paths
|
worktree: IngestSessionWorktree,
|
||||||
.filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/'))
|
paths: string[],
|
||||||
.map((path) => {
|
deletedFileSha: string,
|
||||||
const [, connectionId, fileName] = path.split('/');
|
): Promise<TouchedSlSource[]> {
|
||||||
return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') };
|
const sources: TouchedSlSource[] = [];
|
||||||
})
|
for (const path of paths) {
|
||||||
.filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0);
|
if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path) || path.includes('/_schema/')) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const [, connectionId] = path.split('/');
|
||||||
|
if (!connectionId) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Source identity is the in-file `name:`, never the filename — an uppercase
|
||||||
|
// warehouse source like `WIDGET_SALES` lives in a hash-derived
|
||||||
|
// `widget_sales-<hash>.yaml`, so parsing the basename yields a phantom name.
|
||||||
|
// Read the live file; when it was deleted this run, recover its declared
|
||||||
|
// name from the pre-change commit the way `revertSourceToPreHead` resolves a
|
||||||
|
// gone file from history. The filename is a last resort only when the content
|
||||||
|
// is unrecoverable from both.
|
||||||
|
let content: string | null;
|
||||||
|
try {
|
||||||
|
content = await readFile(join(worktree.workdir, path), 'utf-8');
|
||||||
|
} catch {
|
||||||
|
content = await worktree.git.getFileAtCommit(path, deletedFileSha).catch(() => null);
|
||||||
|
}
|
||||||
|
const sourceName = content === null ? sourceNameFromPath(path) : slSourceNameForFile(path, content);
|
||||||
|
if (sourceName.length > 0) {
|
||||||
|
sources.push({ connectionId, sourceName });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sources;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inverse direction for commits and repair allowlists: resolve each touched
|
||||||
|
// source to its real on-disk path, falling back to the writer's derived
|
||||||
|
// filename when the file was deleted in this run.
|
||||||
|
private async touchedSlSourcePaths(workdir: string, touched: TouchedSlSource[]): Promise<string[]> {
|
||||||
|
const service = this.deps.semanticLayerService.forWorktree(workdir);
|
||||||
|
const paths: string[] = [];
|
||||||
|
for (const source of touched) {
|
||||||
|
const file = await service.readSourceFile(source.connectionId, source.sourceName);
|
||||||
|
paths.push(file?.path ?? slSourceFilePath(source.connectionId, source.sourceName));
|
||||||
|
}
|
||||||
|
return paths;
|
||||||
}
|
}
|
||||||
|
|
||||||
private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] {
|
private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] {
|
||||||
|
|
@ -1558,7 +1597,7 @@ export class IngestBundleRunner {
|
||||||
projectionTouchedSources = projection.touchedSources;
|
projectionTouchedSources = projection.touchedSources;
|
||||||
projectionChangedWikiPageKeys = projection.changedWikiPageKeys;
|
projectionChangedWikiPageKeys = projection.changedWikiPageKeys;
|
||||||
const projectionPaths = [
|
const projectionPaths = [
|
||||||
...projection.touchedSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
...(await this.touchedSlSourcePaths(sessionWorktree.workdir, projection.touchedSources)),
|
||||||
...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||||
];
|
];
|
||||||
projectionTouchedPaths = projectionPaths;
|
projectionTouchedPaths = projectionPaths;
|
||||||
|
|
@ -1740,7 +1779,11 @@ export class IngestBundleRunner {
|
||||||
await validateFinalIngestArtifacts({
|
await validateFinalIngestArtifacts({
|
||||||
connectionIds: slConnectionIds,
|
connectionIds: slConnectionIds,
|
||||||
changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths),
|
changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths),
|
||||||
touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths),
|
touchedSlSources: await this.touchedSlSourcesFromPaths(
|
||||||
|
sessionWorktree,
|
||||||
|
touchedPaths,
|
||||||
|
await sessionWorktree.git.revParseHead(),
|
||||||
|
),
|
||||||
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
||||||
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
||||||
validateTouchedSources: (touched) =>
|
validateTouchedSources: (touched) =>
|
||||||
|
|
@ -2289,20 +2332,34 @@ export class IngestBundleRunner {
|
||||||
)
|
)
|
||||||
: [];
|
: [];
|
||||||
|
|
||||||
const changedConnectionIds = [
|
// Validate the write scope before deriving touched sources: attribution
|
||||||
...new Set([
|
// by before/after diff is only defined for connections whose
|
||||||
...slConnectionIds,
|
// pre-finalization snapshot was loaded (slConnectionIds), and an
|
||||||
...finalizationTouchedPaths
|
// out-of-scope write would otherwise surface downstream as a bogus
|
||||||
.filter((path) => path.startsWith('semantic-layer/'))
|
// unresolved-path or declaration-mismatch failure instead of the real
|
||||||
.map((path) => path.split('/')[1])
|
// policy violation.
|
||||||
.filter((connectionId): connectionId is string => Boolean(connectionId)),
|
await traceTimed(
|
||||||
]),
|
runTrace,
|
||||||
].sort();
|
'finalization',
|
||||||
|
'semantic_layer_target_policy',
|
||||||
|
{
|
||||||
|
sourceKey: job.sourceKey,
|
||||||
|
allowedTargetConnectionIds: slConnectionIds,
|
||||||
|
touchedPaths: [...new Set(finalizationTouchedPaths)].sort(),
|
||||||
|
},
|
||||||
|
async () => {
|
||||||
|
assertSemanticLayerTargetPathsAllowed({
|
||||||
|
paths: finalizationTouchedPaths,
|
||||||
|
allowedConnectionIds: new Set(slConnectionIds),
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
const postFinalizationSourcesByConnection = await this.loadSourcesByConnection(
|
const postFinalizationSourcesByConnection = await this.loadSourcesByConnection(
|
||||||
sessionWorktree.workdir,
|
sessionWorktree.workdir,
|
||||||
changedConnectionIds,
|
slConnectionIds,
|
||||||
);
|
);
|
||||||
const scope = await deriveFinalizationTouchedSources({
|
const scope = deriveFinalizationTouchedSources({
|
||||||
changedPaths: finalizationTouchedPaths,
|
changedPaths: finalizationTouchedPaths,
|
||||||
beforeSourcesByConnection: preFinalizationSourcesByConnection,
|
beforeSourcesByConnection: preFinalizationSourcesByConnection,
|
||||||
afterSourcesByConnection: postFinalizationSourcesByConnection,
|
afterSourcesByConnection: postFinalizationSourcesByConnection,
|
||||||
|
|
@ -2437,7 +2494,7 @@ export class IngestBundleRunner {
|
||||||
...(isolatedDiffEnabled ? projectionTouchedSources : []),
|
...(isolatedDiffEnabled ? projectionTouchedSources : []),
|
||||||
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
|
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
|
||||||
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
|
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
|
||||||
...this.touchedSlSourcesFromPaths(postReconciliationPaths),
|
...(await this.touchedSlSourcesFromPaths(sessionWorktree, postReconciliationPaths, preReconciliationSha)),
|
||||||
...finalizationTouchedSources,
|
...finalizationTouchedSources,
|
||||||
]);
|
]);
|
||||||
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
|
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
|
||||||
|
|
@ -2528,7 +2585,7 @@ export class IngestBundleRunner {
|
||||||
const gateError = this.errorMessage(error);
|
const gateError = this.errorMessage(error);
|
||||||
const repairPaths = finalGateRepairPaths({
|
const repairPaths = finalGateRepairPaths({
|
||||||
changedWikiPageKeys: finalChangedWikiPageKeys,
|
changedWikiPageKeys: finalChangedWikiPageKeys,
|
||||||
touchedSlSources: finalTouchedSlSources,
|
touchedSlSourcePaths: await this.touchedSlSourcePaths(sessionWorktree.workdir, finalTouchedSlSources),
|
||||||
});
|
});
|
||||||
emitStageProgress('final_gates', 89, 'Repairing final artifact gates');
|
emitStageProgress('final_gates', 89, 'Repairing final artifact gates');
|
||||||
const gateRepair = await repairFinalGateFailure({
|
const gateRepair = await repairFinalGateFailure({
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import { fileURLToPath } from 'node:url';
|
||||||
import YAML from 'yaml';
|
import YAML from 'yaml';
|
||||||
import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js';
|
import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js';
|
||||||
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||||
import type { KtxLogger } from '../../context/core/config.js';
|
import type { KtxLogger } from '../../context/core/config.js';
|
||||||
import { noopLogger } from '../../context/core/config.js';
|
import { noopLogger } from '../../context/core/config.js';
|
||||||
|
|
@ -95,6 +96,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
|
||||||
memoryModel?: string;
|
memoryModel?: string;
|
||||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||||
|
sqlAnalysis?: SqlAnalysisPort;
|
||||||
jobIdFactory?: () => string;
|
jobIdFactory?: () => string;
|
||||||
logger?: KtxLogger;
|
logger?: KtxLogger;
|
||||||
embeddingProvider?: KtxEmbeddingProvider | null;
|
embeddingProvider?: KtxEmbeddingProvider | null;
|
||||||
|
|
@ -271,16 +273,13 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||||
let content: string;
|
|
||||||
try {
|
|
||||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||||
content = file.content;
|
if (!file) {
|
||||||
} catch (error) {
|
return this.validateComposedSource(deps, connectionId, sourceName, 'no standalone or overlay file found');
|
||||||
return this.validateComposedSource(deps, connectionId, sourceName, error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
|
const parsed = YAML.parse(file.content) as unknown as Record<string, unknown>;
|
||||||
return this.validateParsedSource(sourceName, parsed);
|
return this.validateParsedSource(sourceName, parsed);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return {
|
return {
|
||||||
|
|
@ -519,6 +518,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
||||||
authorResolver: GitAuthorResolverPort;
|
authorResolver: GitAuthorResolverPort;
|
||||||
slSourcesRepository: SlSourcesIndexPort;
|
slSourcesRepository: SlSourcesIndexPort;
|
||||||
connections: SlConnectionCatalogPort;
|
connections: SlConnectionCatalogPort;
|
||||||
|
sqlAnalysis?: SqlAnalysisPort;
|
||||||
contextStore: SqliteContextEvidenceStore;
|
contextStore: SqliteContextEvidenceStore;
|
||||||
embedding: KtxEmbeddingPort;
|
embedding: KtxEmbeddingPort;
|
||||||
}) {
|
}) {
|
||||||
|
|
@ -551,6 +551,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
||||||
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
|
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
|
||||||
const warehouseVerificationTools = createWarehouseVerificationTools({
|
const warehouseVerificationTools = createWarehouseVerificationTools({
|
||||||
connections: deps.connections,
|
connections: deps.connections,
|
||||||
|
...(deps.sqlAnalysis ? { sqlAnalysis: deps.sqlAnalysis } : {}),
|
||||||
fallbackFileStore: deps.project.fileStore,
|
fallbackFileStore: deps.project.fileStore,
|
||||||
wikiSearchTool,
|
wikiSearchTool,
|
||||||
slDiscoverTool,
|
slDiscoverTool,
|
||||||
|
|
@ -699,6 +700,7 @@ export function createLocalBundleIngestRuntime(
|
||||||
authorResolver: new LocalAuthorResolver(),
|
authorResolver: new LocalAuthorResolver(),
|
||||||
slSourcesRepository,
|
slSourcesRepository,
|
||||||
connections,
|
connections,
|
||||||
|
...(options.sqlAnalysis ? { sqlAnalysis: options.sqlAnalysis } : {}),
|
||||||
contextStore,
|
contextStore,
|
||||||
embedding,
|
embedding,
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { randomUUID } from 'node:crypto';
|
||||||
import { cp, mkdir, rm } from 'node:fs/promises';
|
import { cp, mkdir, rm } from 'node:fs/promises';
|
||||||
import { isAbsolute, resolve } from 'node:path';
|
import { isAbsolute, resolve } from 'node:path';
|
||||||
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||||
import type { KtxLogger } from '../../context/core/config.js';
|
import type { KtxLogger } from '../../context/core/config.js';
|
||||||
import { createAbortError, isAbortError } from '../../context/core/abort.js';
|
import { createAbortError, isAbortError } from '../../context/core/abort.js';
|
||||||
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
||||||
|
|
@ -35,6 +36,7 @@ export interface RunLocalIngestOptions {
|
||||||
memoryModel?: string;
|
memoryModel?: string;
|
||||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||||
|
sqlAnalysis?: SqlAnalysisPort;
|
||||||
logger?: KtxLogger;
|
logger?: KtxLogger;
|
||||||
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
||||||
abortSignal?: AbortSignal;
|
abortSignal?: AbortSignal;
|
||||||
|
|
@ -159,6 +161,7 @@ async function runScheduledPullJob(options: {
|
||||||
memoryModel?: string;
|
memoryModel?: string;
|
||||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||||
|
sqlAnalysis?: SqlAnalysisPort;
|
||||||
logger?: KtxLogger;
|
logger?: KtxLogger;
|
||||||
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
||||||
abortSignal?: AbortSignal;
|
abortSignal?: AbortSignal;
|
||||||
|
|
@ -214,6 +217,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
|
||||||
memoryModel: options.memoryModel,
|
memoryModel: options.memoryModel,
|
||||||
semanticLayerCompute: options.semanticLayerCompute,
|
semanticLayerCompute: options.semanticLayerCompute,
|
||||||
queryExecutor: options.queryExecutor,
|
queryExecutor: options.queryExecutor,
|
||||||
|
sqlAnalysis: options.sqlAnalysis,
|
||||||
logger: options.logger,
|
logger: options.logger,
|
||||||
embeddingProvider: options.embeddingProvider,
|
embeddingProvider: options.embeddingProvider,
|
||||||
abortSignal: options.abortSignal,
|
abortSignal: options.abortSignal,
|
||||||
|
|
@ -397,6 +401,7 @@ export async function runLocalMetabaseIngest(
|
||||||
memoryModel: options.memoryModel,
|
memoryModel: options.memoryModel,
|
||||||
semanticLayerCompute: options.semanticLayerCompute,
|
semanticLayerCompute: options.semanticLayerCompute,
|
||||||
queryExecutor: options.queryExecutor,
|
queryExecutor: options.queryExecutor,
|
||||||
|
sqlAnalysis: options.sqlAnalysis,
|
||||||
logger: options.logger,
|
logger: options.logger,
|
||||||
embeddingProvider: options.embeddingProvider,
|
embeddingProvider: options.embeddingProvider,
|
||||||
abortSignal: options.abortSignal,
|
abortSignal: options.abortSignal,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import type { KtxFileStorePort } from '../../../core/file-store.js';
|
import type { KtxFileStorePort } from '../../../core/file-store.js';
|
||||||
import type { SlConnectionCatalogPort } from '../../../sl/ports.js';
|
import type { SlConnectionCatalogPort } from '../../../sl/ports.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../../sql-analysis/ports.js';
|
||||||
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
||||||
import type { BaseTool, ToolContext } from '../../../tools/base-tool.js';
|
import type { BaseTool, ToolContext } from '../../../tools/base-tool.js';
|
||||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||||
|
|
@ -8,6 +9,7 @@ import { SqlExecutionTool } from './sql-execution.tool.js';
|
||||||
|
|
||||||
export function createWarehouseVerificationTools(deps: {
|
export function createWarehouseVerificationTools(deps: {
|
||||||
connections: SlConnectionCatalogPort;
|
connections: SlConnectionCatalogPort;
|
||||||
|
sqlAnalysis?: SqlAnalysisPort;
|
||||||
fallbackFileStore: KtxFileStorePort;
|
fallbackFileStore: KtxFileStorePort;
|
||||||
wikiSearchTool: BaseTool;
|
wikiSearchTool: BaseTool;
|
||||||
slDiscoverTool: BaseTool;
|
slDiscoverTool: BaseTool;
|
||||||
|
|
@ -18,7 +20,7 @@ export function createWarehouseVerificationTools(deps: {
|
||||||
});
|
});
|
||||||
return [
|
return [
|
||||||
new EntityDetailsTool(catalogFactory),
|
new EntityDetailsTool(catalogFactory),
|
||||||
new SqlExecutionTool(deps.connections),
|
new SqlExecutionTool(deps.connections, deps.sqlAnalysis),
|
||||||
new DiscoverDataTool({
|
new DiscoverDataTool({
|
||||||
wikiSearchTool: deps.wikiSearchTool,
|
wikiSearchTool: deps.wikiSearchTool,
|
||||||
slDiscoverTool: deps.slDiscoverTool,
|
slDiscoverTool: deps.slDiscoverTool,
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import { assertReadOnlySql, limitSqlForExecution } from '../../../../context/connections/read-only-sql.js';
|
import { assertReadOnlySql, limitSqlForExecution } from '../../../../context/connections/read-only-sql.js';
|
||||||
import type { SlConnectionCatalogPort } from '../../../../context/sl/ports.js';
|
import type { SlConnectionCatalogPort } from '../../../../context/sl/ports.js';
|
||||||
|
import { sqlAnalysisDialectForDriver } from '../../../../context/sql-analysis/dialect.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../../context/tools/base-tool.js';
|
import { BaseTool, type ToolContext, type ToolOutput } from '../../../../context/tools/base-tool.js';
|
||||||
|
|
||||||
const sqlExecutionInputSchema = z.object({
|
const sqlExecutionInputSchema = z.object({
|
||||||
|
|
@ -40,7 +42,10 @@ function markdownTable(headers: string[], rows: unknown[][], totalRows: number):
|
||||||
export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
||||||
readonly name = 'sql_execution';
|
readonly name = 'sql_execution';
|
||||||
|
|
||||||
constructor(private readonly connections: SlConnectionCatalogPort) {
|
constructor(
|
||||||
|
private readonly connections: SlConnectionCatalogPort,
|
||||||
|
private readonly sqlAnalysis?: SqlAnalysisPort,
|
||||||
|
) {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,9 +74,24 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!this.sqlAnalysis) {
|
||||||
|
throw new Error('sql_execution requires parser-backed SQL validation.');
|
||||||
|
}
|
||||||
|
|
||||||
let sql: string;
|
let sql: string;
|
||||||
let wrappedSql: string;
|
let wrappedSql: string;
|
||||||
try {
|
try {
|
||||||
|
const connection = await this.connections.getConnectionById(input.connectionId);
|
||||||
|
if (!connection) {
|
||||||
|
throw new Error(`Connection not found: ${input.connectionId}`);
|
||||||
|
}
|
||||||
|
const validation = await this.sqlAnalysis.validateReadOnly(
|
||||||
|
input.sql,
|
||||||
|
sqlAnalysisDialectForDriver(connection.connectionType),
|
||||||
|
);
|
||||||
|
if (!validation.ok) {
|
||||||
|
throw new Error(validation.error ?? 'SQL is not read-only.');
|
||||||
|
}
|
||||||
sql = assertReadOnlySql(input.sql);
|
sql = assertReadOnlySql(input.sql);
|
||||||
wrappedSql = limitSqlForExecution(sql, input.rowLimit);
|
wrappedSql = limitSqlForExecution(sql, input.rowLimit);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,12 @@ import { createKtxEntityDetailsService } from '../../context/scan/entity-details
|
||||||
import type { KtxScanConnector } from '../../context/scan/types.js';
|
import type { KtxScanConnector } from '../../context/scan/types.js';
|
||||||
import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js';
|
import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js';
|
||||||
import { createKtxDiscoverDataService } from '../../context/search/discover.js';
|
import { createKtxDiscoverDataService } from '../../context/search/discover.js';
|
||||||
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
import { sqlAnalysisDialectForDriver } from '../../context/sql-analysis/dialect.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||||
import { compileLocalSlQuery } from '../../context/sl/local-query.js';
|
import { compileLocalSlQuery } from '../../context/sl/local-query.js';
|
||||||
import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js';
|
import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js';
|
||||||
|
import { readLocalSlSource } from '../../context/sl/local-sl.js';
|
||||||
|
import { assertSafeConnectionId } from '../../context/sl/source-files.js';
|
||||||
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
|
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
|
||||||
import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js';
|
import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js';
|
||||||
|
|
||||||
|
|
@ -22,64 +25,12 @@ interface CreateLocalProjectMcpContextPortsOptions {
|
||||||
embeddingService: KtxEmbeddingPort | null;
|
embeddingService: KtxEmbeddingPort | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function dialectForDriver(driver: string | undefined): string {
|
|
||||||
const normalized = (driver ?? 'postgres').toUpperCase();
|
|
||||||
const map: Record<string, string> = {
|
|
||||||
POSTGRES: 'postgres',
|
|
||||||
BIGQUERY: 'bigquery',
|
|
||||||
SNOWFLAKE: 'snowflake',
|
|
||||||
MYSQL: 'mysql',
|
|
||||||
SQLSERVER: 'tsql',
|
|
||||||
SQLITE: 'sqlite',
|
|
||||||
DUCKDB: 'duckdb',
|
|
||||||
CLICKHOUSE: 'clickhouse',
|
|
||||||
DATABRICKS: 'databricks',
|
|
||||||
};
|
|
||||||
return map[normalized] ?? 'postgres';
|
|
||||||
}
|
|
||||||
|
|
||||||
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
|
||||||
return dialectForDriver(driver) as SqlAnalysisDialect;
|
|
||||||
}
|
|
||||||
|
|
||||||
function assertSafePathToken(kind: string, value: string): string {
|
|
||||||
if (
|
|
||||||
value.trim().length === 0 ||
|
|
||||||
value.includes('..') ||
|
|
||||||
value.includes('\\') ||
|
|
||||||
value.startsWith('/') ||
|
|
||||||
value.startsWith('.') ||
|
|
||||||
value.includes('//')
|
|
||||||
) {
|
|
||||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
function assertSafeConnectionId(connectionId: string): string {
|
|
||||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
|
||||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
|
||||||
}
|
|
||||||
return assertSafePathToken('connection id', connectionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
function assertSafeSourceName(sourceName: string): string {
|
|
||||||
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
|
|
||||||
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
|
|
||||||
}
|
|
||||||
return assertSafePathToken('semantic-layer source name', sourceName);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||||
if (connector?.cleanup) {
|
if (connector?.cleanup) {
|
||||||
await connector.cleanup();
|
await connector.cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function slPath(connectionId: string, sourceName: string): string {
|
|
||||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function executeValidatedReadOnlySql(
|
async function executeValidatedReadOnlySql(
|
||||||
project: KtxLocalProject,
|
project: KtxLocalProject,
|
||||||
options: CreateLocalProjectMcpContextPortsOptions,
|
options: CreateLocalProjectMcpContextPortsOptions,
|
||||||
|
|
@ -201,13 +152,11 @@ export function createLocalProjectMcpContextPorts(
|
||||||
},
|
},
|
||||||
semanticLayer: {
|
semanticLayer: {
|
||||||
async readSource(input) {
|
async readSource(input) {
|
||||||
const path = slPath(input.connectionId, input.sourceName);
|
const source = await readLocalSlSource(project, {
|
||||||
try {
|
connectionId: input.connectionId,
|
||||||
const result = await project.fileStore.readFile(path);
|
sourceName: input.sourceName,
|
||||||
return { sourceName: input.sourceName, yaml: result.content };
|
});
|
||||||
} catch {
|
return source ? { sourceName: source.name, yaml: source.yaml } : null;
|
||||||
return null;
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
async query(input, executionOptions) {
|
async query(input, executionOptions) {
|
||||||
if (!options.semanticLayerCompute) {
|
if (!options.semanticLayerCompute) {
|
||||||
|
|
|
||||||
|
|
@ -375,6 +375,9 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
||||||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||||
try {
|
try {
|
||||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||||
|
if (!file) {
|
||||||
|
return { errors: [`${sourceName}: no standalone or overlay file found`], warnings: [] };
|
||||||
|
}
|
||||||
const parsed = YAML.parse(file.content) as SemanticLayerSource;
|
const parsed = YAML.parse(file.content) as SemanticLayerSource;
|
||||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||||
|
|
|
||||||
|
|
@ -483,7 +483,7 @@ export class MemoryAgentService {
|
||||||
if (session.connectionId) {
|
if (session.connectionId) {
|
||||||
for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) {
|
for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) {
|
||||||
try {
|
try {
|
||||||
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName).catch(() => null);
|
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||||
if (file?.content) {
|
if (file?.content) {
|
||||||
const parsed = this.parseYamlOrNull(file.content);
|
const parsed = this.parseYamlOrNull(file.content);
|
||||||
if (parsed) {
|
if (parsed) {
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import { buildLiveDatabaseManifestShards, type LiveDatabaseManifestExistingDescr
|
||||||
import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sql/skill-schemas.js';
|
import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sql/skill-schemas.js';
|
||||||
import type { KtxScanRelationshipConfig } from '../project/config.js';
|
import type { KtxScanRelationshipConfig } from '../project/config.js';
|
||||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||||
|
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
||||||
import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js';
|
import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js';
|
||||||
import {
|
import {
|
||||||
buildKtxRelationshipArtifacts,
|
buildKtxRelationshipArtifacts,
|
||||||
|
|
@ -205,7 +206,7 @@ async function loadExistingManifestState(
|
||||||
|
|
||||||
let files: string[];
|
let files: string[];
|
||||||
try {
|
try {
|
||||||
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml'));
|
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter(isSlYamlPath);
|
||||||
} catch {
|
} catch {
|
||||||
return { descriptions, preservedJoins, usage };
|
return { descriptions, preservedJoins, usage };
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,10 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-ex
|
||||||
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
||||||
import type { KtxMcpProgressCallback } from '../mcp/types.js';
|
import type { KtxMcpProgressCallback } from '../mcp/types.js';
|
||||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||||
|
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
|
||||||
import { loadLocalSlSourceRecords } from './local-sl.js';
|
import { loadLocalSlSourceRecords } from './local-sl.js';
|
||||||
import { toResolvedWire } from './semantic-layer.service.js';
|
import { toResolvedWire } from './semantic-layer.service.js';
|
||||||
|
import { assertSafeConnectionId } from './source-files.js';
|
||||||
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js';
|
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js';
|
||||||
|
|
||||||
const COMPILE_ONLY_REASON =
|
const COMPILE_ONLY_REASON =
|
||||||
|
|
@ -24,43 +26,6 @@ export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionRe
|
||||||
dialect: string;
|
dialect: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
function assertSafePathToken(kind: string, value: string): string {
|
|
||||||
if (
|
|
||||||
value.trim().length === 0 ||
|
|
||||||
value.includes('..') ||
|
|
||||||
value.includes('\\') ||
|
|
||||||
value.startsWith('/') ||
|
|
||||||
value.startsWith('.') ||
|
|
||||||
value.includes('//')
|
|
||||||
) {
|
|
||||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
function assertSafeConnectionId(connectionId: string): string {
|
|
||||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
|
||||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
|
||||||
}
|
|
||||||
return assertSafePathToken('connection id', connectionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
function dialectForDriver(driver: string | undefined): string {
|
|
||||||
const normalized = (driver ?? 'postgres').toUpperCase();
|
|
||||||
const map: Record<string, string> = {
|
|
||||||
POSTGRES: 'postgres',
|
|
||||||
BIGQUERY: 'bigquery',
|
|
||||||
SNOWFLAKE: 'snowflake',
|
|
||||||
MYSQL: 'mysql',
|
|
||||||
SQLSERVER: 'tsql',
|
|
||||||
SQLITE: 'sqlite',
|
|
||||||
DUCKDB: 'duckdb',
|
|
||||||
CLICKHOUSE: 'clickhouse',
|
|
||||||
DATABRICKS: 'databricks',
|
|
||||||
};
|
|
||||||
return map[normalized] ?? 'postgres';
|
|
||||||
}
|
|
||||||
|
|
||||||
function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string {
|
function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string {
|
||||||
if (requested) {
|
if (requested) {
|
||||||
return assertSafeConnectionId(requested);
|
return assertSafeConnectionId(requested);
|
||||||
|
|
@ -93,7 +58,7 @@ export async function compileLocalSlQuery(
|
||||||
): Promise<CompileLocalSlQueryResult> {
|
): Promise<CompileLocalSlQueryResult> {
|
||||||
await options.onProgress?.({ progress: 0, message: 'Compiling query' });
|
await options.onProgress?.({ progress: 0, message: 'Compiling query' });
|
||||||
const connectionId = resolveLocalConnectionId(project, options.connectionId);
|
const connectionId = resolveLocalConnectionId(project, options.connectionId);
|
||||||
const dialect = dialectForDriver(project.config.connections[connectionId]?.driver);
|
const dialect = sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver);
|
||||||
const sources = await loadComputableSources(project, connectionId);
|
const sources = await loadComputableSources(project, connectionId);
|
||||||
|
|
||||||
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });
|
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ import { join } from 'node:path';
|
||||||
import YAML from 'yaml';
|
import YAML from 'yaml';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||||
import type { KtxFileWriteResult } from '../../context/core/file-store.js';
|
|
||||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||||
import { HybridSearchCore } from '../../context/search/hybrid-search-core.js';
|
import { HybridSearchCore } from '../../context/search/hybrid-search-core.js';
|
||||||
import type { SearchCandidateGenerator } from '../../context/search/types.js';
|
import type { SearchCandidateGenerator } from '../../context/search/types.js';
|
||||||
|
|
@ -18,6 +17,13 @@ import {
|
||||||
} from './semantic-layer.service.js';
|
} from './semantic-layer.service.js';
|
||||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||||
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
||||||
|
import {
|
||||||
|
assertSafeConnectionId,
|
||||||
|
isSafeConnectionId,
|
||||||
|
isSlYamlPath,
|
||||||
|
slSourceNameForFile,
|
||||||
|
sourceNameFromPath,
|
||||||
|
} from './source-files.js';
|
||||||
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
||||||
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
|
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
|
||||||
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
|
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
|
||||||
|
|
@ -69,58 +75,10 @@ export type ResolvedSlSource =
|
||||||
| { kind: 'not-found' }
|
| { kind: 'not-found' }
|
||||||
| { kind: 'ambiguous'; connectionIds: string[] };
|
| { kind: 'ambiguous'; connectionIds: string[] };
|
||||||
|
|
||||||
const LOCAL_AUTHOR = 'ktx';
|
|
||||||
const LOCAL_AUTHOR_EMAIL = 'ktx@example.com';
|
|
||||||
|
|
||||||
function assertSafePathToken(kind: string, value: string): string {
|
|
||||||
if (
|
|
||||||
value.trim().length === 0 ||
|
|
||||||
value.includes('..') ||
|
|
||||||
value.includes('\\') ||
|
|
||||||
value.startsWith('/') ||
|
|
||||||
value.startsWith('.') ||
|
|
||||||
value.includes('//')
|
|
||||||
) {
|
|
||||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
function assertSafeConnectionId(connectionId: string): string {
|
|
||||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
|
||||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
|
||||||
}
|
|
||||||
return assertSafePathToken('connection id', connectionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
|
|
||||||
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
function assertSafeSourceName(sourceName: string): string {
|
|
||||||
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
|
|
||||||
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
|
|
||||||
}
|
|
||||||
return assertSafePathToken('semantic-layer source name', sourceName);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
function slPath(connectionId: string, sourceName: string): string {
|
|
||||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sourceNameFromPath(path: string): string {
|
|
||||||
return (
|
|
||||||
path
|
|
||||||
.split('/')
|
|
||||||
.at(-1)
|
|
||||||
?.replace(/\.ya?ml$/, '') ?? path
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseYamlRecord(raw: string): Record<string, unknown> {
|
function parseYamlRecord(raw: string): Record<string, unknown> {
|
||||||
const parsed = YAML.parse(raw) as unknown;
|
const parsed = YAML.parse(raw) as unknown;
|
||||||
if (!isRecord(parsed)) {
|
if (!isRecord(parsed)) {
|
||||||
|
|
@ -215,12 +173,17 @@ export async function loadLocalSlSourceRecords(
|
||||||
const dir = `semantic-layer/${connectionId}`;
|
const dir = `semantic-layer/${connectionId}`;
|
||||||
const schemaDir = `${dir}/_schema`;
|
const schemaDir = `${dir}/_schema`;
|
||||||
const listed = await project.fileStore.listFiles(dir);
|
const listed = await project.fileStore.listFiles(dir);
|
||||||
const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort();
|
const paths = listed.files.filter(isSlYamlPath).sort();
|
||||||
const sources = new Map<string, LocalSlSourceRecord>();
|
const sources = new Map<string, LocalSlSourceRecord>();
|
||||||
|
|
||||||
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
|
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
|
||||||
const raw = await project.fileStore.readFile(path);
|
const raw = await project.fileStore.readFile(path);
|
||||||
const tables = manifestTables(parseYamlRecord(raw.content));
|
let tables: Record<string, ManifestTableEntry> | null;
|
||||||
|
try {
|
||||||
|
tables = manifestTables(parseYamlRecord(raw.content));
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`);
|
||||||
|
}
|
||||||
if (!tables) {
|
if (!tables) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -237,7 +200,29 @@ export async function loadLocalSlSourceRecords(
|
||||||
|
|
||||||
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
|
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
|
||||||
const raw = await project.fileStore.readFile(path);
|
const raw = await project.fileStore.readFile(path);
|
||||||
const parsed = parseYamlRecord(raw.content);
|
let parsed: Record<string, unknown>;
|
||||||
|
try {
|
||||||
|
parsed = parseYamlRecord(raw.content);
|
||||||
|
} catch {
|
||||||
|
// A source mid-edit (e.g. an agent saved half-written YAML) must not take
|
||||||
|
// down reads, listings, or search for its siblings. Key it by the same
|
||||||
|
// name the writer side uses (the intact top-level `name:`, recovered even
|
||||||
|
// when the YAML is broken below it; filename only as a last resort) so a
|
||||||
|
// broken uppercase/hashed/human-renamed source stays reachable under its
|
||||||
|
// real name, and surface the raw content for repair.
|
||||||
|
const brokenName = slSourceNameForFile(path, raw.content);
|
||||||
|
sources.set(brokenName, {
|
||||||
|
connectionId,
|
||||||
|
name: brokenName,
|
||||||
|
path,
|
||||||
|
columnCount: 0,
|
||||||
|
measureCount: 0,
|
||||||
|
joinCount: 0,
|
||||||
|
yaml: raw.content,
|
||||||
|
source: { name: brokenName, grain: [], columns: [], joins: [], measures: [] },
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
|
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
|
||||||
if (parsed.table || parsed.sql) {
|
if (parsed.table || parsed.sql) {
|
||||||
const source = parsedStandaloneSource(parsed, name);
|
const source = parsedStandaloneSource(parsed, name);
|
||||||
|
|
@ -292,50 +277,21 @@ export async function validateLocalSlSource(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @internal */
|
|
||||||
export async function writeLocalSlSource(
|
|
||||||
project: KtxLocalProject,
|
|
||||||
input: { connectionId: string; sourceName: string; yaml: string },
|
|
||||||
): Promise<KtxFileWriteResult> {
|
|
||||||
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
|
|
||||||
if (!validation.valid) {
|
|
||||||
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const parsed = parseYamlRecord(input.yaml);
|
|
||||||
if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) {
|
|
||||||
throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const path = slPath(input.connectionId, input.sourceName);
|
|
||||||
return project.fileStore.writeFile(
|
|
||||||
path,
|
|
||||||
input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`,
|
|
||||||
LOCAL_AUTHOR,
|
|
||||||
LOCAL_AUTHOR_EMAIL,
|
|
||||||
`Write semantic-layer source: ${input.connectionId}/${input.sourceName}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @internal */
|
|
||||||
export async function readLocalSlSource(
|
export async function readLocalSlSource(
|
||||||
project: KtxLocalProject,
|
project: KtxLocalProject,
|
||||||
input: { connectionId: string; sourceName: string },
|
input: { connectionId: string; sourceName: string },
|
||||||
): Promise<LocalSlSource | null> {
|
): Promise<LocalSlSource | null> {
|
||||||
const path = slPath(input.connectionId, input.sourceName);
|
// Source identity is the in-file `name:` (mirroring the warehouse identifier
|
||||||
try {
|
// verbatim, e.g. Snowflake's uppercase `WIDGET_SALES`), never the filename. The
|
||||||
const result = await project.fileStore.readFile(path);
|
// record loader resolves standalone files, overlays, manifest-backed sources,
|
||||||
return {
|
// and mid-edit files whose YAML no longer parses — so readers — `ktx sl read`,
|
||||||
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
|
// `ktx sl validate`, and the `sl_read_source` MCP tool — can surface broken
|
||||||
yaml: result.content,
|
// content for repair instead of failing on it.
|
||||||
};
|
|
||||||
} catch {
|
|
||||||
const records = await loadLocalSlSourceRecords(project, {
|
const records = await loadLocalSlSourceRecords(project, {
|
||||||
connectionId: input.connectionId,
|
connectionId: input.connectionId,
|
||||||
});
|
});
|
||||||
const record = records.find((source) => source.name === input.sourceName);
|
const record = records.find((source) => source.name === input.sourceName);
|
||||||
return record ? { ...record } : null;
|
return record ? { ...record } : null;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function resolveLocalSlSource(
|
export async function resolveLocalSlSource(
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-sch
|
||||||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||||
import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||||
|
import { isSlYamlPath, resolveSlSourceFile, slDeclaredSourceName, slSourceFilePath } from './source-files.js';
|
||||||
import type {
|
import type {
|
||||||
ResolvedSemanticLayerSource,
|
ResolvedSemanticLayerSource,
|
||||||
SemanticLayerColumnOverride,
|
SemanticLayerColumnOverride,
|
||||||
|
|
@ -135,8 +136,30 @@ export class SemanticLayerService {
|
||||||
|
|
||||||
// ── YAML File Operations ────────────────────────────────
|
// ── YAML File Operations ────────────────────────────────
|
||||||
|
|
||||||
private sourcePath(connectionId: string, sourceName: string): string {
|
// The in-file `name:` is the source's identity; the filename is only a derived
|
||||||
return `${SL_DIR_PREFIX}/${connectionId}/${sourceName}.yaml`;
|
// label. Rewrites land on the file that already declares the name (humans may
|
||||||
|
// rename files freely); new sources get a derived filename. A file already
|
||||||
|
// sitting at the derived path that declares a name declares a *different* one
|
||||||
|
// (the resolver would have matched it otherwise) — fail instead of clobbering
|
||||||
|
// it. A nameless/unparseable file there is the broken remains of this very
|
||||||
|
// source (the derived path is a function of the name), so overwriting it is
|
||||||
|
// the repair path, not data loss.
|
||||||
|
private async resolveWritePath(connectionId: string, sourceName: string): Promise<string> {
|
||||||
|
const existing = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||||
|
if (existing) {
|
||||||
|
return existing.path;
|
||||||
|
}
|
||||||
|
const path = slSourceFilePath(connectionId, sourceName);
|
||||||
|
let occupant: string | null = null;
|
||||||
|
try {
|
||||||
|
occupant = slDeclaredSourceName((await this.configService.readFile(path)).content);
|
||||||
|
} catch {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
if (occupant !== null) {
|
||||||
|
throw new Error(`Cannot write source '${sourceName}': ${path} already defines source '${occupant}'`);
|
||||||
|
}
|
||||||
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
async writeSource(
|
async writeSource(
|
||||||
|
|
@ -185,39 +208,42 @@ export class SemanticLayerService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const path = this.sourcePath(connectionId, source.name);
|
const path = await this.resolveWritePath(connectionId, source.name);
|
||||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||||
skipLock: options?.skipLock,
|
skipLock: options?.skipLock,
|
||||||
});
|
});
|
||||||
return { ...result, warnings };
|
// The filename is derived from (or resolved by) the source name — surface
|
||||||
|
// the actual path so callers don't have to re-resolve it.
|
||||||
|
return { ...result, path, warnings };
|
||||||
}
|
}
|
||||||
|
|
||||||
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string }> {
|
/**
|
||||||
const path = this.sourcePath(connectionId, sourceName);
|
* Raw standalone/overlay file for a source, resolved by its in-file `name:`.
|
||||||
const result = await this.configService.readFile(path);
|
* Returns null when no file declares the name (the source may still exist as
|
||||||
return { content: result.content, path };
|
* a manifest entry under `_schema/`).
|
||||||
|
*/
|
||||||
|
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string } | null> {
|
||||||
|
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||||
|
return file ? { content: file.content, path: file.path } : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
||||||
let content: string;
|
const file = await this.readSourceFile(connectionId, sourceName);
|
||||||
try {
|
if (!file) {
|
||||||
const result = await this.readSourceFile(connectionId, sourceName);
|
|
||||||
content = result.content;
|
|
||||||
} catch {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return YAML.parse(content) as SemanticLayerSource;
|
return YAML.parse(file.content) as SemanticLayerSource;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Distinguish a YAML parse failure from a missing file. The file exists but
|
// Distinguish a YAML parse failure from a missing file. The file exists but
|
||||||
// its contents are unparseable — callers that treat null as "does not exist"
|
// its contents are unparseable — callers that treat null as "does not exist"
|
||||||
// could otherwise overwrite the broken file. Surface the parse failure via
|
// could otherwise overwrite the broken file. Surface the parse failure via
|
||||||
// the service logger so the broken source is at least visible.
|
// the service logger so the broken source is at least visible.
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
`[loadSource] ${file.path}: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
);
|
);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -231,7 +257,7 @@ export class SemanticLayerService {
|
||||||
let allFiles: string[];
|
let allFiles: string[];
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(dir);
|
const result = await this.configService.listFiles(dir);
|
||||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
allFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
|
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
|
||||||
loadErrors.push(message);
|
loadErrors.push(message);
|
||||||
|
|
@ -338,7 +364,7 @@ export class SemanticLayerService {
|
||||||
let allFiles: string[];
|
let allFiles: string[];
|
||||||
try {
|
try {
|
||||||
const listing = await this.configService.listFiles(dir);
|
const listing = await this.configService.listFiles(dir);
|
||||||
allFiles = listing.files.filter((f) => f.endsWith('.yaml'));
|
allFiles = listing.files.filter((f) => isSlYamlPath(f));
|
||||||
} catch {
|
} catch {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
@ -408,7 +434,7 @@ export class SemanticLayerService {
|
||||||
const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`;
|
const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`;
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(schemaDir);
|
const result = await this.configService.listFiles(schemaDir);
|
||||||
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||||
for (const filePath of yamlFiles) {
|
for (const filePath of yamlFiles) {
|
||||||
try {
|
try {
|
||||||
const { content } = await this.configService.readFile(filePath);
|
const { content } = await this.configService.readFile(filePath);
|
||||||
|
|
@ -449,7 +475,7 @@ export class SemanticLayerService {
|
||||||
let yamlFiles: string[];
|
let yamlFiles: string[];
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(schemaDir);
|
const result = await this.configService.listFiles(schemaDir);
|
||||||
yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -533,7 +559,7 @@ export class SemanticLayerService {
|
||||||
.map((c) => c.name);
|
.map((c) => c.name);
|
||||||
if (absentDeclaredColumns.length > 0) {
|
if (absentDeclaredColumns.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
`${source.name}: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||||
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
|
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
|
||||||
`Available columns: ${[...manifestColumns.values()].join(', ')}`,
|
`Available columns: ${[...manifestColumns.values()].join(', ')}`,
|
||||||
);
|
);
|
||||||
|
|
@ -545,7 +571,7 @@ export class SemanticLayerService {
|
||||||
});
|
});
|
||||||
if (missingGrainColumns.length > 0) {
|
if (missingGrainColumns.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
`${source.name}: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -562,7 +588,7 @@ export class SemanticLayerService {
|
||||||
});
|
});
|
||||||
if (missing.length > 0) {
|
if (missing.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
`${source.name}: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -577,7 +603,7 @@ export class SemanticLayerService {
|
||||||
});
|
});
|
||||||
if (missing.length > 0) {
|
if (missing.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
`${source.name}: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -592,7 +618,7 @@ export class SemanticLayerService {
|
||||||
});
|
});
|
||||||
if (exprMissing.length > 0) {
|
if (exprMissing.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
`${source.name}: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -606,7 +632,7 @@ export class SemanticLayerService {
|
||||||
});
|
});
|
||||||
if (filterMissing.length > 0) {
|
if (filterMissing.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
`${source.name}: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -619,7 +645,7 @@ export class SemanticLayerService {
|
||||||
}
|
}
|
||||||
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
|
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: join to "${join.to}" references local column ` +
|
`${source.name}: join to "${join.to}" references local column ` +
|
||||||
`"${parsed.localColumn}" that is not a valid output column`,
|
`"${parsed.localColumn}" that is not a valid output column`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -631,7 +657,7 @@ export class SemanticLayerService {
|
||||||
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
|
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
|
||||||
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
|
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${source.name}.yaml: join to "${join.to}" references target column ` +
|
`${source.name}: join to "${join.to}" references target column ` +
|
||||||
`"${parsed.targetColumn}" that does not exist on the target source`,
|
`"${parsed.targetColumn}" that does not exist on the target source`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -650,43 +676,30 @@ export class SemanticLayerService {
|
||||||
return SemanticLayerService.mapDialect(connection.connectionType);
|
return SemanticLayerService.mapDialect(connection.connectionType);
|
||||||
}
|
}
|
||||||
|
|
||||||
async listSourceNames(connectionId: string): Promise<string[]> {
|
|
||||||
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
|
||||||
try {
|
|
||||||
const result = await this.configService.listFiles(dir);
|
|
||||||
return result.files.filter((f) => f.endsWith('.yaml')).map((f) => f.replace(`${dir}/`, '').replace('.yaml', ''));
|
|
||||||
} catch {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async listFilesForConnection(connectionId: string): Promise<string[]> {
|
async listFilesForConnection(connectionId: string): Promise<string[]> {
|
||||||
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(dir, true);
|
const result = await this.configService.listFiles(dir, true);
|
||||||
return result.files.filter((f) => f.endsWith('.yaml'));
|
return result.files.filter((f) => isSlYamlPath(f));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async readFileByPath(connectionId: string, relativePath: string): Promise<{ content: string; readOnly: boolean }> {
|
|
||||||
const fullPath = `${SL_DIR_PREFIX}/${connectionId}/${relativePath}`;
|
|
||||||
const result = await this.configService.readFile(fullPath);
|
|
||||||
return {
|
|
||||||
content: result.content,
|
|
||||||
readOnly: relativePath.startsWith('_schema/'),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) {
|
async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) {
|
||||||
const path = this.sourcePath(connectionId, sourceName);
|
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||||
return this.configService.deleteFile(path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
|
if (!file) {
|
||||||
|
// `deleteFile` returns null for a missing path, which would let a no-op
|
||||||
|
// delete read as success. Distinguish the two real cases instead.
|
||||||
|
if (await this.isManifestBacked(connectionId, sourceName)) {
|
||||||
|
throw new Error(
|
||||||
|
`Source '${sourceName}' is defined by the scan manifest (_schema/) and has no overlay file to delete. ` +
|
||||||
|
`Rescan the connection to remove it from the manifest.`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
throw new Error(`Semantic-layer source not found: ${connectionId}/${sourceName}`);
|
||||||
async getSourceHistory(connectionId: string, sourceName: string) {
|
}
|
||||||
const path = this.sourcePath(connectionId, sourceName);
|
return this.configService.deleteFile(file.path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
|
||||||
return this.configService.getFileHistory(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -815,7 +828,7 @@ export class SemanticLayerService {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\/.+\.ya?ml$/.test(file));
|
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\//.test(file) && isSlYamlPath(file));
|
||||||
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
|
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
|
||||||
for (const filePath of schemaFiles) {
|
for (const filePath of schemaFiles) {
|
||||||
const connectionId = filePath.split('/')[1];
|
const connectionId = filePath.split('/')[1];
|
||||||
|
|
@ -844,7 +857,7 @@ export class SemanticLayerService {
|
||||||
let allFiles: string[];
|
let allFiles: string[];
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(dir);
|
const result = await this.configService.listFiles(dir);
|
||||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
allFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||||
} catch {
|
} catch {
|
||||||
return warnings;
|
return warnings;
|
||||||
}
|
}
|
||||||
|
|
@ -1030,7 +1043,7 @@ export class SemanticLayerService {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(dir);
|
const result = await this.configService.listFiles(dir);
|
||||||
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||||
|
|
||||||
for (const filePath of yamlFiles) {
|
for (const filePath of yamlFiles) {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
160
packages/cli/src/context/sl/source-files.ts
Normal file
160
packages/cli/src/context/sl/source-files.ts
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
import { createHash } from 'node:crypto';
|
||||||
|
import YAML from 'yaml';
|
||||||
|
import type { KtxFileStorePort } from '../../context/core/file-store.js';
|
||||||
|
|
||||||
|
// Semantic-layer source identity lives in the file's `name:` field, which mirrors
|
||||||
|
// the warehouse identifier verbatim (Snowflake's uppercase `SIGNED_UP`, `EVENT$LOG`).
|
||||||
|
// The filename is a derived label and never participates in identity: reads resolve
|
||||||
|
// a source by scanning the connection directory and matching `name:`, and writes
|
||||||
|
// reuse the resolved file's path, so files can be freely renamed by humans without
|
||||||
|
// changing which source they define.
|
||||||
|
|
||||||
|
function assertSafePathToken(kind: string, value: string): string {
|
||||||
|
if (
|
||||||
|
value.trim().length === 0 ||
|
||||||
|
value.includes('..') ||
|
||||||
|
value.includes('\\') ||
|
||||||
|
value.startsWith('/') ||
|
||||||
|
value.startsWith('.') ||
|
||||||
|
value.includes('//')
|
||||||
|
) {
|
||||||
|
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function assertSafeConnectionId(connectionId: string): string {
|
||||||
|
if (!isSafeConnectionId(connectionId)) {
|
||||||
|
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||||
|
}
|
||||||
|
return assertSafePathToken('connection id', connectionId);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
|
||||||
|
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function sourceNameFromPath(path: string): string {
|
||||||
|
return (
|
||||||
|
path
|
||||||
|
.split('/')
|
||||||
|
.at(-1)
|
||||||
|
?.replace(/\.ya?ml$/, '') ?? path
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The one predicate for "this path is a semantic-layer YAML file". ktx itself
|
||||||
|
// always writes `.yaml` (see `slSourceFileName`), but humans rename freely and
|
||||||
|
// the dbt ecosystem's habit is `.yml`, so every reader must accept both — a
|
||||||
|
// listing that recognizes only one extension makes the same file visible to
|
||||||
|
// some entry points and invisible to others.
|
||||||
|
export function isSlYamlPath(path: string): boolean {
|
||||||
|
return path.endsWith('.yaml') || path.endsWith('.yml');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Windows refuses these basenames regardless of extension — a genuinely universal
|
||||||
|
// filesystem invariant, so the static list is acceptable.
|
||||||
|
const WINDOWS_RESERVED_BASENAME = /^(?:con|prn|aux|nul|com[0-9]|lpt[0-9])$/;
|
||||||
|
|
||||||
|
const SAFE_FILE_BASENAME = /^[a-z0-9][a-z0-9_]{0,63}$/;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Derive the filename for a semantic-layer source. Total over all possible
|
||||||
|
* source names — never throws.
|
||||||
|
*
|
||||||
|
* Names that are already safe lowercase snake_case become `<name>.yaml`;
|
||||||
|
* anything else becomes `<slug>-<8 hex of sha256(name)>.yaml`. The two ranges
|
||||||
|
* are disjoint and the mapping is injective: safe filenames contain no `-`,
|
||||||
|
* hashed filenames always end in `-<8 hex>`, and slugs are lowercased so names
|
||||||
|
* differing only by case get distinct hashes instead of colliding paths on
|
||||||
|
* case-insensitive filesystems (macOS APFS, Windows).
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export function slSourceFileName(sourceName: string): string {
|
||||||
|
if (SAFE_FILE_BASENAME.test(sourceName) && !WINDOWS_RESERVED_BASENAME.test(sourceName)) {
|
||||||
|
return `${sourceName}.yaml`;
|
||||||
|
}
|
||||||
|
const slug = sourceName
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9_]+/g, '_')
|
||||||
|
.replace(/_+/g, '_')
|
||||||
|
.replace(/^_+|_+$/g, '')
|
||||||
|
.slice(0, 64);
|
||||||
|
const hash = createHash('sha256').update(sourceName, 'utf-8').digest('hex').slice(0, 8);
|
||||||
|
return `${slug || 'src'}-${hash}.yaml`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function slSourceFilePath(connectionId: string, sourceName: string): string {
|
||||||
|
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${slSourceFileName(sourceName)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SlSourceFile {
|
||||||
|
path: string;
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same keying as `loadLocalSlSourceRecords`: the in-file `name:` is the identity;
|
||||||
|
// the filename is only a fallback for files so broken that even the `name:` is
|
||||||
|
// unrecoverable, or genuinely nameless ones. A file left mid-edit with a syntax
|
||||||
|
// error below its `name:` line keeps its declared identity (see
|
||||||
|
// `slDeclaredSourceName`), so a human-renamed source is still addressed by name
|
||||||
|
// while broken instead of silently reverting to its filename.
|
||||||
|
export function slSourceNameForFile(path: string, content: string): string {
|
||||||
|
return slDeclaredSourceName(content) ?? sourceNameFromPath(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The `name:` a semantic-layer YAML file declares, or null when the file is
|
||||||
|
* nameless or so broken even the name is unrecoverable. Null is how
|
||||||
|
* `writeSource` tells a genuine name conflict at a derived path apart from the
|
||||||
|
* broken remains of the source being written, which a rewrite must repair
|
||||||
|
* rather than refuse.
|
||||||
|
*
|
||||||
|
* Uses `parseDocument`, not `parse`: a file with a syntax error below the
|
||||||
|
* `name:` line still parses into a partial tree whose top-level `name:` is
|
||||||
|
* intact. `parse` would throw on the same input and drop the source to its
|
||||||
|
* filename — wrong for human-renamed files, whose filename is not the name.
|
||||||
|
*/
|
||||||
|
export function slDeclaredSourceName(content: string): string | null {
|
||||||
|
let doc: ReturnType<typeof YAML.parseDocument>;
|
||||||
|
try {
|
||||||
|
doc = YAML.parseDocument(content);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const name = doc.get('name');
|
||||||
|
return typeof name === 'string' && name.length > 0 ? name : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the standalone/overlay file that defines `sourceName` for a connection.
|
||||||
|
* Returns null when no file declares the name (the source may still exist as a
|
||||||
|
* manifest entry under `_schema/`). Throws when more than one file declares the
|
||||||
|
* same name — that breaks the one-file-per-name invariant and must be repaired
|
||||||
|
* by hand rather than silently picking one.
|
||||||
|
*/
|
||||||
|
export async function resolveSlSourceFile(
|
||||||
|
fileStore: Pick<KtxFileStorePort, 'listFiles' | 'readFile'>,
|
||||||
|
connectionId: string,
|
||||||
|
sourceName: string,
|
||||||
|
): Promise<SlSourceFile | null> {
|
||||||
|
const dir = `semantic-layer/${assertSafeConnectionId(connectionId)}`;
|
||||||
|
const schemaDir = `${dir}/_schema`;
|
||||||
|
const listed = await fileStore.listFiles(dir);
|
||||||
|
const paths = listed.files.filter((file) => isSlYamlPath(file) && !file.startsWith(`${schemaDir}/`)).sort();
|
||||||
|
|
||||||
|
const matches: SlSourceFile[] = [];
|
||||||
|
for (const path of paths) {
|
||||||
|
const raw = await fileStore.readFile(path);
|
||||||
|
if (slSourceNameForFile(path, raw.content) === sourceName) {
|
||||||
|
matches.push({ path, content: raw.content });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (matches.length > 1) {
|
||||||
|
throw new Error(
|
||||||
|
`Multiple semantic-layer files declare source "${sourceName}": ${matches.map((match) => match.path).join(', ')}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return matches[0] ?? null;
|
||||||
|
}
|
||||||
|
|
@ -46,12 +46,8 @@ export abstract class BaseSemanticLayerTool<TInput extends ZodType = ZodType> ex
|
||||||
): Promise<string | null> {
|
): Promise<string | null> {
|
||||||
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
|
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
|
||||||
|
|
||||||
try {
|
const file = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||||
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
return file?.content ?? null;
|
||||||
return content;
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected buildMarkdown(
|
protected buildMarkdown(
|
||||||
|
|
|
||||||
|
|
@ -113,13 +113,8 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read existing source
|
// Read existing source
|
||||||
let currentYaml: string | null = null;
|
const currentFile = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||||
try {
|
const currentYaml = currentFile?.content ?? null;
|
||||||
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
|
||||||
currentYaml = content;
|
|
||||||
} catch {
|
|
||||||
currentYaml = null;
|
|
||||||
}
|
|
||||||
if (!currentYaml) {
|
if (!currentYaml) {
|
||||||
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
|
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
|
||||||
if (manifestBacked) {
|
if (manifestBacked) {
|
||||||
|
|
@ -165,6 +160,20 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
|
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The in-file `name:` is the source's identity — an edited name would make
|
||||||
|
// writeSource create a second source instead of updating this one.
|
||||||
|
if (source.name !== sourceName) {
|
||||||
|
return this.buildOutput(
|
||||||
|
false,
|
||||||
|
[
|
||||||
|
`Edits change "name:" from "${sourceName}" to "${source.name ?? '<missing>'}" — renaming is not supported. ` +
|
||||||
|
`Delete the source and recreate it under the new name.`,
|
||||||
|
],
|
||||||
|
sourceName,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||||
|
|
||||||
// Re-serialize and write
|
// Re-serialize and write
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
import YAML from 'yaml';
|
import YAML from 'yaml';
|
||||||
import type { GitService } from '../../../context/core/git.service.js';
|
import type { GitService } from '../../../context/core/git.service.js';
|
||||||
import type { KtxFileStorePort } from '../../../context/core/file-store.js';
|
import type { KtxFileListResult, KtxFileReadResult, KtxFileStorePort } from '../../../context/core/file-store.js';
|
||||||
import { SYSTEM_GIT_AUTHOR } from '../../../context/tools/authors.js';
|
import { SYSTEM_GIT_AUTHOR } from '../../../context/tools/authors.js';
|
||||||
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
|
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
|
||||||
import { sourceOverlaySchema } from '../schemas.js';
|
import { sourceOverlaySchema } from '../schemas.js';
|
||||||
import { SemanticLayerService } from '../semantic-layer.service.js';
|
import { SemanticLayerService } from '../semantic-layer.service.js';
|
||||||
|
import { resolveSlSourceFile, slSourceFilePath } from '../source-files.js';
|
||||||
import type { SemanticLayerSource } from '../types.js';
|
import type { SemanticLayerSource } from '../types.js';
|
||||||
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
|
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
|
||||||
|
|
||||||
|
|
@ -23,9 +24,6 @@ export interface SourceValidationResult {
|
||||||
warnings: string[];
|
warnings: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
const slSourcePath = (connectionId: string, sourceName: string): string =>
|
|
||||||
`semantic-layer/${connectionId}/${sourceName}.yaml`;
|
|
||||||
|
|
||||||
function resolveDialect(warehouse: string | null): string | null {
|
function resolveDialect(warehouse: string | null): string | null {
|
||||||
if (!warehouse) {
|
if (!warehouse) {
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -63,24 +61,21 @@ export async function validateSingleSource(
|
||||||
const errors: string[] = [];
|
const errors: string[] = [];
|
||||||
const warnings: string[] = [];
|
const warnings: string[] = [];
|
||||||
|
|
||||||
let content: string;
|
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||||
try {
|
if (!file) {
|
||||||
const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
errors.push(`${sourceName}: no standalone or overlay file found`);
|
||||||
content = result.content;
|
|
||||||
} catch {
|
|
||||||
errors.push(`${sourceName}.yaml: file not found`);
|
|
||||||
return { errors, warnings };
|
return { errors, warnings };
|
||||||
}
|
}
|
||||||
|
|
||||||
let parsed: Record<string, unknown>;
|
let parsed: Record<string, unknown>;
|
||||||
try {
|
try {
|
||||||
parsed = YAML.parse(content);
|
parsed = YAML.parse(file.content);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
|
errors.push(`${sourceName}: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
|
||||||
return { errors, warnings };
|
return { errors, warnings };
|
||||||
}
|
}
|
||||||
if (!parsed || typeof parsed !== 'object') {
|
if (!parsed || typeof parsed !== 'object') {
|
||||||
errors.push(`${sourceName}.yaml: top-level content is not an object`);
|
errors.push(`${sourceName}: top-level content is not an object`);
|
||||||
return { errors, warnings };
|
return { errors, warnings };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,7 +84,7 @@ export async function validateSingleSource(
|
||||||
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
|
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
|
||||||
if (isManifestBacked) {
|
if (isManifestBacked) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` +
|
`${sourceName}: standalone source shadows an existing manifest entry — ` +
|
||||||
`writing it as-is drops the manifest's columns and joins. ` +
|
`writing it as-is drops the manifest's columns and joins. ` +
|
||||||
`Remove "sql:", "table:", "grain:", and base-table "columns:" and keep only ` +
|
`Remove "sql:", "table:", "grain:", and base-table "columns:" and keep only ` +
|
||||||
`"name:" plus overlay fields such as "measures:", "segments:", "descriptions:", ` +
|
`"name:" plus overlay fields such as "measures:", "segments:", "descriptions:", ` +
|
||||||
|
|
@ -103,21 +98,21 @@ export async function validateSingleSource(
|
||||||
const result = schema.safeParse(parsed);
|
const result = schema.safeParse(parsed);
|
||||||
if (!result.success) {
|
if (!result.success) {
|
||||||
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
|
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
|
||||||
errors.push(`${sourceName}.yaml: schema — ${issues}`);
|
errors.push(`${sourceName}: schema — ${issues}`);
|
||||||
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
|
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
|
||||||
if (errorPaths.has('joins')) {
|
if (errorPaths.has('joins')) {
|
||||||
warnings.push(
|
warnings.push(
|
||||||
`${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
|
`${sourceName}: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (errorPaths.has('columns')) {
|
if (errorPaths.has('columns')) {
|
||||||
warnings.push(
|
warnings.push(
|
||||||
`${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
|
`${sourceName}: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (errorPaths.has('measures')) {
|
if (errorPaths.has('measures')) {
|
||||||
warnings.push(
|
warnings.push(
|
||||||
`${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`,
|
`${sourceName}: hint — measure format: {name, expr, description (optional), filter (optional)}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return { errors, warnings };
|
return { errors, warnings };
|
||||||
|
|
@ -135,7 +130,7 @@ export async function validateSingleSource(
|
||||||
const seenMeasures = new Set<string>();
|
const seenMeasures = new Set<string>();
|
||||||
for (const m of measures) {
|
for (const m of measures) {
|
||||||
if (seenMeasures.has(m.name)) {
|
if (seenMeasures.has(m.name)) {
|
||||||
errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`);
|
errors.push(`${sourceName}: duplicate measure name "${m.name}"`);
|
||||||
}
|
}
|
||||||
seenMeasures.add(m.name);
|
seenMeasures.add(m.name);
|
||||||
}
|
}
|
||||||
|
|
@ -168,7 +163,7 @@ export async function validateSingleSource(
|
||||||
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
|
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
|
||||||
if (missing.length > 0) {
|
if (missing.length > 0) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
|
`${sourceName}: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|
@ -205,7 +200,7 @@ function formatProbeError(args: {
|
||||||
const errMsg = error instanceof Error ? error.message : String(error);
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
|
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
|
||||||
const lines: string[] = [
|
const lines: string[] = [
|
||||||
measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`,
|
measureName ? `${sourceName}: measure "${measureName}" ${headline}.` : `${sourceName}: ${headline}.`,
|
||||||
];
|
];
|
||||||
if (warehouse) {
|
if (warehouse) {
|
||||||
lines.push(` Warehouse: ${warehouse}`);
|
lines.push(` Warehouse: ${warehouse}`);
|
||||||
|
|
@ -249,7 +244,7 @@ async function probeOverlayMeasures(
|
||||||
composed = all.find((s) => s.name === sourceName);
|
composed = all.find((s) => s.name === sourceName);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
errors.push(
|
errors.push(
|
||||||
`${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
|
`${sourceName}: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
|
||||||
);
|
);
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
@ -289,6 +284,26 @@ async function probeOverlayMeasures(
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A read-only view of the config repo at one commit, shaped for
|
||||||
|
* `resolveSlSourceFile` so name→file resolution runs against history exactly as
|
||||||
|
* it does against the working tree — one resolver, two backing stores. Used to
|
||||||
|
* recover the path a source occupied at `preHead` after the live file is gone.
|
||||||
|
*/
|
||||||
|
function gitCommitFileStore(
|
||||||
|
git: GitService,
|
||||||
|
commitHash: string,
|
||||||
|
): Pick<KtxFileStorePort, 'listFiles' | 'readFile'> {
|
||||||
|
return {
|
||||||
|
async listFiles(path: string): Promise<KtxFileListResult> {
|
||||||
|
return { files: await git.listFilesAtCommit(path, commitHash) };
|
||||||
|
},
|
||||||
|
async readFile(path: string): Promise<KtxFileReadResult> {
|
||||||
|
return { content: await git.getFileAtCommit(path, commitHash) };
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
|
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
|
||||||
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
|
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
|
||||||
|
|
@ -300,8 +315,16 @@ export async function revertSourceToPreHead(
|
||||||
preHead: string | null,
|
preHead: string | null,
|
||||||
sourceName: string,
|
sourceName: string,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const relPath = slSourcePath(connectionId, sourceName);
|
// Find the file that defines this source. While it is still on disk
|
||||||
|
// (invalid-but-present) the live resolver finds it by its in-file `name:`.
|
||||||
|
// Once the session deleted it, the path is gone too — and humans rename files
|
||||||
|
// freely, so it is NOT the writer-derived filename. Recover it from history by
|
||||||
|
// resolving the name against the preHead commit instead of guessing.
|
||||||
|
const live = await resolveSlSourceFile(deps.configService, connectionId, sourceName);
|
||||||
|
let relPath: string;
|
||||||
let preContent: string | null = null;
|
let preContent: string | null = null;
|
||||||
|
if (live) {
|
||||||
|
relPath = live.path;
|
||||||
if (preHead) {
|
if (preHead) {
|
||||||
try {
|
try {
|
||||||
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
|
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
|
||||||
|
|
@ -309,6 +332,13 @@ export async function revertSourceToPreHead(
|
||||||
preContent = null;
|
preContent = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const atPreHead = preHead
|
||||||
|
? await resolveSlSourceFile(gitCommitFileStore(deps.gitService, preHead), connectionId, sourceName)
|
||||||
|
: null;
|
||||||
|
relPath = atPreHead?.path ?? slSourceFilePath(connectionId, sourceName);
|
||||||
|
preContent = atPreHead?.content ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
if (preContent !== null) {
|
if (preContent !== null) {
|
||||||
await deps.configService.writeFile(
|
await deps.configService.writeFile(
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,12 @@ const slWriteSourceInputSchema = z.object({
|
||||||
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
|
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
|
||||||
sourceName: z
|
sourceName: z
|
||||||
.string()
|
.string()
|
||||||
.regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)')
|
.min(1)
|
||||||
.describe('Name of the source to create, edit, or delete'),
|
.describe(
|
||||||
|
"Name of the source to create, edit, or delete. Must equal the source's `name:`. Use the verbatim " +
|
||||||
|
'warehouse identifier when overlaying a manifest source (e.g. SIGNED_UP); snake_case is recommended ' +
|
||||||
|
'for new standalone sources.',
|
||||||
|
),
|
||||||
source: sourceInputSchema
|
source: sourceInputSchema
|
||||||
.optional()
|
.optional()
|
||||||
.describe(
|
.describe(
|
||||||
|
|
@ -152,6 +156,17 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The in-file `name:` is the source's identity; the file is written under
|
||||||
|
// source.name while the orphan/shadow checks key on sourceName — a mismatch
|
||||||
|
// would validate one source and save another.
|
||||||
|
if (input.source.name !== sourceName) {
|
||||||
|
return this.buildOutput(
|
||||||
|
false,
|
||||||
|
[`source.name "${input.source.name}" does not match sourceName "${sourceName}" — they must be identical.`],
|
||||||
|
sourceName,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return this.writeFullSource(
|
return this.writeFullSource(
|
||||||
connectionId,
|
connectionId,
|
||||||
input.source,
|
input.source,
|
||||||
|
|
@ -253,12 +268,8 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
||||||
connectionId: string,
|
connectionId: string,
|
||||||
sourceName: string,
|
sourceName: string,
|
||||||
): Promise<string | null> {
|
): Promise<string | null> {
|
||||||
try {
|
const file = await service.readSourceFile(connectionId, sourceName);
|
||||||
const { content } = await service.readSourceFile(connectionId, sourceName);
|
return file?.content ?? null;
|
||||||
return content;
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private async rejectOrphanOverlay(
|
private async rejectOrphanOverlay(
|
||||||
|
|
|
||||||
23
packages/cli/src/context/sql-analysis/dialect.ts
Normal file
23
packages/cli/src/context/sql-analysis/dialect.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
import type { SqlAnalysisDialect } from './ports.js';
|
||||||
|
|
||||||
|
// One mapping from ktx connection identity to the sqlglot dialect name used by
|
||||||
|
// the Python daemon (SQL analysis, read-only validation) and semantic-layer
|
||||||
|
// compute. Keys cover both vocabularies that name a connection's engine:
|
||||||
|
// ktx.yaml driver names ("postgres", "sqlserver") and the local connection-type
|
||||||
|
// spellings exposed by KtxConnectionInfo.connectionType ("POSTGRESQL").
|
||||||
|
const SQLGLOT_DIALECTS: Record<string, SqlAnalysisDialect> = {
|
||||||
|
postgres: 'postgres',
|
||||||
|
postgresql: 'postgres',
|
||||||
|
bigquery: 'bigquery',
|
||||||
|
snowflake: 'snowflake',
|
||||||
|
mysql: 'mysql',
|
||||||
|
sqlserver: 'tsql',
|
||||||
|
sqlite: 'sqlite',
|
||||||
|
duckdb: 'duckdb',
|
||||||
|
clickhouse: 'clickhouse',
|
||||||
|
databricks: 'databricks',
|
||||||
|
};
|
||||||
|
|
||||||
|
export function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||||
|
return SQLGLOT_DIALECTS[(driver ?? '').toLowerCase()] ?? 'postgres';
|
||||||
|
}
|
||||||
|
|
@ -13,7 +13,7 @@ import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
|
||||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||||
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
|
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
|
||||||
import { createCliOperationalLogger } from './io/logger.js';
|
import { createCliOperationalLogger } from './io/logger.js';
|
||||||
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
|
import { createKtxCliLocalIngestAdapters, resolveKtxCliSqlAnalysis } from './local-adapters.js';
|
||||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||||
import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js';
|
import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js';
|
||||||
import {
|
import {
|
||||||
|
|
@ -87,6 +87,7 @@ export interface KtxIngestDeps {
|
||||||
| 'memoryModel'
|
| 'memoryModel'
|
||||||
| 'semanticLayerCompute'
|
| 'semanticLayerCompute'
|
||||||
| 'queryExecutor'
|
| 'queryExecutor'
|
||||||
|
| 'sqlAnalysis'
|
||||||
| 'logger'
|
| 'logger'
|
||||||
| 'pullConfigOptions'
|
| 'pullConfigOptions'
|
||||||
>;
|
>;
|
||||||
|
|
@ -724,7 +725,7 @@ export async function runKtxIngest(
|
||||||
const localIngestOptions = deps.localIngestOptions ?? {};
|
const localIngestOptions = deps.localIngestOptions ?? {};
|
||||||
const managedDaemon = managedDaemonOptionsForIngestRun(args, deps.runtimeIo ?? io);
|
const managedDaemon = managedDaemonOptionsForIngestRun(args, deps.runtimeIo ?? io);
|
||||||
const operationalLogger = createCliOperationalLogger(io, args.outputMode);
|
const operationalLogger = createCliOperationalLogger(io, args.outputMode);
|
||||||
const adapterOptions = {
|
const baseAdapterOptions = {
|
||||||
...(localIngestOptions.pullConfigOptions ?? {}),
|
...(localIngestOptions.pullConfigOptions ?? {}),
|
||||||
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
|
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
|
||||||
...(managedDaemon ? { managedDaemon } : {}),
|
...(managedDaemon ? { managedDaemon } : {}),
|
||||||
|
|
@ -734,6 +735,10 @@ export async function runKtxIngest(
|
||||||
: {}),
|
: {}),
|
||||||
logger: operationalLogger,
|
logger: operationalLogger,
|
||||||
};
|
};
|
||||||
|
// One parser-backed SQL analysis port per run: the historic-sql adapter and
|
||||||
|
// the ingest sql_execution tool share the same daemon-backed validator.
|
||||||
|
const sqlAnalysis = localIngestOptions.sqlAnalysis ?? resolveKtxCliSqlAnalysis(baseAdapterOptions);
|
||||||
|
const adapterOptions = { ...baseAdapterOptions, sqlAnalysis };
|
||||||
const queryExecutor =
|
const queryExecutor =
|
||||||
localIngestOptions.queryExecutor ??
|
localIngestOptions.queryExecutor ??
|
||||||
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject);
|
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject);
|
||||||
|
|
@ -783,6 +788,7 @@ export async function runKtxIngest(
|
||||||
metabaseConnectionId: args.connectionId,
|
metabaseConnectionId: args.connectionId,
|
||||||
...localIngestOptions,
|
...localIngestOptions,
|
||||||
queryExecutor,
|
queryExecutor,
|
||||||
|
sqlAnalysis,
|
||||||
trigger: 'manual_resync',
|
trigger: 'manual_resync',
|
||||||
jobIdFactory: deps.jobIdFactory,
|
jobIdFactory: deps.jobIdFactory,
|
||||||
embeddingProvider,
|
embeddingProvider,
|
||||||
|
|
@ -861,6 +867,7 @@ export async function runKtxIngest(
|
||||||
jobId,
|
jobId,
|
||||||
...localIngestOptions,
|
...localIngestOptions,
|
||||||
queryExecutor,
|
queryExecutor,
|
||||||
|
sqlAnalysis,
|
||||||
pullConfigOptions: adapterOptions,
|
pullConfigOptions: adapterOptions,
|
||||||
embeddingProvider,
|
embeddingProvider,
|
||||||
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,7 @@ function ktxCliLookerOptions(
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function ktxCliHistoricSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions) {
|
export function resolveKtxCliSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions): SqlAnalysisPort {
|
||||||
if (options.sqlAnalysis) {
|
if (options.sqlAnalysis) {
|
||||||
return options.sqlAnalysis;
|
return options.sqlAnalysis;
|
||||||
}
|
}
|
||||||
|
|
@ -289,7 +289,7 @@ function historicSqlOptionsForLocalRun(
|
||||||
}
|
}
|
||||||
|
|
||||||
const base = {
|
const base = {
|
||||||
sqlAnalysis: ktxCliHistoricSqlAnalysis(options),
|
sqlAnalysis: resolveKtxCliSqlAnalysis(options),
|
||||||
};
|
};
|
||||||
|
|
||||||
if (dialect === 'postgres') {
|
if (dialect === 'postgres') {
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ LookML views map to SL sources, `measure:` to measures, `explore: { join: }` to
|
||||||
|
|
||||||
| LookML | KTX form | Notes |
|
| LookML | KTX form | Notes |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
|
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
|
||||||
| `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists |
|
| `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists |
|
||||||
| `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in |
|
| `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in |
|
||||||
| `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone |
|
| `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone |
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,8 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to
|
||||||
|
|
||||||
| MetricFlow | KTX form | Notes |
|
| MetricFlow | KTX form | Notes |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
|
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
|
||||||
| `semantic_model: X { model: source('s','t') }` | **Overlay** at `<connId>/X.yaml` over table `t`. | Same shape; `source()` still resolves to a physical table. |
|
| `semantic_model: X { model: source('s','t') }` | **Overlay** named `X` over table `t`. | Same shape; `source()` still resolves to a physical table. |
|
||||||
| `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. |
|
| `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. |
|
||||||
| `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. |
|
| `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. |
|
||||||
| `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "<agg>(<expr>)" }]` | Aggregation inlined. `agg: count_distinct` → `count(distinct ...)`. |
|
| `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "<agg>(<expr>)" }]` | Aggregation inlined. `agg: count_distinct` → `count(distinct ...)`. |
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ skills must verify warehouse identifiers with `discover_data`,
|
||||||
|
|
||||||
## Part 1 - Schema reference
|
## Part 1 - Schema reference
|
||||||
|
|
||||||
An SL source is a YAML file at `semantic-layer/<connectionId>/<source_name>.yaml`. There are three flavors:
|
An SL source is a YAML file under `semantic-layer/<connectionId>/`. The file's `name:` field is the source's identity — it mirrors the warehouse identifier verbatim (e.g. Snowflake's uppercase `SIGNED_UP`); the filename is only a derived label. Always address sources by name through the `sl_*` tools, never by file path. There are three flavors:
|
||||||
|
|
||||||
### Overlay sources
|
### Overlay sources
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,9 @@
|
||||||
import { describe, expect, it } from 'vitest';
|
import { describe, expect, it } from 'vitest';
|
||||||
import { assertReadOnlySql, limitSqlForExecution } from '../../../src/context/connections/read-only-sql.js';
|
import {
|
||||||
|
assertReadOnlySql,
|
||||||
|
limitSqlForExecution,
|
||||||
|
stripTrailingSqlNoise,
|
||||||
|
} from '../../../src/context/connections/read-only-sql.js';
|
||||||
|
|
||||||
describe('assertReadOnlySql', () => {
|
describe('assertReadOnlySql', () => {
|
||||||
it('allows select and with queries', () => {
|
it('allows select and with queries', () => {
|
||||||
|
|
@ -15,6 +19,51 @@ describe('assertReadOnlySql', () => {
|
||||||
'Only read-only SELECT/WITH queries can be executed locally',
|
'Only read-only SELECT/WITH queries can be executed locally',
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('accepts read-only queries that begin with leading comments', () => {
|
||||||
|
expect(assertReadOnlySql('-- daily widget sales\nselect count(*) from public.widget_sales')).toBe(
|
||||||
|
'select count(*) from public.widget_sales',
|
||||||
|
);
|
||||||
|
expect(assertReadOnlySql('/* block */\n with paid as (select 1) select * from paid')).toContain('with paid');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('still rejects mutating statements hidden behind leading comments', () => {
|
||||||
|
expect(() => assertReadOnlySql('-- harmless\n delete from orders')).toThrow(
|
||||||
|
'Only read-only SELECT/WITH queries can be executed locally',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects a second statement smuggled after a semicolon', () => {
|
||||||
|
expect(() => assertReadOnlySql('select 1; drop table orders')).toThrow(
|
||||||
|
'Only one SQL statement can be executed.',
|
||||||
|
);
|
||||||
|
expect(() => assertReadOnlySql('select 1;\n-- pad\ndelete from orders')).toThrow(
|
||||||
|
'Only one SQL statement can be executed.',
|
||||||
|
);
|
||||||
|
expect(() => assertReadOnlySql('select 1; /* pad */ truncate orders;')).toThrow(
|
||||||
|
'Only one SQL statement can be executed.',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts trailing semicolons, including repeated ones followed by comments', () => {
|
||||||
|
expect(assertReadOnlySql('select 1;')).toBe('select 1;');
|
||||||
|
expect(assertReadOnlySql('select 1 ;; \n')).toBe('select 1 ;;');
|
||||||
|
expect(assertReadOnlySql('select 1; -- done')).toBe('select 1; -- done');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ignores semicolons inside string literals, quoted identifiers, and comments', () => {
|
||||||
|
expect(assertReadOnlySql("select string_agg(name, '; ') from t")).toBe("select string_agg(name, '; ') from t");
|
||||||
|
expect(assertReadOnlySql("select 'it''s; quoted' from t")).toBe("select 'it''s; quoted' from t");
|
||||||
|
expect(assertReadOnlySql('select ";" from "t;u"')).toBe('select ";" from "t;u"');
|
||||||
|
expect(assertReadOnlySql('select 1 -- tail; comment')).toBe('select 1 -- tail; comment');
|
||||||
|
expect(assertReadOnlySql('select 1 /* a;b */ + 2')).toBe('select 1 /* a;b */ + 2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects statements smuggled after a string literal that closes a semicolon early', () => {
|
||||||
|
expect(() => assertReadOnlySql("select 'a'; delete from orders")).toThrow(
|
||||||
|
'Only one SQL statement can be executed.',
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('limitSqlForExecution', () => {
|
describe('limitSqlForExecution', () => {
|
||||||
|
|
@ -27,4 +76,42 @@ describe('limitSqlForExecution', () => {
|
||||||
it('returns the trimmed SQL when no maxRows value is provided', () => {
|
it('returns the trimmed SQL when no maxRows value is provided', () => {
|
||||||
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
|
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('strips leading comments before wrapping with a row limit', () => {
|
||||||
|
expect(limitSqlForExecution('-- top customers\nselect * from public.orders', 25)).toBe(
|
||||||
|
'select * from (select * from public.orders) as ktx_query_result limit 25',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('drops a trailing semicolon followed by a comment so the subquery stays valid', () => {
|
||||||
|
// The single-statement gate accepts `select 1; -- done`; without stripping
|
||||||
|
// the terminator the wrapper would embed `select 1; -- done` and comment out
|
||||||
|
// the closing paren and limit clause.
|
||||||
|
expect(limitSqlForExecution('select 1; -- done', 5)).toBe(
|
||||||
|
'select * from (select 1) as ktx_query_result limit 5',
|
||||||
|
);
|
||||||
|
expect(limitSqlForExecution('select 1; /* note */', 5)).toBe(
|
||||||
|
'select * from (select 1) as ktx_query_result limit 5',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('drops a trailing line comment with no semicolon before wrapping', () => {
|
||||||
|
expect(limitSqlForExecution('select 1 -- done', 5)).toBe('select * from (select 1) as ktx_query_result limit 5');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('stripTrailingSqlNoise', () => {
|
||||||
|
it('removes trailing semicolons, comments, and whitespace', () => {
|
||||||
|
expect(stripTrailingSqlNoise('select 1;')).toBe('select 1');
|
||||||
|
expect(stripTrailingSqlNoise('select 1 ;; ')).toBe('select 1');
|
||||||
|
expect(stripTrailingSqlNoise('select 1; -- done')).toBe('select 1');
|
||||||
|
expect(stripTrailingSqlNoise('select 1 -- done')).toBe('select 1');
|
||||||
|
expect(stripTrailingSqlNoise('select 1; /* trailing */')).toBe('select 1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves semicolons and comment markers inside literals and mid-statement', () => {
|
||||||
|
expect(stripTrailingSqlNoise("select 'a; -- b'")).toBe("select 'a; -- b'");
|
||||||
|
expect(stripTrailingSqlNoise('select 1 /* a;b */ + 2')).toBe('select 1 /* a;b */ + 2');
|
||||||
|
expect(stripTrailingSqlNoise('select ";" from "t;u"')).toBe('select ";" from "t;u"');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
import { mkdir, mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
||||||
import { tmpdir } from 'node:os';
|
import { tmpdir } from 'node:os';
|
||||||
import { join } from 'node:path';
|
import { dirname, join } from 'node:path';
|
||||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||||
import type { KtxCoreConfig } from '../../../src/context/core/config.js';
|
import type { KtxCoreConfig } from '../../../src/context/core/config.js';
|
||||||
import { GitService } from '../../../src/context/core/git.service.js';
|
import { GitService } from '../../../src/context/core/git.service.js';
|
||||||
|
|
@ -35,10 +35,29 @@ describe('GitService', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
|
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
|
||||||
|
await mkdir(dirname(join(tempDir, filePath)), { recursive: true });
|
||||||
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
||||||
return service.commitFile(filePath, message, 'Test', 'test@example.com');
|
return service.commitFile(filePath, message, 'Test', 'test@example.com');
|
||||||
};
|
};
|
||||||
|
|
||||||
|
describe('listFilesAtCommit', () => {
|
||||||
|
it('lists matching paths at a commit and recovers files deleted since', async () => {
|
||||||
|
await writeAndCommit('semantic-layer/warehouse/custom.yaml', 'name: orders\n');
|
||||||
|
const atSeed = await service.revParseHead();
|
||||||
|
await service.deleteFile('semantic-layer/warehouse/custom.yaml', 'drop', 'Test', 'test@example.com');
|
||||||
|
|
||||||
|
// HEAD no longer has the file; the seed commit still does.
|
||||||
|
await expect(service.listFilesAtCommit('semantic-layer/warehouse', 'HEAD')).resolves.toEqual([]);
|
||||||
|
await expect(service.listFilesAtCommit('semantic-layer/warehouse', atSeed)).resolves.toEqual([
|
||||||
|
'semantic-layer/warehouse/custom.yaml',
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns [] for a pathspec that matches nothing', async () => {
|
||||||
|
await expect(service.listFilesAtCommit('does/not/exist', 'HEAD')).resolves.toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('cold-start bootstrap commit', () => {
|
describe('cold-start bootstrap commit', () => {
|
||||||
it('writes an empty commit on init so HEAD always resolves', async () => {
|
it('writes an empty commit on init so HEAD always resolves', async () => {
|
||||||
// beforeEach already ran onModuleInit() against an empty temp dir.
|
// beforeEach already ran onModuleInit() against an empty temp dir.
|
||||||
|
|
|
||||||
|
|
@ -159,13 +159,16 @@ describe('reindexLocalIndexes', () => {
|
||||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||||
'utf-8',
|
'utf-8',
|
||||||
);
|
);
|
||||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
// A broken standalone source is surfaced for repair rather than failing the
|
||||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
|
// scope, so use a corrupt machine-generated manifest shard, which is the
|
||||||
|
// remaining fatal read failure that the per-scope catch must isolate.
|
||||||
|
await mkdir(join(project.projectDir, 'semantic-layer/warehouse/_schema'), { recursive: true });
|
||||||
|
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/broken.yaml'), 'not: [valid', 'utf-8');
|
||||||
|
|
||||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||||
|
|
||||||
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
||||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
|
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('_schema/broken.yaml');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('marks a scope errored when configured embeddings fail', async () => {
|
it('marks a scope errored when configured embeddings fail', async () => {
|
||||||
|
|
|
||||||
|
|
@ -33,14 +33,14 @@ async function makeHarness() {
|
||||||
}
|
}
|
||||||
|
|
||||||
describe('finalGateRepairPaths', () => {
|
describe('finalGateRepairPaths', () => {
|
||||||
it('derives sorted wiki and semantic-layer file paths', () => {
|
it('derives sorted, deduplicated wiki and semantic-layer file paths', () => {
|
||||||
expect(
|
expect(
|
||||||
finalGateRepairPaths({
|
finalGateRepairPaths({
|
||||||
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
|
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
|
||||||
touchedSlSources: [
|
touchedSlSourcePaths: [
|
||||||
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
|
'semantic-layer/warehouse/mart_account_segments.yaml',
|
||||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
'semantic-layer/warehouse/orders.yaml',
|
||||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
'semantic-layer/warehouse/orders.yaml',
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
).toEqual([
|
).toEqual([
|
||||||
|
|
|
||||||
|
|
@ -18,19 +18,49 @@ describe('deriveFinalizationWikiPageKeys', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('deriveFinalizationTouchedSources', () => {
|
describe('deriveFinalizationTouchedSources', () => {
|
||||||
it('maps standalone semantic-layer files directly', async () => {
|
it('resolves standalone files by the source diff, not the filename', () => {
|
||||||
const result = await deriveFinalizationTouchedSources({
|
// The file carries a derived label (`signed_up-<hash>.yaml`); the source it
|
||||||
|
// defines is the in-file `name:` (`SIGNED_UP`), visible only via the diff.
|
||||||
|
const result = deriveFinalizationTouchedSources({
|
||||||
|
changedPaths: ['semantic-layer/warehouse/signed_up-1a2b3c4d.yaml'],
|
||||||
|
beforeSourcesByConnection: new Map([['warehouse', []]]),
|
||||||
|
afterSourcesByConnection: new Map([
|
||||||
|
['warehouse', [{ name: 'SIGNED_UP', grain: [], columns: [], joins: [], measures: [] }]],
|
||||||
|
]),
|
||||||
|
});
|
||||||
|
expect(result).toEqual({
|
||||||
|
touchedSources: [{ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }],
|
||||||
|
unresolvedPaths: [],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('resolves deleted standalone files by the name that disappeared', () => {
|
||||||
|
const result = deriveFinalizationTouchedSources({
|
||||||
|
changedPaths: ['semantic-layer/warehouse/signed_up-1a2b3c4d.yaml'],
|
||||||
|
beforeSourcesByConnection: new Map([
|
||||||
|
['warehouse', [{ name: 'SIGNED_UP', grain: [], columns: [], joins: [], measures: [] }]],
|
||||||
|
]),
|
||||||
|
afterSourcesByConnection: new Map([['warehouse', []]]),
|
||||||
|
});
|
||||||
|
expect(result).toEqual({
|
||||||
|
touchedSources: [{ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }],
|
||||||
|
unresolvedPaths: [],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flags standalone changes that produce no source diff', () => {
|
||||||
|
const result = deriveFinalizationTouchedSources({
|
||||||
changedPaths: ['semantic-layer/warehouse/orders.yaml'],
|
changedPaths: ['semantic-layer/warehouse/orders.yaml'],
|
||||||
beforeSourcesByConnection: new Map(),
|
beforeSourcesByConnection: new Map(),
|
||||||
afterSourcesByConnection: new Map(),
|
afterSourcesByConnection: new Map(),
|
||||||
});
|
});
|
||||||
expect(result).toEqual({
|
expect(result).toEqual({
|
||||||
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
touchedSources: [],
|
||||||
unresolvedPaths: [],
|
unresolvedPaths: ['semantic-layer/warehouse/orders.yaml'],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('resolves aggregate _schema changes by comparing loaded source snapshots', async () => {
|
it('resolves aggregate _schema changes by comparing loaded source snapshots', () => {
|
||||||
const beforeSourcesByConnection = new Map([
|
const beforeSourcesByConnection = new Map([
|
||||||
[
|
[
|
||||||
'warehouse',
|
'warehouse',
|
||||||
|
|
@ -72,7 +102,7 @@ describe('deriveFinalizationTouchedSources', () => {
|
||||||
],
|
],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const result = await deriveFinalizationTouchedSources({
|
const result = deriveFinalizationTouchedSources({
|
||||||
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||||
beforeSourcesByConnection,
|
beforeSourcesByConnection,
|
||||||
afterSourcesByConnection,
|
afterSourcesByConnection,
|
||||||
|
|
@ -84,11 +114,11 @@ describe('deriveFinalizationTouchedSources', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('flags aggregate _schema changes that cannot be resolved to logical sources', async () => {
|
it('flags aggregate _schema changes that cannot be resolved to logical sources', () => {
|
||||||
const beforeSourcesByConnection = new Map([['warehouse', []]]);
|
const beforeSourcesByConnection = new Map([['warehouse', []]]);
|
||||||
const afterSourcesByConnection = new Map([['warehouse', []]]);
|
const afterSourcesByConnection = new Map([['warehouse', []]]);
|
||||||
|
|
||||||
const result = await deriveFinalizationTouchedSources({
|
const result = deriveFinalizationTouchedSources({
|
||||||
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||||
beforeSourcesByConnection,
|
beforeSourcesByConnection,
|
||||||
afterSourcesByConnection,
|
afterSourcesByConnection,
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,15 @@ async function loadSourcesFromRoot(root: string) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mirrors the production contract: resolve the standalone/overlay file for a
|
||||||
|
// source, null when absent. Fixtures keep filename == name, so a direct read
|
||||||
|
// is a faithful shortcut.
|
||||||
|
async function readSourceFileFromRoot(root: string, connectionId: string, sourceName: string) {
|
||||||
|
const relPath = `semantic-layer/${connectionId}/${sourceName}.yaml`;
|
||||||
|
const content = await readFile(join(root, relPath), 'utf-8').catch(() => null);
|
||||||
|
return content === null ? null : { content, path: relPath };
|
||||||
|
}
|
||||||
|
|
||||||
async function listGlobalWikiPageKeys(root: string): Promise<string[]> {
|
async function listGlobalWikiPageKeys(root: string): Promise<string[]> {
|
||||||
const dir = join(root, 'wiki/global');
|
const dir = join(root, 'wiki/global');
|
||||||
const entries = await readdir(dir).catch(() => []);
|
const entries = await readdir(dir).catch(() => []);
|
||||||
|
|
@ -172,11 +181,17 @@ function makeDeps(
|
||||||
const semanticLayerService: any = {
|
const semanticLayerService: any = {
|
||||||
loadAllSources: vi.fn(async () => loadSourcesFromRoot(runtime.configDir)),
|
loadAllSources: vi.fn(async () => loadSourcesFromRoot(runtime.configDir)),
|
||||||
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
|
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
|
||||||
|
readSourceFile: vi.fn((connectionId: string, sourceName: string) =>
|
||||||
|
readSourceFileFromRoot(runtime.configDir, connectionId, sourceName),
|
||||||
|
),
|
||||||
};
|
};
|
||||||
semanticLayerService.forWorktree = vi.fn((workdir: string) => ({
|
semanticLayerService.forWorktree = vi.fn((workdir: string) => ({
|
||||||
...semanticLayerService,
|
...semanticLayerService,
|
||||||
loadAllSources: vi.fn(async () => loadSourcesFromRoot(workdir)),
|
loadAllSources: vi.fn(async () => loadSourcesFromRoot(workdir)),
|
||||||
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
|
listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']),
|
||||||
|
readSourceFile: vi.fn((connectionId: string, sourceName: string) =>
|
||||||
|
readSourceFileFromRoot(workdir, connectionId, sourceName),
|
||||||
|
),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const deps: IngestBundleRunnerDeps = {
|
const deps: IngestBundleRunnerDeps = {
|
||||||
|
|
@ -2366,8 +2381,11 @@ describe('IngestBundleRunner isolated diff path', () => {
|
||||||
join(runtime.configDir, '.ktx/ingest-traces/job-finalization-target-policy/trace.jsonl'),
|
join(runtime.configDir, '.ktx/ingest-traces/job-finalization-target-policy/trace.jsonl'),
|
||||||
'utf-8',
|
'utf-8',
|
||||||
);
|
);
|
||||||
expect(trace).toContain('finalization_committed');
|
// The policy check runs inside finalization, before touched-source
|
||||||
expect(trace).toContain('semantic_layer_target_policy');
|
// derivation — an out-of-scope write fails the finalization stage
|
||||||
|
// instead of reading as committed.
|
||||||
|
expect(trace).not.toContain('finalization_committed');
|
||||||
|
expect(trace).toContain('semantic_layer_target_policy_failed');
|
||||||
expect(trace).toContain('ingest_failed');
|
expect(trace).toContain('ingest_failed');
|
||||||
} finally {
|
} finally {
|
||||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||||
|
|
|
||||||
|
|
@ -1776,12 +1776,24 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
|
||||||
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
|
|
||||||
);
|
|
||||||
let head = 'pre-finalization';
|
let head = 'pre-finalization';
|
||||||
|
// Touched-source derivation diffs composed sources before/after finalization
|
||||||
|
// (the filename never carries identity), so the mock must reflect the write:
|
||||||
|
// `orders` exists only once the finalization commit lands.
|
||||||
|
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||||
|
Promise.resolve({
|
||||||
|
sources:
|
||||||
|
connectionId === 'warehouse-2' && head === 'post-finalization'
|
||||||
|
? [{ name: `${connectionId}_source` }, { name: 'orders' }]
|
||||||
|
: [{ name: `${connectionId}_source` }],
|
||||||
|
loadErrors: [],
|
||||||
|
}),
|
||||||
|
);
|
||||||
const git = {
|
const git = {
|
||||||
revParseHead: vi.fn(async () => head),
|
revParseHead: vi.fn(async () => head),
|
||||||
|
// Touched-source derivation reads each changed file's `name:`; the worktree
|
||||||
|
// is mocked (no files on disk), so serve the source content from history.
|
||||||
|
getFileAtCommit: vi.fn(async () => 'name: orders\n'),
|
||||||
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
|
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
|
||||||
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
|
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
|
||||||
head = 'post-finalization';
|
head = 'post-finalization';
|
||||||
|
|
@ -1854,10 +1866,41 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
|
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
|
||||||
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]);
|
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [
|
||||||
|
{ name: 'warehouse-2_source' },
|
||||||
|
{ name: 'orders' },
|
||||||
|
]);
|
||||||
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('recovers a deleted hash-named SL source by its in-file name, not its filename', async () => {
|
||||||
|
const runner = buildRunner();
|
||||||
|
// An uppercase warehouse source lives in a hash-derived filename, so parsing
|
||||||
|
// the basename yields the phantom `widget_sales-1a2b3c4d`. The real name must
|
||||||
|
// come from the file's `name:`, recovered from history once it was deleted.
|
||||||
|
const deletedPath = 'semantic-layer/warehouse/widget_sales-1a2b3c4d.yaml';
|
||||||
|
const getFileAtCommit = vi.fn(async () => 'name: WIDGET_SALES\ntable: WIDGET_SALES\n');
|
||||||
|
const worktree = { workdir: join(tmpdir(), 'ktx-absent-worktree-recover'), git: { getFileAtCommit } };
|
||||||
|
|
||||||
|
const touched = await (runner as any).touchedSlSourcesFromPaths(worktree, [deletedPath], 'pre-change-sha');
|
||||||
|
|
||||||
|
expect(touched).toEqual([{ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }]);
|
||||||
|
expect(getFileAtCommit).toHaveBeenCalledWith(deletedPath, 'pre-change-sha');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to the filename only when a deleted SL file is unrecoverable from history', async () => {
|
||||||
|
const runner = buildRunner();
|
||||||
|
const deletedPath = 'semantic-layer/warehouse/orders.yaml';
|
||||||
|
const getFileAtCommit = vi.fn(async () => {
|
||||||
|
throw new Error('path not present at commit');
|
||||||
|
});
|
||||||
|
const worktree = { workdir: join(tmpdir(), 'ktx-absent-worktree-fallback'), git: { getFileAtCommit } };
|
||||||
|
|
||||||
|
const touched = await (runner as any).touchedSlSourcesFromPaths(worktree, [deletedPath], 'pre-change-sha');
|
||||||
|
|
||||||
|
expect(touched).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
|
||||||
|
});
|
||||||
|
|
||||||
it('includes finalization actions in memory-flow saved counts', async () => {
|
it('includes finalization actions in memory-flow saved counts', async () => {
|
||||||
const deps = makeDeps();
|
const deps = makeDeps();
|
||||||
deps.adapter.source = 'historic-sql';
|
deps.adapter.source = 'historic-sql';
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,21 @@
|
||||||
import { describe, expect, it, vi } from 'vitest';
|
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
import type { SlConnectionCatalogPort } from '../../../../../src/context/sl/ports.js';
|
import type { SlConnectionCatalogPort } from '../../../../../src/context/sl/ports.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../../../../src/context/sql-analysis/ports.js';
|
||||||
import type { ToolContext } from '../../../../../src/context/tools/base-tool.js';
|
import type { ToolContext } from '../../../../../src/context/tools/base-tool.js';
|
||||||
import { SqlExecutionTool } from '../../../../../src/context/ingest/tools/warehouse-verification/sql-execution.tool.js';
|
import { SqlExecutionTool } from '../../../../../src/context/ingest/tools/warehouse-verification/sql-execution.tool.js';
|
||||||
|
|
||||||
describe('SqlExecutionTool', () => {
|
describe('SqlExecutionTool', () => {
|
||||||
const connections = {
|
const connections = {
|
||||||
executeQuery: vi.fn(),
|
executeQuery: vi.fn(),
|
||||||
} as unknown as SlConnectionCatalogPort & { executeQuery: ReturnType<typeof vi.fn> };
|
getConnectionById: vi.fn(async () => ({ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' })),
|
||||||
const tool = new SqlExecutionTool(connections);
|
} as unknown as SlConnectionCatalogPort & {
|
||||||
|
executeQuery: ReturnType<typeof vi.fn>;
|
||||||
|
getConnectionById: ReturnType<typeof vi.fn>;
|
||||||
|
};
|
||||||
|
const sqlAnalysis = {
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
|
||||||
|
} as unknown as SqlAnalysisPort & { validateReadOnly: ReturnType<typeof vi.fn> };
|
||||||
|
const tool = new SqlExecutionTool(connections, sqlAnalysis);
|
||||||
const context: ToolContext = {
|
const context: ToolContext = {
|
||||||
sourceId: 'ingest',
|
sourceId: 'ingest',
|
||||||
messageId: 'm1',
|
messageId: 'm1',
|
||||||
|
|
@ -15,7 +23,15 @@ describe('SqlExecutionTool', () => {
|
||||||
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
session: { allowedConnectionNames: new Set(['warehouse']) } as any,
|
||||||
};
|
};
|
||||||
|
|
||||||
it('wraps read-only SQL with a capped row limit', async () => {
|
beforeEach(() => {
|
||||||
|
connections.executeQuery.mockReset();
|
||||||
|
connections.getConnectionById.mockReset();
|
||||||
|
connections.getConnectionById.mockResolvedValue({ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' });
|
||||||
|
sqlAnalysis.validateReadOnly.mockReset();
|
||||||
|
sqlAnalysis.validateReadOnly.mockResolvedValue({ ok: true, error: null });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates with the parser-backed validator in the connection dialect, then wraps with a capped row limit', async () => {
|
||||||
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
|
connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 });
|
||||||
|
|
||||||
const result = await tool.call(
|
const result = await tool.call(
|
||||||
|
|
@ -23,6 +39,7 @@ describe('SqlExecutionTool', () => {
|
||||||
context,
|
context,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select status from public.orders', 'postgres');
|
||||||
expect(connections.executeQuery).toHaveBeenCalledWith(
|
expect(connections.executeQuery).toHaveBeenCalledWith(
|
||||||
'warehouse',
|
'warehouse',
|
||||||
'select * from (select status from public.orders) as ktx_query_result limit 5',
|
'select * from (select status from public.orders) as ktx_query_result limit 5',
|
||||||
|
|
@ -31,14 +48,46 @@ describe('SqlExecutionTool', () => {
|
||||||
expect(result.structured.wrappedSql).toContain('limit 5');
|
expect(result.structured.wrappedSql).toContain('limit 5');
|
||||||
});
|
});
|
||||||
|
|
||||||
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => {
|
it('maps connection types to sqlglot dialects', async () => {
|
||||||
connections.executeQuery.mockClear();
|
connections.getConnectionById.mockResolvedValue({ id: 'warehouse', name: 'warehouse', connectionType: 'SNOWFLAKE' });
|
||||||
|
connections.executeQuery.mockResolvedValue({ headers: [], rows: [], totalRows: 0 });
|
||||||
|
|
||||||
|
await tool.call({ connectionId: 'warehouse', sql: 'select 1' }, context);
|
||||||
|
|
||||||
|
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select 1', 'snowflake');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns the validator error without executing when validation fails', async () => {
|
||||||
|
sqlAnalysis.validateReadOnly.mockResolvedValue({ ok: false, error: 'SQL contains read/write operation: Insert' });
|
||||||
|
|
||||||
|
const result = await tool.call(
|
||||||
|
{ connectionId: 'warehouse', sql: 'with x as (insert into t values (1) returning *) select * from x' },
|
||||||
|
context,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.markdown).toContain('SQL contains read/write operation: Insert');
|
||||||
|
expect(result.structured.error).toContain('SQL contains read/write operation: Insert');
|
||||||
|
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws when no parser-backed validator is configured', async () => {
|
||||||
|
const unvalidated = new SqlExecutionTool(connections);
|
||||||
|
|
||||||
|
await expect(unvalidated.call({ connectionId: 'warehouse', sql: 'select 1' }, context)).rejects.toThrow(
|
||||||
|
'sql_execution requires parser-backed SQL validation.',
|
||||||
|
);
|
||||||
|
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it.each(['insert into x values (1)', 'drop table x', 'vacuum'])(
|
||||||
|
'keeps the local backstop even when the validator approves: %s',
|
||||||
|
async (sql) => {
|
||||||
const result = await tool.call({ connectionId: 'warehouse', sql }, context);
|
const result = await tool.call({ connectionId: 'warehouse', sql }, context);
|
||||||
|
|
||||||
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
|
expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.');
|
||||||
expect(connections.executeQuery).not.toHaveBeenCalled();
|
expect(connections.executeQuery).not.toHaveBeenCalled();
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
it('surfaces connector errors verbatim', async () => {
|
it('surfaces connector errors verbatim', async () => {
|
||||||
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));
|
connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist'));
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,9 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
import { initKtxProject } from '../../../src/context/project/project.js';
|
import { initKtxProject } from '../../../src/context/project/project.js';
|
||||||
import { KtxQueryError } from '../../../src/errors.js';
|
import { KtxQueryError } from '../../../src/errors.js';
|
||||||
import { createKtxConnectorCapabilities, type KtxQueryResult, type KtxScanConnector, type KtxSchemaSnapshot } from '../../../src/context/scan/types.js';
|
import { createKtxConnectorCapabilities, type KtxQueryResult, type KtxScanConnector, type KtxSchemaSnapshot } from '../../../src/context/scan/types.js';
|
||||||
import { writeLocalSlSource } from '../../../src/context/sl/local-sl.js';
|
import { SemanticLayerService } from '../../../src/context/sl/semantic-layer.service.js';
|
||||||
|
import type { SemanticLayerSource } from '../../../src/context/sl/types.js';
|
||||||
|
import { seedSlSourceFile } from '../sl/sl-source-seeding.test-utils.js';
|
||||||
import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js';
|
import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js';
|
||||||
|
|
||||||
describe('createLocalProjectMcpContextPorts', () => {
|
describe('createLocalProjectMcpContextPorts', () => {
|
||||||
|
|
@ -739,7 +741,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
||||||
|
|
||||||
it('reads seeded semantic-layer sources', async () => {
|
it('reads seeded semantic-layer sources', async () => {
|
||||||
const project = await initKtxProject({ projectDir: tempDir });
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: [
|
yaml: [
|
||||||
|
|
@ -763,7 +765,92 @@ describe('createLocalProjectMcpContextPorts', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('rejects path traversal keys before touching the project directory', async () => {
|
it('reads manifest-backed sources with uppercase warehouse identifiers', async () => {
|
||||||
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||||
|
[
|
||||||
|
'tables:',
|
||||||
|
' WIDGET_SALES:',
|
||||||
|
' table: PUBLIC.WIDGET_SALES',
|
||||||
|
' columns:',
|
||||||
|
' - name: ID',
|
||||||
|
' type: number',
|
||||||
|
' pk: true',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'seed uppercase manifest shard',
|
||||||
|
);
|
||||||
|
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }),
|
||||||
|
).resolves.toMatchObject({
|
||||||
|
sourceName: 'WIDGET_SALES',
|
||||||
|
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('composes an overlay written for an uppercase manifest source at a derived filename', async () => {
|
||||||
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||||
|
[
|
||||||
|
'tables:',
|
||||||
|
' WIDGET_SALES:',
|
||||||
|
' table: PUBLIC.WIDGET_SALES',
|
||||||
|
' columns:',
|
||||||
|
' - name: ID',
|
||||||
|
' type: number',
|
||||||
|
' pk: true',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'seed uppercase manifest shard',
|
||||||
|
);
|
||||||
|
|
||||||
|
// The production write path: agents overlay manifest sources via
|
||||||
|
// SemanticLayerService.writeSource using the verbatim warehouse name.
|
||||||
|
const service = new SemanticLayerService(project.fileStore as never, {} as never, {} as never);
|
||||||
|
const overlay = {
|
||||||
|
name: 'WIDGET_SALES',
|
||||||
|
measures: [{ name: 'widget_sales_count', expr: 'count(*)' }],
|
||||||
|
} as SemanticLayerSource;
|
||||||
|
const write = await service.writeSource('warehouse', overlay, 'ktx', 'ktx@example.com');
|
||||||
|
expect(write.path).toMatch(/^semantic-layer\/warehouse\/widget_sales-[0-9a-f]{8}\.yaml$/);
|
||||||
|
|
||||||
|
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||||
|
await expect(
|
||||||
|
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'WIDGET_SALES' }),
|
||||||
|
).resolves.toMatchObject({
|
||||||
|
sourceName: 'WIDGET_SALES',
|
||||||
|
yaml: expect.stringContaining('widget_sales_count'),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns a standalone source verbatim even when its YAML is currently broken', async () => {
|
||||||
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/orders.yaml',
|
||||||
|
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'seed broken source mid-edit',
|
||||||
|
);
|
||||||
|
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'orders' }),
|
||||||
|
).resolves.toMatchObject({
|
||||||
|
sourceName: 'orders',
|
||||||
|
yaml: expect.stringContaining('[unterminated'),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps path-traversal keys away from the project directory', async () => {
|
||||||
const project = await initKtxProject({ projectDir: tempDir });
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null });
|
||||||
|
|
||||||
|
|
@ -774,12 +861,14 @@ describe('createLocalProjectMcpContextPorts', () => {
|
||||||
}),
|
}),
|
||||||
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
|
).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".');
|
||||||
|
|
||||||
|
// Source reads never derive a file path from the name; a traversal-style
|
||||||
|
// name simply matches no record.
|
||||||
await expect(
|
await expect(
|
||||||
ports.semanticLayer?.readSource({
|
ports.semanticLayer?.readSource({
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: '../orders',
|
sourceName: '../orders',
|
||||||
}),
|
}),
|
||||||
).rejects.toThrow('Unsafe semantic-layer source name');
|
).resolves.toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('uses semantic compute for compile-only sl_query when supplied', async () => {
|
it('uses semantic compute for compile-only sl_query when supplied', async () => {
|
||||||
|
|
@ -788,7 +877,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
||||||
driver: 'postgres',
|
driver: 'postgres',
|
||||||
url: 'env:DATABASE_URL',
|
url: 'env:DATABASE_URL',
|
||||||
};
|
};
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: [
|
yaml: [
|
||||||
|
|
@ -850,7 +939,7 @@ describe('createLocalProjectMcpContextPorts', () => {
|
||||||
driver: 'postgres',
|
driver: 'postgres',
|
||||||
url: 'env:DATABASE_URL',
|
url: 'env:DATABASE_URL',
|
||||||
};
|
};
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: [
|
yaml: [
|
||||||
|
|
|
||||||
|
|
@ -357,6 +357,10 @@ describe('MemoryAgentService.gateRevertInvalidSources (J3)', () => {
|
||||||
const configService = {
|
const configService = {
|
||||||
writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}),
|
writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}),
|
||||||
deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}),
|
deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}),
|
||||||
|
// Revert resolves the live file by name; with no listing it falls back
|
||||||
|
// to the writer-derived filename.
|
||||||
|
listFiles: vi.fn().mockResolvedValue({ files: [] }),
|
||||||
|
readFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
||||||
};
|
};
|
||||||
const gitService = {
|
const gitService = {
|
||||||
getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')),
|
getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')),
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ import { afterEach, beforeEach, describe, it } from 'vitest';
|
||||||
import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js';
|
import { SqliteContextEvidenceStore } from '../../../src/context/ingest/context-evidence/sqlite-context-evidence-store.js';
|
||||||
import type { JsonValue } from '../../../src/context/ingest/ports.js';
|
import type { JsonValue } from '../../../src/context/ingest/ports.js';
|
||||||
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||||
import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../../../src/context/sl/local-sl.js';
|
import { type LocalSlSourceSearchResult, searchLocalSlSources } from '../../../src/context/sl/local-sl.js';
|
||||||
|
import { seedSlSourceFile } from '../sl/sl-source-seeding.test-utils.js';
|
||||||
import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js';
|
import type { ContextEvidenceSearchResult } from '../../../src/context/tools/context-evidence-tool-store.js';
|
||||||
import {
|
import {
|
||||||
type LocalKnowledgeSearchResult,
|
type LocalKnowledgeSearchResult,
|
||||||
|
|
@ -99,12 +100,12 @@ function toContextConformanceResult(result: ContextEvidenceSearchResult): Search
|
||||||
}
|
}
|
||||||
|
|
||||||
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
});
|
});
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'finance',
|
connectionId: 'finance',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: FINANCE_ORDERS_YAML,
|
yaml: FINANCE_ORDERS_YAML,
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,8 @@ import {
|
||||||
resolveLocalSlSource,
|
resolveLocalSlSource,
|
||||||
searchLocalSlSources,
|
searchLocalSlSources,
|
||||||
validateLocalSlSource,
|
validateLocalSlSource,
|
||||||
writeLocalSlSource,
|
|
||||||
} from '../../../src/context/sl/local-sl.js';
|
} from '../../../src/context/sl/local-sl.js';
|
||||||
|
import { seedSlSourceFile } from './sl-source-seeding.test-utils.js';
|
||||||
|
|
||||||
const ORDERS_YAML = [
|
const ORDERS_YAML = [
|
||||||
'name: orders',
|
'name: orders',
|
||||||
|
|
@ -60,7 +60,7 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('writes, reads, lists, and validates semantic-layer sources', async () => {
|
it('writes, reads, lists, and validates semantic-layer sources', async () => {
|
||||||
const write = await writeLocalSlSource(project, {
|
const write = await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -92,7 +92,7 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('resolves a scoped source by connection id', async () => {
|
it('resolves a scoped source by connection id', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -115,7 +115,7 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns not-found for a missing scoped source', async () => {
|
it('returns not-found for a missing scoped source', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -130,12 +130,12 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('resolves a unique source name across all connections', async () => {
|
it('resolves a unique source name across all connections', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
});
|
});
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'analytics',
|
connectionId: 'analytics',
|
||||||
sourceName: 'tickets',
|
sourceName: 'tickets',
|
||||||
yaml: SUPPORT_YAML,
|
yaml: SUPPORT_YAML,
|
||||||
|
|
@ -157,7 +157,7 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns not-found for a missing unscoped source', async () => {
|
it('returns not-found for a missing unscoped source', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -169,12 +169,12 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports sorted ambiguous connection ids for duplicate source names', async () => {
|
it('reports sorted ambiguous connection ids for duplicate source names', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
});
|
});
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'analytics',
|
connectionId: 'analytics',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -261,6 +261,153 @@ describe('local semantic-layer helpers', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('reads manifest-backed scan sources whose warehouse identifiers are uppercase', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||||
|
`tables:
|
||||||
|
WIDGET_SALES:
|
||||||
|
table: PUBLIC.WIDGET_SALES
|
||||||
|
columns:
|
||||||
|
- name: ID
|
||||||
|
type: number
|
||||||
|
pk: true
|
||||||
|
- name: EMAIL
|
||||||
|
type: string
|
||||||
|
`,
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'Add uppercase manifest shard',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'WIDGET_SALES' })).resolves.toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
name: 'WIDGET_SALES',
|
||||||
|
path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#WIDGET_SALES',
|
||||||
|
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reads manifest-backed sources whose names are not filename-safe', async () => {
|
||||||
|
// Snowflake and Postgres unquoted identifiers allow `$`; manifest keys
|
||||||
|
// carry the warehouse name verbatim, so the lookup must accept it.
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||||
|
`tables:
|
||||||
|
EVENT$LOG:
|
||||||
|
table: PUBLIC.EVENT$LOG
|
||||||
|
columns:
|
||||||
|
- name: ID
|
||||||
|
type: number
|
||||||
|
pk: true
|
||||||
|
`,
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'Add manifest shard with dollar-sign table name',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'EVENT$LOG' })).resolves.toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
name: 'EVENT$LOG',
|
||||||
|
path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#EVENT$LOG',
|
||||||
|
yaml: expect.stringContaining('table: PUBLIC.EVENT$LOG'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reads a manifest-backed source while a sibling standalone file has broken YAML', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/_schema/PUBLIC.yaml',
|
||||||
|
`tables:
|
||||||
|
WIDGET_SALES:
|
||||||
|
table: PUBLIC.WIDGET_SALES
|
||||||
|
columns:
|
||||||
|
- name: ID
|
||||||
|
type: number
|
||||||
|
pk: true
|
||||||
|
`,
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'Add manifest shard',
|
||||||
|
);
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/orders.yaml',
|
||||||
|
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'seed a sibling source mid-edit with broken YAML',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'WIDGET_SALES' })).resolves.toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
name: 'WIDGET_SALES',
|
||||||
|
yaml: expect.stringContaining('table: PUBLIC.WIDGET_SALES'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// The broken sibling stays visible in listings instead of hiding or
|
||||||
|
// failing the whole connection.
|
||||||
|
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
|
||||||
|
expect.objectContaining({ name: 'orders', columnCount: 0 }),
|
||||||
|
expect.objectContaining({ name: 'WIDGET_SALES', columnCount: 1 }),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns the raw YAML of a standalone source whose content no longer parses', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/orders.yaml',
|
||||||
|
'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n',
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'seed a source mid-edit with broken YAML',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' })).resolves.toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
name: 'orders',
|
||||||
|
path: 'semantic-layer/warehouse/orders.yaml',
|
||||||
|
yaml: expect.stringContaining('[unterminated'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reads a broken source by its declared name even when the filename differs', async () => {
|
||||||
|
// Identity is the intact top-level `name:`, recovered via parseDocument even
|
||||||
|
// when the YAML is broken below it — never the filename. A human-renamed or
|
||||||
|
// hashed-filename source (e.g. an uppercase warehouse name) saved mid-edit
|
||||||
|
// must stay reachable under the name it declares, matching the writer side
|
||||||
|
// (resolveSlSourceFile). Keying it by the filename would make it invisible
|
||||||
|
// under its real name.
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||||
|
'name: SIGNED_UP\nmeasures:\n - name: signups\n expr: [unterminated\n',
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'seed a human-renamed source mid-edit with broken YAML',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'SIGNED_UP' })).resolves.toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
name: 'SIGNED_UP',
|
||||||
|
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||||
|
yaml: expect.stringContaining('[unterminated'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// The filename is not the identity, so it does not resolve a source.
|
||||||
|
await expect(
|
||||||
|
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'renamed-by-hand' }),
|
||||||
|
).resolves.toBeNull();
|
||||||
|
|
||||||
|
await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([
|
||||||
|
expect.objectContaining({ name: 'SIGNED_UP', columnCount: 0 }),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
it('expands manifest-backed scan sources when listing all connections', async () => {
|
it('expands manifest-backed scan sources when listing all connections', async () => {
|
||||||
await project.fileStore.writeFile(
|
await project.fileStore.writeFile(
|
||||||
'semantic-layer/warehouse/_schema/public.yaml',
|
'semantic-layer/warehouse/_schema/public.yaml',
|
||||||
|
|
@ -292,12 +439,12 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('searches local semantic-layer source text through SQLite FTS', async () => {
|
it('searches local semantic-layer source text through SQLite FTS', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
});
|
});
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'tickets',
|
sourceName: 'tickets',
|
||||||
yaml: SUPPORT_YAML,
|
yaml: SUPPORT_YAML,
|
||||||
|
|
@ -365,12 +512,12 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('searches all connections with one global hybrid ranking pass', async () => {
|
it('searches all connections with one global hybrid ranking pass', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
});
|
});
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'finance',
|
connectionId: 'finance',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: [
|
yaml: [
|
||||||
|
|
@ -403,7 +550,7 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns dictionary evidence when collected sample values explain a match', async () => {
|
it('returns dictionary evidence when collected sample values explain a match', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -456,7 +603,7 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('adds the token lane alongside lexical matches for normalized query terms', async () => {
|
it('adds the token lane alongside lexical matches for normalized query terms', async () => {
|
||||||
await writeLocalSlSource(project, {
|
await seedSlSourceFile(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: 'orders',
|
sourceName: 'orders',
|
||||||
yaml: ORDERS_YAML,
|
yaml: ORDERS_YAML,
|
||||||
|
|
@ -471,21 +618,13 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports schema validation errors without writing invalid YAML', async () => {
|
it('reports schema validation errors for invalid YAML', async () => {
|
||||||
const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n');
|
const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n');
|
||||||
|
|
||||||
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
|
await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({
|
||||||
valid: false,
|
valid: false,
|
||||||
errors: expect.arrayContaining([expect.stringContaining('grain')]),
|
errors: expect.arrayContaining([expect.stringContaining('grain')]),
|
||||||
});
|
});
|
||||||
|
|
||||||
await expect(
|
|
||||||
writeLocalSlSource(project, {
|
|
||||||
connectionId: 'warehouse',
|
|
||||||
sourceName: 'broken',
|
|
||||||
yaml: invalidYaml,
|
|
||||||
}),
|
|
||||||
).rejects.toThrow('Invalid semantic-layer source');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports overlay columns that are not computed columns', async () => {
|
it('reports overlay columns that are not computed columns', async () => {
|
||||||
|
|
@ -506,12 +645,40 @@ describe('local semantic-layer helpers', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('rejects unsafe source paths', async () => {
|
it('never derives a file path from a traversal-style source name', async () => {
|
||||||
|
// Reads match names against loaded records, so a traversal-style name is
|
||||||
|
// simply not found; the writer-side guarantee (derived filenames contain
|
||||||
|
// no separators) is covered by the source-files tests.
|
||||||
await expect(
|
await expect(
|
||||||
readLocalSlSource(project, {
|
readLocalSlSource(project, {
|
||||||
connectionId: 'warehouse',
|
connectionId: 'warehouse',
|
||||||
sourceName: '../orders',
|
sourceName: '../orders',
|
||||||
}),
|
}),
|
||||||
).rejects.toThrow('Unsafe semantic-layer source name');
|
).resolves.toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reads a source from a human-renamed file by its in-file name', async () => {
|
||||||
|
// The filename is a derived label, not identity: a file renamed by a human
|
||||||
|
// still resolves under the `name:` it declares.
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/custom-file-name.yaml',
|
||||||
|
ORDERS_YAML,
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
'Seed source at a human-chosen filename',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' }),
|
||||||
|
).resolves.toMatchObject({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
name: 'orders',
|
||||||
|
path: 'semantic-layer/warehouse/custom-file-name.yaml',
|
||||||
|
yaml: ORDERS_YAML,
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'custom-file-name' }),
|
||||||
|
).resolves.toBeNull();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,9 @@ import { join } from 'node:path';
|
||||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||||
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||||
import { assertSearchBackendConformanceCase } from '../search/backend-conformance.test-utils.js';
|
import { assertSearchBackendConformanceCase } from '../search/backend-conformance.test-utils.js';
|
||||||
import { searchLocalSlSources, writeLocalSlSource, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js';
|
import { searchLocalSlSources, type LocalSlSourceSearchResult } from '../../../src/context/sl/local-sl.js';
|
||||||
import { searchLocalSlSourcesWithPglitePrototype } from '../../../src/context/sl/pglite-sl-search-prototype.js';
|
import { searchLocalSlSourcesWithPglitePrototype } from '../../../src/context/sl/pglite-sl-search-prototype.js';
|
||||||
|
import { seedSlSourceFile } from './sl-source-seeding.test-utils.js';
|
||||||
|
|
||||||
const ORDERS_YAML = [
|
const ORDERS_YAML = [
|
||||||
'name: orders',
|
'name: orders',
|
||||||
|
|
@ -107,9 +108,9 @@ function toConformanceResult(result: LocalSlSourceSearchResult) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
async function seedSemanticLayerProject(project: KtxLocalProject): Promise<void> {
|
||||||
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
|
await seedSlSourceFile(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML });
|
||||||
await writeLocalSlSource(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
|
await seedSlSourceFile(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML });
|
||||||
await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
|
await seedSlSourceFile(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML });
|
||||||
|
|
||||||
await project.fileStore.writeFile(
|
await project.fileStore.writeFile(
|
||||||
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json',
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,9 @@
|
||||||
|
import { mkdtemp, rm } from 'node:fs/promises';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
import type { Mock } from 'vitest';
|
import type { Mock } from 'vitest';
|
||||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
ColumnNameCollisionError,
|
ColumnNameCollisionError,
|
||||||
|
|
@ -71,6 +75,7 @@ describe('loadSource', () => {
|
||||||
it('warns and returns null when an existing source file has invalid YAML', async () => {
|
it('warns and returns null when an existing source file has invalid YAML', async () => {
|
||||||
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
||||||
const configService = {
|
const configService = {
|
||||||
|
listFiles: vi.fn().mockResolvedValue({ files: ['semantic-layer/warehouse/orders.yaml'] }),
|
||||||
readFile: vi.fn().mockResolvedValue({ content: 'name: [' }),
|
readFile: vi.fn().mockResolvedValue({ content: 'name: [' }),
|
||||||
};
|
};
|
||||||
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never);
|
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never);
|
||||||
|
|
@ -79,9 +84,33 @@ describe('loadSource', () => {
|
||||||
|
|
||||||
expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml');
|
expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml');
|
||||||
expect(logger.warn).toHaveBeenCalledWith(
|
expect(logger.warn).toHaveBeenCalledWith(
|
||||||
expect.stringContaining('[loadSource] warehouse/orders.yaml: YAML parse failed:'),
|
expect.stringContaining('[loadSource] semantic-layer/warehouse/orders.yaml: YAML parse failed:'),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('returns null when no file declares the source name', async () => {
|
||||||
|
const configService = {
|
||||||
|
listFiles: vi.fn().mockResolvedValue({ files: [] }),
|
||||||
|
readFile: vi.fn(),
|
||||||
|
};
|
||||||
|
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
|
||||||
|
|
||||||
|
await expect(service.loadSource('warehouse', 'orders')).resolves.toBeNull();
|
||||||
|
expect(configService.readFile).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('resolves a source by its in-file name when the filename differs', async () => {
|
||||||
|
const configService = {
|
||||||
|
listFiles: vi.fn().mockResolvedValue({ files: ['semantic-layer/warehouse/renamed.yaml'] }),
|
||||||
|
readFile: vi.fn().mockResolvedValue({ content: 'name: SIGNED_UP\nmeasures: []\n' }),
|
||||||
|
};
|
||||||
|
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort);
|
||||||
|
|
||||||
|
await expect(service.loadSource('warehouse', 'SIGNED_UP')).resolves.toEqual({
|
||||||
|
name: 'SIGNED_UP',
|
||||||
|
measures: [],
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('composeOverlay', () => {
|
describe('composeOverlay', () => {
|
||||||
|
|
@ -1242,3 +1271,177 @@ describe('findDanglingSegmentRefs', () => {
|
||||||
expect(findDanglingSegmentRefs({})).toEqual([]);
|
expect(findDanglingSegmentRefs({})).toEqual([]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('writeSource / deleteSource file naming', () => {
|
||||||
|
let tempDir: string;
|
||||||
|
let project: KtxLocalProject;
|
||||||
|
let service: SemanticLayerService;
|
||||||
|
|
||||||
|
const author = 'T U';
|
||||||
|
const authorEmail = 't@u.com';
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-service-files-'));
|
||||||
|
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||||
|
service = new SemanticLayerService(project.fileStore as never, connectionCatalog() as never, pythonPort as never);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await rm(tempDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
const signedUp: SemanticLayerSource = {
|
||||||
|
name: 'SIGNED_UP',
|
||||||
|
table: 'PUBLIC.SIGNED_UP',
|
||||||
|
grain: ['ID'],
|
||||||
|
columns: [{ name: 'ID', type: 'number' }],
|
||||||
|
joins: [],
|
||||||
|
measures: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
it('writes a new uppercase source at a derived filename and reads it back by name', async () => {
|
||||||
|
const result = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||||
|
|
||||||
|
expect(result.path).toMatch(/^semantic-layer\/warehouse\/signed_up-[0-9a-f]{8}\.yaml$/);
|
||||||
|
|
||||||
|
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
|
||||||
|
expect(file?.path).toBe(result.path);
|
||||||
|
expect(file?.content).toContain('name: SIGNED_UP');
|
||||||
|
|
||||||
|
// Rewriting lands on the same file instead of deriving a second one.
|
||||||
|
const rewrite = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||||
|
expect(rewrite.path).toBe(result.path);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('repairs a broken file occupying the derived path instead of refusing the write', async () => {
|
||||||
|
const written = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
written.path,
|
||||||
|
'name: SIGNED_UP\nmeasures: [unterminated\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'break the file',
|
||||||
|
);
|
||||||
|
|
||||||
|
const repaired = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||||
|
|
||||||
|
expect(repaired.path).toBe(written.path);
|
||||||
|
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
|
||||||
|
expect(file?.path).toBe(written.path);
|
||||||
|
expect(file?.content).toContain('name: SIGNED_UP');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rewrites a human-renamed file in place', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/custom.yaml',
|
||||||
|
'name: orders\nmeasures: []\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'seed renamed file',
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await service.writeSource(
|
||||||
|
'warehouse',
|
||||||
|
{ name: 'orders', grain: [], columns: [], joins: [], measures: [] },
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.path).toBe('semantic-layer/warehouse/custom.yaml');
|
||||||
|
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
|
||||||
|
expect(listed.files).toEqual(['semantic-layer/warehouse/custom.yaml']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('repairs a human-renamed broken file in place instead of deriving a second one', async () => {
|
||||||
|
// Renamed (filename ≠ name) AND mid-edit broken: identity must survive the
|
||||||
|
// syntax error so the rewrite lands on the original file rather than creating
|
||||||
|
// a duplicate at the derived path that later trips the by-name resolver.
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/custom.yaml',
|
||||||
|
'name: SIGNED_UP\nmeasures: [unterminated\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'seed broken renamed file',
|
||||||
|
);
|
||||||
|
|
||||||
|
const repaired = await service.writeSource('warehouse', signedUp, author, authorEmail);
|
||||||
|
|
||||||
|
expect(repaired.path).toBe('semantic-layer/warehouse/custom.yaml');
|
||||||
|
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
|
||||||
|
expect(listed.files).toEqual(['semantic-layer/warehouse/custom.yaml']);
|
||||||
|
const file = await service.readSourceFile('warehouse', 'SIGNED_UP');
|
||||||
|
expect(file?.path).toBe('semantic-layer/warehouse/custom.yaml');
|
||||||
|
expect(file?.content).toContain('name: SIGNED_UP');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps a .yml-renamed file visible to the loader and the by-name resolver alike', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/custom.yml',
|
||||||
|
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\nmeasures: []\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'seed .yml file',
|
||||||
|
);
|
||||||
|
|
||||||
|
const { sources, loadErrors } = await service.loadAllSources('warehouse');
|
||||||
|
expect(loadErrors).toEqual([]);
|
||||||
|
expect(sources.map((source) => source.name)).toEqual(['orders']);
|
||||||
|
|
||||||
|
const file = await service.readSourceFile('warehouse', 'orders');
|
||||||
|
expect(file?.path).toBe('semantic-layer/warehouse/custom.yml');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('refuses to clobber a derived path occupied by a different source', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/orders.yaml',
|
||||||
|
'name: other_source\nmeasures: []\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'seed conflicting file',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
service.writeSource(
|
||||||
|
'warehouse',
|
||||||
|
{ name: 'orders', grain: [], columns: [], joins: [], measures: [] },
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
),
|
||||||
|
).rejects.toThrow("already defines source 'other_source'");
|
||||||
|
});
|
||||||
|
|
||||||
|
it('deletes the file resolved by name, wherever it lives', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/custom.yaml',
|
||||||
|
'name: orders\nmeasures: []\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'seed renamed file',
|
||||||
|
);
|
||||||
|
|
||||||
|
await service.deleteSource('warehouse', 'orders', author, authorEmail);
|
||||||
|
|
||||||
|
const listed = await project.fileStore.listFiles('semantic-layer/warehouse');
|
||||||
|
expect(listed.files).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('explains manifest-backed deletes instead of silently succeeding', async () => {
|
||||||
|
await project.fileStore.writeFile(
|
||||||
|
'semantic-layer/warehouse/_schema/public.yaml',
|
||||||
|
'tables:\n payments:\n table: public.payments\n columns:\n - name: id\n type: number\n',
|
||||||
|
author,
|
||||||
|
authorEmail,
|
||||||
|
'seed manifest shard',
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(service.deleteSource('warehouse', 'payments', author, authorEmail)).rejects.toThrow(
|
||||||
|
/scan manifest/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws a plain not-found error for unknown sources', async () => {
|
||||||
|
await expect(service.deleteSource('warehouse', 'missing', author, authorEmail)).rejects.toThrow(
|
||||||
|
'Semantic-layer source not found: warehouse/missing',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
22
packages/cli/test/context/sl/sl-source-seeding.test-utils.ts
Normal file
22
packages/cli/test/context/sl/sl-source-seeding.test-utils.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
import type { KtxFileWriteResult } from '../../../src/context/core/file-store.js';
|
||||||
|
import type { KtxLocalProject } from '../../../src/context/project/project.js';
|
||||||
|
import { slSourceFilePath } from '../../../src/context/sl/source-files.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Seed a standalone/overlay semantic-layer file at the writer-derived path,
|
||||||
|
* bypassing tool-level validation. Production writes go through
|
||||||
|
* `SemanticLayerService.writeSource`; tests that only need a file on disk use
|
||||||
|
* this instead.
|
||||||
|
*/
|
||||||
|
export async function seedSlSourceFile(
|
||||||
|
project: KtxLocalProject,
|
||||||
|
input: { connectionId: string; sourceName: string; yaml: string },
|
||||||
|
): Promise<KtxFileWriteResult> {
|
||||||
|
return project.fileStore.writeFile(
|
||||||
|
slSourceFilePath(input.connectionId, input.sourceName),
|
||||||
|
input.yaml,
|
||||||
|
'ktx',
|
||||||
|
'ktx@example.com',
|
||||||
|
`Seed semantic-layer source: ${input.connectionId}/${input.sourceName}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
154
packages/cli/test/context/sl/source-files.test.ts
Normal file
154
packages/cli/test/context/sl/source-files.test.ts
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
import { mkdtemp, rm } from 'node:fs/promises';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||||
|
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
|
||||||
|
import {
|
||||||
|
resolveSlSourceFile,
|
||||||
|
slSourceFileName,
|
||||||
|
slSourceFilePath,
|
||||||
|
slSourceNameForFile,
|
||||||
|
} from '../../../src/context/sl/source-files.js';
|
||||||
|
|
||||||
|
describe('slSourceFileName', () => {
|
||||||
|
it('keeps safe lowercase snake_case names verbatim', () => {
|
||||||
|
expect(slSourceFileName('orders')).toBe('orders.yaml');
|
||||||
|
expect(slSourceFileName('mart_account_segments')).toBe('mart_account_segments.yaml');
|
||||||
|
expect(slSourceFileName('orders2')).toBe('orders2.yaml');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('derives a slug-hash filename for any other name and never throws', () => {
|
||||||
|
expect(slSourceFileName('SIGNED_UP')).toMatch(/^signed_up-[0-9a-f]{8}\.yaml$/);
|
||||||
|
expect(slSourceFileName('EVENT$LOG')).toMatch(/^event_log-[0-9a-f]{8}\.yaml$/);
|
||||||
|
expect(slSourceFileName('my.dotted.name')).toMatch(/^my_dotted_name-[0-9a-f]{8}\.yaml$/);
|
||||||
|
expect(slSourceFileName('汉字')).toMatch(/^src-[0-9a-f]{8}\.yaml$/);
|
||||||
|
expect(slSourceFileName(' ')).toMatch(/^src-[0-9a-f]{8}\.yaml$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is deterministic', () => {
|
||||||
|
expect(slSourceFileName('EVENT$LOG')).toBe(slSourceFileName('EVENT$LOG'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('never emits path separators or traversal segments', () => {
|
||||||
|
for (const name of ['../orders', 'a/b', 'a\\b', '..', './x']) {
|
||||||
|
const fileName = slSourceFileName(name);
|
||||||
|
expect(fileName).not.toContain('/');
|
||||||
|
expect(fileName).not.toContain('\\');
|
||||||
|
expect(fileName).not.toContain('..');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps case-differing names disjoint on case-insensitive filesystems', () => {
|
||||||
|
// Safe-branch filenames contain no `-`; hash-branch filenames always end
|
||||||
|
// in `-<8 hex>` with a hash of the raw name, so `events` vs `EVENTS`
|
||||||
|
// cannot collide even when the filesystem folds case (macOS, Windows).
|
||||||
|
const lower = slSourceFileName('events');
|
||||||
|
const upper = slSourceFileName('EVENTS');
|
||||||
|
expect(lower).toBe('events.yaml');
|
||||||
|
expect(upper).toMatch(/^events-[0-9a-f]{8}\.yaml$/);
|
||||||
|
expect(upper.toLowerCase()).not.toBe(lower.toLowerCase());
|
||||||
|
expect(lower).not.toContain('-');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('routes Windows reserved device basenames through the hash branch', () => {
|
||||||
|
expect(slSourceFileName('con')).toMatch(/^con-[0-9a-f]{8}\.yaml$/);
|
||||||
|
expect(slSourceFileName('lpt1')).toMatch(/^lpt1-[0-9a-f]{8}\.yaml$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('caps overlong names', () => {
|
||||||
|
const longName = `a${'b'.repeat(300)}`;
|
||||||
|
const fileName = slSourceFileName(longName);
|
||||||
|
expect(fileName.length).toBeLessThanOrEqual(64 + '-12345678.yaml'.length);
|
||||||
|
expect(fileName).toMatch(/^ab+-[0-9a-f]{8}\.yaml$/);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('slSourceFilePath', () => {
|
||||||
|
it('rejects unsafe connection ids but accepts any source name', () => {
|
||||||
|
expect(slSourceFilePath('warehouse', 'EVENT$LOG')).toMatch(
|
||||||
|
/^semantic-layer\/warehouse\/event_log-[0-9a-f]{8}\.yaml$/,
|
||||||
|
);
|
||||||
|
expect(() => slSourceFilePath('../warehouse', 'orders')).toThrow('Unsafe connection id');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('slSourceNameForFile', () => {
|
||||||
|
it('prefers the in-file name and falls back to the filename', () => {
|
||||||
|
expect(slSourceNameForFile('semantic-layer/warehouse/custom.yaml', 'name: SIGNED_UP\n')).toBe('SIGNED_UP');
|
||||||
|
expect(slSourceNameForFile('semantic-layer/warehouse/orders.yaml', 'measures: []\n')).toBe('orders');
|
||||||
|
expect(slSourceNameForFile('semantic-layer/warehouse/orders.yaml', 'measures: [unterminated\n')).toBe('orders');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('recovers the declared name when the file is broken below the name: line', () => {
|
||||||
|
// A human-renamed file left mid-edit keeps its identity: the syntax error is
|
||||||
|
// under `measures:`, so the top-level `name:` is still recoverable and must
|
||||||
|
// win over the (unrelated) filename.
|
||||||
|
expect(slSourceNameForFile('semantic-layer/warehouse/renamed-by-hand.yaml', 'name: SIGNED_UP\nmeasures: [oops\n')).toBe(
|
||||||
|
'SIGNED_UP',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('resolveSlSourceFile', () => {
|
||||||
|
let tempDir: string;
|
||||||
|
let project: KtxLocalProject;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sl-source-files-'));
|
||||||
|
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await rm(tempDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
async function seed(path: string, content: string): Promise<void> {
|
||||||
|
await project.fileStore.writeFile(path, content, 'ktx', 'ktx@example.com', `seed ${path}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
it('matches by in-file name regardless of the filename', async () => {
|
||||||
|
await seed('semantic-layer/warehouse/renamed-by-hand.yaml', 'name: SIGNED_UP\nmeasures: []\n');
|
||||||
|
|
||||||
|
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'SIGNED_UP')).resolves.toEqual({
|
||||||
|
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||||
|
content: 'name: SIGNED_UP\nmeasures: []\n',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null when no file declares the name and ignores manifest shards', async () => {
|
||||||
|
await seed('semantic-layer/warehouse/_schema/public.yaml', 'tables:\n orders:\n table: public.orders\n');
|
||||||
|
|
||||||
|
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).resolves.toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to the filename for broken YAML', async () => {
|
||||||
|
const broken = 'name: orders\nmeasures: [unterminated\n';
|
||||||
|
await seed('semantic-layer/warehouse/orders.yaml', broken);
|
||||||
|
|
||||||
|
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).resolves.toEqual({
|
||||||
|
path: 'semantic-layer/warehouse/orders.yaml',
|
||||||
|
content: broken,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches a human-renamed broken file by its still-recoverable name', async () => {
|
||||||
|
// Filename ≠ name, so the filename fallback cannot find it; resolution must
|
||||||
|
// come from the intact top-level `name:` even though the YAML is broken.
|
||||||
|
const broken = 'name: SIGNED_UP\nmeasures: [unterminated\n';
|
||||||
|
await seed('semantic-layer/warehouse/renamed-by-hand.yaml', broken);
|
||||||
|
|
||||||
|
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'SIGNED_UP')).resolves.toEqual({
|
||||||
|
path: 'semantic-layer/warehouse/renamed-by-hand.yaml',
|
||||||
|
content: broken,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws when two files declare the same source name', async () => {
|
||||||
|
await seed('semantic-layer/warehouse/orders.yaml', 'name: orders\nmeasures: []\n');
|
||||||
|
await seed('semantic-layer/warehouse/orders_copy.yaml', 'name: orders\nmeasures: []\n');
|
||||||
|
|
||||||
|
await expect(resolveSlSourceFile(project.fileStore, 'warehouse', 'orders')).rejects.toThrow(
|
||||||
|
'Multiple semantic-layer files declare source "orders"',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -188,7 +188,7 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
||||||
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
|
it('returns a directed hint pointing at sl_write_source + overlay shape', async () => {
|
||||||
const { tool, semanticLayerService } = makeTool({
|
const { tool, semanticLayerService } = makeTool({
|
||||||
semanticLayerService: {
|
semanticLayerService: {
|
||||||
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||||
isManifestBacked: vi.fn().mockResolvedValue(true),
|
isManifestBacked: vi.fn().mockResolvedValue(true),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
@ -222,7 +222,7 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
||||||
it('still returns the plain "Source not found" error for truly-missing names', async () => {
|
it('still returns the plain "Source not found" error for truly-missing names', async () => {
|
||||||
const { tool, semanticLayerService } = makeTool({
|
const { tool, semanticLayerService } = makeTool({
|
||||||
semanticLayerService: {
|
semanticLayerService: {
|
||||||
readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
@ -241,3 +241,20 @@ describe('SlEditSourceTool — manifest-backed source without overlay', () => {
|
||||||
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('SlEditSourceTool — name edits', () => {
|
||||||
|
it('rejects edits that change the in-file name', async () => {
|
||||||
|
const { tool, semanticLayerService } = makeTool();
|
||||||
|
const result = await tool.call(
|
||||||
|
{
|
||||||
|
connectionId: '11111111-1111-1111-1111-111111111111',
|
||||||
|
sourceName: 'orders',
|
||||||
|
yaml_edits: [{ oldText: 'name: orders', newText: 'name: renamed_orders' }],
|
||||||
|
} as any,
|
||||||
|
baseContext,
|
||||||
|
);
|
||||||
|
expect(result.structured.success).toBe(false);
|
||||||
|
expect(result.markdown).toMatch(/renaming is not supported/i);
|
||||||
|
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,15 @@ function makeSession(overrides: Partial<ToolSession> = {}): ToolSession {
|
||||||
configService: {
|
configService: {
|
||||||
writeFile: vi.fn().mockResolvedValue(undefined),
|
writeFile: vi.fn().mockResolvedValue(undefined),
|
||||||
deleteFile: vi.fn().mockResolvedValue(undefined),
|
deleteFile: vi.fn().mockResolvedValue(undefined),
|
||||||
|
// No live file for `orders` — revert recovers the preHead path from history.
|
||||||
|
listFiles: vi.fn().mockResolvedValue({ files: [] }),
|
||||||
|
readFile: vi.fn().mockRejectedValue(new Error('ENOENT')),
|
||||||
|
} as any,
|
||||||
|
gitService: {
|
||||||
|
// The source lived at its derived filename at preHead.
|
||||||
|
listFilesAtCommit: vi.fn().mockResolvedValue(['semantic-layer/conn-1/orders.yaml']),
|
||||||
|
getFileAtCommit: vi.fn().mockResolvedValue('name: orders\nmeasures: []\n'),
|
||||||
} as any,
|
} as any,
|
||||||
gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any,
|
|
||||||
...overrides,
|
...overrides,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -65,4 +72,33 @@ describe('SlRollbackTool', () => {
|
||||||
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
|
expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false);
|
||||||
expect(session.actions).toEqual([]);
|
expect(session.actions).toEqual([]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('restores a deleted human-renamed source at the path it occupied at preHead', async () => {
|
||||||
|
// The source lived at a custom filename (≠ the writer-derived `orders.yaml`)
|
||||||
|
// and the session deleted it. Revert must recover the custom path from the
|
||||||
|
// preHead commit and restore there, not write/no-op against the derived path.
|
||||||
|
const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) };
|
||||||
|
const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1);
|
||||||
|
const renamedContent = 'name: orders\ntable: public.orders\nmeasures: []\n';
|
||||||
|
const session = makeSession({
|
||||||
|
gitService: {
|
||||||
|
listFilesAtCommit: vi.fn().mockResolvedValue(['semantic-layer/conn-1/custom.yaml']),
|
||||||
|
getFileAtCommit: vi.fn().mockResolvedValue(renamedContent),
|
||||||
|
} as any,
|
||||||
|
});
|
||||||
|
const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session };
|
||||||
|
|
||||||
|
const result = await tool.call({ sourceName: 'orders' } as any, context);
|
||||||
|
|
||||||
|
expect(result.structured.success).toBe(true);
|
||||||
|
expect((session.configService as any).writeFile).toHaveBeenCalledWith(
|
||||||
|
'semantic-layer/conn-1/custom.yaml',
|
||||||
|
renamedContent,
|
||||||
|
expect.anything(),
|
||||||
|
expect.anything(),
|
||||||
|
expect.anything(),
|
||||||
|
expect.anything(),
|
||||||
|
);
|
||||||
|
expect((session.configService as any).deleteFile).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ function makeTool(overrides: Partial<Record<string, any>> = {}) {
|
||||||
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
|
validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }),
|
||||||
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
|
writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }),
|
||||||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||||
...overrides.semanticLayerService,
|
...overrides.semanticLayerService,
|
||||||
};
|
};
|
||||||
const slSearchService = {
|
const slSearchService = {
|
||||||
|
|
@ -66,7 +66,7 @@ describe('SlWriteSourceTool — session gating', () => {
|
||||||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||||
listManifestSourceNames: vi.fn().mockResolvedValue([]),
|
listManifestSourceNames: vi.fn().mockResolvedValue([]),
|
||||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||||
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
||||||
} as any,
|
} as any,
|
||||||
wikiService: {} as any,
|
wikiService: {} as any,
|
||||||
|
|
@ -248,7 +248,7 @@ describe('SlWriteSourceTool — session gating', () => {
|
||||||
deleteSource: vi.fn().mockResolvedValue(undefined),
|
deleteSource: vi.fn().mockResolvedValue(undefined),
|
||||||
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
|
listManifestSourceNames: vi.fn().mockResolvedValue(['mart_account_segments']),
|
||||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||||
readSourceFile: vi.fn().mockRejectedValue(new Error('not found')),
|
readSourceFile: vi.fn().mockResolvedValue(null),
|
||||||
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
findManifestEntryByTableRef: vi.fn().mockResolvedValue(null),
|
||||||
} as any,
|
} as any,
|
||||||
});
|
});
|
||||||
|
|
@ -377,3 +377,36 @@ describe('SlWriteSourceTool — standalone shadow guard', () => {
|
||||||
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
|
expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('SlWriteSourceTool — source name identity', () => {
|
||||||
|
it('accepts verbatim warehouse identifiers as sourceName', () => {
|
||||||
|
const { tool } = makeTool();
|
||||||
|
const base = { connectionId: '11111111-1111-1111-1111-111111111111' };
|
||||||
|
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'SIGNED_UP' }).success).toBe(true);
|
||||||
|
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'EVENT$LOG' }).success).toBe(true);
|
||||||
|
expect(tool.inputSchema.safeParse({ ...base, sourceName: 'orders' }).success).toBe(true);
|
||||||
|
expect(tool.inputSchema.safeParse({ ...base, sourceName: '' }).success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects a source whose name does not match sourceName', async () => {
|
||||||
|
const { tool, semanticLayerService } = makeTool();
|
||||||
|
const result = await tool.call(
|
||||||
|
{
|
||||||
|
connectionId: '11111111-1111-1111-1111-111111111111',
|
||||||
|
sourceName: 'orders',
|
||||||
|
source: {
|
||||||
|
name: 'other_orders',
|
||||||
|
sql: 'select 1 as id',
|
||||||
|
grain: ['id'],
|
||||||
|
columns: [{ name: 'id', type: 'string' }],
|
||||||
|
measures: [],
|
||||||
|
joins: [],
|
||||||
|
} as any,
|
||||||
|
} as any,
|
||||||
|
baseContext,
|
||||||
|
);
|
||||||
|
expect(result.structured.success).toBe(false);
|
||||||
|
expect(result.markdown).toMatch(/does not match sourceName/);
|
||||||
|
expect(semanticLayerService.writeSource).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
29
packages/cli/test/context/sql-analysis/dialect.test.ts
Normal file
29
packages/cli/test/context/sql-analysis/dialect.test.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { sqlAnalysisDialectForDriver } from '../../../src/context/sql-analysis/dialect.js';
|
||||||
|
|
||||||
|
describe('sqlAnalysisDialectForDriver', () => {
|
||||||
|
it('maps ktx.yaml driver names to sqlglot dialects', () => {
|
||||||
|
expect(sqlAnalysisDialectForDriver('postgres')).toBe('postgres');
|
||||||
|
expect(sqlAnalysisDialectForDriver('bigquery')).toBe('bigquery');
|
||||||
|
expect(sqlAnalysisDialectForDriver('snowflake')).toBe('snowflake');
|
||||||
|
expect(sqlAnalysisDialectForDriver('mysql')).toBe('mysql');
|
||||||
|
expect(sqlAnalysisDialectForDriver('sqlserver')).toBe('tsql');
|
||||||
|
expect(sqlAnalysisDialectForDriver('sqlite')).toBe('sqlite');
|
||||||
|
expect(sqlAnalysisDialectForDriver('duckdb')).toBe('duckdb');
|
||||||
|
expect(sqlAnalysisDialectForDriver('clickhouse')).toBe('clickhouse');
|
||||||
|
expect(sqlAnalysisDialectForDriver('databricks')).toBe('databricks');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps local connection-type spellings to sqlglot dialects', () => {
|
||||||
|
expect(sqlAnalysisDialectForDriver('POSTGRESQL')).toBe('postgres');
|
||||||
|
expect(sqlAnalysisDialectForDriver('SQLSERVER')).toBe('tsql');
|
||||||
|
expect(sqlAnalysisDialectForDriver('BIGQUERY')).toBe('bigquery');
|
||||||
|
expect(sqlAnalysisDialectForDriver('SQLITE')).toBe('sqlite');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('defaults to postgres for unknown or missing drivers', () => {
|
||||||
|
expect(sqlAnalysisDialectForDriver(undefined)).toBe('postgres');
|
||||||
|
expect(sqlAnalysisDialectForDriver('')).toBe('postgres');
|
||||||
|
expect(sqlAnalysisDialectForDriver('unknown')).toBe('postgres');
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue