mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
fix: read semantic sources safely (#284)
* fix: read semantic sources safely
* test: retarget reindex per-scope error case to a broken manifest
Reading a broken standalone source was made non-fatal in de1f1a8d (it is
surfaced for repair instead of throwing), so the reindex per-scope error
test no longer captured an error. Point it at a corrupt manifest shard,
which is the remaining fatal read failure the per-scope catch must
isolate, and assert the captured error names the offending file.
* fix(sl): decouple semantic-layer file names from warehouse naming rules
The in-file `name:` field is now the sole source identity; the filename is
a derived label that never participates in identity. This removes the
"Unsafe semantic-layer source name" failure class entirely: any warehouse
identifier (Snowflake's uppercase SIGNED_UP, EVENT$LOG, dotted names) can
be read, overlaid, edited, and deleted.
- New `source-files.ts`: one total filename derivation (safe lowercase
names verbatim; otherwise slug + sha256-hash suffix, immune to
case-insensitive-filesystem collisions) and one by-name file resolver.
- Reads resolve by name everywhere; the path-from-name fast path and
`assertSafeSourceName` are gone.
- Writes resolve-then-write: rewrites land on the file that declares the
name (human renames survive); new sources get a derived filename; a
derived path occupied by a different source fails instead of clobbering.
- `readSourceFile` returns null for missing files instead of forcing every
caller to launder IO errors; `deleteSource` distinguishes manifest-backed
sources from not-found instead of silently succeeding.
- `sl_write_source` accepts verbatim warehouse identifiers (snake_case is
now a recommendation for new sources) and rejects sourceName/source.name
mismatches; `sl_edit_source` rejects name-changing edits.
- Ingest projection commits, gate-repair allowlists, and touched-source
derivation use resolved paths / in-file names instead of interpolating
`<connId>/<name>.yaml`.
- Collapsed the five parallel path derivations and duplicated path-token
helpers onto the shared module; dropped dead service methods.
* fix(sl): resolve sources by declared name end-to-end and gate warehouse SQL with the parser-backed validator
- Key broken/renamed semantic-layer files by their recoverable in-file
name (slSourceNameForFile) so mid-edit sources stay reachable under
their real identity in reads, listings, and search
- Derive finalization touched sources from composed-source diffs and
recover deleted files' declared names from the pre-change commit
instead of parsing hash-derived filenames
- Resolve revert/rollback paths against history (listFilesAtCommit) so
human-renamed files are restored where they lived at preHead
- Validate ingest sql_execution through the daemon's sqlglot
validateReadOnly in the connection's dialect, sharing one
driver-to-dialect map (sql-analysis/dialect.ts) across MCP and ingest
- Harden the local read-only SQL backstop: accept leading comments,
reject smuggled second statements, and strip trailing
semicolons/comments before row-limit wrapping
This commit is contained in:
parent
853f39a7c3
commit
f3f893bf01
51 changed files with 1797 additions and 476 deletions
|
|
@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string {
|
|||
return url;
|
||||
}
|
||||
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
export function isKtxSqliteConnectionConfig(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
): connection is KtxSqliteConnectionConfig {
|
||||
|
|
@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
|||
|
||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
||||
const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
|
||||
import { assertReadOnlySql, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
|
|
@ -284,7 +284,7 @@ function isDeniedError(error: unknown): boolean {
|
|||
}
|
||||
|
||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
||||
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sqlText));
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,16 +2,133 @@ const MUTATING_SQL =
|
|||
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
||||
const READ_SQL = /^\s*(select|with)\b/i;
|
||||
|
||||
// Agents (and the daemon's sqlglot validator, which ignores comments) routinely
|
||||
// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading
|
||||
// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH
|
||||
// SQL is rejected here while the parser-backed validator accepts it.
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
// Lexes past one string literal, quoted identifier, or comment starting at
|
||||
// `index`, using standard-SQL rules ('' and "" escapes; no dialect extensions
|
||||
// such as backslash escapes or dollar quoting). Returns the index after the
|
||||
// token, or `index` unchanged when no quoted/comment token starts there.
|
||||
function skipQuotedOrComment(sql: string, index: number): number {
|
||||
const quote = sql[index];
|
||||
if (quote === "'" || quote === '"') {
|
||||
let i = index + 1;
|
||||
while (i < sql.length) {
|
||||
if (sql[i] === quote) {
|
||||
if (sql[i + 1] === quote) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
return i + 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return sql.length;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
return end === -1 ? sql.length : end + 1;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
return end === -1 ? sql.length : end + 2;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Backstop against statement smuggling (`select 1; drop table x`): reject any
|
||||
// semicolon that is followed by real content. Semicolons inside string
|
||||
// literals, quoted identifiers, and comments are fine, as are trailing
|
||||
// semicolons (optionally followed by whitespace and comments). This deliberately
|
||||
// lexes standard SQL only, so dialect-specific escapes can cause a false
|
||||
// reject — never a false accept; the canonical gate is the daemon's
|
||||
// sqlglot-backed validateReadOnly.
|
||||
function assertSingleSqlStatement(sql: string): void {
|
||||
let index = 0;
|
||||
let sawSemicolon = false;
|
||||
while (index < sql.length) {
|
||||
const skipped = skipQuotedOrComment(sql, index);
|
||||
if (skipped > index) {
|
||||
index = skipped;
|
||||
continue;
|
||||
}
|
||||
if (sql[index] === ';') {
|
||||
sawSemicolon = true;
|
||||
} else if (sawSemicolon && !/\s/.test(sql[index])) {
|
||||
throw new Error('Only one SQL statement can be executed.');
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
export function assertReadOnlySql(sql: string): string {
|
||||
const trimmed = sql.trim();
|
||||
const trimmed = stripLeadingSqlComments(sql).trim();
|
||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
}
|
||||
assertSingleSqlStatement(trimmed);
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
// `assertReadOnlySql` deliberately keeps trailing semicolons, comments, and
|
||||
// whitespace (e.g. `select 1; -- done`) — harmless for direct single-statement
|
||||
// execution. A row-limit subquery wrapper needs a bare expression instead: a
|
||||
// trailing `;` would sit illegally inside the subquery, and a trailing line
|
||||
// comment would comment out the closing paren and limit clause. Lex forward with
|
||||
// the same standard-SQL rules as the single-statement gate and truncate at the
|
||||
// end of the last meaningful token, dropping trailing semicolons, comments, and
|
||||
// whitespace. Characters inside string literals and quoted identifiers stay
|
||||
// meaningful, so a `;` or `--` within a literal is never mistaken for a
|
||||
// terminator (a plain regex cannot make that distinction).
|
||||
export function stripTrailingSqlNoise(sql: string): string {
|
||||
let index = 0;
|
||||
let meaningfulEnd = 0;
|
||||
while (index < sql.length) {
|
||||
if (sql.startsWith('--', index) || sql.startsWith('/*', index)) {
|
||||
index = skipQuotedOrComment(sql, index);
|
||||
continue;
|
||||
}
|
||||
const afterQuoted = skipQuotedOrComment(sql, index);
|
||||
if (afterQuoted > index) {
|
||||
meaningfulEnd = afterQuoted;
|
||||
index = afterQuoted;
|
||||
continue;
|
||||
}
|
||||
if (sql[index] !== ';' && !/\s/.test(sql[index] ?? '')) {
|
||||
meaningfulEnd = index + 1;
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
return sql.slice(0, meaningfulEnd);
|
||||
}
|
||||
|
||||
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, '');
|
||||
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sql));
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -557,12 +557,13 @@ export class GitService {
|
|||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
* List all paths matching `pathSpec` as they exist at `commitHash`. Reads from
|
||||
* git object storage, so it's safe against concurrent working-tree mutations
|
||||
* and can recover paths (e.g. a human-renamed file) that no longer exist on disk.
|
||||
*/
|
||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||
async listFilesAtCommit(pathSpec: string, commitHash: string): Promise<string[]> {
|
||||
try {
|
||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', 'HEAD', '--', pathSpec]);
|
||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', commitHash, '--', pathSpec]);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -572,6 +573,14 @@ export class GitService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
*/
|
||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||
return this.listFilesAtCommit(pathSpec, 'HEAD');
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse all commits between `preHead` and current HEAD into a single commit with the given
|
||||
* message. Used by the memory agent to squash N per-tool-call commits into one ingest commit.
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { dirname, join, relative } from 'node:path';
|
|||
import YAML from 'yaml';
|
||||
import type { MemoryAction } from '../../../../context/memory/types.js';
|
||||
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
|
||||
import { isSlYamlPath } from '../../../sl/source-files.js';
|
||||
import type { FinalizationOverrideReplay } from '../../types.js';
|
||||
import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
|
||||
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
|
||||
|
|
@ -251,7 +252,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
|
||||
|
||||
const schemaRoot = join(input.workdir, 'semantic-layer', input.connectionId, '_schema');
|
||||
for (const file of (await walkFiles(schemaRoot)).filter((candidate) => candidate.endsWith('.yaml') || candidate.endsWith('.yml'))) {
|
||||
for (const file of (await walkFiles(schemaRoot)).filter(isSlYamlPath)) {
|
||||
const path = join(schemaRoot, file);
|
||||
const before = await readFile(path, 'utf-8');
|
||||
const shard = (YAML.parse(before) ?? {}) as ManifestShard;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
||||
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||
import type { IngestTraceWriter } from './ingest-trace.js';
|
||||
import { traceTimed } from './ingest-trace.js';
|
||||
|
||||
|
|
@ -149,11 +148,13 @@ function buildToolSet(input: {
|
|||
|
||||
export function finalGateRepairPaths(input: {
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
// Resolved by the caller: SL filenames are derived labels, so the repair
|
||||
// allowlist must carry the real on-disk paths, not name-interpolated ones.
|
||||
touchedSlSourcePaths: string[];
|
||||
}): string[] {
|
||||
return [
|
||||
...new Set([
|
||||
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...input.touchedSlSourcePaths,
|
||||
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
]),
|
||||
].sort();
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
||||
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
||||
import type { TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||
import type { IngestReportFinalizationMismatch } from './reports.js';
|
||||
|
|
@ -64,39 +65,36 @@ export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
|
|||
);
|
||||
}
|
||||
|
||||
export async function deriveFinalizationTouchedSources(
|
||||
input: DeriveTouchedSourcesInput,
|
||||
): Promise<DeriveTouchedSourcesResult> {
|
||||
// Source identity is the in-file `name:`; filenames are derived labels (see
|
||||
// source-files.ts), so a changed path — manifest shard or standalone file —
|
||||
// cannot be mapped to a source by parsing its filename. Instead, every changed
|
||||
// semantic-layer file is attributed through the before/after diff of its
|
||||
// connection's composed sources. A changed file whose connection diff is empty
|
||||
// cannot be attributed to any source and is surfaced as unresolved.
|
||||
export function deriveFinalizationTouchedSources(input: DeriveTouchedSourcesInput): DeriveTouchedSourcesResult {
|
||||
const touched = new Map<string, TouchedSlSource>();
|
||||
const unresolvedPaths: string[] = [];
|
||||
|
||||
const pathsByConnection = new Map<string, string[]>();
|
||||
for (const path of input.changedPaths) {
|
||||
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
|
||||
if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path)) {
|
||||
continue;
|
||||
}
|
||||
const parts = path.split('/');
|
||||
const connectionId = parts[1] ?? '';
|
||||
const connectionId = path.split('/')[1] ?? '';
|
||||
if (!connectionId) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
if (parts[2] !== '_schema') {
|
||||
const fileName = parts.at(-1) ?? '';
|
||||
const sourceName = fileName.replace(/\.ya?ml$/, '');
|
||||
if (!sourceName) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
||||
continue;
|
||||
}
|
||||
pathsByConnection.set(connectionId, [...(pathsByConnection.get(connectionId) ?? []), path]);
|
||||
}
|
||||
|
||||
for (const [connectionId, paths] of pathsByConnection) {
|
||||
const changedNames = changedSourceNames(
|
||||
input.beforeSourcesByConnection.get(connectionId) ?? [],
|
||||
input.afterSourcesByConnection.get(connectionId) ?? [],
|
||||
);
|
||||
if (changedNames.length === 0) {
|
||||
unresolvedPaths.push(path);
|
||||
unresolvedPaths.push(...paths);
|
||||
continue;
|
||||
}
|
||||
for (const sourceName of changedNames) {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime
|
|||
import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js';
|
||||
import type { CaptureSession, MemoryAction } from '../../context/memory/types.js';
|
||||
import type { SemanticLayerService } from '../../context/sl/semantic-layer.service.js';
|
||||
import { isSlYamlPath, slSourceFilePath, slSourceNameForFile, sourceNameFromPath } from '../../context/sl/source-files.js';
|
||||
import type { SemanticLayerSource } from '../../context/sl/types.js';
|
||||
import type { SlValidationDeps } from '../../context/sl/tools/sl-warehouse-validation.js';
|
||||
import { createTouchedSlSources, type TouchedSlSource } from '../../context/tools/touched-sl-sources.js';
|
||||
|
|
@ -498,7 +499,7 @@ export class IngestBundleRunner {
|
|||
const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId);
|
||||
const names = files
|
||||
.filter((f) => !f.startsWith('_schema/'))
|
||||
.map((f) => f.replace(/\.yaml$/, ''))
|
||||
.map((f) => sourceNameFromPath(f))
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
const body = names.length > 0 ? names.join('\n') : '(no sources yet)';
|
||||
return `## ${connectionId}\n${body}`;
|
||||
|
|
@ -791,14 +792,52 @@ export class IngestBundleRunner {
|
|||
].sort();
|
||||
}
|
||||
|
||||
private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] {
|
||||
return paths
|
||||
.filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/'))
|
||||
.map((path) => {
|
||||
const [, connectionId, fileName] = path.split('/');
|
||||
return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') };
|
||||
})
|
||||
.filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0);
|
||||
private async touchedSlSourcesFromPaths(
|
||||
worktree: IngestSessionWorktree,
|
||||
paths: string[],
|
||||
deletedFileSha: string,
|
||||
): Promise<TouchedSlSource[]> {
|
||||
const sources: TouchedSlSource[] = [];
|
||||
for (const path of paths) {
|
||||
if (!path.startsWith('semantic-layer/') || !isSlYamlPath(path) || path.includes('/_schema/')) {
|
||||
continue;
|
||||
}
|
||||
const [, connectionId] = path.split('/');
|
||||
if (!connectionId) {
|
||||
continue;
|
||||
}
|
||||
// Source identity is the in-file `name:`, never the filename — an uppercase
|
||||
// warehouse source like `WIDGET_SALES` lives in a hash-derived
|
||||
// `widget_sales-<hash>.yaml`, so parsing the basename yields a phantom name.
|
||||
// Read the live file; when it was deleted this run, recover its declared
|
||||
// name from the pre-change commit the way `revertSourceToPreHead` resolves a
|
||||
// gone file from history. The filename is a last resort only when the content
|
||||
// is unrecoverable from both.
|
||||
let content: string | null;
|
||||
try {
|
||||
content = await readFile(join(worktree.workdir, path), 'utf-8');
|
||||
} catch {
|
||||
content = await worktree.git.getFileAtCommit(path, deletedFileSha).catch(() => null);
|
||||
}
|
||||
const sourceName = content === null ? sourceNameFromPath(path) : slSourceNameForFile(path, content);
|
||||
if (sourceName.length > 0) {
|
||||
sources.push({ connectionId, sourceName });
|
||||
}
|
||||
}
|
||||
return sources;
|
||||
}
|
||||
|
||||
// Inverse direction for commits and repair allowlists: resolve each touched
|
||||
// source to its real on-disk path, falling back to the writer's derived
|
||||
// filename when the file was deleted in this run.
|
||||
private async touchedSlSourcePaths(workdir: string, touched: TouchedSlSource[]): Promise<string[]> {
|
||||
const service = this.deps.semanticLayerService.forWorktree(workdir);
|
||||
const paths: string[] = [];
|
||||
for (const source of touched) {
|
||||
const file = await service.readSourceFile(source.connectionId, source.sourceName);
|
||||
paths.push(file?.path ?? slSourceFilePath(source.connectionId, source.sourceName));
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] {
|
||||
|
|
@ -1558,7 +1597,7 @@ export class IngestBundleRunner {
|
|||
projectionTouchedSources = projection.touchedSources;
|
||||
projectionChangedWikiPageKeys = projection.changedWikiPageKeys;
|
||||
const projectionPaths = [
|
||||
...projection.touchedSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...(await this.touchedSlSourcePaths(sessionWorktree.workdir, projection.touchedSources)),
|
||||
...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
];
|
||||
projectionTouchedPaths = projectionPaths;
|
||||
|
|
@ -1740,7 +1779,11 @@ export class IngestBundleRunner {
|
|||
await validateFinalIngestArtifacts({
|
||||
connectionIds: slConnectionIds,
|
||||
changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths),
|
||||
touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths),
|
||||
touchedSlSources: await this.touchedSlSourcesFromPaths(
|
||||
sessionWorktree,
|
||||
touchedPaths,
|
||||
await sessionWorktree.git.revParseHead(),
|
||||
),
|
||||
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
||||
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
||||
validateTouchedSources: (touched) =>
|
||||
|
|
@ -2289,20 +2332,34 @@ export class IngestBundleRunner {
|
|||
)
|
||||
: [];
|
||||
|
||||
const changedConnectionIds = [
|
||||
...new Set([
|
||||
...slConnectionIds,
|
||||
...finalizationTouchedPaths
|
||||
.filter((path) => path.startsWith('semantic-layer/'))
|
||||
.map((path) => path.split('/')[1])
|
||||
.filter((connectionId): connectionId is string => Boolean(connectionId)),
|
||||
]),
|
||||
].sort();
|
||||
// Validate the write scope before deriving touched sources: attribution
|
||||
// by before/after diff is only defined for connections whose
|
||||
// pre-finalization snapshot was loaded (slConnectionIds), and an
|
||||
// out-of-scope write would otherwise surface downstream as a bogus
|
||||
// unresolved-path or declaration-mismatch failure instead of the real
|
||||
// policy violation.
|
||||
await traceTimed(
|
||||
runTrace,
|
||||
'finalization',
|
||||
'semantic_layer_target_policy',
|
||||
{
|
||||
sourceKey: job.sourceKey,
|
||||
allowedTargetConnectionIds: slConnectionIds,
|
||||
touchedPaths: [...new Set(finalizationTouchedPaths)].sort(),
|
||||
},
|
||||
async () => {
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: finalizationTouchedPaths,
|
||||
allowedConnectionIds: new Set(slConnectionIds),
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
const postFinalizationSourcesByConnection = await this.loadSourcesByConnection(
|
||||
sessionWorktree.workdir,
|
||||
changedConnectionIds,
|
||||
slConnectionIds,
|
||||
);
|
||||
const scope = await deriveFinalizationTouchedSources({
|
||||
const scope = deriveFinalizationTouchedSources({
|
||||
changedPaths: finalizationTouchedPaths,
|
||||
beforeSourcesByConnection: preFinalizationSourcesByConnection,
|
||||
afterSourcesByConnection: postFinalizationSourcesByConnection,
|
||||
|
|
@ -2437,7 +2494,7 @@ export class IngestBundleRunner {
|
|||
...(isolatedDiffEnabled ? projectionTouchedSources : []),
|
||||
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
|
||||
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
|
||||
...this.touchedSlSourcesFromPaths(postReconciliationPaths),
|
||||
...(await this.touchedSlSourcesFromPaths(sessionWorktree, postReconciliationPaths, preReconciliationSha)),
|
||||
...finalizationTouchedSources,
|
||||
]);
|
||||
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
|
||||
|
|
@ -2528,7 +2585,7 @@ export class IngestBundleRunner {
|
|||
const gateError = this.errorMessage(error);
|
||||
const repairPaths = finalGateRepairPaths({
|
||||
changedWikiPageKeys: finalChangedWikiPageKeys,
|
||||
touchedSlSources: finalTouchedSlSources,
|
||||
touchedSlSourcePaths: await this.touchedSlSourcePaths(sessionWorktree.workdir, finalTouchedSlSources),
|
||||
});
|
||||
emitStageProgress('final_gates', 89, 'Repairing final artifact gates');
|
||||
const gateRepair = await repairFinalGateFailure({
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { fileURLToPath } from 'node:url';
|
|||
import YAML from 'yaml';
|
||||
import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js';
|
||||
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
||||
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||
import type { KtxLogger } from '../../context/core/config.js';
|
||||
import { noopLogger } from '../../context/core/config.js';
|
||||
|
|
@ -95,6 +96,7 @@ export interface CreateLocalBundleIngestRuntimeOptions {
|
|||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
jobIdFactory?: () => string;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: KtxEmbeddingProvider | null;
|
||||
|
|
@ -271,16 +273,13 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
|||
}
|
||||
|
||||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||
let content: string;
|
||||
try {
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
content = file.content;
|
||||
} catch (error) {
|
||||
return this.validateComposedSource(deps, connectionId, sourceName, error);
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
return this.validateComposedSource(deps, connectionId, sourceName, 'no standalone or overlay file found');
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = YAML.parse(content) as unknown as Record<string, unknown>;
|
||||
const parsed = YAML.parse(file.content) as unknown as Record<string, unknown>;
|
||||
return this.validateParsedSource(sourceName, parsed);
|
||||
} catch (error) {
|
||||
return {
|
||||
|
|
@ -519,6 +518,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
authorResolver: GitAuthorResolverPort;
|
||||
slSourcesRepository: SlSourcesIndexPort;
|
||||
connections: SlConnectionCatalogPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
contextStore: SqliteContextEvidenceStore;
|
||||
embedding: KtxEmbeddingPort;
|
||||
}) {
|
||||
|
|
@ -551,6 +551,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 });
|
||||
const warehouseVerificationTools = createWarehouseVerificationTools({
|
||||
connections: deps.connections,
|
||||
...(deps.sqlAnalysis ? { sqlAnalysis: deps.sqlAnalysis } : {}),
|
||||
fallbackFileStore: deps.project.fileStore,
|
||||
wikiSearchTool,
|
||||
slDiscoverTool,
|
||||
|
|
@ -699,6 +700,7 @@ export function createLocalBundleIngestRuntime(
|
|||
authorResolver: new LocalAuthorResolver(),
|
||||
slSourcesRepository,
|
||||
connections,
|
||||
...(options.sqlAnalysis ? { sqlAnalysis: options.sqlAnalysis } : {}),
|
||||
contextStore,
|
||||
embedding,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { randomUUID } from 'node:crypto';
|
|||
import { cp, mkdir, rm } from 'node:fs/promises';
|
||||
import { isAbsolute, resolve } from 'node:path';
|
||||
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
|
||||
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import type { KtxLogger } from '../../context/core/config.js';
|
||||
import { createAbortError, isAbortError } from '../../context/core/abort.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
||||
|
|
@ -35,6 +36,7 @@ export interface RunLocalIngestOptions {
|
|||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
||||
abortSignal?: AbortSignal;
|
||||
|
|
@ -159,6 +161,7 @@ async function runScheduledPullJob(options: {
|
|||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null;
|
||||
abortSignal?: AbortSignal;
|
||||
|
|
@ -214,6 +217,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
|
|||
memoryModel: options.memoryModel,
|
||||
semanticLayerCompute: options.semanticLayerCompute,
|
||||
queryExecutor: options.queryExecutor,
|
||||
sqlAnalysis: options.sqlAnalysis,
|
||||
logger: options.logger,
|
||||
embeddingProvider: options.embeddingProvider,
|
||||
abortSignal: options.abortSignal,
|
||||
|
|
@ -397,6 +401,7 @@ export async function runLocalMetabaseIngest(
|
|||
memoryModel: options.memoryModel,
|
||||
semanticLayerCompute: options.semanticLayerCompute,
|
||||
queryExecutor: options.queryExecutor,
|
||||
sqlAnalysis: options.sqlAnalysis,
|
||||
logger: options.logger,
|
||||
embeddingProvider: options.embeddingProvider,
|
||||
abortSignal: options.abortSignal,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import type { KtxFileStorePort } from '../../../core/file-store.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../sl/ports.js';
|
||||
import type { SqlAnalysisPort } from '../../../sql-analysis/ports.js';
|
||||
import { WarehouseCatalogService } from '../../../scan/warehouse-catalog.js';
|
||||
import type { BaseTool, ToolContext } from '../../../tools/base-tool.js';
|
||||
import { DiscoverDataTool } from './discover-data.tool.js';
|
||||
|
|
@ -8,6 +9,7 @@ import { SqlExecutionTool } from './sql-execution.tool.js';
|
|||
|
||||
export function createWarehouseVerificationTools(deps: {
|
||||
connections: SlConnectionCatalogPort;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
fallbackFileStore: KtxFileStorePort;
|
||||
wikiSearchTool: BaseTool;
|
||||
slDiscoverTool: BaseTool;
|
||||
|
|
@ -18,7 +20,7 @@ export function createWarehouseVerificationTools(deps: {
|
|||
});
|
||||
return [
|
||||
new EntityDetailsTool(catalogFactory),
|
||||
new SqlExecutionTool(deps.connections),
|
||||
new SqlExecutionTool(deps.connections, deps.sqlAnalysis),
|
||||
new DiscoverDataTool({
|
||||
wikiSearchTool: deps.wikiSearchTool,
|
||||
slDiscoverTool: deps.slDiscoverTool,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { z } from 'zod';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../../../context/connections/read-only-sql.js';
|
||||
import type { SlConnectionCatalogPort } from '../../../../context/sl/ports.js';
|
||||
import { sqlAnalysisDialectForDriver } from '../../../../context/sql-analysis/dialect.js';
|
||||
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../../../context/tools/base-tool.js';
|
||||
|
||||
const sqlExecutionInputSchema = z.object({
|
||||
|
|
@ -40,7 +42,10 @@ function markdownTable(headers: string[], rows: unknown[][], totalRows: number):
|
|||
export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
||||
readonly name = 'sql_execution';
|
||||
|
||||
constructor(private readonly connections: SlConnectionCatalogPort) {
|
||||
constructor(
|
||||
private readonly connections: SlConnectionCatalogPort,
|
||||
private readonly sqlAnalysis?: SqlAnalysisPort,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
|
|
@ -69,9 +74,24 @@ export class SqlExecutionTool extends BaseTool<typeof sqlExecutionInputSchema> {
|
|||
};
|
||||
}
|
||||
|
||||
if (!this.sqlAnalysis) {
|
||||
throw new Error('sql_execution requires parser-backed SQL validation.');
|
||||
}
|
||||
|
||||
let sql: string;
|
||||
let wrappedSql: string;
|
||||
try {
|
||||
const connection = await this.connections.getConnectionById(input.connectionId);
|
||||
if (!connection) {
|
||||
throw new Error(`Connection not found: ${input.connectionId}`);
|
||||
}
|
||||
const validation = await this.sqlAnalysis.validateReadOnly(
|
||||
input.sql,
|
||||
sqlAnalysisDialectForDriver(connection.connectionType),
|
||||
);
|
||||
if (!validation.ok) {
|
||||
throw new Error(validation.error ?? 'SQL is not read-only.');
|
||||
}
|
||||
sql = assertReadOnlySql(input.sql);
|
||||
wrappedSql = limitSqlForExecution(sql, input.rowLimit);
|
||||
} catch (error) {
|
||||
|
|
|
|||
|
|
@ -8,9 +8,12 @@ import { createKtxEntityDetailsService } from '../../context/scan/entity-details
|
|||
import type { KtxScanConnector } from '../../context/scan/types.js';
|
||||
import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js';
|
||||
import { createKtxDiscoverDataService } from '../../context/search/discover.js';
|
||||
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import { sqlAnalysisDialectForDriver } from '../../context/sql-analysis/dialect.js';
|
||||
import type { SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
|
||||
import { compileLocalSlQuery } from '../../context/sl/local-query.js';
|
||||
import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js';
|
||||
import { readLocalSlSource } from '../../context/sl/local-sl.js';
|
||||
import { assertSafeConnectionId } from '../../context/sl/source-files.js';
|
||||
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
|
||||
import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js';
|
||||
|
||||
|
|
@ -22,64 +25,12 @@ interface CreateLocalProjectMcpContextPortsOptions {
|
|||
embeddingService: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
function dialectForDriver(driver: string | undefined): string {
|
||||
const normalized = (driver ?? 'postgres').toUpperCase();
|
||||
const map: Record<string, string> = {
|
||||
POSTGRES: 'postgres',
|
||||
BIGQUERY: 'bigquery',
|
||||
SNOWFLAKE: 'snowflake',
|
||||
MYSQL: 'mysql',
|
||||
SQLSERVER: 'tsql',
|
||||
SQLITE: 'sqlite',
|
||||
DUCKDB: 'duckdb',
|
||||
CLICKHOUSE: 'clickhouse',
|
||||
DATABRICKS: 'databricks',
|
||||
};
|
||||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
return dialectForDriver(driver) as SqlAnalysisDialect;
|
||||
}
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
function assertSafeSourceName(sourceName: string): string {
|
||||
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
|
||||
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
|
||||
}
|
||||
return assertSafePathToken('semantic-layer source name', sourceName);
|
||||
}
|
||||
|
||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||
if (connector?.cleanup) {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
function slPath(connectionId: string, sourceName: string): string {
|
||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
|
||||
}
|
||||
|
||||
async function executeValidatedReadOnlySql(
|
||||
project: KtxLocalProject,
|
||||
options: CreateLocalProjectMcpContextPortsOptions,
|
||||
|
|
@ -201,13 +152,11 @@ export function createLocalProjectMcpContextPorts(
|
|||
},
|
||||
semanticLayer: {
|
||||
async readSource(input) {
|
||||
const path = slPath(input.connectionId, input.sourceName);
|
||||
try {
|
||||
const result = await project.fileStore.readFile(path);
|
||||
return { sourceName: input.sourceName, yaml: result.content };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const source = await readLocalSlSource(project, {
|
||||
connectionId: input.connectionId,
|
||||
sourceName: input.sourceName,
|
||||
});
|
||||
return source ? { sourceName: source.name, yaml: source.yaml } : null;
|
||||
},
|
||||
async query(input, executionOptions) {
|
||||
if (!options.semanticLayerCompute) {
|
||||
|
|
|
|||
|
|
@ -375,6 +375,9 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
|||
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
|
||||
try {
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
return { errors: [`${sourceName}: no standalone or overlay file found`], warnings: [] };
|
||||
}
|
||||
const parsed = YAML.parse(file.content) as SemanticLayerSource;
|
||||
const isOverlay = parsed.table == null && parsed.sql == null;
|
||||
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
|
||||
|
|
|
|||
|
|
@ -483,7 +483,7 @@ export class MemoryAgentService {
|
|||
if (session.connectionId) {
|
||||
for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) {
|
||||
try {
|
||||
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName).catch(() => null);
|
||||
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (file?.content) {
|
||||
const parsed = this.parseYamlOrNull(file.content);
|
||||
if (parsed) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { buildLiveDatabaseManifestShards, type LiveDatabaseManifestExistingDescr
|
|||
import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sql/skill-schemas.js';
|
||||
import type { KtxScanRelationshipConfig } from '../project/config.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
||||
import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js';
|
||||
import {
|
||||
buildKtxRelationshipArtifacts,
|
||||
|
|
@ -205,7 +206,7 @@ async function loadExistingManifestState(
|
|||
|
||||
let files: string[];
|
||||
try {
|
||||
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml'));
|
||||
files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter(isSlYamlPath);
|
||||
} catch {
|
||||
return { descriptions, preservedJoins, usage };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,10 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-ex
|
|||
import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js';
|
||||
import type { KtxMcpProgressCallback } from '../mcp/types.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
|
||||
import { loadLocalSlSourceRecords } from './local-sl.js';
|
||||
import { toResolvedWire } from './semantic-layer.service.js';
|
||||
import { assertSafeConnectionId } from './source-files.js';
|
||||
import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js';
|
||||
|
||||
const COMPILE_ONLY_REASON =
|
||||
|
|
@ -24,43 +26,6 @@ export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionRe
|
|||
dialect: string;
|
||||
}
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
function dialectForDriver(driver: string | undefined): string {
|
||||
const normalized = (driver ?? 'postgres').toUpperCase();
|
||||
const map: Record<string, string> = {
|
||||
POSTGRES: 'postgres',
|
||||
BIGQUERY: 'bigquery',
|
||||
SNOWFLAKE: 'snowflake',
|
||||
MYSQL: 'mysql',
|
||||
SQLSERVER: 'tsql',
|
||||
SQLITE: 'sqlite',
|
||||
DUCKDB: 'duckdb',
|
||||
CLICKHOUSE: 'clickhouse',
|
||||
DATABRICKS: 'databricks',
|
||||
};
|
||||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string {
|
||||
if (requested) {
|
||||
return assertSafeConnectionId(requested);
|
||||
|
|
@ -93,7 +58,7 @@ export async function compileLocalSlQuery(
|
|||
): Promise<CompileLocalSlQueryResult> {
|
||||
await options.onProgress?.({ progress: 0, message: 'Compiling query' });
|
||||
const connectionId = resolveLocalConnectionId(project, options.connectionId);
|
||||
const dialect = dialectForDriver(project.config.connections[connectionId]?.driver);
|
||||
const dialect = sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver);
|
||||
const sources = await loadComputableSources(project, connectionId);
|
||||
|
||||
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import { join } from 'node:path';
|
|||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||
import type { KtxFileWriteResult } from '../../context/core/file-store.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { HybridSearchCore } from '../../context/search/hybrid-search-core.js';
|
||||
import type { SearchCandidateGenerator } from '../../context/search/types.js';
|
||||
|
|
@ -18,6 +17,13 @@ import {
|
|||
} from './semantic-layer.service.js';
|
||||
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
|
||||
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
|
||||
import {
|
||||
assertSafeConnectionId,
|
||||
isSafeConnectionId,
|
||||
isSlYamlPath,
|
||||
slSourceNameForFile,
|
||||
sourceNameFromPath,
|
||||
} from './source-files.js';
|
||||
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
|
||||
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
|
||||
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
|
||||
|
|
@ -69,58 +75,10 @@ export type ResolvedSlSource =
|
|||
| { kind: 'not-found' }
|
||||
| { kind: 'ambiguous'; connectionIds: string[] };
|
||||
|
||||
const LOCAL_AUTHOR = 'ktx';
|
||||
const LOCAL_AUTHOR_EMAIL = 'ktx@example.com';
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
|
||||
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
|
||||
}
|
||||
|
||||
function assertSafeSourceName(sourceName: string): string {
|
||||
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
|
||||
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
|
||||
}
|
||||
return assertSafePathToken('semantic-layer source name', sourceName);
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function slPath(connectionId: string, sourceName: string): string {
|
||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
|
||||
}
|
||||
|
||||
function sourceNameFromPath(path: string): string {
|
||||
return (
|
||||
path
|
||||
.split('/')
|
||||
.at(-1)
|
||||
?.replace(/\.ya?ml$/, '') ?? path
|
||||
);
|
||||
}
|
||||
|
||||
function parseYamlRecord(raw: string): Record<string, unknown> {
|
||||
const parsed = YAML.parse(raw) as unknown;
|
||||
if (!isRecord(parsed)) {
|
||||
|
|
@ -215,12 +173,17 @@ export async function loadLocalSlSourceRecords(
|
|||
const dir = `semantic-layer/${connectionId}`;
|
||||
const schemaDir = `${dir}/_schema`;
|
||||
const listed = await project.fileStore.listFiles(dir);
|
||||
const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort();
|
||||
const paths = listed.files.filter(isSlYamlPath).sort();
|
||||
const sources = new Map<string, LocalSlSourceRecord>();
|
||||
|
||||
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
|
||||
const raw = await project.fileStore.readFile(path);
|
||||
const tables = manifestTables(parseYamlRecord(raw.content));
|
||||
let tables: Record<string, ManifestTableEntry> | null;
|
||||
try {
|
||||
tables = manifestTables(parseYamlRecord(raw.content));
|
||||
} catch (error) {
|
||||
throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
if (!tables) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -237,7 +200,29 @@ export async function loadLocalSlSourceRecords(
|
|||
|
||||
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
|
||||
const raw = await project.fileStore.readFile(path);
|
||||
const parsed = parseYamlRecord(raw.content);
|
||||
let parsed: Record<string, unknown>;
|
||||
try {
|
||||
parsed = parseYamlRecord(raw.content);
|
||||
} catch {
|
||||
// A source mid-edit (e.g. an agent saved half-written YAML) must not take
|
||||
// down reads, listings, or search for its siblings. Key it by the same
|
||||
// name the writer side uses (the intact top-level `name:`, recovered even
|
||||
// when the YAML is broken below it; filename only as a last resort) so a
|
||||
// broken uppercase/hashed/human-renamed source stays reachable under its
|
||||
// real name, and surface the raw content for repair.
|
||||
const brokenName = slSourceNameForFile(path, raw.content);
|
||||
sources.set(brokenName, {
|
||||
connectionId,
|
||||
name: brokenName,
|
||||
path,
|
||||
columnCount: 0,
|
||||
measureCount: 0,
|
||||
joinCount: 0,
|
||||
yaml: raw.content,
|
||||
source: { name: brokenName, grain: [], columns: [], joins: [], measures: [] },
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
|
||||
if (parsed.table || parsed.sql) {
|
||||
const source = parsedStandaloneSource(parsed, name);
|
||||
|
|
@ -292,50 +277,21 @@ export async function validateLocalSlSource(
|
|||
}
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function writeLocalSlSource(
|
||||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string; yaml: string },
|
||||
): Promise<KtxFileWriteResult> {
|
||||
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
|
||||
if (!validation.valid) {
|
||||
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
|
||||
}
|
||||
|
||||
const parsed = parseYamlRecord(input.yaml);
|
||||
if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) {
|
||||
throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`);
|
||||
}
|
||||
|
||||
const path = slPath(input.connectionId, input.sourceName);
|
||||
return project.fileStore.writeFile(
|
||||
path,
|
||||
input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`,
|
||||
LOCAL_AUTHOR,
|
||||
LOCAL_AUTHOR_EMAIL,
|
||||
`Write semantic-layer source: ${input.connectionId}/${input.sourceName}`,
|
||||
);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function readLocalSlSource(
|
||||
project: KtxLocalProject,
|
||||
input: { connectionId: string; sourceName: string },
|
||||
): Promise<LocalSlSource | null> {
|
||||
const path = slPath(input.connectionId, input.sourceName);
|
||||
try {
|
||||
const result = await project.fileStore.readFile(path);
|
||||
return {
|
||||
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
|
||||
yaml: result.content,
|
||||
};
|
||||
} catch {
|
||||
const records = await loadLocalSlSourceRecords(project, {
|
||||
connectionId: input.connectionId,
|
||||
});
|
||||
const record = records.find((source) => source.name === input.sourceName);
|
||||
return record ? { ...record } : null;
|
||||
}
|
||||
// Source identity is the in-file `name:` (mirroring the warehouse identifier
|
||||
// verbatim, e.g. Snowflake's uppercase `WIDGET_SALES`), never the filename. The
|
||||
// record loader resolves standalone files, overlays, manifest-backed sources,
|
||||
// and mid-edit files whose YAML no longer parses — so readers — `ktx sl read`,
|
||||
// `ktx sl validate`, and the `sl_read_source` MCP tool — can surface broken
|
||||
// content for repair instead of failing on it.
|
||||
const records = await loadLocalSlSourceRecords(project, {
|
||||
connectionId: input.connectionId,
|
||||
});
|
||||
const record = records.find((source) => source.name === input.sourceName);
|
||||
return record ? { ...record } : null;
|
||||
}
|
||||
|
||||
export async function resolveLocalSlSource(
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import type { TableUsageOutput } from '../ingest/adapters/historic-sql/skill-sch
|
|||
import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js';
|
||||
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
||||
import { isOverlaySource, resolvedSourceSchema, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
|
||||
import { isSlYamlPath, resolveSlSourceFile, slDeclaredSourceName, slSourceFilePath } from './source-files.js';
|
||||
import type {
|
||||
ResolvedSemanticLayerSource,
|
||||
SemanticLayerColumnOverride,
|
||||
|
|
@ -135,8 +136,30 @@ export class SemanticLayerService {
|
|||
|
||||
// ── YAML File Operations ────────────────────────────────
|
||||
|
||||
private sourcePath(connectionId: string, sourceName: string): string {
|
||||
return `${SL_DIR_PREFIX}/${connectionId}/${sourceName}.yaml`;
|
||||
// The in-file `name:` is the source's identity; the filename is only a derived
|
||||
// label. Rewrites land on the file that already declares the name (humans may
|
||||
// rename files freely); new sources get a derived filename. A file already
|
||||
// sitting at the derived path that declares a name declares a *different* one
|
||||
// (the resolver would have matched it otherwise) — fail instead of clobbering
|
||||
// it. A nameless/unparseable file there is the broken remains of this very
|
||||
// source (the derived path is a function of the name), so overwriting it is
|
||||
// the repair path, not data loss.
|
||||
private async resolveWritePath(connectionId: string, sourceName: string): Promise<string> {
|
||||
const existing = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||
if (existing) {
|
||||
return existing.path;
|
||||
}
|
||||
const path = slSourceFilePath(connectionId, sourceName);
|
||||
let occupant: string | null = null;
|
||||
try {
|
||||
occupant = slDeclaredSourceName((await this.configService.readFile(path)).content);
|
||||
} catch {
|
||||
return path;
|
||||
}
|
||||
if (occupant !== null) {
|
||||
throw new Error(`Cannot write source '${sourceName}': ${path} already defines source '${occupant}'`);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
async writeSource(
|
||||
|
|
@ -185,39 +208,42 @@ export class SemanticLayerService {
|
|||
}
|
||||
}
|
||||
|
||||
const path = this.sourcePath(connectionId, source.name);
|
||||
const path = await this.resolveWritePath(connectionId, source.name);
|
||||
const normalizedSource = normalizeSemanticLayerDescriptions(source);
|
||||
const content = YAML.stringify(normalizedSource, { indent: 2, lineWidth: 0, version: '1.1' });
|
||||
const message = commitMessage ?? `Update semantic layer source: ${source.name}`;
|
||||
const result = await this.configService.writeFile(path, content, author, authorEmail, message, {
|
||||
skipLock: options?.skipLock,
|
||||
});
|
||||
return { ...result, warnings };
|
||||
// The filename is derived from (or resolved by) the source name — surface
|
||||
// the actual path so callers don't have to re-resolve it.
|
||||
return { ...result, path, warnings };
|
||||
}
|
||||
|
||||
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string }> {
|
||||
const path = this.sourcePath(connectionId, sourceName);
|
||||
const result = await this.configService.readFile(path);
|
||||
return { content: result.content, path };
|
||||
/**
|
||||
* Raw standalone/overlay file for a source, resolved by its in-file `name:`.
|
||||
* Returns null when no file declares the name (the source may still exist as
|
||||
* a manifest entry under `_schema/`).
|
||||
*/
|
||||
async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string } | null> {
|
||||
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||
return file ? { content: file.content, path: file.path } : null;
|
||||
}
|
||||
|
||||
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
||||
let content: string;
|
||||
try {
|
||||
const result = await this.readSourceFile(connectionId, sourceName);
|
||||
content = result.content;
|
||||
} catch {
|
||||
const file = await this.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return YAML.parse(content) as SemanticLayerSource;
|
||||
return YAML.parse(file.content) as SemanticLayerSource;
|
||||
} catch (error) {
|
||||
// Distinguish a YAML parse failure from a missing file. The file exists but
|
||||
// its contents are unparseable — callers that treat null as "does not exist"
|
||||
// could otherwise overwrite the broken file. Surface the parse failure via
|
||||
// the service logger so the broken source is at least visible.
|
||||
this.logger.warn(
|
||||
`[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
`[loadSource] ${file.path}: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
|
@ -231,7 +257,7 @@ export class SemanticLayerService {
|
|||
let allFiles: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
allFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch (e) {
|
||||
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
|
||||
loadErrors.push(message);
|
||||
|
|
@ -338,7 +364,7 @@ export class SemanticLayerService {
|
|||
let allFiles: string[];
|
||||
try {
|
||||
const listing = await this.configService.listFiles(dir);
|
||||
allFiles = listing.files.filter((f) => f.endsWith('.yaml'));
|
||||
allFiles = listing.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return result;
|
||||
}
|
||||
|
|
@ -408,7 +434,7 @@ export class SemanticLayerService {
|
|||
const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`;
|
||||
try {
|
||||
const result = await this.configService.listFiles(schemaDir);
|
||||
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
for (const filePath of yamlFiles) {
|
||||
try {
|
||||
const { content } = await this.configService.readFile(filePath);
|
||||
|
|
@ -449,7 +475,7 @@ export class SemanticLayerService {
|
|||
let yamlFiles: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(schemaDir);
|
||||
yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -533,7 +559,7 @@ export class SemanticLayerService {
|
|||
.map((c) => c.name);
|
||||
if (absentDeclaredColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||
`${source.name}: table "${source.table}" matched manifest ${manifestLabel}, ` +
|
||||
`but declared column(s) absent from physical table: ${absentDeclaredColumns.join(', ')}. ` +
|
||||
`Available columns: ${[...manifestColumns.values()].join(', ')}`,
|
||||
);
|
||||
|
|
@ -545,7 +571,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (missingGrainColumns.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||
`${source.name}: grain column(s) absent from physical table "${source.table}": ${missingGrainColumns.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -562,7 +588,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
`${source.name}: computed column "${column.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -577,7 +603,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
`${source.name}: segment "${segment.name}" references unknown column(s): ${missing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -592,7 +618,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (exprMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||
`${source.name}: measure "${measure.name}" references unknown column(s): ${exprMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -606,7 +632,7 @@ export class SemanticLayerService {
|
|||
});
|
||||
if (filterMissing.length > 0) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||
`${source.name}: measure "${measure.name}" filter references unknown column(s): ${filterMissing.join(', ')}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -619,7 +645,7 @@ export class SemanticLayerService {
|
|||
}
|
||||
if (!validOutputColumns.has(parsed.localColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references local column ` +
|
||||
`${source.name}: join to "${join.to}" references local column ` +
|
||||
`"${parsed.localColumn}" that is not a valid output column`,
|
||||
);
|
||||
}
|
||||
|
|
@ -631,7 +657,7 @@ export class SemanticLayerService {
|
|||
const targetColumns = new Set(targetSource.columns.map((c) => c.name.toLowerCase()));
|
||||
if (!targetColumns.has(parsed.targetColumn.toLowerCase())) {
|
||||
errors.push(
|
||||
`${source.name}.yaml: join to "${join.to}" references target column ` +
|
||||
`${source.name}: join to "${join.to}" references target column ` +
|
||||
`"${parsed.targetColumn}" that does not exist on the target source`,
|
||||
);
|
||||
}
|
||||
|
|
@ -650,43 +676,30 @@ export class SemanticLayerService {
|
|||
return SemanticLayerService.mapDialect(connection.connectionType);
|
||||
}
|
||||
|
||||
async listSourceNames(connectionId: string): Promise<string[]> {
|
||||
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
return result.files.filter((f) => f.endsWith('.yaml')).map((f) => f.replace(`${dir}/`, '').replace('.yaml', ''));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async listFilesForConnection(connectionId: string): Promise<string[]> {
|
||||
const dir = `${SL_DIR_PREFIX}/${connectionId}`;
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir, true);
|
||||
return result.files.filter((f) => f.endsWith('.yaml'));
|
||||
return result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async readFileByPath(connectionId: string, relativePath: string): Promise<{ content: string; readOnly: boolean }> {
|
||||
const fullPath = `${SL_DIR_PREFIX}/${connectionId}/${relativePath}`;
|
||||
const result = await this.configService.readFile(fullPath);
|
||||
return {
|
||||
content: result.content,
|
||||
readOnly: relativePath.startsWith('_schema/'),
|
||||
};
|
||||
}
|
||||
|
||||
async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) {
|
||||
const path = this.sourcePath(connectionId, sourceName);
|
||||
return this.configService.deleteFile(path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
|
||||
}
|
||||
|
||||
async getSourceHistory(connectionId: string, sourceName: string) {
|
||||
const path = this.sourcePath(connectionId, sourceName);
|
||||
return this.configService.getFileHistory(path);
|
||||
const file = await resolveSlSourceFile(this.configService, connectionId, sourceName);
|
||||
if (!file) {
|
||||
// `deleteFile` returns null for a missing path, which would let a no-op
|
||||
// delete read as success. Distinguish the two real cases instead.
|
||||
if (await this.isManifestBacked(connectionId, sourceName)) {
|
||||
throw new Error(
|
||||
`Source '${sourceName}' is defined by the scan manifest (_schema/) and has no overlay file to delete. ` +
|
||||
`Rescan the connection to remove it from the manifest.`,
|
||||
);
|
||||
}
|
||||
throw new Error(`Semantic-layer source not found: ${connectionId}/${sourceName}`);
|
||||
}
|
||||
return this.configService.deleteFile(file.path, author, authorEmail, `Delete semantic layer source: ${sourceName}`);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -815,7 +828,7 @@ export class SemanticLayerService {
|
|||
return [];
|
||||
}
|
||||
|
||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\/.+\.ya?ml$/.test(file));
|
||||
const schemaFiles = files.filter((file) => /^semantic-layer\/[^/]+\/_schema\//.test(file) && isSlYamlPath(file));
|
||||
const entries: Array<{ connectionId: string; source: SemanticLayerSource }> = [];
|
||||
for (const filePath of schemaFiles) {
|
||||
const connectionId = filePath.split('/')[1];
|
||||
|
|
@ -844,7 +857,7 @@ export class SemanticLayerService {
|
|||
let allFiles: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
allFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
} catch {
|
||||
return warnings;
|
||||
}
|
||||
|
|
@ -1030,7 +1043,7 @@ export class SemanticLayerService {
|
|||
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
const yamlFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
const yamlFiles = result.files.filter((f) => isSlYamlPath(f));
|
||||
|
||||
for (const filePath of yamlFiles) {
|
||||
try {
|
||||
|
|
|
|||
160
packages/cli/src/context/sl/source-files.ts
Normal file
160
packages/cli/src/context/sl/source-files.ts
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import YAML from 'yaml';
|
||||
import type { KtxFileStorePort } from '../../context/core/file-store.js';
|
||||
|
||||
// Semantic-layer source identity lives in the file's `name:` field, which mirrors
|
||||
// the warehouse identifier verbatim (Snowflake's uppercase `SIGNED_UP`, `EVENT$LOG`).
|
||||
// The filename is a derived label and never participates in identity: reads resolve
|
||||
// a source by scanning the connection directory and matching `name:`, and writes
|
||||
// reuse the resolved file's path, so files can be freely renamed by humans without
|
||||
// changing which source they define.
|
||||
|
||||
function assertSafePathToken(kind: string, value: string): string {
|
||||
if (
|
||||
value.trim().length === 0 ||
|
||||
value.includes('..') ||
|
||||
value.includes('\\') ||
|
||||
value.startsWith('/') ||
|
||||
value.startsWith('.') ||
|
||||
value.includes('//')
|
||||
) {
|
||||
throw new Error(`Unsafe ${kind}: ${value}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function assertSafeConnectionId(connectionId: string): string {
|
||||
if (!isSafeConnectionId(connectionId)) {
|
||||
throw new Error(`Unsafe connection id: ${connectionId}`);
|
||||
}
|
||||
return assertSafePathToken('connection id', connectionId);
|
||||
}
|
||||
|
||||
export function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
|
||||
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
|
||||
}
|
||||
|
||||
export function sourceNameFromPath(path: string): string {
|
||||
return (
|
||||
path
|
||||
.split('/')
|
||||
.at(-1)
|
||||
?.replace(/\.ya?ml$/, '') ?? path
|
||||
);
|
||||
}
|
||||
|
||||
// The one predicate for "this path is a semantic-layer YAML file". ktx itself
|
||||
// always writes `.yaml` (see `slSourceFileName`), but humans rename freely and
|
||||
// the dbt ecosystem's habit is `.yml`, so every reader must accept both — a
|
||||
// listing that recognizes only one extension makes the same file visible to
|
||||
// some entry points and invisible to others.
|
||||
export function isSlYamlPath(path: string): boolean {
|
||||
return path.endsWith('.yaml') || path.endsWith('.yml');
|
||||
}
|
||||
|
||||
// Windows refuses these basenames regardless of extension — a genuinely universal
|
||||
// filesystem invariant, so the static list is acceptable.
|
||||
const WINDOWS_RESERVED_BASENAME = /^(?:con|prn|aux|nul|com[0-9]|lpt[0-9])$/;
|
||||
|
||||
const SAFE_FILE_BASENAME = /^[a-z0-9][a-z0-9_]{0,63}$/;
|
||||
|
||||
/**
|
||||
* Derive the filename for a semantic-layer source. Total over all possible
|
||||
* source names — never throws.
|
||||
*
|
||||
* Names that are already safe lowercase snake_case become `<name>.yaml`;
|
||||
* anything else becomes `<slug>-<8 hex of sha256(name)>.yaml`. The two ranges
|
||||
* are disjoint and the mapping is injective: safe filenames contain no `-`,
|
||||
* hashed filenames always end in `-<8 hex>`, and slugs are lowercased so names
|
||||
* differing only by case get distinct hashes instead of colliding paths on
|
||||
* case-insensitive filesystems (macOS APFS, Windows).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export function slSourceFileName(sourceName: string): string {
|
||||
if (SAFE_FILE_BASENAME.test(sourceName) && !WINDOWS_RESERVED_BASENAME.test(sourceName)) {
|
||||
return `${sourceName}.yaml`;
|
||||
}
|
||||
const slug = sourceName
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9_]+/g, '_')
|
||||
.replace(/_+/g, '_')
|
||||
.replace(/^_+|_+$/g, '')
|
||||
.slice(0, 64);
|
||||
const hash = createHash('sha256').update(sourceName, 'utf-8').digest('hex').slice(0, 8);
|
||||
return `${slug || 'src'}-${hash}.yaml`;
|
||||
}
|
||||
|
||||
export function slSourceFilePath(connectionId: string, sourceName: string): string {
|
||||
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${slSourceFileName(sourceName)}`;
|
||||
}
|
||||
|
||||
export interface SlSourceFile {
|
||||
path: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
// Same keying as `loadLocalSlSourceRecords`: the in-file `name:` is the identity;
|
||||
// the filename is only a fallback for files so broken that even the `name:` is
|
||||
// unrecoverable, or genuinely nameless ones. A file left mid-edit with a syntax
|
||||
// error below its `name:` line keeps its declared identity (see
|
||||
// `slDeclaredSourceName`), so a human-renamed source is still addressed by name
|
||||
// while broken instead of silently reverting to its filename.
|
||||
export function slSourceNameForFile(path: string, content: string): string {
|
||||
return slDeclaredSourceName(content) ?? sourceNameFromPath(path);
|
||||
}
|
||||
|
||||
/**
|
||||
* The `name:` a semantic-layer YAML file declares, or null when the file is
|
||||
* nameless or so broken even the name is unrecoverable. Null is how
|
||||
* `writeSource` tells a genuine name conflict at a derived path apart from the
|
||||
* broken remains of the source being written, which a rewrite must repair
|
||||
* rather than refuse.
|
||||
*
|
||||
* Uses `parseDocument`, not `parse`: a file with a syntax error below the
|
||||
* `name:` line still parses into a partial tree whose top-level `name:` is
|
||||
* intact. `parse` would throw on the same input and drop the source to its
|
||||
* filename — wrong for human-renamed files, whose filename is not the name.
|
||||
*/
|
||||
export function slDeclaredSourceName(content: string): string | null {
|
||||
let doc: ReturnType<typeof YAML.parseDocument>;
|
||||
try {
|
||||
doc = YAML.parseDocument(content);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const name = doc.get('name');
|
||||
return typeof name === 'string' && name.length > 0 ? name : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the standalone/overlay file that defines `sourceName` for a connection.
|
||||
* Returns null when no file declares the name (the source may still exist as a
|
||||
* manifest entry under `_schema/`). Throws when more than one file declares the
|
||||
* same name — that breaks the one-file-per-name invariant and must be repaired
|
||||
* by hand rather than silently picking one.
|
||||
*/
|
||||
export async function resolveSlSourceFile(
|
||||
fileStore: Pick<KtxFileStorePort, 'listFiles' | 'readFile'>,
|
||||
connectionId: string,
|
||||
sourceName: string,
|
||||
): Promise<SlSourceFile | null> {
|
||||
const dir = `semantic-layer/${assertSafeConnectionId(connectionId)}`;
|
||||
const schemaDir = `${dir}/_schema`;
|
||||
const listed = await fileStore.listFiles(dir);
|
||||
const paths = listed.files.filter((file) => isSlYamlPath(file) && !file.startsWith(`${schemaDir}/`)).sort();
|
||||
|
||||
const matches: SlSourceFile[] = [];
|
||||
for (const path of paths) {
|
||||
const raw = await fileStore.readFile(path);
|
||||
if (slSourceNameForFile(path, raw.content) === sourceName) {
|
||||
matches.push({ path, content: raw.content });
|
||||
}
|
||||
}
|
||||
if (matches.length > 1) {
|
||||
throw new Error(
|
||||
`Multiple semantic-layer files declare source "${sourceName}": ${matches.map((match) => match.path).join(', ')}`,
|
||||
);
|
||||
}
|
||||
return matches[0] ?? null;
|
||||
}
|
||||
|
|
@ -46,12 +46,8 @@ export abstract class BaseSemanticLayerTool<TInput extends ZodType = ZodType> ex
|
|||
): Promise<string | null> {
|
||||
const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService;
|
||||
|
||||
try {
|
||||
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
return content;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const file = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
return file?.content ?? null;
|
||||
}
|
||||
|
||||
protected buildMarkdown(
|
||||
|
|
|
|||
|
|
@ -113,13 +113,8 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
}
|
||||
|
||||
// Read existing source
|
||||
let currentYaml: string | null = null;
|
||||
try {
|
||||
const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
currentYaml = content;
|
||||
} catch {
|
||||
currentYaml = null;
|
||||
}
|
||||
const currentFile = await semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
const currentYaml = currentFile?.content ?? null;
|
||||
if (!currentYaml) {
|
||||
const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName);
|
||||
if (manifestBacked) {
|
||||
|
|
@ -165,6 +160,20 @@ If no source exists yet, use sl_write_source instead — this tool will reject t
|
|||
} catch (e) {
|
||||
return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName);
|
||||
}
|
||||
|
||||
// The in-file `name:` is the source's identity — an edited name would make
|
||||
// writeSource create a second source instead of updating this one.
|
||||
if (source.name !== sourceName) {
|
||||
return this.buildOutput(
|
||||
false,
|
||||
[
|
||||
`Edits change "name:" from "${sourceName}" to "${source.name ?? '<missing>'}" — renaming is not supported. ` +
|
||||
`Delete the source and recreate it under the new name.`,
|
||||
],
|
||||
sourceName,
|
||||
);
|
||||
}
|
||||
|
||||
source = normalizeSemanticLayerDescriptions(source, { fillMissing: !!context.session?.ingest });
|
||||
|
||||
// Re-serialize and write
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import YAML from 'yaml';
|
||||
import type { GitService } from '../../../context/core/git.service.js';
|
||||
import type { KtxFileStorePort } from '../../../context/core/file-store.js';
|
||||
import type { KtxFileListResult, KtxFileReadResult, KtxFileStorePort } from '../../../context/core/file-store.js';
|
||||
import { SYSTEM_GIT_AUTHOR } from '../../../context/tools/authors.js';
|
||||
import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js';
|
||||
import { sourceOverlaySchema } from '../schemas.js';
|
||||
import { SemanticLayerService } from '../semantic-layer.service.js';
|
||||
import { resolveSlSourceFile, slSourceFilePath } from '../source-files.js';
|
||||
import type { SemanticLayerSource } from '../types.js';
|
||||
import { sourceDefinitionSchema } from './base-semantic-layer.tool.js';
|
||||
|
||||
|
|
@ -23,9 +24,6 @@ export interface SourceValidationResult {
|
|||
warnings: string[];
|
||||
}
|
||||
|
||||
const slSourcePath = (connectionId: string, sourceName: string): string =>
|
||||
`semantic-layer/${connectionId}/${sourceName}.yaml`;
|
||||
|
||||
function resolveDialect(warehouse: string | null): string | null {
|
||||
if (!warehouse) {
|
||||
return null;
|
||||
|
|
@ -63,24 +61,21 @@ export async function validateSingleSource(
|
|||
const errors: string[] = [];
|
||||
const warnings: string[] = [];
|
||||
|
||||
let content: string;
|
||||
try {
|
||||
const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
content = result.content;
|
||||
} catch {
|
||||
errors.push(`${sourceName}.yaml: file not found`);
|
||||
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
|
||||
if (!file) {
|
||||
errors.push(`${sourceName}: no standalone or overlay file found`);
|
||||
return { errors, warnings };
|
||||
}
|
||||
|
||||
let parsed: Record<string, unknown>;
|
||||
try {
|
||||
parsed = YAML.parse(content);
|
||||
parsed = YAML.parse(file.content);
|
||||
} catch (e) {
|
||||
errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
|
||||
errors.push(`${sourceName}: invalid YAML — ${e instanceof Error ? e.message : String(e)}`);
|
||||
return { errors, warnings };
|
||||
}
|
||||
if (!parsed || typeof parsed !== 'object') {
|
||||
errors.push(`${sourceName}.yaml: top-level content is not an object`);
|
||||
errors.push(`${sourceName}: top-level content is not an object`);
|
||||
return { errors, warnings };
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +84,7 @@ export async function validateSingleSource(
|
|||
const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName);
|
||||
if (isManifestBacked) {
|
||||
errors.push(
|
||||
`${sourceName}.yaml: standalone source shadows an existing manifest entry — ` +
|
||||
`${sourceName}: standalone source shadows an existing manifest entry — ` +
|
||||
`writing it as-is drops the manifest's columns and joins. ` +
|
||||
`Remove "sql:", "table:", "grain:", and base-table "columns:" and keep only ` +
|
||||
`"name:" plus overlay fields such as "measures:", "segments:", "descriptions:", ` +
|
||||
|
|
@ -103,21 +98,21 @@ export async function validateSingleSource(
|
|||
const result = schema.safeParse(parsed);
|
||||
if (!result.success) {
|
||||
const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ');
|
||||
errors.push(`${sourceName}.yaml: schema — ${issues}`);
|
||||
errors.push(`${sourceName}: schema — ${issues}`);
|
||||
const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0])));
|
||||
if (errorPaths.has('joins')) {
|
||||
warnings.push(
|
||||
`${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
|
||||
`${sourceName}: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`,
|
||||
);
|
||||
}
|
||||
if (errorPaths.has('columns')) {
|
||||
warnings.push(
|
||||
`${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
|
||||
`${sourceName}: hint — overlay columns must be computed: {name, expr, type}. Use column_overrides for manifest column descriptions or metadata.`,
|
||||
);
|
||||
}
|
||||
if (errorPaths.has('measures')) {
|
||||
warnings.push(
|
||||
`${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`,
|
||||
`${sourceName}: hint — measure format: {name, expr, description (optional), filter (optional)}`,
|
||||
);
|
||||
}
|
||||
return { errors, warnings };
|
||||
|
|
@ -135,7 +130,7 @@ export async function validateSingleSource(
|
|||
const seenMeasures = new Set<string>();
|
||||
for (const m of measures) {
|
||||
if (seenMeasures.has(m.name)) {
|
||||
errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`);
|
||||
errors.push(`${sourceName}: duplicate measure name "${m.name}"`);
|
||||
}
|
||||
seenMeasures.add(m.name);
|
||||
}
|
||||
|
|
@ -168,7 +163,7 @@ export async function validateSingleSource(
|
|||
const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase()));
|
||||
if (missing.length > 0) {
|
||||
errors.push(
|
||||
`${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
|
||||
`${sourceName}: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`,
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
|
|
@ -205,7 +200,7 @@ function formatProbeError(args: {
|
|||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name));
|
||||
const lines: string[] = [
|
||||
measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`,
|
||||
measureName ? `${sourceName}: measure "${measureName}" ${headline}.` : `${sourceName}: ${headline}.`,
|
||||
];
|
||||
if (warehouse) {
|
||||
lines.push(` Warehouse: ${warehouse}`);
|
||||
|
|
@ -249,7 +244,7 @@ async function probeOverlayMeasures(
|
|||
composed = all.find((s) => s.name === sourceName);
|
||||
} catch (e) {
|
||||
errors.push(
|
||||
`${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
|
||||
`${sourceName}: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`,
|
||||
);
|
||||
return errors;
|
||||
}
|
||||
|
|
@ -289,6 +284,26 @@ async function probeOverlayMeasures(
|
|||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* A read-only view of the config repo at one commit, shaped for
|
||||
* `resolveSlSourceFile` so name→file resolution runs against history exactly as
|
||||
* it does against the working tree — one resolver, two backing stores. Used to
|
||||
* recover the path a source occupied at `preHead` after the live file is gone.
|
||||
*/
|
||||
function gitCommitFileStore(
|
||||
git: GitService,
|
||||
commitHash: string,
|
||||
): Pick<KtxFileStorePort, 'listFiles' | 'readFile'> {
|
||||
return {
|
||||
async listFiles(path: string): Promise<KtxFileListResult> {
|
||||
return { files: await git.listFilesAtCommit(path, commitHash) };
|
||||
},
|
||||
async readFile(path: string): Promise<KtxFileReadResult> {
|
||||
return { content: await git.getFileAtCommit(path, commitHash) };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't
|
||||
* exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate
|
||||
|
|
@ -300,14 +315,29 @@ export async function revertSourceToPreHead(
|
|||
preHead: string | null,
|
||||
sourceName: string,
|
||||
): Promise<string> {
|
||||
const relPath = slSourcePath(connectionId, sourceName);
|
||||
// Find the file that defines this source. While it is still on disk
|
||||
// (invalid-but-present) the live resolver finds it by its in-file `name:`.
|
||||
// Once the session deleted it, the path is gone too — and humans rename files
|
||||
// freely, so it is NOT the writer-derived filename. Recover it from history by
|
||||
// resolving the name against the preHead commit instead of guessing.
|
||||
const live = await resolveSlSourceFile(deps.configService, connectionId, sourceName);
|
||||
let relPath: string;
|
||||
let preContent: string | null = null;
|
||||
if (preHead) {
|
||||
try {
|
||||
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
|
||||
} catch {
|
||||
preContent = null;
|
||||
if (live) {
|
||||
relPath = live.path;
|
||||
if (preHead) {
|
||||
try {
|
||||
preContent = await deps.gitService.getFileAtCommit(relPath, preHead);
|
||||
} catch {
|
||||
preContent = null;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const atPreHead = preHead
|
||||
? await resolveSlSourceFile(gitCommitFileStore(deps.gitService, preHead), connectionId, sourceName)
|
||||
: null;
|
||||
relPath = atPreHead?.path ?? slSourceFilePath(connectionId, sourceName);
|
||||
preContent = atPreHead?.content ?? null;
|
||||
}
|
||||
|
||||
if (preContent !== null) {
|
||||
|
|
|
|||
|
|
@ -22,8 +22,12 @@ const slWriteSourceInputSchema = z.object({
|
|||
connectionId: slToolConnectionIdSchema.describe('Data source connection ID'),
|
||||
sourceName: z
|
||||
.string()
|
||||
.regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)')
|
||||
.describe('Name of the source to create, edit, or delete'),
|
||||
.min(1)
|
||||
.describe(
|
||||
"Name of the source to create, edit, or delete. Must equal the source's `name:`. Use the verbatim " +
|
||||
'warehouse identifier when overlaying a manifest source (e.g. SIGNED_UP); snake_case is recommended ' +
|
||||
'for new standalone sources.',
|
||||
),
|
||||
source: sourceInputSchema
|
||||
.optional()
|
||||
.describe(
|
||||
|
|
@ -152,6 +156,17 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
);
|
||||
}
|
||||
|
||||
// The in-file `name:` is the source's identity; the file is written under
|
||||
// source.name while the orphan/shadow checks key on sourceName — a mismatch
|
||||
// would validate one source and save another.
|
||||
if (input.source.name !== sourceName) {
|
||||
return this.buildOutput(
|
||||
false,
|
||||
[`source.name "${input.source.name}" does not match sourceName "${sourceName}" — they must be identical.`],
|
||||
sourceName,
|
||||
);
|
||||
}
|
||||
|
||||
return this.writeFullSource(
|
||||
connectionId,
|
||||
input.source,
|
||||
|
|
@ -253,12 +268,8 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co
|
|||
connectionId: string,
|
||||
sourceName: string,
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const { content } = await service.readSourceFile(connectionId, sourceName);
|
||||
return content;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const file = await service.readSourceFile(connectionId, sourceName);
|
||||
return file?.content ?? null;
|
||||
}
|
||||
|
||||
private async rejectOrphanOverlay(
|
||||
|
|
|
|||
23
packages/cli/src/context/sql-analysis/dialect.ts
Normal file
23
packages/cli/src/context/sql-analysis/dialect.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import type { SqlAnalysisDialect } from './ports.js';
|
||||
|
||||
// One mapping from ktx connection identity to the sqlglot dialect name used by
|
||||
// the Python daemon (SQL analysis, read-only validation) and semantic-layer
|
||||
// compute. Keys cover both vocabularies that name a connection's engine:
|
||||
// ktx.yaml driver names ("postgres", "sqlserver") and the local connection-type
|
||||
// spellings exposed by KtxConnectionInfo.connectionType ("POSTGRESQL").
|
||||
const SQLGLOT_DIALECTS: Record<string, SqlAnalysisDialect> = {
|
||||
postgres: 'postgres',
|
||||
postgresql: 'postgres',
|
||||
bigquery: 'bigquery',
|
||||
snowflake: 'snowflake',
|
||||
mysql: 'mysql',
|
||||
sqlserver: 'tsql',
|
||||
sqlite: 'sqlite',
|
||||
duckdb: 'duckdb',
|
||||
clickhouse: 'clickhouse',
|
||||
databricks: 'databricks',
|
||||
};
|
||||
|
||||
export function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
return SQLGLOT_DIALECTS[(driver ?? '').toLowerCase()] ?? 'postgres';
|
||||
}
|
||||
|
|
@ -13,7 +13,7 @@ import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
|
|||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
|
||||
import { createCliOperationalLogger } from './io/logger.js';
|
||||
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
|
||||
import { createKtxCliLocalIngestAdapters, resolveKtxCliSqlAnalysis } from './local-adapters.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js';
|
||||
import {
|
||||
|
|
@ -87,6 +87,7 @@ export interface KtxIngestDeps {
|
|||
| 'memoryModel'
|
||||
| 'semanticLayerCompute'
|
||||
| 'queryExecutor'
|
||||
| 'sqlAnalysis'
|
||||
| 'logger'
|
||||
| 'pullConfigOptions'
|
||||
>;
|
||||
|
|
@ -724,7 +725,7 @@ export async function runKtxIngest(
|
|||
const localIngestOptions = deps.localIngestOptions ?? {};
|
||||
const managedDaemon = managedDaemonOptionsForIngestRun(args, deps.runtimeIo ?? io);
|
||||
const operationalLogger = createCliOperationalLogger(io, args.outputMode);
|
||||
const adapterOptions = {
|
||||
const baseAdapterOptions = {
|
||||
...(localIngestOptions.pullConfigOptions ?? {}),
|
||||
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
|
||||
...(managedDaemon ? { managedDaemon } : {}),
|
||||
|
|
@ -734,6 +735,10 @@ export async function runKtxIngest(
|
|||
: {}),
|
||||
logger: operationalLogger,
|
||||
};
|
||||
// One parser-backed SQL analysis port per run: the historic-sql adapter and
|
||||
// the ingest sql_execution tool share the same daemon-backed validator.
|
||||
const sqlAnalysis = localIngestOptions.sqlAnalysis ?? resolveKtxCliSqlAnalysis(baseAdapterOptions);
|
||||
const adapterOptions = { ...baseAdapterOptions, sqlAnalysis };
|
||||
const queryExecutor =
|
||||
localIngestOptions.queryExecutor ??
|
||||
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject);
|
||||
|
|
@ -783,6 +788,7 @@ export async function runKtxIngest(
|
|||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
sqlAnalysis,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
embeddingProvider,
|
||||
|
|
@ -861,6 +867,7 @@ export async function runKtxIngest(
|
|||
jobId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
sqlAnalysis,
|
||||
pullConfigOptions: adapterOptions,
|
||||
embeddingProvider,
|
||||
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ function ktxCliLookerOptions(
|
|||
};
|
||||
}
|
||||
|
||||
function ktxCliHistoricSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions) {
|
||||
export function resolveKtxCliSqlAnalysis(options: KtxCliLocalIngestAdaptersOptions): SqlAnalysisPort {
|
||||
if (options.sqlAnalysis) {
|
||||
return options.sqlAnalysis;
|
||||
}
|
||||
|
|
@ -289,7 +289,7 @@ function historicSqlOptionsForLocalRun(
|
|||
}
|
||||
|
||||
const base = {
|
||||
sqlAnalysis: ktxCliHistoricSqlAnalysis(options),
|
||||
sqlAnalysis: resolveKtxCliSqlAnalysis(options),
|
||||
};
|
||||
|
||||
if (dialect === 'postgres') {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ LookML views map to SL sources, `measure:` to measures, `explore: { join: }` to
|
|||
|
||||
| LookML | KTX form | Notes |
|
||||
|---|---|---|
|
||||
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
|
||||
| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins`, `segments` | Manifest-backed; inherit grain/columns |
|
||||
| `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists |
|
||||
| `view: X { sql_always_where: <p> }` | **Standalone** with `sql: SELECT * FROM <base> WHERE <p>` | Enforcement, not opt-in |
|
||||
| `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: "<local> = Y.<col>", relationship: … }` | On the overlay or standalone |
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to
|
|||
|
||||
| MetricFlow | KTX form | Notes |
|
||||
|---|---|---|
|
||||
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `<connId>/X.yaml` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
|
||||
| `semantic_model: X { model: source('s','t') }` | **Overlay** at `<connId>/X.yaml` over table `t`. | Same shape; `source()` still resolves to a physical table. |
|
||||
| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** named `X` with `measures`, computed-only `columns`, `column_overrides`, `joins` | The `model:` ref resolves to a manifest table. |
|
||||
| `semantic_model: X { model: source('s','t') }` | **Overlay** named `X` over table `t`. | Same shape; `source()` still resolves to a physical table. |
|
||||
| `semantic_model: X { model: <literal> }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. |
|
||||
| `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X - flatten. |
|
||||
| `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "<agg>(<expr>)" }]` | Aggregation inlined. `agg: count_distinct` → `count(distinct ...)`. |
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ skills must verify warehouse identifiers with `discover_data`,
|
|||
|
||||
## Part 1 - Schema reference
|
||||
|
||||
An SL source is a YAML file at `semantic-layer/<connectionId>/<source_name>.yaml`. There are three flavors:
|
||||
An SL source is a YAML file under `semantic-layer/<connectionId>/`. The file's `name:` field is the source's identity — it mirrors the warehouse identifier verbatim (e.g. Snowflake's uppercase `SIGNED_UP`); the filename is only a derived label. Always address sources by name through the `sl_*` tools, never by file path. There are three flavors:
|
||||
|
||||
### Overlay sources
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue