fix: read semantic sources safely

This commit is contained in:
Andrey Avtomonov 2026-06-10 01:06:52 +02:00
parent 65de75ebd7
commit de1f1a8d5e
8 changed files with 292 additions and 66 deletions

View file

@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string {
return url;
}
function stripLeadingSqlComments(sql: string): string {
let index = 0;
while (index < sql.length) {
while (/\s/.test(sql[index] ?? '')) {
index += 1;
}
if (sql.startsWith('--', index)) {
const end = sql.indexOf('\n', index + 2);
index = end === -1 ? sql.length : end + 1;
continue;
}
if (sql.startsWith('/*', index)) {
const end = sql.indexOf('*/', index + 2);
if (end === -1) {
return sql.slice(index);
}
index = end + 2;
continue;
}
break;
}
return sql.slice(index);
}
export function isKtxSqliteConnectionConfig(
connection: KtxSqliteConnectionConfig | undefined,
): connection is KtxSqliteConnectionConfig {
@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params);
return { ...result, rowCount: result.rows.length };
}

View file

@ -2,8 +2,36 @@ const MUTATING_SQL =
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
const READ_SQL = /^\s*(select|with)\b/i;
// Agents (and the daemon's sqlglot validator, which ignores comments) routinely
// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading
// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH
// SQL is rejected here while the parser-backed validator accepts it.
function stripLeadingSqlComments(sql: string): string {
let index = 0;
while (index < sql.length) {
while (/\s/.test(sql[index] ?? '')) {
index += 1;
}
if (sql.startsWith('--', index)) {
const end = sql.indexOf('\n', index + 2);
index = end === -1 ? sql.length : end + 1;
continue;
}
if (sql.startsWith('/*', index)) {
const end = sql.indexOf('*/', index + 2);
if (end === -1) {
return sql.slice(index);
}
index = end + 2;
continue;
}
break;
}
return sql.slice(index);
}
export function assertReadOnlySql(sql: string): string {
const trimmed = sql.trim();
const trimmed = stripLeadingSqlComments(sql).trim();
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
}

View file

@ -10,6 +10,7 @@ import { createKtxDiscoverDataService } from '../../context/search/discover.js';
import type { SqlAnalysisDialect, SqlAnalysisPort } from '../../context/sql-analysis/ports.js';
import { compileLocalSlQuery } from '../../context/sl/local-query.js';
import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js';
import { readLocalSlSource } from '../../context/sl/local-sl.js';
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js';
@ -62,23 +63,12 @@ function assertSafeConnectionId(connectionId: string): string {
return assertSafePathToken('connection id', connectionId);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
}
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
if (connector?.cleanup) {
await connector.cleanup();
}
}
function slPath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
}
async function executeValidatedReadOnlySql(
project: KtxLocalProject,
options: CreateLocalProjectMcpContextPortsOptions,
@ -188,13 +178,11 @@ export function createLocalProjectMcpContextPorts(
},
semanticLayer: {
async readSource(input) {
const path = slPath(input.connectionId, input.sourceName);
try {
const result = await project.fileStore.readFile(path);
return { sourceName: input.sourceName, yaml: result.content };
} catch {
return null;
}
const source = await readLocalSlSource(project, {
connectionId: input.connectionId,
sourceName: input.sourceName,
});
return source ? { sourceName: source.name, yaml: source.yaml } : null;
},
async query(input, executionOptions) {
if (!options.semanticLayerCompute) {

View file

@ -97,8 +97,17 @@ function isSafeConnectionId(connectionId: string | undefined): connectionId is s
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
}
// Standalone files live at a path derived from the source name, so only
// conservative filename-safe names may reach `slPath`. Source names themselves
// mirror the warehouse identifier verbatim (Snowflake's uppercase `SIGNED_UP`
// or `EVENT$LOG`); manifest-backed sources are matched by name in memory and
// must never be gated on this filename rule.
function isSafeSourceName(sourceName: string): boolean {
return /^[A-Za-z0-9][A-Za-z0-9_]*$/.test(sourceName);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
if (!isSafeSourceName(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
@ -220,7 +229,12 @@ export async function loadLocalSlSourceRecords(
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path);
const tables = manifestTables(parseYamlRecord(raw.content));
let tables: Record<string, ManifestTableEntry> | null;
try {
tables = manifestTables(parseYamlRecord(raw.content));
} catch (error) {
throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`);
}
if (!tables) {
continue;
}
@ -237,7 +251,26 @@ export async function loadLocalSlSourceRecords(
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path);
const parsed = parseYamlRecord(raw.content);
let parsed: Record<string, unknown>;
try {
parsed = parseYamlRecord(raw.content);
} catch {
// A source mid-edit (e.g. an agent saved half-written YAML) must not take
// down reads, listings, or search for its siblings. Keep the file visible
// under its filename and surface the raw content for repair.
const brokenName = sourceNameFromPath(path);
sources.set(brokenName, {
connectionId,
name: brokenName,
path,
columnCount: 0,
measureCount: 0,
joinCount: 0,
yaml: raw.content,
source: { name: brokenName, grain: [], columns: [], joins: [], measures: [] },
});
continue;
}
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
if (parsed.table || parsed.sql) {
const source = parsedStandaloneSource(parsed, name);
@ -317,25 +350,35 @@ export async function writeLocalSlSource(
);
}
/** @internal */
export async function readLocalSlSource(
project: KtxLocalProject,
input: { connectionId: string; sourceName: string },
): Promise<LocalSlSource | null> {
const path = slPath(input.connectionId, input.sourceName);
try {
const result = await project.fileStore.readFile(path);
return {
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
yaml: result.content,
};
} catch {
const records = await loadLocalSlSourceRecords(project, {
connectionId: input.connectionId,
});
const record = records.find((source) => source.name === input.sourceName);
return record ? { ...record } : null;
// Only filename-safe names can have a standalone file; manifest-backed
// sources mirror raw warehouse identifiers (e.g. `EVENT$LOG`) and are found
// by the record lookup below, which never derives a path from the name.
if (isSafeSourceName(input.sourceName)) {
const path = slPath(input.connectionId, input.sourceName);
try {
const result = await project.fileStore.readFile(path);
return {
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
yaml: result.content,
};
} catch {
// Missing or mid-edit standalone file — the record loader covers both:
// it skips absent files and represents a file whose YAML no longer
// parses verbatim, so readers — `ktx sl read`, `ktx sl validate`, and
// the `sl_read_source` MCP tool — can surface the broken content for
// repair instead of failing on it.
}
}
const records = await loadLocalSlSourceRecords(project, {
connectionId: input.connectionId,
});
const record = records.find((source) => source.name === input.sourceName);
return record ? { ...record } : null;
}
export async function resolveLocalSlSource(