diff --git a/packages/cli/src/connectors/sqlite/connector.ts b/packages/cli/src/connectors/sqlite/connector.ts index f5ba2a55..4cbeeada 100644 --- a/packages/cli/src/connectors/sqlite/connector.ts +++ b/packages/cli/src/connectors/sqlite/connector.ts @@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string { return url; } -function stripLeadingSqlComments(sql: string): string { - let index = 0; - while (index < sql.length) { - while (/\s/.test(sql[index] ?? '')) { - index += 1; - } - if (sql.startsWith('--', index)) { - const end = sql.indexOf('\n', index + 2); - index = end === -1 ? sql.length : end + 1; - continue; - } - if (sql.startsWith('/*', index)) { - const end = sql.indexOf('*/', index + 2); - if (end === -1) { - return sql.slice(index); - } - index = end + 2; - continue; - } - break; - } - return sql.slice(index); -} - export function isKtxSqliteConnectionConfig( connection: KtxSqliteConnectionConfig | undefined, ): connection is KtxSqliteConnectionConfig { @@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector { async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); - const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params); + const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params); return { ...result, rowCount: result.rows.length }; } diff --git a/packages/cli/src/context/connections/read-only-sql.ts b/packages/cli/src/context/connections/read-only-sql.ts index fe71a0c3..9f6d4f22 100644 --- a/packages/cli/src/context/connections/read-only-sql.ts +++ b/packages/cli/src/context/connections/read-only-sql.ts @@ -2,8 +2,36 @@ const MUTATING_SQL = /^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i; const READ_SQL = /^\s*(select|with)\b/i; +// Agents (and the daemon's sqlglot validator, which ignores comments) routinely +// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading +// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH +// SQL is rejected here while the parser-backed validator accepts it. +function stripLeadingSqlComments(sql: string): string { + let index = 0; + while (index < sql.length) { + while (/\s/.test(sql[index] ?? '')) { + index += 1; + } + if (sql.startsWith('--', index)) { + const end = sql.indexOf('\n', index + 2); + index = end === -1 ? sql.length : end + 1; + continue; + } + if (sql.startsWith('/*', index)) { + const end = sql.indexOf('*/', index + 2); + if (end === -1) { + return sql.slice(index); + } + index = end + 2; + continue; + } + break; + } + return sql.slice(index); +} + export function assertReadOnlySql(sql: string): string { - const trimmed = sql.trim(); + const trimmed = stripLeadingSqlComments(sql).trim(); if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) { throw new Error('Only read-only SELECT/WITH queries can be executed locally.'); } diff --git a/packages/cli/src/context/mcp/local-project-ports.ts b/packages/cli/src/context/mcp/local-project-ports.ts index 4c820b14..ece16ddb 100644 --- a/packages/cli/src/context/mcp/local-project-ports.ts +++ b/packages/cli/src/context/mcp/local-project-ports.ts @@ -10,6 +10,7 @@ import { createKtxDiscoverDataService } from '../../context/search/discover.js'; import type { SqlAnalysisDialect, SqlAnalysisPort } from '../../context/sql-analysis/ports.js'; import { compileLocalSlQuery } from '../../context/sl/local-query.js'; import { createKtxDictionarySearchService } from '../../context/sl/dictionary-search.js'; +import { readLocalSlSource } from '../../context/sl/local-sl.js'; import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js'; import type { KtxMcpContextPorts, KtxMcpProgressCallback, KtxSqlExecutionResponse } from './types.js'; @@ -62,23 +63,12 @@ function assertSafeConnectionId(connectionId: string): string { return assertSafePathToken('connection id', connectionId); } -function assertSafeSourceName(sourceName: string): string { - if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) { - throw new Error(`Unsafe semantic-layer source name: ${sourceName}`); - } - return assertSafePathToken('semantic-layer source name', sourceName); -} - async function cleanupConnector(connector: KtxScanConnector | null): Promise { if (connector?.cleanup) { await connector.cleanup(); } } -function slPath(connectionId: string, sourceName: string): string { - return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`; -} - async function executeValidatedReadOnlySql( project: KtxLocalProject, options: CreateLocalProjectMcpContextPortsOptions, @@ -188,13 +178,11 @@ export function createLocalProjectMcpContextPorts( }, semanticLayer: { async readSource(input) { - const path = slPath(input.connectionId, input.sourceName); - try { - const result = await project.fileStore.readFile(path); - return { sourceName: input.sourceName, yaml: result.content }; - } catch { - return null; - } + const source = await readLocalSlSource(project, { + connectionId: input.connectionId, + sourceName: input.sourceName, + }); + return source ? { sourceName: source.name, yaml: source.yaml } : null; }, async query(input, executionOptions) { if (!options.semanticLayerCompute) { diff --git a/packages/cli/src/context/sl/local-sl.ts b/packages/cli/src/context/sl/local-sl.ts index fb573392..c5adac19 100644 --- a/packages/cli/src/context/sl/local-sl.ts +++ b/packages/cli/src/context/sl/local-sl.ts @@ -97,8 +97,17 @@ function isSafeConnectionId(connectionId: string | undefined): connectionId is s return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId); } +// Standalone files live at a path derived from the source name, so only +// conservative filename-safe names may reach `slPath`. Source names themselves +// mirror the warehouse identifier verbatim (Snowflake's uppercase `SIGNED_UP` +// or `EVENT$LOG`); manifest-backed sources are matched by name in memory and +// must never be gated on this filename rule. +function isSafeSourceName(sourceName: string): boolean { + return /^[A-Za-z0-9][A-Za-z0-9_]*$/.test(sourceName); +} + function assertSafeSourceName(sourceName: string): string { - if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) { + if (!isSafeSourceName(sourceName)) { throw new Error(`Unsafe semantic-layer source name: ${sourceName}`); } return assertSafePathToken('semantic-layer source name', sourceName); @@ -220,7 +229,12 @@ export async function loadLocalSlSourceRecords( for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) { const raw = await project.fileStore.readFile(path); - const tables = manifestTables(parseYamlRecord(raw.content)); + let tables: Record | null; + try { + tables = manifestTables(parseYamlRecord(raw.content)); + } catch (error) { + throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`); + } if (!tables) { continue; } @@ -237,7 +251,26 @@ export async function loadLocalSlSourceRecords( for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) { const raw = await project.fileStore.readFile(path); - const parsed = parseYamlRecord(raw.content); + let parsed: Record; + try { + parsed = parseYamlRecord(raw.content); + } catch { + // A source mid-edit (e.g. an agent saved half-written YAML) must not take + // down reads, listings, or search for its siblings. Keep the file visible + // under its filename and surface the raw content for repair. + const brokenName = sourceNameFromPath(path); + sources.set(brokenName, { + connectionId, + name: brokenName, + path, + columnCount: 0, + measureCount: 0, + joinCount: 0, + yaml: raw.content, + source: { name: brokenName, grain: [], columns: [], joins: [], measures: [] }, + }); + continue; + } const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path); if (parsed.table || parsed.sql) { const source = parsedStandaloneSource(parsed, name); @@ -317,25 +350,35 @@ export async function writeLocalSlSource( ); } -/** @internal */ export async function readLocalSlSource( project: KtxLocalProject, input: { connectionId: string; sourceName: string }, ): Promise { - const path = slPath(input.connectionId, input.sourceName); - try { - const result = await project.fileStore.readFile(path); - return { - ...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }), - yaml: result.content, - }; - } catch { - const records = await loadLocalSlSourceRecords(project, { - connectionId: input.connectionId, - }); - const record = records.find((source) => source.name === input.sourceName); - return record ? { ...record } : null; + // Only filename-safe names can have a standalone file; manifest-backed + // sources mirror raw warehouse identifiers (e.g. `EVENT$LOG`) and are found + // by the record lookup below, which never derives a path from the name. + if (isSafeSourceName(input.sourceName)) { + const path = slPath(input.connectionId, input.sourceName); + try { + const result = await project.fileStore.readFile(path); + return { + ...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }), + yaml: result.content, + }; + } catch { + // Missing or mid-edit standalone file — the record loader covers both: + // it skips absent files and represents a file whose YAML no longer + // parses verbatim, so readers — `ktx sl read`, `ktx sl validate`, and + // the `sl_read_source` MCP tool — can surface the broken content for + // repair instead of failing on it. + } } + + const records = await loadLocalSlSourceRecords(project, { + connectionId: input.connectionId, + }); + const record = records.find((source) => source.name === input.sourceName); + return record ? { ...record } : null; } export async function resolveLocalSlSource( diff --git a/packages/cli/test/context/connections/read-only-sql.test.ts b/packages/cli/test/context/connections/read-only-sql.test.ts index 90affa04..ef8c1595 100644 --- a/packages/cli/test/context/connections/read-only-sql.test.ts +++ b/packages/cli/test/context/connections/read-only-sql.test.ts @@ -15,6 +15,19 @@ describe('assertReadOnlySql', () => { 'Only read-only SELECT/WITH queries can be executed locally', ); }); + + it('accepts read-only queries that begin with leading comments', () => { + expect(assertReadOnlySql('-- signups per day\nselect count(*) from public.signed_up')).toBe( + 'select count(*) from public.signed_up', + ); + expect(assertReadOnlySql('/* block */\n with paid as (select 1) select * from paid')).toContain('with paid'); + }); + + it('still rejects mutating statements hidden behind leading comments', () => { + expect(() => assertReadOnlySql('-- harmless\n delete from orders')).toThrow( + 'Only read-only SELECT/WITH queries can be executed locally', + ); + }); }); describe('limitSqlForExecution', () => { @@ -27,4 +40,10 @@ describe('limitSqlForExecution', () => { it('returns the trimmed SQL when no maxRows value is provided', () => { expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders'); }); + + it('strips leading comments before wrapping with a row limit', () => { + expect(limitSqlForExecution('-- top customers\nselect * from public.orders', 25)).toBe( + 'select * from (select * from public.orders) as ktx_query_result limit 25', + ); + }); }); diff --git a/packages/cli/test/context/mcp/local-project-ports.test.ts b/packages/cli/test/context/mcp/local-project-ports.test.ts index afe85b4c..1ca9933e 100644 --- a/packages/cli/test/context/mcp/local-project-ports.test.ts +++ b/packages/cli/test/context/mcp/local-project-ports.test.ts @@ -690,7 +690,54 @@ describe('createLocalProjectMcpContextPorts', () => { }); }); - it('rejects path traversal keys before touching the project directory', async () => { + it('reads manifest-backed sources with uppercase warehouse identifiers', async () => { + const project = await initKtxProject({ projectDir: tempDir }); + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/PUBLIC.yaml', + [ + 'tables:', + ' SIGNED_UP:', + ' table: PUBLIC.SIGNED_UP', + ' columns:', + ' - name: ID', + ' type: number', + ' pk: true', + '', + ].join('\n'), + 'ktx', + 'ktx@example.com', + 'seed uppercase manifest shard', + ); + const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null }); + + await expect( + ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'SIGNED_UP' }), + ).resolves.toMatchObject({ + sourceName: 'SIGNED_UP', + yaml: expect.stringContaining('table: PUBLIC.SIGNED_UP'), + }); + }); + + it('returns a standalone source verbatim even when its YAML is currently broken', async () => { + const project = await initKtxProject({ projectDir: tempDir }); + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + 'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n', + 'ktx', + 'ktx@example.com', + 'seed broken source mid-edit', + ); + const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null }); + + await expect( + ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'orders' }), + ).resolves.toMatchObject({ + sourceName: 'orders', + yaml: expect.stringContaining('[unterminated'), + }); + }); + + it('keeps path-traversal keys away from the project directory', async () => { const project = await initKtxProject({ projectDir: tempDir }); const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null }); @@ -701,12 +748,14 @@ describe('createLocalProjectMcpContextPorts', () => { }), ).rejects.toThrow('Invalid wiki key "../outside". Wiki keys must be flat; use "outside".'); + // Source reads never derive a file path from the name; a traversal-style + // name simply matches no record. await expect( ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: '../orders', }), - ).rejects.toThrow('Unsafe semantic-layer source name'); + ).resolves.toBeNull(); }); it('uses semantic compute for compile-only sl_query when supplied', async () => { diff --git a/packages/cli/test/context/sl/local-sl.test.ts b/packages/cli/test/context/sl/local-sl.test.ts index b3a9b7d6..3cc2f238 100644 --- a/packages/cli/test/context/sl/local-sl.test.ts +++ b/packages/cli/test/context/sl/local-sl.test.ts @@ -261,6 +261,119 @@ describe('local semantic-layer helpers', () => { ); }); + it('reads manifest-backed scan sources whose warehouse identifiers are uppercase', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/PUBLIC.yaml', + `tables: + SIGNED_UP: + table: PUBLIC.SIGNED_UP + columns: + - name: ID + type: number + pk: true + - name: EMAIL + type: string +`, + 'ktx', + 'ktx@example.com', + 'Add uppercase manifest shard', + ); + + await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'SIGNED_UP' })).resolves.toEqual( + expect.objectContaining({ + connectionId: 'warehouse', + name: 'SIGNED_UP', + path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#SIGNED_UP', + yaml: expect.stringContaining('table: PUBLIC.SIGNED_UP'), + }), + ); + }); + + it('reads manifest-backed sources whose names are not filename-safe', async () => { + // Snowflake and Postgres unquoted identifiers allow `$`; manifest keys + // carry the warehouse name verbatim, so the lookup must accept it. + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/PUBLIC.yaml', + `tables: + EVENT$LOG: + table: PUBLIC.EVENT$LOG + columns: + - name: ID + type: number + pk: true +`, + 'ktx', + 'ktx@example.com', + 'Add manifest shard with dollar-sign table name', + ); + + await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'EVENT$LOG' })).resolves.toEqual( + expect.objectContaining({ + connectionId: 'warehouse', + name: 'EVENT$LOG', + path: 'semantic-layer/warehouse/_schema/PUBLIC.yaml#EVENT$LOG', + yaml: expect.stringContaining('table: PUBLIC.EVENT$LOG'), + }), + ); + }); + + it('reads a manifest-backed source while a sibling standalone file has broken YAML', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/PUBLIC.yaml', + `tables: + SIGNED_UP: + table: PUBLIC.SIGNED_UP + columns: + - name: ID + type: number + pk: true +`, + 'ktx', + 'ktx@example.com', + 'Add manifest shard', + ); + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + 'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n', + 'ktx', + 'ktx@example.com', + 'seed a sibling source mid-edit with broken YAML', + ); + + await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'SIGNED_UP' })).resolves.toEqual( + expect.objectContaining({ + name: 'SIGNED_UP', + yaml: expect.stringContaining('table: PUBLIC.SIGNED_UP'), + }), + ); + + // The broken sibling stays visible in listings instead of hiding or + // failing the whole connection. + await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([ + expect.objectContaining({ name: 'orders', columnCount: 0 }), + expect.objectContaining({ name: 'SIGNED_UP', columnCount: 1 }), + ]); + }); + + it('returns the raw YAML of a standalone source whose content no longer parses', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + 'name: orders\nmeasures:\n - name: revenue\n expr: [unterminated\n', + 'ktx', + 'ktx@example.com', + 'seed a source mid-edit with broken YAML', + ); + + await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' })).resolves.toEqual( + expect.objectContaining({ + connectionId: 'warehouse', + name: 'orders', + path: 'semantic-layer/warehouse/orders.yaml', + yaml: expect.stringContaining('[unterminated'), + }), + ); + }); + it('expands manifest-backed scan sources when listing all connections', async () => { await project.fileStore.writeFile( 'semantic-layer/warehouse/_schema/public.yaml', @@ -506,12 +619,22 @@ describe('local semantic-layer helpers', () => { }); }); - it('rejects unsafe source paths', async () => { + it('never derives a file path from a traversal-style source name', async () => { + // Reads match names against loaded records, so a traversal-style name is + // simply not found; writes build a file path from the name and must throw. await expect( readLocalSlSource(project, { connectionId: 'warehouse', sourceName: '../orders', }), + ).resolves.toBeNull(); + + await expect( + writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: '../orders', + yaml: ORDERS_YAML.replace('name: orders', 'name: ../orders'), + }), ).rejects.toThrow('Unsafe semantic-layer source name'); }); }); diff --git a/uv.lock b/uv.lock index 1f35cc3e..924d9159 100644 --- a/uv.lock +++ b/uv.lock @@ -466,7 +466,7 @@ wheels = [ [[package]] name = "ktx-daemon" -version = "0.10.0" +version = "0.11.0" source = { editable = "python/ktx-daemon" } dependencies = [ { name = "fastapi" }, @@ -523,7 +523,7 @@ dev = [ [[package]] name = "ktx-sl" -version = "0.10.0" +version = "0.11.0" source = { editable = "python/ktx-sl" } dependencies = [ { name = "pydantic" },