feat: query_policy semantic-layer-only restricts agents to predefined semantic-layer measures (#334)

* feat(sl): add predefined_measures_only guard to semantic query planning

SemanticQuery gains a predefined_measures_only flag; the planner rejects
any measure resolved with Provenance.COMPOSED (runtime aggregate
expressions and query-time derivations) while predefined measures,
predefined derived chains, dimensions, filters, and segments pass.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* feat(config): add per-connection query_policy to warehouse connections

query_policy: semantic-layer-only | read-only-sql (default) on the
warehouse connection schema, plus a policy module with the raw-SQL
guard, federated member restriction lookup, and the project-level
predicate used to gate sql_execution registration.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* feat(cli): enforce query_policy on raw SQL through one shared executor

ktx sql and the MCP sql_execution tool now share executeProjectRawSql
(resolve, policy check, read-only validation, execute), collapsing
their duplicated validate-then-execute paths. Restricted connections
are rejected before validation; federated raw SQL is rejected when any
member is restricted. sql_execution is not registered when every SQL
connection is restricted, and connection_list marks restricted
connections so agents route to sl_query. executeProjectReadOnlySql
stays generic for ktx-internal SQL (scan, ingest, SL-generated).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* feat(sl): compile queries with predefined_measures_only from query_policy

compileLocalSlQuery injects the flag from the connection's query_policy,
never from caller input, covering both ktx sl query and the MCP
sl_query tool through the daemon compile path.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* docs: document query_policy semantic-layer-only

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* fix(sl): close semantic-layer-only bypasses via filters and federated hint

The predefined_measures_only guard only inspected query.measures, so a
composed aggregate written into `filters` slipped through _classify_filters
into a HAVING clause untouched — letting a restricted agent evaluate
arbitrary aggregates (e.g. threshold-probing `sum(x) BETWEEN a AND b`).
Reject filter clauses that compose an aggregate function; a HAVING that
compares a predefined measure by name (`orders.revenue > 100`) still works.

Also make the federated sl_query error policy-aware: when a member is
restricted, raw federated SQL is disabled too, so stop directing the agent
to `ktx sql -c _ktx_federated` / sql_execution (a guaranteed failure) and
point to per-connection semantic-layer queries instead.

---------

Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
Luca Martial 2026-07-03 01:54:17 -07:00 committed by GitHub
parent 66768fe009
commit a651b82e2f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 887 additions and 68 deletions

View file

@ -1,5 +1,6 @@
import type { KtxProjectConnectionConfig } from '../project/config.js';
import type { ConnectionType } from './connection-type.js';
import { connectionQueryPolicy } from './query-policy.js';
export interface LocalWarehouseDescriptor {
id: string;
@ -18,6 +19,7 @@ export interface LocalConnectionInfo {
connectionType: string;
members?: string[];
hint?: string;
queryPolicy?: 'semantic-layer-only';
}
const DRIVER_TO_CONNECTION_TYPE: Record<string, ConnectionType> = {
@ -92,10 +94,17 @@ export function localConnectionInfoFromConfig(
if (!connection) {
return null;
}
const restricted = connectionQueryPolicy(connection) === 'semantic-layer-only';
return {
id,
name: id,
connectionType: localConnectionTypeForConfig(id, connection),
...(restricted
? {
queryPolicy: 'semantic-layer-only' as const,
hint: 'Raw SQL is disabled on this connection; query it with sl_query using predefined measures.',
}
: {}),
};
}

View file

@ -1,8 +1,15 @@
import { executeFederatedQuery } from '../../connectors/duckdb/federated-executor.js';
import { KtxExpectedError, KtxQueryError, isNativeProgrammingFault } from '../../errors.js';
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
import type { SqlAnalysisPort } from '../sql-analysis/ports.js';
import { assertSafeConnectionId } from '../sl/source-files.js';
import type { KtxLocalProject } from '../project/project.js';
import type { KtxScanConnector, KtxScanContext } from '../scan/types.js';
import { assertSqlQueryableConnection } from './dialects.js';
import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from './federation.js';
import { assertRawSqlAllowed } from './query-policy.js';
import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutionResult } from './query-executor.js';
import { resolveConfiguredConnection } from './resolve-connection.js';
export interface ExecuteProjectReadOnlySqlDeps {
project: KtxLocalProject;
@ -56,3 +63,71 @@ export async function executeProjectReadOnlySql(
await connector?.cleanup?.();
}
}
type RawSqlProgressCallback = (event: { progress: number; message: string }) => void | Promise<void>;
export interface ExecuteProjectRawSqlDeps {
project: KtxLocalProject;
connectionId: string;
sql: string;
maxRows: number;
sqlAnalysis: SqlAnalysisPort;
createConnector: (connectionId: string) => Promise<KtxScanConnector> | KtxScanConnector;
executeFederated?: typeof executeFederatedQuery;
runId: string;
onProgress?: RawSqlProgressCallback;
}
/**
* Single guarded path for user-authored (raw) SQL `ktx sql` and the MCP
* sql_execution tool. Enforces the connection's query_policy and the parser
* read-only guard before executing; ktx-internal SQL (semantic-layer, ingest)
* calls executeProjectReadOnlySql directly and is not subject to query_policy.
*/
export async function executeProjectRawSql(deps: ExecuteProjectRawSqlDeps): Promise<KtxSqlQueryExecutionResult> {
const { project } = deps;
await deps.onProgress?.({ progress: 0, message: 'Validating SQL' });
const isFederated = deps.connectionId === FEDERATED_CONNECTION_ID;
const connectionId = isFederated ? deps.connectionId : assertSafeConnectionId(deps.connectionId);
const connection = isFederated ? undefined : resolveConfiguredConnection(project.config, connectionId);
if (!isFederated) {
assertSqlQueryableConnection(connectionId, connection!.driver);
}
assertRawSqlAllowed(project.config, project.projectDir, connectionId);
const dialect = sqlAnalysisDialectForDriver(isFederated ? 'duckdb' : connection!.driver);
const validation = await deps.sqlAnalysis.validateReadOnly(deps.sql, dialect);
if (!validation.ok) {
// A read-only guard rejecting the caller's SQL is an expected outcome, not a
// ktx fault: classify it so reportException keeps it out of Error Tracking.
throw new KtxQueryError(validation.error ?? 'SQL is not read-only.');
}
await deps.onProgress?.({ progress: 0.3, message: 'Executing' });
const result = await executeProjectReadOnlySql({
project,
input: {
connectionId,
projectDir: project.projectDir,
connection,
sql: deps.sql,
maxRows: deps.maxRows,
},
createConnector: deps.createConnector,
executeFederated: deps.executeFederated,
runId: deps.runId,
}).catch((error: unknown) => {
// A warehouse/driver rejection (e.g. the caller's SQL failed to compile) is a
// surfaced operational outcome, not a ktx fault: mark it expected while
// preserving the warehouse's own diagnostics. A native JS error (TypeError,
// etc.) signals a bug in connector code — let it propagate unchanged so Error
// Tracking still sees it.
if (isNativeProgrammingFault(error) || error instanceof KtxExpectedError) {
throw error;
}
throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error });
});
await deps.onProgress?.({ progress: 1, message: `Fetched ${result.rowCount ?? result.rows.length} rows` });
return result;
}

View file

@ -0,0 +1,61 @@
import { KtxQueryError } from '../../errors.js';
import type { KtxProjectConfig, KtxProjectConnectionConfig } from '../project/config.js';
import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from './federation.js';
import { isSqlQueryableDriver } from './dialects.js';
export type KtxConnectionQueryPolicy = 'read-only-sql' | 'semantic-layer-only';
export function connectionQueryPolicy(
connection: KtxProjectConnectionConfig | undefined,
): KtxConnectionQueryPolicy {
return connection !== undefined && connection.query_policy === 'semantic-layer-only'
? 'semantic-layer-only'
: 'read-only-sql';
}
/** Member ids whose policy blocks raw SQL through the federated connection. */
export function restrictedFederatedMemberIds(config: KtxProjectConfig, projectDir: string): string[] {
const descriptor = deriveFederatedConnection(config.connections, projectDir);
if (!descriptor) {
return [];
}
return descriptor.members
.filter((member) => connectionQueryPolicy(member.connection) === 'semantic-layer-only')
.map((member) => member.connectionId);
}
export function assertRawSqlAllowed(config: KtxProjectConfig, projectDir: string, connectionId: string): void {
if (connectionId === FEDERATED_CONNECTION_ID) {
const restricted = restrictedFederatedMemberIds(config, projectDir);
if (restricted.length > 0) {
throw new KtxQueryError(
`Federated SQL execution is disabled: member connection(s) ${restricted
.map((id) => `"${id}"`)
.join(', ')} are restricted to semantic-layer queries (query_policy: semantic-layer-only in ktx.yaml).`,
);
}
return;
}
if (connectionQueryPolicy(config.connections[connectionId]) === 'semantic-layer-only') {
throw new KtxQueryError(
`Connection "${connectionId}" is restricted to semantic-layer queries (query_policy: semantic-layer-only in ktx.yaml); ` +
'raw SQL execution is disabled. Query it through the semantic layer with predefined measures instead ' +
'(the sl_query tool or `ktx sl query`).',
);
}
}
/**
* False only when the project has SQL-queryable connections and every one of
* them is restricted then no raw-SQL surface can succeed and the
* sql_execution tool should not be offered at all.
*/
export function projectAllowsRawSql(config: KtxProjectConfig): boolean {
const sqlConnections = Object.values(config.connections).filter((connection) =>
isSqlQueryableDriver(connection.driver),
);
if (sqlConnections.length === 0) {
return true;
}
return sqlConnections.some((connection) => connectionQueryPolicy(connection) === 'read-only-sql');
}

View file

@ -243,6 +243,7 @@ const connectionListOutputSchema = z.object({
connectionType: z.string(),
members: z.array(z.string()).optional(),
hint: z.string().optional(),
queryPolicy: z.literal('semantic-layer-only').optional(),
}),
),
});

View file

@ -1,12 +1,11 @@
import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js';
import { KtxExpectedError, KtxQueryError, isNativeProgrammingFault } from '../../errors.js';
import { KtxExpectedError } from '../../errors.js';
import { isDatabaseDriver, normalizeConnectionDriver } from '../../connection-drivers.js';
import { sqlDialectNotes } from '../../context/sql-analysis/dialect-notes.js';
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
import { executeProjectReadOnlySql } from '../../context/connections/project-sql-executor.js';
import { FEDERATED_CONNECTION_ID, federatedConnectionListing } from '../../context/connections/federation.js';
import { assertSqlQueryableConnection } from '../../context/connections/dialects.js';
import { resolveConfiguredConnection } from '../../context/connections/resolve-connection.js';
import { executeProjectRawSql } from '../../context/connections/project-sql-executor.js';
import { federatedConnectionListing } from '../../context/connections/federation.js';
import { projectAllowsRawSql, restrictedFederatedMemberIds } from '../../context/connections/query-policy.js';
import {
type LocalConnectionInfo,
localConnectionInfoFromConfig,
@ -41,7 +40,6 @@ async function executeValidatedReadOnlySql(
input: { connectionId: string; sql: string; maxRows: number },
onProgress?: KtxMcpProgressCallback,
): Promise<KtxSqlExecutionResponse> {
await onProgress?.({ progress: 0, message: 'Validating SQL' });
if (!options.sqlAnalysis) {
throw new Error('sql_execution requires parser-backed SQL validation.');
}
@ -50,52 +48,22 @@ async function executeValidatedReadOnlySql(
throw new Error('sql_execution requires a local scan connector factory.');
}
const isFederated = input.connectionId === FEDERATED_CONNECTION_ID;
const connectionId = isFederated ? input.connectionId : assertSafeConnectionId(input.connectionId);
const connection = isFederated ? undefined : resolveConfiguredConnection(project.config, connectionId);
if (!isFederated) {
assertSqlQueryableConnection(connectionId, connection!.driver);
}
const dialect = sqlAnalysisDialectForDriver(isFederated ? 'duckdb' : connection!.driver);
const validation = await options.sqlAnalysis.validateReadOnly(input.sql, dialect);
if (!validation.ok) {
// A read-only guard rejecting the agent's SQL is an expected outcome, not a
// ktx fault: classify it so reportException keeps it out of Error Tracking.
throw new KtxQueryError(validation.error ?? 'SQL is not read-only.');
}
await onProgress?.({ progress: 0.3, message: 'Executing' });
const result = await executeProjectReadOnlySql({
const result = await executeProjectRawSql({
project,
input: {
connectionId,
projectDir: project.projectDir,
connection,
sql: input.sql,
maxRows: input.maxRows,
},
connectionId: input.connectionId,
sql: input.sql,
maxRows: input.maxRows,
sqlAnalysis: options.sqlAnalysis,
createConnector,
runId: 'mcp-sql-execution',
}).catch((error: unknown) => {
// A warehouse/driver rejection (e.g. the agent's SQL failed to compile) is a
// surfaced operational outcome, not a ktx fault: mark it expected while
// preserving the warehouse's own diagnostics. A native JS error (TypeError,
// etc.) signals a bug in connector code — let it propagate unchanged so Error
// Tracking still sees it.
if (isNativeProgrammingFault(error) || error instanceof KtxExpectedError) {
throw error;
}
throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error });
onProgress,
});
const response = {
return {
headers: result.headers,
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
rows: result.rows,
rowCount: result.rowCount ?? result.rows.length,
};
await onProgress?.({ progress: 1, message: `Fetched ${response.rowCount} rows` });
return response;
}
/** @internal Resolves a connection's dialect SQL notes; throws KtxExpectedError for an unknown or non-SQL-warehouse connection. */
@ -130,12 +98,17 @@ export function createLocalProjectMcpContextPorts(
.sort((a, b) => a.id.localeCompare(b.id));
const federated = federatedConnectionListing(project.config.connections, project.projectDir);
if (federated) {
const restricted = restrictedFederatedMemberIds(project.config, project.projectDir);
configured.push({
id: federated.id,
name: federated.id,
connectionType: 'DUCKDB',
members: federated.members,
hint: federated.hint,
hint:
restricted.length > 0
? `Federated SQL is disabled: member connection(s) ${restricted.join(', ')} have query_policy: semantic-layer-only.`
: federated.hint,
...(restricted.length > 0 ? { queryPolicy: 'semantic-layer-only' as const } : {}),
});
}
return configured;
@ -231,7 +204,10 @@ export function createLocalProjectMcpContextPorts(
},
};
if (options.sqlAnalysis && options.localScan?.createConnector) {
// Register sql_execution only when some connection can accept raw SQL; in
// mixed projects the tool stays and executeProjectRawSql rejects restricted
// connection ids at request time.
if (options.sqlAnalysis && options.localScan?.createConnector && projectAllowsRawSql(project.config)) {
ports.sqlExecution = {
async execute(input, executionOptions) {
return executeValidatedReadOnlySql(project, options, input, executionOptions?.onProgress);

View file

@ -42,6 +42,12 @@ function warehouseConnectionSchema<const Driver extends WarehouseDriver>(driver:
.describe(
'Maximum execution time for a single read-only query, in milliseconds (default 30000). Enforced as a server-side statement timeout for remote engines and by SIGKILL-ing a forked query subprocess for in-process SQLite. A query exceeding it is cancelled and returns a "query exceeded Ns" error so the agent can revise.',
),
query_policy: z
.enum(['read-only-sql', 'semantic-layer-only'])
.optional()
.describe(
'Agent-facing query authorship policy (default "read-only-sql"). "read-only-sql" allows parser-validated read-only SQL plus semantic-layer queries. "semantic-layer-only" rejects raw SQL on this connection (`ktx sql`, the sql_execution tool, and federated queries that include it) and restricts semantic-layer queries to measures predefined in the semantic-layer sources. ktx-internal scan and ingest queries are unaffected.',
),
})
.describe(
`${driver} warehouse connection. Additional driver-tunable fields (e.g. context.queryHistory) are accepted and passed through.`,

View file

@ -4,6 +4,7 @@ import type { KtxMcpProgressCallback } from '../mcp/types.js';
import type { KtxLocalProject } from '../../context/project/project.js';
import { isSqlQueryableDriver } from '../connections/dialects.js';
import { FEDERATED_CONNECTION_ID } from '../connections/federation.js';
import { connectionQueryPolicy, restrictedFederatedMemberIds } from '../connections/query-policy.js';
import { resolveRequiredConnectionId } from '../connections/resolve-connection.js';
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
import { loadLocalSlSourceRecords } from './local-sl.js';
@ -14,11 +15,31 @@ import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput, Semant
const COMPILE_ONLY_REASON =
'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.';
const FEDERATED_SL_QUERY_UNSUPPORTED =
`Semantic-layer queries are per-connection and cannot target the federated connection '${FEDERATED_CONNECTION_ID}'. ` +
`Run a cross-database query as read-only SQL instead — ktx sql -c ${FEDERATED_CONNECTION_ID} "SELECT ..." or the sql_execution tool — ` +
'using catalog-qualified table names (connectionId.schema.table, or connectionId.table for sqlite; ' +
'double-quote ids that are not bare identifiers, e.g. "books-db".public.books).';
const FEDERATED_SL_QUERY_PREFIX =
`Semantic-layer queries are per-connection and cannot target the federated connection '${FEDERATED_CONNECTION_ID}'. `;
// The raw-SQL fallback is only valid when federated raw SQL is allowed; when a
// member is restricted (query_policy: semantic-layer-only), assertRawSqlAllowed
// rejects the same path, so directing the agent there would burn a guaranteed
// failure. Derive the message from the restricted-member set instead.
function federatedSlQueryUnsupportedMessage(project: KtxLocalProject): string {
const restricted = restrictedFederatedMemberIds(project.config, project.projectDir);
if (restricted.length > 0) {
return (
FEDERATED_SL_QUERY_PREFIX +
`Cross-database SQL through '${FEDERATED_CONNECTION_ID}' is also disabled because member connection(s) ` +
`${restricted.map((id) => `'${id}'`).join(', ')} are restricted to semantic-layer queries ` +
'(query_policy: semantic-layer-only). Query each connection on its own through the semantic layer ' +
'(the sl_query tool or `ktx sl query` with its connection id).'
);
}
return (
FEDERATED_SL_QUERY_PREFIX +
`Run a cross-database query as read-only SQL instead — ktx sql -c ${FEDERATED_CONNECTION_ID} "SELECT ..." or the sql_execution tool — ` +
'using catalog-qualified table names (connectionId.schema.table, or connectionId.table for sqlite; ' +
'double-quote ids that are not bare identifiers, e.g. "books-db".public.books).'
);
}
export interface CompileLocalSlQueryOptions {
connectionId?: string;
@ -79,7 +100,7 @@ export async function compileLocalSlQuery(
options: CompileLocalSlQueryOptions,
): Promise<CompileLocalSlQueryResult> {
if (options.connectionId === FEDERATED_CONNECTION_ID) {
throw new Error(FEDERATED_SL_QUERY_UNSUPPORTED);
throw new Error(federatedSlQueryUnsupportedMessage(project));
}
await options.onProgress?.({ progress: 0, message: 'Compiling query' });
const connectionId = resolveLocalConnectionId(project, options.connectionId);
@ -93,11 +114,13 @@ export async function compileLocalSlQuery(
const dialect = sqlAnalysisDialectForDriver(driver);
const sources = await loadComputableSources(project, connectionId);
const predefinedMeasuresOnly =
connectionQueryPolicy(project.config.connections[connectionId]) === 'semantic-layer-only';
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });
const response = await options.compute.query({
sources,
dialect,
query: options.query,
query: { ...options.query, predefined_measures_only: predefinedMeasuresOnly },
});
if (!options.execute) {

View file

@ -81,6 +81,9 @@ export interface SemanticLayerQueryInput {
order_by?: Array<string | { field: string; direction?: string }>;
limit?: number;
include_empty?: boolean;
// Set by compileLocalSlQuery from the connection's query_policy, never from
// caller input: the planner rejects runtime-composed measures when true.
predefined_measures_only?: boolean;
}
export interface SemanticLayerQueryExecutionResult {

View file

@ -1,6 +1,6 @@
import { executeFederatedQuery } from './connectors/duckdb/federated-executor.js';
import { FEDERATED_CONNECTION_ID } from './context/connections/federation.js';
import { executeProjectReadOnlySql } from './context/connections/project-sql-executor.js';
import { executeProjectRawSql } from './context/connections/project-sql-executor.js';
import type { KtxSqlQueryExecutionResult } from './context/connections/query-executor.js';
import { assertSqlQueryableConnection } from './context/connections/dialects.js';
import { resolveConfiguredConnection } from './context/connections/resolve-connection.js';
@ -137,6 +137,8 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps:
const connection = isFederated ? undefined : resolveConfiguredConnection(project.config, args.connectionId);
driver = isFederated ? 'duckdb' : String(connection?.driver ?? 'unknown').toLowerCase();
demoConnection = isFederated ? false : isDemoConnection(args.connectionId, connection);
// Fail fast before creating the SQL-analysis daemon port; executeProjectRawSql
// re-asserts this for every caller.
if (!isFederated) {
assertSqlQueryableConnection(args.connectionId, connection?.driver);
}
@ -152,26 +154,19 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps:
}));
const analysisPort = createSqlAnalysis();
const dialect: SqlAnalysisDialect = isFederated ? 'duckdb' : sqlAnalysisDialectForDriver(connection?.driver);
const validation = await analysisPort.validateReadOnly(args.sql, dialect);
if (!validation.ok) {
throw new Error(validation.error ?? 'SQL is not read-only.');
}
const referencedTableCount = await safeReferencedTableCount(analysisPort, args.sql, dialect);
const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector;
const result = await executeProjectReadOnlySql({
const result = await executeProjectRawSql({
project,
input: {
connectionId: args.connectionId,
projectDir: args.projectDir,
connection,
sql: args.sql,
maxRows: args.maxRows,
},
connectionId: args.connectionId,
sql: args.sql,
maxRows: args.maxRows,
sqlAnalysis: analysisPort,
createConnector: (connectionId) => createScanConnector(project!, connectionId),
executeFederated: deps.executeFederated,
runId: 'cli-sql',
});
const referencedTableCount = await safeReferencedTableCount(analysisPort, args.sql, dialect);
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
printSqlResult(resultOutput(args.connectionId, result), mode, io);
await emitTelemetryEvent({

View file

@ -1,10 +1,14 @@
import { describe, expect, it, vi } from 'vitest';
import type { executeFederatedQuery } from '../../../src/connectors/duckdb/federated-executor.js';
import { executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js';
import { executeProjectRawSql, executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js';
import type { KtxLocalProject } from '../../../src/context/project/project.js';
import type { KtxScanConnector } from '../../../src/context/scan/types.js';
import type { SqlAnalysisPort } from '../../../src/context/sql-analysis/ports.js';
import { KtxQueryError } from '../../../src/errors.js';
function fakeProject(connections: Record<string, { driver: string }>): KtxLocalProject {
function fakeProject(
connections: Record<string, { driver: string; query_policy?: 'semantic-layer-only' }>,
): KtxLocalProject {
return {
projectDir: '/tmp/proj',
configPath: '/tmp/proj/ktx.yaml',
@ -114,3 +118,97 @@ describe('executeProjectReadOnlySql headerTypes', () => {
expect(result.headerTypes).toEqual(['INTEGER']);
});
});
function fakeSqlAnalysis(validation: { ok: boolean; error: string | null }): SqlAnalysisPort {
return {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => validation),
} as unknown as SqlAnalysisPort;
}
describe('executeProjectRawSql', () => {
it('validates then executes raw SQL on an unrestricted connection', async () => {
const project = fakeProject({ pg: { driver: 'postgres' } });
const sqlAnalysis = fakeSqlAnalysis({ ok: true, error: null });
const connector = connectorReturning({
headers: ['id'],
rows: [[1]],
totalRows: 1,
rowCount: 1,
});
const result = await executeProjectRawSql({
project,
connectionId: 'pg',
sql: 'SELECT id FROM orders',
maxRows: 25,
sqlAnalysis,
createConnector: () => connector,
runId: 'test-raw-sql',
});
expect(result.rows).toEqual([[1]]);
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('SELECT id FROM orders', 'postgres');
});
it('rejects a restricted connection before validation or execution', async () => {
const project = fakeProject({ pg: { driver: 'postgres', query_policy: 'semantic-layer-only' } });
const sqlAnalysis = fakeSqlAnalysis({ ok: true, error: null });
const createConnector = vi.fn();
const execution = executeProjectRawSql({
project,
connectionId: 'pg',
sql: 'SELECT 1',
maxRows: 25,
sqlAnalysis,
createConnector: createConnector as never,
runId: 'test-raw-sql',
});
await expect(execution).rejects.toBeInstanceOf(KtxQueryError);
await expect(execution).rejects.toThrow(/query_policy: semantic-layer-only/);
expect(sqlAnalysis.validateReadOnly).not.toHaveBeenCalled();
expect(createConnector).not.toHaveBeenCalled();
});
it('rejects federated raw SQL when a member connection is restricted', async () => {
const project = fakeProject({
pg: { driver: 'postgres', query_policy: 'semantic-layer-only' },
lite: { driver: 'sqlite' },
});
const executeFederated = vi.fn();
await expect(
executeProjectRawSql({
project,
connectionId: '_ktx_federated',
sql: 'SELECT 1',
maxRows: 25,
sqlAnalysis: fakeSqlAnalysis({ ok: true, error: null }),
createConnector: vi.fn() as never,
executeFederated: executeFederated as never,
runId: 'test-raw-sql',
}),
).rejects.toThrow(/"pg"/);
expect(executeFederated).not.toHaveBeenCalled();
});
it('classifies a read-only validation failure as an expected query error', async () => {
const project = fakeProject({ pg: { driver: 'postgres' } });
const createConnector = vi.fn();
const execution = executeProjectRawSql({
project,
connectionId: 'pg',
sql: 'DROP TABLE orders',
maxRows: 25,
sqlAnalysis: fakeSqlAnalysis({ ok: false, error: 'SQL is not read-only: DROP.' }),
createConnector: createConnector as never,
runId: 'test-raw-sql',
});
await expect(execution).rejects.toBeInstanceOf(KtxQueryError);
await expect(execution).rejects.toThrow('SQL is not read-only: DROP.');
expect(createConnector).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,144 @@
import { describe, expect, it } from 'vitest';
import {
assertRawSqlAllowed,
connectionQueryPolicy,
projectAllowsRawSql,
restrictedFederatedMemberIds,
} from '../../../src/context/connections/query-policy.js';
import { parseKtxProjectConfig } from '../../../src/context/project/config.js';
import { KtxQueryError } from '../../../src/errors.js';
const PROJECT_DIR = '/tmp/proj';
function config(yaml: string) {
return parseKtxProjectConfig(yaml);
}
describe('connectionQueryPolicy', () => {
it('defaults to read-only-sql when the field is absent or the connection is unknown', () => {
const parsed = config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
`);
expect(connectionQueryPolicy(parsed.connections.warehouse)).toBe('read-only-sql');
expect(connectionQueryPolicy(undefined)).toBe('read-only-sql');
});
it('reads semantic-layer-only from ktx.yaml', () => {
const parsed = config(`
connections:
warehouse:
driver: snowflake
url: env:SNOWFLAKE_URL
query_policy: semantic-layer-only
`);
expect(connectionQueryPolicy(parsed.connections.warehouse)).toBe('semantic-layer-only');
});
it('rejects unknown query_policy values at config parse time', () => {
expect(() =>
config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
query_policy: everything-goes
`),
).toThrow();
});
});
describe('assertRawSqlAllowed', () => {
it('allows raw SQL on an unrestricted connection', () => {
const parsed = config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
`);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, 'warehouse')).not.toThrow();
});
it('rejects raw SQL on a restricted connection with an expected error naming the policy', () => {
const parsed = config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
query_policy: semantic-layer-only
`);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, 'warehouse')).toThrow(KtxQueryError);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, 'warehouse')).toThrow(
/query_policy: semantic-layer-only/,
);
});
it('rejects federated raw SQL when any member connection is restricted', () => {
const parsed = config(`
connections:
sales:
driver: sqlite
url: file:sales.db
query_policy: semantic-layer-only
events:
driver: sqlite
url: file:events.db
`);
expect(restrictedFederatedMemberIds(parsed, PROJECT_DIR)).toEqual(['sales']);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, '_ktx_federated')).toThrow(/"sales"/);
});
it('allows federated raw SQL when no member is restricted', () => {
const parsed = config(`
connections:
sales:
driver: sqlite
url: file:sales.db
events:
driver: sqlite
url: file:events.db
`);
expect(restrictedFederatedMemberIds(parsed, PROJECT_DIR)).toEqual([]);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, '_ktx_federated')).not.toThrow();
});
});
describe('projectAllowsRawSql', () => {
it('is true when at least one SQL connection is unrestricted', () => {
const parsed = config(`
connections:
finance:
driver: postgres
url: env:FINANCE_URL
query_policy: semantic-layer-only
warehouse:
driver: sqlite
url: file:warehouse.db
`);
expect(projectAllowsRawSql(parsed)).toBe(true);
});
it('is false when every SQL connection is restricted', () => {
const parsed = config(`
connections:
finance:
driver: postgres
url: env:FINANCE_URL
query_policy: semantic-layer-only
`);
expect(projectAllowsRawSql(parsed)).toBe(false);
});
it('is true for projects with no SQL-queryable connections', () => {
const parsed = config(`
connections:
docs:
driver: mongodb
url: mongodb://localhost:27017/app
`);
expect(projectAllowsRawSql(parsed)).toBe(true);
expect(projectAllowsRawSql(config('connections: {}'))).toBe(true);
});
});

View file

@ -33,6 +33,10 @@
},
"hint": {
"type": "string"
},
"queryPolicy": {
"type": "string",
"const": "semantic-layer-only"
}
},
"required": [

View file

@ -247,6 +247,80 @@ describe('createLocalProjectMcpContextPorts', () => {
expect(connector.cleanup).toHaveBeenCalled();
});
it('omits sql_execution when every SQL connection is semantic-layer-only', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
query_policy: 'semantic-layer-only',
};
const ports = createLocalProjectMcpContextPorts(project, {
sqlAnalysis: {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
},
localScan: { createConnector: vi.fn(async () => testConnector()) },
embeddingService: null,
});
expect(ports.sqlExecution).toBeUndefined();
});
it('keeps sql_execution in mixed projects but rejects restricted connections and flags them in connection_list', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
project.config.connections.finance = {
driver: 'postgres',
url: 'env:FINANCE_URL',
query_policy: 'semantic-layer-only',
};
const createConnector = vi.fn(async () => testConnector());
const sqlAnalysis = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
};
const ports = createLocalProjectMcpContextPorts(project, {
sqlAnalysis,
localScan: { createConnector },
embeddingService: null,
});
expect(ports.sqlExecution).toBeDefined();
const execution = ports.sqlExecution?.execute({
connectionId: 'finance',
sql: 'select 1',
maxRows: 5,
});
await expect(execution).rejects.toBeInstanceOf(KtxQueryError);
await expect(execution).rejects.toThrow(/query_policy: semantic-layer-only/);
expect(sqlAnalysis.validateReadOnly).not.toHaveBeenCalled();
expect(createConnector).not.toHaveBeenCalled();
// Both postgres members federate, so the restricted member also blocks federated raw SQL.
await expect(
ports.sqlExecution?.execute({ connectionId: '_ktx_federated', sql: 'select 1', maxRows: 5 }),
).rejects.toThrow(/"finance"/);
await expect(ports.connections?.list()).resolves.toEqual([
expect.objectContaining({
id: 'finance',
queryPolicy: 'semantic-layer-only',
hint: expect.stringContaining('sl_query'),
}),
expect.objectContaining({ id: 'warehouse' }),
expect.objectContaining({
id: '_ktx_federated',
queryPolicy: 'semantic-layer-only',
hint: expect.stringContaining('finance'),
}),
]);
});
it('rejects sql_execution against an unconfigured connection with an actionable expected error', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {

View file

@ -3,6 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxSemanticLayerComputePort } from '../../../src/context/daemon/semantic-layer-compute.js';
import { FEDERATED_CONNECTION_ID } from '../../../src/context/connections/federation.js';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { compileLocalSlQuery } from '../../../src/context/sl/local-query.js';
@ -67,6 +68,59 @@ grain: []
await rm(tempDir, { recursive: true, force: true });
});
it('injects predefined_measures_only when the connection query_policy is semantic-layer-only', async () => {
project.config.connections.warehouse = { driver: 'postgres', query_policy: 'semantic-layer-only' };
await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [], limit: 10 },
compute,
});
expect(compute.query).toHaveBeenCalledWith(
expect.objectContaining({
query: expect.objectContaining({ predefined_measures_only: true }),
}),
);
});
it('rejects a federated sl_query, pointing to per-connection SL when a member is restricted', async () => {
project.config.connections.warehouse = { driver: 'postgres', query_policy: 'semantic-layer-only' };
project.config.connections.analytics = { driver: 'postgres' };
let message = '';
try {
await compileLocalSlQuery(project, {
connectionId: FEDERATED_CONNECTION_ID,
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
});
throw new Error('expected compileLocalSlQuery to reject');
} catch (e) {
message = (e as Error).message;
}
expect(message).toContain("member connection(s) 'warehouse'");
expect(message).toContain('query_policy: semantic-layer-only');
// Must not send the agent down the raw-SQL path that assertRawSqlAllowed rejects.
expect(message).not.toContain(`ktx sql -c ${FEDERATED_CONNECTION_ID}`);
expect(message).not.toContain('sql_execution');
expect(compute.query).not.toHaveBeenCalled();
});
it('rejects a federated sl_query, pointing to raw federated SQL when no member is restricted', async () => {
project.config.connections.analytics = { driver: 'postgres' };
await expect(
compileLocalSlQuery(project, {
connectionId: FEDERATED_CONNECTION_ID,
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
}),
).rejects.toThrow(`ktx sql -c ${FEDERATED_CONNECTION_ID}`);
expect(compute.query).not.toHaveBeenCalled();
});
it('refuses a non-SQL (context-only) connection instead of compiling it as Postgres', async () => {
project.config.connections['mongo-prod'] = { driver: 'mongodb', url: 'mongodb://localhost:27017/app' };
await expect(
@ -109,6 +163,7 @@ grain: []
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
predefined_measures_only: false,
},
});
expect(result).toEqual({
@ -190,6 +245,7 @@ grain: []
query: {
measures: ['sum(payments.amount)'],
dimensions: [],
predefined_measures_only: false,
},
});
});

View file

@ -141,6 +141,41 @@ describe('runKtxSql', () => {
expect(io.stderr()).toBe('');
});
it('refuses raw SQL when the connection query_policy is semantic-layer-only', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite', path: 'warehouse.db', query_policy: 'semantic-layer-only' },
});
const sqlAnalysis = makeSqlAnalysis({ ok: true, error: null });
const createScanConnector = vi.fn(async () => makeConnector());
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select id from orders',
maxRows: 1000,
output: 'pretty',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => sqlAnalysis,
createScanConnector,
},
),
).resolves.toBe(1);
expect(io.stderr()).toContain('query_policy: semantic-layer-only');
expect(sqlAnalysis.validateReadOnly).not.toHaveBeenCalled();
expect(createScanConnector).not.toHaveBeenCalled();
});
it('emits debug telemetry for SQL without raw query text', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');