feat: query_policy semantic-layer-only restricts agents to predefined semantic-layer measures (#334)

* feat(sl): add predefined_measures_only guard to semantic query planning

SemanticQuery gains a predefined_measures_only flag; the planner rejects
any measure resolved with Provenance.COMPOSED (runtime aggregate
expressions and query-time derivations) while predefined measures,
predefined derived chains, dimensions, filters, and segments pass.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* feat(config): add per-connection query_policy to warehouse connections

query_policy: semantic-layer-only | read-only-sql (default) on the
warehouse connection schema, plus a policy module with the raw-SQL
guard, federated member restriction lookup, and the project-level
predicate used to gate sql_execution registration.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* feat(cli): enforce query_policy on raw SQL through one shared executor

ktx sql and the MCP sql_execution tool now share executeProjectRawSql
(resolve, policy check, read-only validation, execute), collapsing
their duplicated validate-then-execute paths. Restricted connections
are rejected before validation; federated raw SQL is rejected when any
member is restricted. sql_execution is not registered when every SQL
connection is restricted, and connection_list marks restricted
connections so agents route to sl_query. executeProjectReadOnlySql
stays generic for ktx-internal SQL (scan, ingest, SL-generated).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* feat(sl): compile queries with predefined_measures_only from query_policy

compileLocalSlQuery injects the flag from the connection's query_policy,
never from caller input, covering both ktx sl query and the MCP
sl_query tool through the daemon compile path.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* docs: document query_policy semantic-layer-only

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* fix(sl): close semantic-layer-only bypasses via filters and federated hint

The predefined_measures_only guard only inspected query.measures, so a
composed aggregate written into `filters` slipped through _classify_filters
into a HAVING clause untouched — letting a restricted agent evaluate
arbitrary aggregates (e.g. threshold-probing `sum(x) BETWEEN a AND b`).
Reject filter clauses that compose an aggregate function; a HAVING that
compares a predefined measure by name (`orders.revenue > 100`) still works.

Also make the federated sl_query error policy-aware: when a member is
restricted, raw federated SQL is disabled too, so stop directing the agent
to `ktx sql -c _ktx_federated` / sql_execution (a guaranteed failure) and
point to per-connection semantic-layer queries instead.

---------

Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
Luca Martial 2026-07-03 01:54:17 -07:00 committed by GitHub
parent 66768fe009
commit a651b82e2f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 887 additions and 68 deletions

View file

@ -1,10 +1,14 @@
import { describe, expect, it, vi } from 'vitest';
import type { executeFederatedQuery } from '../../../src/connectors/duckdb/federated-executor.js';
import { executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js';
import { executeProjectRawSql, executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js';
import type { KtxLocalProject } from '../../../src/context/project/project.js';
import type { KtxScanConnector } from '../../../src/context/scan/types.js';
import type { SqlAnalysisPort } from '../../../src/context/sql-analysis/ports.js';
import { KtxQueryError } from '../../../src/errors.js';
function fakeProject(connections: Record<string, { driver: string }>): KtxLocalProject {
function fakeProject(
connections: Record<string, { driver: string; query_policy?: 'semantic-layer-only' }>,
): KtxLocalProject {
return {
projectDir: '/tmp/proj',
configPath: '/tmp/proj/ktx.yaml',
@ -114,3 +118,97 @@ describe('executeProjectReadOnlySql headerTypes', () => {
expect(result.headerTypes).toEqual(['INTEGER']);
});
});
function fakeSqlAnalysis(validation: { ok: boolean; error: string | null }): SqlAnalysisPort {
return {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => validation),
} as unknown as SqlAnalysisPort;
}
describe('executeProjectRawSql', () => {
it('validates then executes raw SQL on an unrestricted connection', async () => {
const project = fakeProject({ pg: { driver: 'postgres' } });
const sqlAnalysis = fakeSqlAnalysis({ ok: true, error: null });
const connector = connectorReturning({
headers: ['id'],
rows: [[1]],
totalRows: 1,
rowCount: 1,
});
const result = await executeProjectRawSql({
project,
connectionId: 'pg',
sql: 'SELECT id FROM orders',
maxRows: 25,
sqlAnalysis,
createConnector: () => connector,
runId: 'test-raw-sql',
});
expect(result.rows).toEqual([[1]]);
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('SELECT id FROM orders', 'postgres');
});
it('rejects a restricted connection before validation or execution', async () => {
const project = fakeProject({ pg: { driver: 'postgres', query_policy: 'semantic-layer-only' } });
const sqlAnalysis = fakeSqlAnalysis({ ok: true, error: null });
const createConnector = vi.fn();
const execution = executeProjectRawSql({
project,
connectionId: 'pg',
sql: 'SELECT 1',
maxRows: 25,
sqlAnalysis,
createConnector: createConnector as never,
runId: 'test-raw-sql',
});
await expect(execution).rejects.toBeInstanceOf(KtxQueryError);
await expect(execution).rejects.toThrow(/query_policy: semantic-layer-only/);
expect(sqlAnalysis.validateReadOnly).not.toHaveBeenCalled();
expect(createConnector).not.toHaveBeenCalled();
});
it('rejects federated raw SQL when a member connection is restricted', async () => {
const project = fakeProject({
pg: { driver: 'postgres', query_policy: 'semantic-layer-only' },
lite: { driver: 'sqlite' },
});
const executeFederated = vi.fn();
await expect(
executeProjectRawSql({
project,
connectionId: '_ktx_federated',
sql: 'SELECT 1',
maxRows: 25,
sqlAnalysis: fakeSqlAnalysis({ ok: true, error: null }),
createConnector: vi.fn() as never,
executeFederated: executeFederated as never,
runId: 'test-raw-sql',
}),
).rejects.toThrow(/"pg"/);
expect(executeFederated).not.toHaveBeenCalled();
});
it('classifies a read-only validation failure as an expected query error', async () => {
const project = fakeProject({ pg: { driver: 'postgres' } });
const createConnector = vi.fn();
const execution = executeProjectRawSql({
project,
connectionId: 'pg',
sql: 'DROP TABLE orders',
maxRows: 25,
sqlAnalysis: fakeSqlAnalysis({ ok: false, error: 'SQL is not read-only: DROP.' }),
createConnector: createConnector as never,
runId: 'test-raw-sql',
});
await expect(execution).rejects.toBeInstanceOf(KtxQueryError);
await expect(execution).rejects.toThrow('SQL is not read-only: DROP.');
expect(createConnector).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,144 @@
import { describe, expect, it } from 'vitest';
import {
assertRawSqlAllowed,
connectionQueryPolicy,
projectAllowsRawSql,
restrictedFederatedMemberIds,
} from '../../../src/context/connections/query-policy.js';
import { parseKtxProjectConfig } from '../../../src/context/project/config.js';
import { KtxQueryError } from '../../../src/errors.js';
const PROJECT_DIR = '/tmp/proj';
function config(yaml: string) {
return parseKtxProjectConfig(yaml);
}
describe('connectionQueryPolicy', () => {
it('defaults to read-only-sql when the field is absent or the connection is unknown', () => {
const parsed = config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
`);
expect(connectionQueryPolicy(parsed.connections.warehouse)).toBe('read-only-sql');
expect(connectionQueryPolicy(undefined)).toBe('read-only-sql');
});
it('reads semantic-layer-only from ktx.yaml', () => {
const parsed = config(`
connections:
warehouse:
driver: snowflake
url: env:SNOWFLAKE_URL
query_policy: semantic-layer-only
`);
expect(connectionQueryPolicy(parsed.connections.warehouse)).toBe('semantic-layer-only');
});
it('rejects unknown query_policy values at config parse time', () => {
expect(() =>
config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
query_policy: everything-goes
`),
).toThrow();
});
});
describe('assertRawSqlAllowed', () => {
it('allows raw SQL on an unrestricted connection', () => {
const parsed = config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
`);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, 'warehouse')).not.toThrow();
});
it('rejects raw SQL on a restricted connection with an expected error naming the policy', () => {
const parsed = config(`
connections:
warehouse:
driver: sqlite
url: file:warehouse.db
query_policy: semantic-layer-only
`);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, 'warehouse')).toThrow(KtxQueryError);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, 'warehouse')).toThrow(
/query_policy: semantic-layer-only/,
);
});
it('rejects federated raw SQL when any member connection is restricted', () => {
const parsed = config(`
connections:
sales:
driver: sqlite
url: file:sales.db
query_policy: semantic-layer-only
events:
driver: sqlite
url: file:events.db
`);
expect(restrictedFederatedMemberIds(parsed, PROJECT_DIR)).toEqual(['sales']);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, '_ktx_federated')).toThrow(/"sales"/);
});
it('allows federated raw SQL when no member is restricted', () => {
const parsed = config(`
connections:
sales:
driver: sqlite
url: file:sales.db
events:
driver: sqlite
url: file:events.db
`);
expect(restrictedFederatedMemberIds(parsed, PROJECT_DIR)).toEqual([]);
expect(() => assertRawSqlAllowed(parsed, PROJECT_DIR, '_ktx_federated')).not.toThrow();
});
});
describe('projectAllowsRawSql', () => {
it('is true when at least one SQL connection is unrestricted', () => {
const parsed = config(`
connections:
finance:
driver: postgres
url: env:FINANCE_URL
query_policy: semantic-layer-only
warehouse:
driver: sqlite
url: file:warehouse.db
`);
expect(projectAllowsRawSql(parsed)).toBe(true);
});
it('is false when every SQL connection is restricted', () => {
const parsed = config(`
connections:
finance:
driver: postgres
url: env:FINANCE_URL
query_policy: semantic-layer-only
`);
expect(projectAllowsRawSql(parsed)).toBe(false);
});
it('is true for projects with no SQL-queryable connections', () => {
const parsed = config(`
connections:
docs:
driver: mongodb
url: mongodb://localhost:27017/app
`);
expect(projectAllowsRawSql(parsed)).toBe(true);
expect(projectAllowsRawSql(config('connections: {}'))).toBe(true);
});
});

View file

@ -33,6 +33,10 @@
},
"hint": {
"type": "string"
},
"queryPolicy": {
"type": "string",
"const": "semantic-layer-only"
}
},
"required": [

View file

@ -247,6 +247,80 @@ describe('createLocalProjectMcpContextPorts', () => {
expect(connector.cleanup).toHaveBeenCalled();
});
it('omits sql_execution when every SQL connection is semantic-layer-only', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
query_policy: 'semantic-layer-only',
};
const ports = createLocalProjectMcpContextPorts(project, {
sqlAnalysis: {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
},
localScan: { createConnector: vi.fn(async () => testConnector()) },
embeddingService: null,
});
expect(ports.sqlExecution).toBeUndefined();
});
it('keeps sql_execution in mixed projects but rejects restricted connections and flags them in connection_list', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {
driver: 'postgres',
url: 'env:DATABASE_URL',
};
project.config.connections.finance = {
driver: 'postgres',
url: 'env:FINANCE_URL',
query_policy: 'semantic-layer-only',
};
const createConnector = vi.fn(async () => testConnector());
const sqlAnalysis = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => ({ ok: true, error: null })),
};
const ports = createLocalProjectMcpContextPorts(project, {
sqlAnalysis,
localScan: { createConnector },
embeddingService: null,
});
expect(ports.sqlExecution).toBeDefined();
const execution = ports.sqlExecution?.execute({
connectionId: 'finance',
sql: 'select 1',
maxRows: 5,
});
await expect(execution).rejects.toBeInstanceOf(KtxQueryError);
await expect(execution).rejects.toThrow(/query_policy: semantic-layer-only/);
expect(sqlAnalysis.validateReadOnly).not.toHaveBeenCalled();
expect(createConnector).not.toHaveBeenCalled();
// Both postgres members federate, so the restricted member also blocks federated raw SQL.
await expect(
ports.sqlExecution?.execute({ connectionId: '_ktx_federated', sql: 'select 1', maxRows: 5 }),
).rejects.toThrow(/"finance"/);
await expect(ports.connections?.list()).resolves.toEqual([
expect.objectContaining({
id: 'finance',
queryPolicy: 'semantic-layer-only',
hint: expect.stringContaining('sl_query'),
}),
expect.objectContaining({ id: 'warehouse' }),
expect.objectContaining({
id: '_ktx_federated',
queryPolicy: 'semantic-layer-only',
hint: expect.stringContaining('finance'),
}),
]);
});
it('rejects sql_execution against an unconfigured connection with an actionable expected error', async () => {
const project = await initKtxProject({ projectDir: tempDir });
project.config.connections.warehouse = {

View file

@ -3,6 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxSemanticLayerComputePort } from '../../../src/context/daemon/semantic-layer-compute.js';
import { FEDERATED_CONNECTION_ID } from '../../../src/context/connections/federation.js';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { compileLocalSlQuery } from '../../../src/context/sl/local-query.js';
@ -67,6 +68,59 @@ grain: []
await rm(tempDir, { recursive: true, force: true });
});
it('injects predefined_measures_only when the connection query_policy is semantic-layer-only', async () => {
project.config.connections.warehouse = { driver: 'postgres', query_policy: 'semantic-layer-only' };
await compileLocalSlQuery(project, {
connectionId: 'warehouse',
query: { measures: ['orders.order_count'], dimensions: [], limit: 10 },
compute,
});
expect(compute.query).toHaveBeenCalledWith(
expect.objectContaining({
query: expect.objectContaining({ predefined_measures_only: true }),
}),
);
});
it('rejects a federated sl_query, pointing to per-connection SL when a member is restricted', async () => {
project.config.connections.warehouse = { driver: 'postgres', query_policy: 'semantic-layer-only' };
project.config.connections.analytics = { driver: 'postgres' };
let message = '';
try {
await compileLocalSlQuery(project, {
connectionId: FEDERATED_CONNECTION_ID,
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
});
throw new Error('expected compileLocalSlQuery to reject');
} catch (e) {
message = (e as Error).message;
}
expect(message).toContain("member connection(s) 'warehouse'");
expect(message).toContain('query_policy: semantic-layer-only');
// Must not send the agent down the raw-SQL path that assertRawSqlAllowed rejects.
expect(message).not.toContain(`ktx sql -c ${FEDERATED_CONNECTION_ID}`);
expect(message).not.toContain('sql_execution');
expect(compute.query).not.toHaveBeenCalled();
});
it('rejects a federated sl_query, pointing to raw federated SQL when no member is restricted', async () => {
project.config.connections.analytics = { driver: 'postgres' };
await expect(
compileLocalSlQuery(project, {
connectionId: FEDERATED_CONNECTION_ID,
query: { measures: ['orders.order_count'], dimensions: [] },
compute,
}),
).rejects.toThrow(`ktx sql -c ${FEDERATED_CONNECTION_ID}`);
expect(compute.query).not.toHaveBeenCalled();
});
it('refuses a non-SQL (context-only) connection instead of compiling it as Postgres', async () => {
project.config.connections['mongo-prod'] = { driver: 'mongodb', url: 'mongodb://localhost:27017/app' };
await expect(
@ -109,6 +163,7 @@ grain: []
measures: ['orders.order_count'],
dimensions: ['orders.status'],
limit: 25,
predefined_measures_only: false,
},
});
expect(result).toEqual({
@ -190,6 +245,7 @@ grain: []
query: {
measures: ['sum(payments.amount)'],
dimensions: [],
predefined_measures_only: false,
},
});
});