From 7a86aa9ddcfb37faf5c0a591ac44a5ad4aeda45c Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Mon, 11 May 2026 23:31:04 -0700 Subject: [PATCH] fix(sl): tighten source validation guards --- .../src/sl/semantic-layer.service.test.ts | 54 +++++++++++++++++++ .../context/src/sl/semantic-layer.service.ts | 21 +++++++- python/ktx-sl/semantic_layer/engine.py | 5 +- python/ktx-sl/tests/test_validator.py | 28 ++++++++++ 4 files changed, 105 insertions(+), 3 deletions(-) diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 7c70950b..8121ed53 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -891,6 +891,60 @@ describe('validateWithProposedSource', () => { expect(result.errors).toEqual([]); }); + it('allows SQL syntax tokens and cast types in physical expression validation', async () => { + const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml'; + configService.listFiles.mockImplementation((dir: string) => { + if (dir === 'semantic-layer/dbt-main') { + return Promise.resolve({ files: [] }); + } + if (dir === 'semantic-layer') { + return Promise.resolve({ files: [schemaPath] }); + } + if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') { + return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] }); + } + return Promise.resolve({ files: [] }); + }); + configService.readFile.mockResolvedValue({ + content: [ + 'tables:', + ' mart_revenue_daily:', + ' table: orbit_analytics.mart_revenue_daily', + ' columns:', + ' - { name: order_id, type: string }', + ' - { name: revenue_date, type: time }', + ' - { name: amount, type: number }', + ' - { name: status, type: string }', + ' - { name: created_at, type: time }', + ].join('\n'), + }); + pythonPort.validateSources.mockResolvedValue({ + data: { errors: [], warnings: [] }, + }); + + const result = await service.validateWithProposedSource('dbt-main', { + name: 'mart_revenue_daily', + table: 'orbit_analytics.mart_revenue_daily', + grain: ['order_id'], + columns: [ + { name: 'order_id', type: 'string' }, + { name: 'revenue_date', type: 'time' }, + { name: 'amount', type: 'number' }, + { name: 'status', type: 'string' }, + { name: 'created_at', type: 'time' }, + { name: 'status_text', type: 'string', expr: 'status::text' }, + ], + segments: [{ name: 'current_or_paid', expr: "created_at <= current_date OR status = 'paid'" }], + joins: [], + measures: [ + { name: 'paid_amount', expr: "sum(amount) FILTER (WHERE status = 'paid')" }, + { name: 'cast_amount_count', expr: 'count(cast(amount as text))' }, + ], + }); + + expect(result.errors).toEqual([]); + }); + it('rejects join keys that are absent from matched physical sources', async () => { const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml'; configService.listFiles.mockImplementation((dir: string) => { diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index 6ec21a58..a7be91e9 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -1160,6 +1160,8 @@ const SQL_KEYWORDS = new Set([ 'in', 'between', 'like', + 'where', + 'filter', 'cast', 'coalesce', 'nullif', @@ -1183,15 +1185,31 @@ const SQL_KEYWORDS = new Set([ 'rows', 'range', 'current', + 'current_date', + 'current_time', + 'current_timestamp', + 'localtime', + 'localtimestamp', 'row', 'numeric', 'decimal', 'int', 'integer', + 'bigint', + 'smallint', 'float', 'double', + 'real', 'string', + 'text', + 'char', + 'character', + 'varchar', 'timestamp', + 'time', + 'uuid', + 'json', + 'jsonb', 'bool', 'boolean', ]); @@ -1218,7 +1236,8 @@ function normalizeSqlExpressionForIdentifierScan(expr: string): string { .replace(/'([^']|'')*'/g, ' ') .replace(/"([^"]+)"/g, '$1') .replace(/`([^`]+)`/g, '$1') - .replace(/\[([^\]]+)\]/g, '$1'); + .replace(/\[([^\]]+)\]/g, '$1') + .replace(/::\s*[A-Za-z_][\w$]*(?:\s*\([^)]*\))?/g, ' '); } function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; name: string }> { diff --git a/python/ktx-sl/semantic_layer/engine.py b/python/ktx-sl/semantic_layer/engine.py index b45d89ac..01e03805 100644 --- a/python/ktx-sl/semantic_layer/engine.py +++ b/python/ktx-sl/semantic_layer/engine.py @@ -210,8 +210,6 @@ class SemanticEngine: grain = grain_col.lower() if grain in source_columns: return True - if any(col.endswith(f"_{grain}") for col in source_columns): - return True if grain == "id": candidates = { f"{target_name}_id", @@ -219,6 +217,9 @@ class SemanticEngine: } if source_columns.intersection(candidates): return True + continue + if any(col.endswith(f"_{grain}") for col in source_columns): + return True return False def _check_sql_join_coverage( diff --git a/python/ktx-sl/tests/test_validator.py b/python/ktx-sl/tests/test_validator.py index 909d5a6c..065c6331 100644 --- a/python/ktx-sl/tests/test_validator.py +++ b/python/ktx-sl/tests/test_validator.py @@ -323,6 +323,34 @@ class TestJoinValidation: assert report.errors == [] + def test_sql_join_coverage_does_not_treat_unrelated_id_suffix_as_id_key(self): + requesters = SourceDefinition( + name="large_contract_requesters", + sql=""" + select accounts.account_name, requests.user_id + from orbit_raw.requests requests + join public.accounts accounts + on requests.account_id = accounts.id + """, + grain=["user_id"], + columns=[ + SourceColumn(name="account_name", type="string"), + SourceColumn(name="user_id", type="string"), + ], + joins=[], + ) + accounts = _src("accounts", columns=["id", "account_name"], grain=["id"]) + engine = SemanticEngine.from_sources( + { + "large_contract_requesters": requesters, + "accounts": accounts, + } + ) + + report = engine.validate(recently_touched={"large_contract_requesters"}) + + assert report.errors == [] + def test_sql_join_coverage_requires_join_when_projected_key_exists(self): requesters = SourceDefinition( name="large_contract_requesters",