mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
fix(sl): tighten source validation guards
This commit is contained in:
parent
aaa928e768
commit
7a86aa9ddc
4 changed files with 105 additions and 3 deletions
|
|
@ -891,6 +891,60 @@ describe('validateWithProposedSource', () => {
|
|||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('allows SQL syntax tokens and cast types in physical expression validation', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
if (dir === 'semantic-layer/dbt-main') {
|
||||
return Promise.resolve({ files: [] });
|
||||
}
|
||||
if (dir === 'semantic-layer') {
|
||||
return Promise.resolve({ files: [schemaPath] });
|
||||
}
|
||||
if (dir === 'semantic-layer/dbt-main/_schema' || dir === 'semantic-layer/postgres-warehouse/_schema') {
|
||||
return Promise.resolve({ files: dir.endsWith('postgres-warehouse/_schema') ? [schemaPath] : [] });
|
||||
}
|
||||
return Promise.resolve({ files: [] });
|
||||
});
|
||||
configService.readFile.mockResolvedValue({
|
||||
content: [
|
||||
'tables:',
|
||||
' mart_revenue_daily:',
|
||||
' table: orbit_analytics.mart_revenue_daily',
|
||||
' columns:',
|
||||
' - { name: order_id, type: string }',
|
||||
' - { name: revenue_date, type: time }',
|
||||
' - { name: amount, type: number }',
|
||||
' - { name: status, type: string }',
|
||||
' - { name: created_at, type: time }',
|
||||
].join('\n'),
|
||||
});
|
||||
pythonPort.validateSources.mockResolvedValue({
|
||||
data: { errors: [], warnings: [] },
|
||||
});
|
||||
|
||||
const result = await service.validateWithProposedSource('dbt-main', {
|
||||
name: 'mart_revenue_daily',
|
||||
table: 'orbit_analytics.mart_revenue_daily',
|
||||
grain: ['order_id'],
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'string' },
|
||||
{ name: 'revenue_date', type: 'time' },
|
||||
{ name: 'amount', type: 'number' },
|
||||
{ name: 'status', type: 'string' },
|
||||
{ name: 'created_at', type: 'time' },
|
||||
{ name: 'status_text', type: 'string', expr: 'status::text' },
|
||||
],
|
||||
segments: [{ name: 'current_or_paid', expr: "created_at <= current_date OR status = 'paid'" }],
|
||||
joins: [],
|
||||
measures: [
|
||||
{ name: 'paid_amount', expr: "sum(amount) FILTER (WHERE status = 'paid')" },
|
||||
{ name: 'cast_amount_count', expr: 'count(cast(amount as text))' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects join keys that are absent from matched physical sources', async () => {
|
||||
const schemaPath = 'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml';
|
||||
configService.listFiles.mockImplementation((dir: string) => {
|
||||
|
|
|
|||
|
|
@ -1160,6 +1160,8 @@ const SQL_KEYWORDS = new Set([
|
|||
'in',
|
||||
'between',
|
||||
'like',
|
||||
'where',
|
||||
'filter',
|
||||
'cast',
|
||||
'coalesce',
|
||||
'nullif',
|
||||
|
|
@ -1183,15 +1185,31 @@ const SQL_KEYWORDS = new Set([
|
|||
'rows',
|
||||
'range',
|
||||
'current',
|
||||
'current_date',
|
||||
'current_time',
|
||||
'current_timestamp',
|
||||
'localtime',
|
||||
'localtimestamp',
|
||||
'row',
|
||||
'numeric',
|
||||
'decimal',
|
||||
'int',
|
||||
'integer',
|
||||
'bigint',
|
||||
'smallint',
|
||||
'float',
|
||||
'double',
|
||||
'real',
|
||||
'string',
|
||||
'text',
|
||||
'char',
|
||||
'character',
|
||||
'varchar',
|
||||
'timestamp',
|
||||
'time',
|
||||
'uuid',
|
||||
'json',
|
||||
'jsonb',
|
||||
'bool',
|
||||
'boolean',
|
||||
]);
|
||||
|
|
@ -1218,7 +1236,8 @@ function normalizeSqlExpressionForIdentifierScan(expr: string): string {
|
|||
.replace(/'([^']|'')*'/g, ' ')
|
||||
.replace(/"([^"]+)"/g, '$1')
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
.replace(/\[([^\]]+)\]/g, '$1');
|
||||
.replace(/\[([^\]]+)\]/g, '$1')
|
||||
.replace(/::\s*[A-Za-z_][\w$]*(?:\s*\([^)]*\))?/g, ' ');
|
||||
}
|
||||
|
||||
function extractSqlIdentifierRefs(expr: string): Array<{ qualifier?: string; name: string }> {
|
||||
|
|
|
|||
|
|
@ -210,8 +210,6 @@ class SemanticEngine:
|
|||
grain = grain_col.lower()
|
||||
if grain in source_columns:
|
||||
return True
|
||||
if any(col.endswith(f"_{grain}") for col in source_columns):
|
||||
return True
|
||||
if grain == "id":
|
||||
candidates = {
|
||||
f"{target_name}_id",
|
||||
|
|
@ -219,6 +217,9 @@ class SemanticEngine:
|
|||
}
|
||||
if source_columns.intersection(candidates):
|
||||
return True
|
||||
continue
|
||||
if any(col.endswith(f"_{grain}") for col in source_columns):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _check_sql_join_coverage(
|
||||
|
|
|
|||
|
|
@ -323,6 +323,34 @@ class TestJoinValidation:
|
|||
|
||||
assert report.errors == []
|
||||
|
||||
def test_sql_join_coverage_does_not_treat_unrelated_id_suffix_as_id_key(self):
|
||||
requesters = SourceDefinition(
|
||||
name="large_contract_requesters",
|
||||
sql="""
|
||||
select accounts.account_name, requests.user_id
|
||||
from orbit_raw.requests requests
|
||||
join public.accounts accounts
|
||||
on requests.account_id = accounts.id
|
||||
""",
|
||||
grain=["user_id"],
|
||||
columns=[
|
||||
SourceColumn(name="account_name", type="string"),
|
||||
SourceColumn(name="user_id", type="string"),
|
||||
],
|
||||
joins=[],
|
||||
)
|
||||
accounts = _src("accounts", columns=["id", "account_name"], grain=["id"])
|
||||
engine = SemanticEngine.from_sources(
|
||||
{
|
||||
"large_contract_requesters": requesters,
|
||||
"accounts": accounts,
|
||||
}
|
||||
)
|
||||
|
||||
report = engine.validate(recently_touched={"large_contract_requesters"})
|
||||
|
||||
assert report.errors == []
|
||||
|
||||
def test_sql_join_coverage_requires_join_when_projected_key_exists(self):
|
||||
requesters = SourceDefinition(
|
||||
name="large_contract_requesters",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue