mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
fix(sl): parse user filter expressions as predicates, not projections (#307)
* fix(sl): parse user filter expressions as predicates, not projections
User-authored filters and segments were parsed in a projection context
(`SELECT {expr}`). On T-SQL a top-level `col = 'value'` projection is the
`alias = expression` aliasing syntax, so an equality filter parsed this way
became `'value' AS col` — dropping the comparison entirely and silently
skipping computed-column expansion (the column hid behind the alias).
Parse user fragments as predicates (`SELECT * WHERE {expr}`) at every parse
site — the parser cache, measure-filter CASE WHEN generation, computed-column
expansion, and measure-filter/segment column qualification. For plain
non-condition expressions the column set is identical, so this is a no-op
everywhere except the T-SQL alias case it fixes.
Add cross-dialect regression tests (tsql, postgres, snowflake, bigquery)
locking equality filters/segments to comparison shape and confirming `= 'x'`
now matches `IN ('x')` on T-SQL.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
* Shorten T-SQL predicate comments
* docs(sl): tighten T-SQL predicate docstrings and AGENTS docstring rule
Trim the parser and regression-test docstrings to the 1-3 line bar and
extend the AGENTS.md comment guidance to cover docstrings explicitly.
* refactor(sl): route all filter parsing through parse_predicate
Consolidate the predicate-context parse into a single parse_predicate
helper and route every filter-parsing call site through it: measure
CASE-WHEN filters, segments, computed-column-in-filter, the
aggregate-locality HAVING rewrite, and the planner OR-mixing /
top-level-AND split. The locality and split paths still parsed user
filters in projection context, so a named-measure equality filter
compiled to `0 AS measure` on T-SQL. Add a locality regression test
covering the HAVING rewrite path.
---------
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
parent
4dae8c34dd
commit
fb50c11d16
5 changed files with 238 additions and 63 deletions
|
|
@ -196,6 +196,15 @@ def quote_reserved_identifiers(expr: str) -> str:
|
|||
return result
|
||||
|
||||
|
||||
def _predicate_select(expr: str) -> str:
|
||||
"""Wrap a user expression as `SELECT * WHERE …`, quoting reserved identifiers.
|
||||
|
||||
Predicate, not projection: T-SQL reads a top-level `col = 'value'` projection
|
||||
as the `alias = expression` form and would compile the filter to `'value' AS col`.
|
||||
"""
|
||||
return f"SELECT * WHERE {quote_reserved_identifiers(expr)}"
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=256)
|
||||
def _cached_parse_select(sql: str, dialect: str) -> exp.Expression:
|
||||
"""Cache parsed SELECT wrapper trees keyed by (sql, dialect).
|
||||
|
|
@ -206,6 +215,14 @@ def _cached_parse_select(sql: str, dialect: str) -> exp.Expression:
|
|||
return sqlglot.parse_one(sql, read=dialect)
|
||||
|
||||
|
||||
def parse_predicate(expr: str, dialect: str) -> exp.Expression:
|
||||
"""Parse a user filter into a fresh, mutable WHERE-condition node.
|
||||
|
||||
Uncached, so the result is safe to `.transform()`; raises on unparseable input.
|
||||
"""
|
||||
return sqlglot.parse_one(_predicate_select(expr), read=dialect).find(exp.Where).this
|
||||
|
||||
|
||||
class ExpressionParser:
|
||||
"""Parses user-authored SQL expressions for AST walks.
|
||||
|
||||
|
|
@ -218,12 +235,9 @@ class ExpressionParser:
|
|||
def __init__(self, dialect: str = "postgres") -> None:
|
||||
self.dialect = dialect
|
||||
|
||||
def _quote_reserved_identifiers(self, expr: str) -> str:
|
||||
return quote_reserved_identifiers(expr)
|
||||
|
||||
def _parse_as_select(self, quoted_expr: str) -> exp.Expression:
|
||||
"""Parse expression wrapped in SELECT, using cache for repeated expressions."""
|
||||
return _cached_parse_select(f"SELECT {quoted_expr}", self.dialect)
|
||||
def _parse_as_select(self, expr: str) -> exp.Expression:
|
||||
"""Parse a user fragment for read-only AST walks, via the parse cache."""
|
||||
return _cached_parse_select(_predicate_select(expr), self.dialect)
|
||||
|
||||
def parse(
|
||||
self,
|
||||
|
|
@ -236,8 +250,7 @@ class ExpressionParser:
|
|||
if not expr or not expr.strip():
|
||||
return result
|
||||
|
||||
quoted_expr = self._quote_reserved_identifiers(expr)
|
||||
tree = self._parse_as_select(quoted_expr)
|
||||
tree = self._parse_as_select(expr)
|
||||
|
||||
# Extract source.column references
|
||||
for col in tree.find_all(exp.Column):
|
||||
|
|
@ -296,8 +309,7 @@ class ExpressionParser:
|
|||
"""Quick extraction of source names from an expression."""
|
||||
if not expr or not expr.strip():
|
||||
return set()
|
||||
quoted_expr = self._quote_reserved_identifiers(expr)
|
||||
tree = self._parse_as_select(quoted_expr)
|
||||
tree = self._parse_as_select(expr)
|
||||
return {
|
||||
_strip_quotes(col.table) for col in tree.find_all(exp.Column) if col.table
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue