mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat(query-history): scope mining to modeled schemas by default (#258)
* feat(query-history): structure SQL analysis table refs * feat(query-history): qualify SQL analysis table refs * feat(query-history): wire modeled scope floor through ingest * chore(query-history): verify scope floor * test(query-history): align daemon SQL batch endpoint contract * feat(query-history): build scope from same-run scan catalog * feat(query-history): fail open on scope-floor catalog failures * chore(query-history): verify scope-floor v1 closure * refactor(query-history): share scope membership * feat(setup): apply derived query history filters * docs: document derived query history filters * fix(query-history): redact filter picker LLM prompt SQL * fix(setup): run filter picker SQL analysis through managed daemon * chore(query-history): verify filter picker v1 closure * fix(query-history): fail open on partial service-account attribution * fix(query-history): aggregate BigQuery users by execution count * fix(query-history): aggregate Snowflake users by execution count * fix(query-history): use BigQuery query info hash
This commit is contained in:
parent
ce1516b357
commit
e70ae1e63b
42 changed files with 3090 additions and 274 deletions
|
|
@ -32,7 +32,10 @@ def test_analyze_sql_batch_extracts_tables_and_clause_columns() -> None:
|
|||
|
||||
result = response.results["orders_by_customer"]
|
||||
assert result.error is None
|
||||
assert result.tables_touched == ["public.orders", "public.customers"]
|
||||
assert [item.model_dump() for item in result.tables_touched] == [
|
||||
{"catalog": None, "db": "public", "name": "orders"},
|
||||
{"catalog": None, "db": "public", "name": "customers"},
|
||||
]
|
||||
assert result.columns_by_clause == {
|
||||
"select": ["status"],
|
||||
"where": ["created_at"],
|
||||
|
|
@ -56,6 +59,114 @@ def test_analyze_sql_batch_returns_per_item_parse_errors() -> None:
|
|||
assert result.error is not None
|
||||
|
||||
|
||||
def test_analyze_sql_batch_qualifies_bare_table_from_catalog() -> None:
|
||||
response = analyze_sql_batch_response(
|
||||
AnalyzeSqlBatchRequest(
|
||||
dialect="postgres",
|
||||
catalog={
|
||||
"tables": [
|
||||
{
|
||||
"catalog": None,
|
||||
"db": "orbit_raw",
|
||||
"name": "accounts",
|
||||
"columns": ["id"],
|
||||
},
|
||||
{
|
||||
"catalog": None,
|
||||
"db": "orbit_analytics",
|
||||
"name": "orders",
|
||||
"columns": ["id"],
|
||||
},
|
||||
]
|
||||
},
|
||||
items=[AnalyzeSqlBatchItem(id="bare", sql="select id from accounts")],
|
||||
max_workers=1,
|
||||
)
|
||||
)
|
||||
|
||||
assert [item.model_dump() for item in response.results["bare"].tables_touched] == [
|
||||
{"catalog": None, "db": "orbit_raw", "name": "accounts"}
|
||||
]
|
||||
|
||||
|
||||
def test_analyze_sql_batch_returns_all_ambiguous_modeled_matches() -> None:
|
||||
response = analyze_sql_batch_response(
|
||||
AnalyzeSqlBatchRequest(
|
||||
dialect="postgres",
|
||||
catalog={
|
||||
"tables": [
|
||||
{
|
||||
"catalog": None,
|
||||
"db": "orbit_raw",
|
||||
"name": "events",
|
||||
"columns": ["id"],
|
||||
},
|
||||
{
|
||||
"catalog": None,
|
||||
"db": "orbit_analytics",
|
||||
"name": "events",
|
||||
"columns": ["id"],
|
||||
},
|
||||
]
|
||||
},
|
||||
items=[AnalyzeSqlBatchItem(id="ambiguous", sql="select id from events")],
|
||||
max_workers=1,
|
||||
)
|
||||
)
|
||||
|
||||
assert [
|
||||
item.model_dump() for item in response.results["ambiguous"].tables_touched
|
||||
] == [
|
||||
{"catalog": None, "db": "orbit_raw", "name": "events"},
|
||||
{"catalog": None, "db": "orbit_analytics", "name": "events"},
|
||||
]
|
||||
|
||||
|
||||
def test_analyze_sql_batch_leaves_unresolved_bare_refs_unqualified() -> None:
|
||||
response = analyze_sql_batch_response(
|
||||
AnalyzeSqlBatchRequest(
|
||||
dialect="postgres",
|
||||
catalog={
|
||||
"tables": [{"catalog": None, "db": "orbit_raw", "name": "accounts"}]
|
||||
},
|
||||
items=[AnalyzeSqlBatchItem(id="missing", sql="select * from invoices")],
|
||||
max_workers=1,
|
||||
)
|
||||
)
|
||||
|
||||
assert [
|
||||
item.model_dump() for item in response.results["missing"].tables_touched
|
||||
] == [{"catalog": None, "db": None, "name": "invoices"}]
|
||||
|
||||
|
||||
def test_analyze_sql_batch_returns_bigquery_project_dataset_table_refs() -> None:
|
||||
response = analyze_sql_batch_response(
|
||||
AnalyzeSqlBatchRequest(
|
||||
dialect="bigquery",
|
||||
catalog={
|
||||
"tables": [
|
||||
{
|
||||
"catalog": "demo-project",
|
||||
"db": "orbit_analytics",
|
||||
"name": "orders",
|
||||
}
|
||||
]
|
||||
},
|
||||
items=[
|
||||
AnalyzeSqlBatchItem(
|
||||
id="bq",
|
||||
sql="select * from `demo-project.orbit_analytics.orders`",
|
||||
)
|
||||
],
|
||||
max_workers=1,
|
||||
)
|
||||
)
|
||||
|
||||
assert [item.model_dump() for item in response.results["bq"].tables_touched] == [
|
||||
{"catalog": "demo-project", "db": "orbit_analytics", "name": "orders"}
|
||||
]
|
||||
|
||||
|
||||
def test_columns_from_nodes_ignores_non_expression_clause_values() -> None:
|
||||
assert _columns_from_nodes([True, False, None]) == []
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue