mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-17 03:15:14 +02:00
Remove spurious workspace parameter from SPARQL algebra evaluator (#915)
Fix threading of workspace paramater: - The SPARQL algebra evaluator was threading a workspace parameter through every function and passing it to TriplesClient.query(), which doesn't accept it. Workspace isolation is handled by pub/sub topic routing — the TriplesClient is already scoped to a workspace-specific flow, same as GraphRAG. Passing workspace explicitly was both incorrect and unnecessary. Update tests: - tests/unit/test_query/test_sparql_algebra.py (new) — Tests _query_pattern, _eval_bgp, and evaluate() with various algebra nodes. Key tests assert workspace is never in tc.query() kwargs, plus correctness tests for BGP, JOIN, UNION, SLICE, DISTINCT, and edge cases. - tests/unit/test_retrieval/test_graph_rag.py — Added test_triples_query_never_passes_workspace (checks query()) and test_follow_edges_never_passes_workspace (checks query_stream()).
This commit is contained in:
parent
f0ad282708
commit
bb1109963c
4 changed files with 394 additions and 44 deletions
|
|
@ -30,14 +30,13 @@ class EvaluationError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
async def evaluate(node, triples_client, workspace, collection, limit=10000):
|
||||
async def evaluate(node, triples_client, collection, limit=10000):
|
||||
"""
|
||||
Evaluate a SPARQL algebra node.
|
||||
|
||||
Args:
|
||||
node: rdflib CompValue algebra node
|
||||
triples_client: TriplesClient instance for triple pattern queries
|
||||
workspace: workspace/keyspace identifier
|
||||
collection: collection identifier
|
||||
limit: safety limit on results
|
||||
|
||||
|
|
@ -55,24 +54,24 @@ async def evaluate(node, triples_client, workspace, collection, limit=10000):
|
|||
logger.warning(f"Unsupported algebra node: {name}")
|
||||
return [{}]
|
||||
|
||||
return await handler(node, triples_client, workspace, collection, limit)
|
||||
return await handler(node, triples_client, collection, limit)
|
||||
|
||||
|
||||
# --- Node handlers ---
|
||||
|
||||
async def _eval_select_query(node, tc, workspace, collection, limit):
|
||||
async def _eval_select_query(node, tc, collection, limit):
|
||||
"""Evaluate a SelectQuery node."""
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return await evaluate(node.p, tc, collection, limit)
|
||||
|
||||
|
||||
async def _eval_project(node, tc, workspace, collection, limit):
|
||||
async def _eval_project(node, tc, collection, limit):
|
||||
"""Evaluate a Project node (SELECT variable projection)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
variables = [str(v) for v in node.PV]
|
||||
return project(solutions, variables)
|
||||
|
||||
|
||||
async def _eval_bgp(node, tc, workspace, collection, limit):
|
||||
async def _eval_bgp(node, tc, collection, limit):
|
||||
"""
|
||||
Evaluate a Basic Graph Pattern.
|
||||
|
||||
|
|
@ -107,7 +106,7 @@ async def _eval_bgp(node, tc, workspace, collection, limit):
|
|||
|
||||
# Query the triples store
|
||||
results = await _query_pattern(
|
||||
tc, s_val, p_val, o_val, workspace, collection, limit
|
||||
tc, s_val, p_val, o_val, collection, limit
|
||||
)
|
||||
|
||||
# Map results back to variable bindings,
|
||||
|
|
@ -130,17 +129,17 @@ async def _eval_bgp(node, tc, workspace, collection, limit):
|
|||
return solutions[:limit]
|
||||
|
||||
|
||||
async def _eval_join(node, tc, workspace, collection, limit):
|
||||
async def _eval_join(node, tc, collection, limit):
|
||||
"""Evaluate a Join node."""
|
||||
left = await evaluate(node.p1, tc, workspace, collection, limit)
|
||||
right = await evaluate(node.p2, tc, workspace, collection, limit)
|
||||
left = await evaluate(node.p1, tc, collection, limit)
|
||||
right = await evaluate(node.p2, tc, collection, limit)
|
||||
return hash_join(left, right)[:limit]
|
||||
|
||||
|
||||
async def _eval_left_join(node, tc, workspace, collection, limit):
|
||||
async def _eval_left_join(node, tc, collection, limit):
|
||||
"""Evaluate a LeftJoin node (OPTIONAL)."""
|
||||
left_sols = await evaluate(node.p1, tc, workspace, collection, limit)
|
||||
right_sols = await evaluate(node.p2, tc, workspace, collection, limit)
|
||||
left_sols = await evaluate(node.p1, tc, collection, limit)
|
||||
right_sols = await evaluate(node.p2, tc, collection, limit)
|
||||
|
||||
filter_fn = None
|
||||
if hasattr(node, "expr") and node.expr is not None:
|
||||
|
|
@ -153,16 +152,16 @@ async def _eval_left_join(node, tc, workspace, collection, limit):
|
|||
return left_join(left_sols, right_sols, filter_fn)[:limit]
|
||||
|
||||
|
||||
async def _eval_union(node, tc, workspace, collection, limit):
|
||||
async def _eval_union(node, tc, collection, limit):
|
||||
"""Evaluate a Union node."""
|
||||
left = await evaluate(node.p1, tc, workspace, collection, limit)
|
||||
right = await evaluate(node.p2, tc, workspace, collection, limit)
|
||||
left = await evaluate(node.p1, tc, collection, limit)
|
||||
right = await evaluate(node.p2, tc, collection, limit)
|
||||
return union(left, right)[:limit]
|
||||
|
||||
|
||||
async def _eval_filter(node, tc, workspace, collection, limit):
|
||||
async def _eval_filter(node, tc, collection, limit):
|
||||
"""Evaluate a Filter node."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
expr = node.expr
|
||||
return [
|
||||
sol for sol in solutions
|
||||
|
|
@ -170,22 +169,22 @@ async def _eval_filter(node, tc, workspace, collection, limit):
|
|||
]
|
||||
|
||||
|
||||
async def _eval_distinct(node, tc, workspace, collection, limit):
|
||||
async def _eval_distinct(node, tc, collection, limit):
|
||||
"""Evaluate a Distinct node."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
return distinct(solutions)
|
||||
|
||||
|
||||
async def _eval_reduced(node, tc, workspace, collection, limit):
|
||||
async def _eval_reduced(node, tc, collection, limit):
|
||||
"""Evaluate a Reduced node (like Distinct but implementation-defined)."""
|
||||
# Treat same as Distinct
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
return distinct(solutions)
|
||||
|
||||
|
||||
async def _eval_order_by(node, tc, workspace, collection, limit):
|
||||
async def _eval_order_by(node, tc, collection, limit):
|
||||
"""Evaluate an OrderBy node."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
|
||||
key_fns = []
|
||||
for cond in node.expr:
|
||||
|
|
@ -206,7 +205,7 @@ async def _eval_order_by(node, tc, workspace, collection, limit):
|
|||
return order_by(solutions, key_fns)
|
||||
|
||||
|
||||
async def _eval_slice(node, tc, workspace, collection, limit):
|
||||
async def _eval_slice(node, tc, collection, limit):
|
||||
"""Evaluate a Slice node (LIMIT/OFFSET)."""
|
||||
# Pass tighter limit downstream if possible
|
||||
inner_limit = limit
|
||||
|
|
@ -214,13 +213,13 @@ async def _eval_slice(node, tc, workspace, collection, limit):
|
|||
offset = node.start or 0
|
||||
inner_limit = min(limit, offset + node.length)
|
||||
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, inner_limit)
|
||||
solutions = await evaluate(node.p, tc, collection, inner_limit)
|
||||
return slice_solutions(solutions, node.start or 0, node.length)
|
||||
|
||||
|
||||
async def _eval_extend(node, tc, workspace, collection, limit):
|
||||
async def _eval_extend(node, tc, collection, limit):
|
||||
"""Evaluate an Extend node (BIND)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
var_name = str(node.var)
|
||||
expr = node.expr
|
||||
|
||||
|
|
@ -246,9 +245,9 @@ async def _eval_extend(node, tc, workspace, collection, limit):
|
|||
return result
|
||||
|
||||
|
||||
async def _eval_group(node, tc, workspace, collection, limit):
|
||||
async def _eval_group(node, tc, collection, limit):
|
||||
"""Evaluate a Group node (GROUP BY with aggregation)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
|
||||
# Extract grouping expressions
|
||||
group_exprs = []
|
||||
|
|
@ -289,9 +288,9 @@ async def _eval_group(node, tc, workspace, collection, limit):
|
|||
return result
|
||||
|
||||
|
||||
async def _eval_aggregate_join(node, tc, workspace, collection, limit):
|
||||
async def _eval_aggregate_join(node, tc, collection, limit):
|
||||
"""Evaluate an AggregateJoin (aggregation functions after GROUP BY)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
solutions = await evaluate(node.p, tc, collection, limit)
|
||||
|
||||
result = []
|
||||
for sol in solutions:
|
||||
|
|
@ -310,7 +309,7 @@ async def _eval_aggregate_join(node, tc, workspace, collection, limit):
|
|||
return result
|
||||
|
||||
|
||||
async def _eval_graph(node, tc, workspace, collection, limit):
|
||||
async def _eval_graph(node, tc, collection, limit):
|
||||
"""Evaluate a Graph node (GRAPH clause)."""
|
||||
term = node.term
|
||||
|
||||
|
|
@ -319,16 +318,16 @@ async def _eval_graph(node, tc, workspace, collection, limit):
|
|||
# We'd need to pass graph to triples queries
|
||||
# For now, evaluate inner pattern normally
|
||||
logger.info(f"GRAPH <{term}> clause - graph filtering not yet wired")
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return await evaluate(node.p, tc, collection, limit)
|
||||
elif isinstance(term, Variable):
|
||||
# GRAPH ?g { ... } — variable graph
|
||||
logger.info(f"GRAPH ?{term} clause - variable graph not yet wired")
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return await evaluate(node.p, tc, collection, limit)
|
||||
else:
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return await evaluate(node.p, tc, collection, limit)
|
||||
|
||||
|
||||
async def _eval_values(node, tc, workspace, collection, limit):
|
||||
async def _eval_values(node, tc, collection, limit):
|
||||
"""Evaluate a VALUES clause (inline data)."""
|
||||
variables = [str(v) for v in node.var]
|
||||
solutions = []
|
||||
|
|
@ -343,9 +342,9 @@ async def _eval_values(node, tc, workspace, collection, limit):
|
|||
return solutions
|
||||
|
||||
|
||||
async def _eval_to_multiset(node, tc, workspace, collection, limit):
|
||||
async def _eval_to_multiset(node, tc, collection, limit):
|
||||
"""Evaluate a ToMultiSet node (subquery)."""
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return await evaluate(node.p, tc, collection, limit)
|
||||
|
||||
|
||||
# --- Aggregate computation ---
|
||||
|
|
@ -487,7 +486,7 @@ def _resolve_term(tmpl, solution):
|
|||
return rdflib_term_to_term(tmpl)
|
||||
|
||||
|
||||
async def _query_pattern(tc, s, p, o, workspace, collection, limit):
|
||||
async def _query_pattern(tc, s, p, o, collection, limit):
|
||||
"""
|
||||
Issue a streaming triple pattern query via TriplesClient.
|
||||
|
||||
|
|
@ -496,7 +495,6 @@ async def _query_pattern(tc, s, p, o, workspace, collection, limit):
|
|||
results = await tc.query(
|
||||
s=s, p=p, o=o,
|
||||
limit=limit,
|
||||
workspace=workspace,
|
||||
collection=collection,
|
||||
)
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -141,7 +141,6 @@ class Processor(FlowProcessor):
|
|||
solutions = await evaluate(
|
||||
parsed.algebra,
|
||||
triples_client,
|
||||
workspace=flow.workspace,
|
||||
collection=request.collection or "default",
|
||||
limit=request.limit or 10000,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue