Remove spurious workspace parameter from SPARQL algebra evaluator (#915)

Fix threading of workspace paramater:
- The SPARQL algebra evaluator was threading a workspace parameter
  through every function and passing it to TriplesClient.query(),
  which doesn't accept it. Workspace isolation is handled by pub/sub
  topic routing — the TriplesClient is already scoped to a
  workspace-specific flow, same as GraphRAG. Passing workspace
  explicitly was both incorrect and unnecessary.

Update tests:
- tests/unit/test_query/test_sparql_algebra.py (new) — Tests
  _query_pattern, _eval_bgp, and evaluate() with various algebra
  nodes. Key tests assert workspace is never in tc.query() kwargs,
  plus correctness tests for BGP, JOIN, UNION, SLICE, DISTINCT, and
  edge cases.
- tests/unit/test_retrieval/test_graph_rag.py — Added
  test_triples_query_never_passes_workspace (checks query()) and
  test_follow_edges_never_passes_workspace (checks query_stream()).
This commit is contained in:
cybermaggedon 2026-05-14 12:03:43 +01:00 committed by GitHub
parent f0ad282708
commit bb1109963c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 394 additions and 44 deletions

View file

@ -30,14 +30,13 @@ class EvaluationError(Exception):
pass
async def evaluate(node, triples_client, workspace, collection, limit=10000):
async def evaluate(node, triples_client, collection, limit=10000):
"""
Evaluate a SPARQL algebra node.
Args:
node: rdflib CompValue algebra node
triples_client: TriplesClient instance for triple pattern queries
workspace: workspace/keyspace identifier
collection: collection identifier
limit: safety limit on results
@ -55,24 +54,24 @@ async def evaluate(node, triples_client, workspace, collection, limit=10000):
logger.warning(f"Unsupported algebra node: {name}")
return [{}]
return await handler(node, triples_client, workspace, collection, limit)
return await handler(node, triples_client, collection, limit)
# --- Node handlers ---
async def _eval_select_query(node, tc, workspace, collection, limit):
async def _eval_select_query(node, tc, collection, limit):
"""Evaluate a SelectQuery node."""
return await evaluate(node.p, tc, workspace, collection, limit)
return await evaluate(node.p, tc, collection, limit)
async def _eval_project(node, tc, workspace, collection, limit):
async def _eval_project(node, tc, collection, limit):
"""Evaluate a Project node (SELECT variable projection)."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
variables = [str(v) for v in node.PV]
return project(solutions, variables)
async def _eval_bgp(node, tc, workspace, collection, limit):
async def _eval_bgp(node, tc, collection, limit):
"""
Evaluate a Basic Graph Pattern.
@ -107,7 +106,7 @@ async def _eval_bgp(node, tc, workspace, collection, limit):
# Query the triples store
results = await _query_pattern(
tc, s_val, p_val, o_val, workspace, collection, limit
tc, s_val, p_val, o_val, collection, limit
)
# Map results back to variable bindings,
@ -130,17 +129,17 @@ async def _eval_bgp(node, tc, workspace, collection, limit):
return solutions[:limit]
async def _eval_join(node, tc, workspace, collection, limit):
async def _eval_join(node, tc, collection, limit):
"""Evaluate a Join node."""
left = await evaluate(node.p1, tc, workspace, collection, limit)
right = await evaluate(node.p2, tc, workspace, collection, limit)
left = await evaluate(node.p1, tc, collection, limit)
right = await evaluate(node.p2, tc, collection, limit)
return hash_join(left, right)[:limit]
async def _eval_left_join(node, tc, workspace, collection, limit):
async def _eval_left_join(node, tc, collection, limit):
"""Evaluate a LeftJoin node (OPTIONAL)."""
left_sols = await evaluate(node.p1, tc, workspace, collection, limit)
right_sols = await evaluate(node.p2, tc, workspace, collection, limit)
left_sols = await evaluate(node.p1, tc, collection, limit)
right_sols = await evaluate(node.p2, tc, collection, limit)
filter_fn = None
if hasattr(node, "expr") and node.expr is not None:
@ -153,16 +152,16 @@ async def _eval_left_join(node, tc, workspace, collection, limit):
return left_join(left_sols, right_sols, filter_fn)[:limit]
async def _eval_union(node, tc, workspace, collection, limit):
async def _eval_union(node, tc, collection, limit):
"""Evaluate a Union node."""
left = await evaluate(node.p1, tc, workspace, collection, limit)
right = await evaluate(node.p2, tc, workspace, collection, limit)
left = await evaluate(node.p1, tc, collection, limit)
right = await evaluate(node.p2, tc, collection, limit)
return union(left, right)[:limit]
async def _eval_filter(node, tc, workspace, collection, limit):
async def _eval_filter(node, tc, collection, limit):
"""Evaluate a Filter node."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
expr = node.expr
return [
sol for sol in solutions
@ -170,22 +169,22 @@ async def _eval_filter(node, tc, workspace, collection, limit):
]
async def _eval_distinct(node, tc, workspace, collection, limit):
async def _eval_distinct(node, tc, collection, limit):
"""Evaluate a Distinct node."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
return distinct(solutions)
async def _eval_reduced(node, tc, workspace, collection, limit):
async def _eval_reduced(node, tc, collection, limit):
"""Evaluate a Reduced node (like Distinct but implementation-defined)."""
# Treat same as Distinct
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
return distinct(solutions)
async def _eval_order_by(node, tc, workspace, collection, limit):
async def _eval_order_by(node, tc, collection, limit):
"""Evaluate an OrderBy node."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
key_fns = []
for cond in node.expr:
@ -206,7 +205,7 @@ async def _eval_order_by(node, tc, workspace, collection, limit):
return order_by(solutions, key_fns)
async def _eval_slice(node, tc, workspace, collection, limit):
async def _eval_slice(node, tc, collection, limit):
"""Evaluate a Slice node (LIMIT/OFFSET)."""
# Pass tighter limit downstream if possible
inner_limit = limit
@ -214,13 +213,13 @@ async def _eval_slice(node, tc, workspace, collection, limit):
offset = node.start or 0
inner_limit = min(limit, offset + node.length)
solutions = await evaluate(node.p, tc, workspace, collection, inner_limit)
solutions = await evaluate(node.p, tc, collection, inner_limit)
return slice_solutions(solutions, node.start or 0, node.length)
async def _eval_extend(node, tc, workspace, collection, limit):
async def _eval_extend(node, tc, collection, limit):
"""Evaluate an Extend node (BIND)."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
var_name = str(node.var)
expr = node.expr
@ -246,9 +245,9 @@ async def _eval_extend(node, tc, workspace, collection, limit):
return result
async def _eval_group(node, tc, workspace, collection, limit):
async def _eval_group(node, tc, collection, limit):
"""Evaluate a Group node (GROUP BY with aggregation)."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
# Extract grouping expressions
group_exprs = []
@ -289,9 +288,9 @@ async def _eval_group(node, tc, workspace, collection, limit):
return result
async def _eval_aggregate_join(node, tc, workspace, collection, limit):
async def _eval_aggregate_join(node, tc, collection, limit):
"""Evaluate an AggregateJoin (aggregation functions after GROUP BY)."""
solutions = await evaluate(node.p, tc, workspace, collection, limit)
solutions = await evaluate(node.p, tc, collection, limit)
result = []
for sol in solutions:
@ -310,7 +309,7 @@ async def _eval_aggregate_join(node, tc, workspace, collection, limit):
return result
async def _eval_graph(node, tc, workspace, collection, limit):
async def _eval_graph(node, tc, collection, limit):
"""Evaluate a Graph node (GRAPH clause)."""
term = node.term
@ -319,16 +318,16 @@ async def _eval_graph(node, tc, workspace, collection, limit):
# We'd need to pass graph to triples queries
# For now, evaluate inner pattern normally
logger.info(f"GRAPH <{term}> clause - graph filtering not yet wired")
return await evaluate(node.p, tc, workspace, collection, limit)
return await evaluate(node.p, tc, collection, limit)
elif isinstance(term, Variable):
# GRAPH ?g { ... } — variable graph
logger.info(f"GRAPH ?{term} clause - variable graph not yet wired")
return await evaluate(node.p, tc, workspace, collection, limit)
return await evaluate(node.p, tc, collection, limit)
else:
return await evaluate(node.p, tc, workspace, collection, limit)
return await evaluate(node.p, tc, collection, limit)
async def _eval_values(node, tc, workspace, collection, limit):
async def _eval_values(node, tc, collection, limit):
"""Evaluate a VALUES clause (inline data)."""
variables = [str(v) for v in node.var]
solutions = []
@ -343,9 +342,9 @@ async def _eval_values(node, tc, workspace, collection, limit):
return solutions
async def _eval_to_multiset(node, tc, workspace, collection, limit):
async def _eval_to_multiset(node, tc, collection, limit):
"""Evaluate a ToMultiSet node (subquery)."""
return await evaluate(node.p, tc, workspace, collection, limit)
return await evaluate(node.p, tc, collection, limit)
# --- Aggregate computation ---
@ -487,7 +486,7 @@ def _resolve_term(tmpl, solution):
return rdflib_term_to_term(tmpl)
async def _query_pattern(tc, s, p, o, workspace, collection, limit):
async def _query_pattern(tc, s, p, o, collection, limit):
"""
Issue a streaming triple pattern query via TriplesClient.
@ -496,7 +495,6 @@ async def _query_pattern(tc, s, p, o, workspace, collection, limit):
results = await tc.query(
s=s, p=p, o=o,
limit=limit,
workspace=workspace,
collection=collection,
)
return results

View file

@ -141,7 +141,6 @@ class Processor(FlowProcessor):
solutions = await evaluate(
parsed.algebra,
triples_client,
workspace=flow.workspace,
collection=request.collection or "default",
limit=request.limit or 10000,
)