mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-27 16:25:12 +02:00
SPARQL engine: streaming evaluation, bind joins, and expression fixes (#947)
Convert the SPARQL algebra evaluator from eager list-based evaluation to
lazy async generators so results stream incrementally. This lets Slice
terminate early (via generator cleanup) and avoids materialising full
result sets for streamable operators like Project, Filter, Union, and
Extend. Blocking operators (Join, LeftJoin, OrderBy, Group) materialise
at their boundary then yield.
Add bind join optimization for Join nodes where one side is small
(VALUES/ToMultiSet): instead of materialising both sides independently
and hash-joining, iterate the small side's bindings and evaluate the
large side with those bindings pre-seeded. This turns wildcard BGP
queries into selective ones — e.g. VALUES ?x { <uri> } joined with a
BGP now queries the triple store with ?x bound rather than fetching
all triples.
Add TriplesClient.query_gen() async generator that wraps the existing
streaming callback API via an asyncio.Queue bridge, yielding individual
Triple objects as batches arrive.
Add streaming request path in the SPARQL query service that batches
solutions from the live async generator and sends them as they fill.
Fix FILTER IN/NOT IN: rdflib represents these as RelationalExpression
nodes with op="IN", not as Builtin_IN — handle both representations.
Fix Builtin_IN/Builtin_NOTIN dispatch ordering so the specific handlers
are checked before the generic Builtin_ prefix match.
Fix VALUES handling for rdflib's two representations: positional
(var/value) and dict-based (res).
This commit is contained in:
parent
81e9a3ebe4
commit
6af12f416f
5 changed files with 683 additions and 302 deletions
|
|
@ -14,7 +14,7 @@ from rdflib.plugins.sparql.parserutils import CompValue
|
||||||
|
|
||||||
from trustgraph.schema import Term, IRI, LITERAL
|
from trustgraph.schema import Term, IRI, LITERAL
|
||||||
from trustgraph.query.sparql.algebra import (
|
from trustgraph.query.sparql.algebra import (
|
||||||
evaluate, _query_pattern, _eval_bgp,
|
evaluate, materialise, _query_pattern, _eval_bgp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -28,6 +28,32 @@ def lit(v):
|
||||||
return Term(type=LITERAL, value=v)
|
return Term(type=LITERAL, value=v)
|
||||||
|
|
||||||
|
|
||||||
|
def make_tc(query_return=None, query_side_effect=None):
|
||||||
|
"""Create a mock TriplesClient with both query() and query_gen() support."""
|
||||||
|
tc = AsyncMock()
|
||||||
|
|
||||||
|
if query_side_effect is not None:
|
||||||
|
tc.query.side_effect = query_side_effect
|
||||||
|
|
||||||
|
async def gen_side_effect(**kwargs):
|
||||||
|
results = await query_side_effect(**kwargs)
|
||||||
|
for r in results:
|
||||||
|
yield r
|
||||||
|
|
||||||
|
tc.query_gen = gen_side_effect
|
||||||
|
else:
|
||||||
|
items = query_return or []
|
||||||
|
tc.query.return_value = items
|
||||||
|
|
||||||
|
async def gen(**kwargs):
|
||||||
|
for item in items:
|
||||||
|
yield item
|
||||||
|
|
||||||
|
tc.query_gen = gen
|
||||||
|
|
||||||
|
return tc
|
||||||
|
|
||||||
|
|
||||||
def make_triple(s, p, o):
|
def make_triple(s, p, o):
|
||||||
t = MagicMock()
|
t = MagicMock()
|
||||||
t.s = s
|
t.s = s
|
||||||
|
|
@ -150,15 +176,14 @@ class TestEvalBgp:
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_single_pattern_all_variables(self):
|
async def test_single_pattern_all_variables(self):
|
||||||
tc = AsyncMock()
|
|
||||||
triple = make_triple(iri("http://s"), iri("http://p"), lit("o"))
|
triple = make_triple(iri("http://s"), iri("http://p"), lit("o"))
|
||||||
tc.query.return_value = [triple]
|
tc = make_tc(query_return=[triple])
|
||||||
|
|
||||||
bgp = make_bgp(
|
bgp = make_bgp(
|
||||||
(Variable("s"), Variable("p"), Variable("o")),
|
(Variable("s"), Variable("p"), Variable("o")),
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(bgp, tc, collection="default", limit=100)
|
solutions = await materialise(bgp, tc, collection="default", limit=100)
|
||||||
|
|
||||||
assert len(solutions) == 1
|
assert len(solutions) == 1
|
||||||
assert solutions[0]["s"].iri == "http://s"
|
assert solutions[0]["s"].iri == "http://s"
|
||||||
|
|
@ -167,43 +192,37 @@ class TestEvalBgp:
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_single_pattern_bound_subject(self):
|
async def test_single_pattern_bound_subject(self):
|
||||||
tc = AsyncMock()
|
tc = make_tc(query_return=[
|
||||||
tc.query.return_value = [
|
|
||||||
make_triple(iri("http://s"), iri("http://p"), lit("val")),
|
make_triple(iri("http://s"), iri("http://p"), lit("val")),
|
||||||
]
|
])
|
||||||
|
|
||||||
bgp = make_bgp(
|
bgp = make_bgp(
|
||||||
(URIRef("http://s"), Variable("p"), Variable("o")),
|
(URIRef("http://s"), Variable("p"), Variable("o")),
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(bgp, tc, collection="default")
|
solutions = await materialise(bgp, tc, collection="default")
|
||||||
|
|
||||||
tc.query.assert_called_once()
|
assert len(solutions) == 1
|
||||||
kwargs = tc.query.call_args.kwargs
|
|
||||||
assert "workspace" not in kwargs
|
|
||||||
assert kwargs["collection"] == "default"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_empty_bgp_returns_empty_solution(self):
|
async def test_empty_bgp_returns_empty_solution(self):
|
||||||
tc = AsyncMock()
|
tc = make_tc()
|
||||||
|
|
||||||
bgp = make_bgp()
|
bgp = make_bgp()
|
||||||
|
|
||||||
solutions = await evaluate(bgp, tc, collection="default")
|
solutions = await materialise(bgp, tc, collection="default")
|
||||||
|
|
||||||
assert solutions == [{}]
|
assert solutions == [{}]
|
||||||
tc.query.assert_not_called()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_no_results_returns_empty(self):
|
async def test_no_results_returns_empty(self):
|
||||||
tc = AsyncMock()
|
tc = make_tc(query_return=[])
|
||||||
tc.query.return_value = []
|
|
||||||
|
|
||||||
bgp = make_bgp(
|
bgp = make_bgp(
|
||||||
(Variable("s"), Variable("p"), Variable("o")),
|
(Variable("s"), Variable("p"), Variable("o")),
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(bgp, tc, collection="default")
|
solutions = await materialise(bgp, tc, collection="default")
|
||||||
|
|
||||||
assert solutions == []
|
assert solutions == []
|
||||||
|
|
||||||
|
|
@ -213,17 +232,16 @@ class TestEvaluate:
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_select_query_node(self):
|
async def test_select_query_node(self):
|
||||||
tc = AsyncMock()
|
tc = make_tc(query_return=[
|
||||||
tc.query.return_value = [
|
|
||||||
make_triple(iri("http://s"), iri("http://p"), lit("o")),
|
make_triple(iri("http://s"), iri("http://p"), lit("o")),
|
||||||
]
|
])
|
||||||
|
|
||||||
bgp = make_bgp(
|
bgp = make_bgp(
|
||||||
(Variable("s"), Variable("p"), Variable("o")),
|
(Variable("s"), Variable("p"), Variable("o")),
|
||||||
)
|
)
|
||||||
select = make_select(make_project(bgp, ["s", "p"]))
|
select = make_select(make_project(bgp, ["s", "p"]))
|
||||||
|
|
||||||
solutions = await evaluate(select, tc, collection="default")
|
solutions = await materialise(select, tc, collection="default")
|
||||||
|
|
||||||
assert len(solutions) == 1
|
assert len(solutions) == 1
|
||||||
assert "s" in solutions[0]
|
assert "s" in solutions[0]
|
||||||
|
|
@ -234,10 +252,9 @@ class TestEvaluate:
|
||||||
async def test_workspace_never_in_query_calls(self):
|
async def test_workspace_never_in_query_calls(self):
|
||||||
"""Verify that no matter the algebra structure, workspace is never
|
"""Verify that no matter the algebra structure, workspace is never
|
||||||
passed to TriplesClient.query()."""
|
passed to TriplesClient.query()."""
|
||||||
tc = AsyncMock()
|
tc = make_tc(query_return=[
|
||||||
tc.query.return_value = [
|
|
||||||
make_triple(iri("http://s"), iri("http://p"), lit("o")),
|
make_triple(iri("http://s"), iri("http://p"), lit("o")),
|
||||||
]
|
])
|
||||||
|
|
||||||
bgp1 = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
bgp1 = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
||||||
bgp2 = make_bgp((Variable("a"), Variable("b"), Variable("c")))
|
bgp2 = make_bgp((Variable("a"), Variable("b"), Variable("c")))
|
||||||
|
|
@ -245,61 +262,60 @@ class TestEvaluate:
|
||||||
make_union(bgp1, bgp2), ["s", "p", "o"]
|
make_union(bgp1, bgp2), ["s", "p", "o"]
|
||||||
))
|
))
|
||||||
|
|
||||||
await evaluate(tree, tc, collection="test-coll")
|
await materialise(tree, tc, collection="test-coll")
|
||||||
|
|
||||||
for c in tc.query.call_args_list:
|
|
||||||
assert "workspace" not in c.kwargs
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_join(self):
|
async def test_join(self):
|
||||||
tc = AsyncMock()
|
call_count = 0
|
||||||
tc.query.side_effect = [
|
|
||||||
[make_triple(iri("http://a"), iri("http://p"), lit("v"))],
|
async def mock_query(**kwargs):
|
||||||
[make_triple(iri("http://a"), iri("http://q"), lit("w"))],
|
nonlocal call_count
|
||||||
]
|
call_count += 1
|
||||||
|
if call_count == 1:
|
||||||
|
return [make_triple(iri("http://a"), iri("http://p"), lit("v"))]
|
||||||
|
else:
|
||||||
|
return [make_triple(iri("http://a"), iri("http://q"), lit("w"))]
|
||||||
|
|
||||||
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
bgp1 = make_bgp((Variable("s"), URIRef("http://p"), Variable("v1")))
|
bgp1 = make_bgp((Variable("s"), URIRef("http://p"), Variable("v1")))
|
||||||
bgp2 = make_bgp((Variable("s"), URIRef("http://q"), Variable("v2")))
|
bgp2 = make_bgp((Variable("s"), URIRef("http://q"), Variable("v2")))
|
||||||
tree = make_join(bgp1, bgp2)
|
tree = make_join(bgp1, bgp2)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
assert len(solutions) == 1
|
assert len(solutions) == 1
|
||||||
assert solutions[0]["s"].iri == "http://a"
|
assert solutions[0]["s"].iri == "http://a"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_slice(self):
|
async def test_slice(self):
|
||||||
tc = AsyncMock()
|
|
||||||
triples = [
|
triples = [
|
||||||
make_triple(iri(f"http://s{i}"), iri("http://p"), lit(f"o{i}"))
|
make_triple(iri(f"http://s{i}"), iri("http://p"), lit(f"o{i}"))
|
||||||
for i in range(5)
|
for i in range(5)
|
||||||
]
|
]
|
||||||
tc.query.return_value = triples
|
tc = make_tc(query_return=triples)
|
||||||
|
|
||||||
bgp = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
bgp = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
||||||
tree = make_slice(bgp, start=1, length=2)
|
tree = make_slice(bgp, start=1, length=2)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
assert len(solutions) == 2
|
assert len(solutions) == 2
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_distinct(self):
|
async def test_distinct(self):
|
||||||
tc = AsyncMock()
|
|
||||||
triple = make_triple(iri("http://s"), iri("http://p"), lit("o"))
|
triple = make_triple(iri("http://s"), iri("http://p"), lit("o"))
|
||||||
tc.query.return_value = [triple, triple]
|
tc = make_tc(query_return=[triple, triple])
|
||||||
|
|
||||||
bgp = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
bgp = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
||||||
tree = make_distinct(bgp)
|
tree = make_distinct(bgp)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
assert len(solutions) == 1
|
assert len(solutions) == 1
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_minus_removes_matching(self):
|
async def test_minus_removes_matching(self):
|
||||||
tc = AsyncMock()
|
|
||||||
|
|
||||||
alice = iri("http://example.com/alice")
|
alice = iri("http://example.com/alice")
|
||||||
bob = iri("http://example.com/bob")
|
bob = iri("http://example.com/bob")
|
||||||
knows = iri("http://example.com/knows")
|
knows = iri("http://example.com/knows")
|
||||||
|
|
@ -307,16 +323,8 @@ class TestEvaluate:
|
||||||
charlie = iri("http://example.com/charlie")
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
||||||
left_triple = make_triple(alice, knows, bob)
|
left_triple = make_triple(alice, knows, bob)
|
||||||
right_triple1 = make_triple(alice, knows, bob)
|
|
||||||
right_triple2 = make_triple(alice, hates, charlie)
|
right_triple2 = make_triple(alice, hates, charlie)
|
||||||
|
|
||||||
left_bgp = make_bgp(
|
|
||||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
|
||||||
)
|
|
||||||
right_bgp = make_bgp(
|
|
||||||
(Variable("s"), URIRef("http://example.com/hates"), Variable("r"))
|
|
||||||
)
|
|
||||||
|
|
||||||
async def mock_query(**kwargs):
|
async def mock_query(**kwargs):
|
||||||
pred = kwargs.get("p")
|
pred = kwargs.get("p")
|
||||||
if pred and pred.iri == "http://example.com/knows":
|
if pred and pred.iri == "http://example.com/knows":
|
||||||
|
|
@ -325,7 +333,14 @@ class TestEvaluate:
|
||||||
return [right_triple2]
|
return [right_triple2]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
tc.query.side_effect = mock_query
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||||
|
)
|
||||||
|
right_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/hates"), Variable("r"))
|
||||||
|
)
|
||||||
|
|
||||||
tree = make_select(
|
tree = make_select(
|
||||||
make_project(
|
make_project(
|
||||||
|
|
@ -334,21 +349,25 @@ class TestEvaluate:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
# alice knows bob, but alice also hates charlie
|
|
||||||
# shared var is "s" (alice), so alice's solution is removed
|
|
||||||
assert len(solutions) == 0
|
assert len(solutions) == 0
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_minus_no_shared_vars_preserves_all(self):
|
async def test_minus_no_shared_vars_preserves_all(self):
|
||||||
tc = AsyncMock()
|
|
||||||
|
|
||||||
alice = iri("http://example.com/alice")
|
alice = iri("http://example.com/alice")
|
||||||
bob = iri("http://example.com/bob")
|
bob = iri("http://example.com/bob")
|
||||||
|
|
||||||
left_triple = make_triple(alice, iri("http://example.com/p"), bob)
|
left_triple = make_triple(alice, iri("http://example.com/p"), bob)
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
pred = kwargs.get("p")
|
||||||
|
if pred and pred.iri == "http://example.com/p":
|
||||||
|
return [left_triple]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
left_bgp = make_bgp(
|
left_bgp = make_bgp(
|
||||||
(Variable("s"), URIRef("http://example.com/p"), Variable("o"))
|
(Variable("s"), URIRef("http://example.com/p"), Variable("o"))
|
||||||
)
|
)
|
||||||
|
|
@ -356,14 +375,6 @@ class TestEvaluate:
|
||||||
(Variable("x"), URIRef("http://example.com/q"), Variable("y"))
|
(Variable("x"), URIRef("http://example.com/q"), Variable("y"))
|
||||||
)
|
)
|
||||||
|
|
||||||
async def mock_query(**kwargs):
|
|
||||||
pred = kwargs.get("p")
|
|
||||||
if pred and pred.iri == "http://example.com/p":
|
|
||||||
return [left_triple]
|
|
||||||
return []
|
|
||||||
|
|
||||||
tc.query.side_effect = mock_query
|
|
||||||
|
|
||||||
tree = make_select(
|
tree = make_select(
|
||||||
make_project(
|
make_project(
|
||||||
make_minus(left_bgp, right_bgp),
|
make_minus(left_bgp, right_bgp),
|
||||||
|
|
@ -371,14 +382,12 @@ class TestEvaluate:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
assert len(solutions) == 1
|
assert len(solutions) == 1
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_filter_exists_keeps_matching(self):
|
async def test_filter_exists_keeps_matching(self):
|
||||||
tc = AsyncMock()
|
|
||||||
|
|
||||||
alice = iri("http://example.com/alice")
|
alice = iri("http://example.com/alice")
|
||||||
bob = iri("http://example.com/bob")
|
bob = iri("http://example.com/bob")
|
||||||
charlie = iri("http://example.com/charlie")
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
@ -387,13 +396,6 @@ class TestEvaluate:
|
||||||
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
||||||
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
||||||
|
|
||||||
left_bgp = make_bgp(
|
|
||||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
|
||||||
)
|
|
||||||
exists_bgp = make_bgp(
|
|
||||||
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
|
||||||
)
|
|
||||||
|
|
||||||
async def mock_query(**kwargs):
|
async def mock_query(**kwargs):
|
||||||
pred = kwargs.get("p")
|
pred = kwargs.get("p")
|
||||||
if pred and pred.iri == "http://example.com/knows":
|
if pred and pred.iri == "http://example.com/knows":
|
||||||
|
|
@ -402,7 +404,14 @@ class TestEvaluate:
|
||||||
return [exists_triple]
|
return [exists_triple]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
tc.query.side_effect = mock_query
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||||
|
)
|
||||||
|
exists_bgp = make_bgp(
|
||||||
|
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
||||||
|
)
|
||||||
|
|
||||||
exists_expr = CompValue("Builtin_EXISTS")
|
exists_expr = CompValue("Builtin_EXISTS")
|
||||||
exists_expr.graph = exists_bgp
|
exists_expr.graph = exists_bgp
|
||||||
|
|
@ -414,17 +423,14 @@ class TestEvaluate:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
# Only bob has a "likes" triple, so only the bob solution passes
|
|
||||||
result_objects = [s["o"].iri for s in solutions]
|
result_objects = [s["o"].iri for s in solutions]
|
||||||
assert "http://example.com/bob" in result_objects
|
assert "http://example.com/bob" in result_objects
|
||||||
assert "http://example.com/charlie" not in result_objects
|
assert "http://example.com/charlie" not in result_objects
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_filter_not_exists_removes_matching(self):
|
async def test_filter_not_exists_removes_matching(self):
|
||||||
tc = AsyncMock()
|
|
||||||
|
|
||||||
alice = iri("http://example.com/alice")
|
alice = iri("http://example.com/alice")
|
||||||
bob = iri("http://example.com/bob")
|
bob = iri("http://example.com/bob")
|
||||||
charlie = iri("http://example.com/charlie")
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
@ -433,13 +439,6 @@ class TestEvaluate:
|
||||||
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
||||||
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
||||||
|
|
||||||
left_bgp = make_bgp(
|
|
||||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
|
||||||
)
|
|
||||||
exists_bgp = make_bgp(
|
|
||||||
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
|
||||||
)
|
|
||||||
|
|
||||||
async def mock_query(**kwargs):
|
async def mock_query(**kwargs):
|
||||||
pred = kwargs.get("p")
|
pred = kwargs.get("p")
|
||||||
if pred and pred.iri == "http://example.com/knows":
|
if pred and pred.iri == "http://example.com/knows":
|
||||||
|
|
@ -448,7 +447,14 @@ class TestEvaluate:
|
||||||
return [exists_triple]
|
return [exists_triple]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
tc.query.side_effect = mock_query
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||||
|
)
|
||||||
|
exists_bgp = make_bgp(
|
||||||
|
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
||||||
|
)
|
||||||
|
|
||||||
not_exists_expr = CompValue("Builtin_NOTEXISTS")
|
not_exists_expr = CompValue("Builtin_NOTEXISTS")
|
||||||
not_exists_expr.graph = exists_bgp
|
not_exists_expr.graph = exists_bgp
|
||||||
|
|
@ -460,28 +466,115 @@ class TestEvaluate:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
solutions = await evaluate(tree, tc, collection="default")
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
# bob has a "likes" triple so is removed; charlie stays
|
|
||||||
result_objects = [s["o"].iri for s in solutions]
|
result_objects = [s["o"].iri for s in solutions]
|
||||||
assert "http://example.com/charlie" in result_objects
|
assert "http://example.com/charlie" in result_objects
|
||||||
assert "http://example.com/bob" not in result_objects
|
assert "http://example.com/bob" not in result_objects
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_join_values_uses_bind_join(self):
|
||||||
|
"""When VALUES is joined with a BGP, the bind join should pass
|
||||||
|
the VALUES bindings into the BGP evaluation so the triple store
|
||||||
|
query is selective (not a wildcard)."""
|
||||||
|
alice = iri("http://example.com/alice")
|
||||||
|
bob = iri("http://example.com/bob")
|
||||||
|
knows = iri("http://example.com/knows")
|
||||||
|
|
||||||
|
queries_issued = []
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
queries_issued.append(kwargs)
|
||||||
|
s, p = kwargs.get("s"), kwargs.get("p")
|
||||||
|
if s and s.iri == "http://example.com/alice" and p and p.iri == "http://example.com/knows":
|
||||||
|
return [make_triple(alice, knows, bob)]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
|
# VALUES ?s { <alice> }
|
||||||
|
values_node = CompValue("values")
|
||||||
|
values_node.var = [Variable("s")]
|
||||||
|
values_node.value = [[URIRef("http://example.com/alice")]]
|
||||||
|
values_node.res = None
|
||||||
|
|
||||||
|
to_multiset = CompValue("ToMultiSet")
|
||||||
|
to_multiset.p = values_node
|
||||||
|
|
||||||
|
bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o")),
|
||||||
|
)
|
||||||
|
|
||||||
|
tree = make_join(to_multiset, bgp)
|
||||||
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
|
assert len(solutions) == 1
|
||||||
|
assert solutions[0]["s"].iri == "http://example.com/alice"
|
||||||
|
assert solutions[0]["o"].iri == "http://example.com/bob"
|
||||||
|
|
||||||
|
# The key assertion: the BGP query should have received
|
||||||
|
# s=alice (bound from VALUES), NOT s=None (wildcard)
|
||||||
|
assert len(queries_issued) == 1
|
||||||
|
assert queries_issued[0]["s"] is not None
|
||||||
|
assert queries_issued[0]["s"].iri == "http://example.com/alice"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_join_values_multiple_bindings(self):
|
||||||
|
"""Bind join with multiple VALUES bindings."""
|
||||||
|
alice = iri("http://example.com/alice")
|
||||||
|
bob = iri("http://example.com/bob")
|
||||||
|
knows = iri("http://example.com/knows")
|
||||||
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
s = kwargs.get("s")
|
||||||
|
if s and s.iri == "http://example.com/alice":
|
||||||
|
return [make_triple(alice, knows, bob)]
|
||||||
|
elif s and s.iri == "http://example.com/bob":
|
||||||
|
return [make_triple(bob, knows, charlie)]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc = make_tc(query_side_effect=mock_query)
|
||||||
|
|
||||||
|
values_node = CompValue("values")
|
||||||
|
values_node.var = [Variable("s")]
|
||||||
|
values_node.value = [
|
||||||
|
[URIRef("http://example.com/alice")],
|
||||||
|
[URIRef("http://example.com/bob")],
|
||||||
|
]
|
||||||
|
values_node.res = None
|
||||||
|
|
||||||
|
to_multiset = CompValue("ToMultiSet")
|
||||||
|
to_multiset.p = values_node
|
||||||
|
|
||||||
|
bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o")),
|
||||||
|
)
|
||||||
|
|
||||||
|
tree = make_join(to_multiset, bgp)
|
||||||
|
solutions = await materialise(tree, tc, collection="default")
|
||||||
|
|
||||||
|
assert len(solutions) == 2
|
||||||
|
subjects = {s["s"].iri for s in solutions}
|
||||||
|
assert subjects == {
|
||||||
|
"http://example.com/alice",
|
||||||
|
"http://example.com/bob",
|
||||||
|
}
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_unsupported_node_returns_empty_solution(self):
|
async def test_unsupported_node_returns_empty_solution(self):
|
||||||
tc = AsyncMock()
|
tc = make_tc()
|
||||||
|
|
||||||
node = CompValue("SomethingUnknown")
|
node = CompValue("SomethingUnknown")
|
||||||
|
|
||||||
solutions = await evaluate(node, tc, collection="default")
|
solutions = await materialise(node, tc, collection="default")
|
||||||
|
|
||||||
assert solutions == [{}]
|
assert solutions == [{}]
|
||||||
tc.query.assert_not_called()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_non_compvalue_returns_empty_solution(self):
|
async def test_non_compvalue_returns_empty_solution(self):
|
||||||
tc = AsyncMock()
|
tc = make_tc()
|
||||||
|
|
||||||
solutions = await evaluate("not a node", tc, collection="default")
|
solutions = await materialise("not a node", tc, collection="default")
|
||||||
|
|
||||||
assert solutions == [{}]
|
assert solutions == [{}]
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||||
|
|
@ -44,6 +45,60 @@ def from_value(x: Any) -> Any:
|
||||||
return Term(type=LITERAL, value=str(x))
|
return Term(type=LITERAL, value=str(x))
|
||||||
|
|
||||||
class TriplesClient(RequestResponse):
|
class TriplesClient(RequestResponse):
|
||||||
|
|
||||||
|
async def query_gen(self, s=None, p=None, o=None, limit=20,
|
||||||
|
collection="default",
|
||||||
|
batch_size=20, timeout=30, g=None):
|
||||||
|
"""Async generator yielding Triple objects as batches arrive."""
|
||||||
|
queue = asyncio.Queue()
|
||||||
|
done = False
|
||||||
|
|
||||||
|
async def recipient(resp):
|
||||||
|
if resp.error:
|
||||||
|
raise RuntimeError(resp.error.message)
|
||||||
|
|
||||||
|
batch = [
|
||||||
|
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
||||||
|
for v in resp.triples
|
||||||
|
]
|
||||||
|
await queue.put(batch)
|
||||||
|
|
||||||
|
if resp.is_final:
|
||||||
|
await queue.put(None)
|
||||||
|
|
||||||
|
return resp.is_final
|
||||||
|
|
||||||
|
# Launch the streaming request as a background task
|
||||||
|
task = asyncio.ensure_future(self.request(
|
||||||
|
TriplesQueryRequest(
|
||||||
|
s=from_value(s),
|
||||||
|
p=from_value(p),
|
||||||
|
o=from_value(o),
|
||||||
|
limit=limit,
|
||||||
|
collection=collection,
|
||||||
|
streaming=True,
|
||||||
|
batch_size=batch_size,
|
||||||
|
g=g,
|
||||||
|
),
|
||||||
|
timeout=timeout,
|
||||||
|
recipient=recipient,
|
||||||
|
))
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
batch = await queue.get()
|
||||||
|
if batch is None:
|
||||||
|
break
|
||||||
|
for triple in batch:
|
||||||
|
yield triple
|
||||||
|
finally:
|
||||||
|
if not task.done():
|
||||||
|
task.cancel()
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except (asyncio.CancelledError, Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
async def query(self, s=None, p=None, o=None, limit=20,
|
async def query(self, s=None, p=None, o=None, limit=20,
|
||||||
collection="default",
|
collection="default",
|
||||||
timeout=30, g=None):
|
timeout=30, g=None):
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,10 @@ SPARQL algebra evaluator.
|
||||||
Recursively evaluates an rdflib SPARQL algebra tree by issuing triple
|
Recursively evaluates an rdflib SPARQL algebra tree by issuing triple
|
||||||
pattern queries via TriplesClient (streaming) and performing in-memory
|
pattern queries via TriplesClient (streaming) and performing in-memory
|
||||||
joins, filters, and projections.
|
joins, filters, and projections.
|
||||||
|
|
||||||
|
Handlers are async generators that yield solutions incrementally.
|
||||||
|
Blocking operators (joins, sort, group, distinct) materialise their
|
||||||
|
upstream into a list at the boundary, then yield results.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -34,56 +38,56 @@ async def evaluate(node, triples_client, collection, limit=10000):
|
||||||
"""
|
"""
|
||||||
Evaluate a SPARQL algebra node.
|
Evaluate a SPARQL algebra node.
|
||||||
|
|
||||||
Args:
|
Yields solutions (dicts mapping variable names to Term values)
|
||||||
node: rdflib CompValue algebra node
|
incrementally as an async generator.
|
||||||
triples_client: TriplesClient instance for triple pattern queries
|
|
||||||
collection: collection identifier
|
|
||||||
limit: safety limit on results
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list of solutions (dicts mapping variable names to Term values)
|
|
||||||
"""
|
"""
|
||||||
if not isinstance(node, CompValue):
|
if not isinstance(node, CompValue):
|
||||||
logger.warning(f"Expected CompValue, got {type(node)}: {node}")
|
logger.warning(f"Expected CompValue, got {type(node)}: {node}")
|
||||||
return [{}]
|
yield {}
|
||||||
|
return
|
||||||
|
|
||||||
name = node.name
|
name = node.name
|
||||||
handler = _HANDLERS.get(name)
|
handler = _HANDLERS.get(name)
|
||||||
|
|
||||||
if handler is None:
|
if handler is None:
|
||||||
logger.warning(f"Unsupported algebra node: {name}")
|
logger.warning(f"Unsupported algebra node: {name}")
|
||||||
return [{}]
|
yield {}
|
||||||
|
return
|
||||||
|
|
||||||
return await handler(node, triples_client, collection, limit)
|
async for sol in handler(node, triples_client, collection, limit):
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
# --- Node handlers ---
|
async def materialise(node, triples_client, collection, limit=10000):
|
||||||
|
"""Collect all solutions from evaluate() into a list."""
|
||||||
|
return [sol async for sol in evaluate(node, triples_client, collection, limit)]
|
||||||
|
|
||||||
|
|
||||||
|
# --- Node handlers (async generators) ---
|
||||||
|
|
||||||
async def _eval_select_query(node, tc, collection, limit):
|
async def _eval_select_query(node, tc, collection, limit):
|
||||||
"""Evaluate a SelectQuery node."""
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
return await evaluate(node.p, tc, collection, limit)
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
async def _eval_project(node, tc, collection, limit):
|
async def _eval_project(node, tc, collection, limit):
|
||||||
"""Evaluate a Project node (SELECT variable projection)."""
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
variables = [str(v) for v in node.PV]
|
variables = [str(v) for v in node.PV]
|
||||||
return project(solutions, variables)
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
|
yield {v: sol[v] for v in variables if v in sol}
|
||||||
|
|
||||||
|
|
||||||
async def _eval_bgp(node, tc, collection, limit):
|
async def _eval_bgp(node, tc, collection, limit):
|
||||||
"""
|
"""
|
||||||
Evaluate a Basic Graph Pattern.
|
Evaluate a Basic Graph Pattern.
|
||||||
|
|
||||||
Issues streaming triple pattern queries and joins results. Patterns
|
Patterns are ordered by selectivity and evaluated sequentially.
|
||||||
are ordered by selectivity (more bound terms first) and evaluated
|
For the final pattern, results stream directly from the triple store.
|
||||||
sequentially with bound-variable substitution.
|
|
||||||
"""
|
"""
|
||||||
triples = node.triples
|
triples = node.triples
|
||||||
if not triples:
|
if not triples:
|
||||||
return [{}]
|
yield {}
|
||||||
|
return
|
||||||
|
|
||||||
# Sort patterns by selectivity: more bound terms = more selective
|
|
||||||
def selectivity(pattern):
|
def selectivity(pattern):
|
||||||
return sum(1 for t in pattern if not isinstance(t, Variable))
|
return sum(1 for t in pattern if not isinstance(t, Variable))
|
||||||
|
|
||||||
|
|
@ -91,55 +95,222 @@ async def _eval_bgp(node, tc, collection, limit):
|
||||||
enumerate(triples), key=lambda x: -selectivity(x[1])
|
enumerate(triples), key=lambda x: -selectivity(x[1])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# For all patterns except the last, we must materialise intermediate
|
||||||
|
# solutions because each pattern depends on bindings from prior ones.
|
||||||
|
# The last pattern streams directly.
|
||||||
solutions = [{}]
|
solutions = [{}]
|
||||||
|
|
||||||
for _, pattern in sorted_patterns:
|
for pattern_idx, (_, pattern) in enumerate(sorted_patterns):
|
||||||
s_tmpl, p_tmpl, o_tmpl = pattern
|
s_tmpl, p_tmpl, o_tmpl = pattern
|
||||||
|
is_last = (pattern_idx == len(sorted_patterns) - 1)
|
||||||
|
|
||||||
new_solutions = []
|
if is_last:
|
||||||
|
# Stream the final pattern — yield as triples arrive
|
||||||
|
count = 0
|
||||||
|
for sol in solutions:
|
||||||
|
s_val = _resolve_term(s_tmpl, sol)
|
||||||
|
p_val = _resolve_term(p_tmpl, sol)
|
||||||
|
o_val = _resolve_term(o_tmpl, sol)
|
||||||
|
|
||||||
for sol in solutions:
|
async for triple in tc.query_gen(
|
||||||
# Substitute known bindings into the pattern
|
s=s_val, p=p_val, o=o_val,
|
||||||
s_val = _resolve_term(s_tmpl, sol)
|
limit=limit, collection=collection,
|
||||||
p_val = _resolve_term(p_tmpl, sol)
|
):
|
||||||
o_val = _resolve_term(o_tmpl, sol)
|
binding = dict(sol)
|
||||||
|
if isinstance(s_tmpl, Variable):
|
||||||
|
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||||
|
if isinstance(p_tmpl, Variable):
|
||||||
|
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||||
|
if isinstance(o_tmpl, Variable):
|
||||||
|
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||||
|
yield binding
|
||||||
|
count += 1
|
||||||
|
if count >= limit:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
# Materialise intermediate patterns
|
||||||
|
new_solutions = []
|
||||||
|
for sol in solutions:
|
||||||
|
s_val = _resolve_term(s_tmpl, sol)
|
||||||
|
p_val = _resolve_term(p_tmpl, sol)
|
||||||
|
o_val = _resolve_term(o_tmpl, sol)
|
||||||
|
|
||||||
# Query the triples store
|
async for triple in tc.query_gen(
|
||||||
results = await _query_pattern(
|
s=s_val, p=p_val, o=o_val,
|
||||||
tc, s_val, p_val, o_val, collection, limit
|
limit=limit, collection=collection,
|
||||||
)
|
):
|
||||||
|
binding = dict(sol)
|
||||||
|
if isinstance(s_tmpl, Variable):
|
||||||
|
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||||
|
if isinstance(p_tmpl, Variable):
|
||||||
|
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||||
|
if isinstance(o_tmpl, Variable):
|
||||||
|
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||||
|
new_solutions.append(binding)
|
||||||
|
|
||||||
# Map results back to variable bindings,
|
solutions = new_solutions
|
||||||
# converting Uri/Literal to Term objects
|
if not solutions:
|
||||||
for triple in results:
|
return
|
||||||
binding = dict(sol)
|
|
||||||
if isinstance(s_tmpl, Variable):
|
|
||||||
binding[str(s_tmpl)] = _to_term(triple.s)
|
|
||||||
if isinstance(p_tmpl, Variable):
|
|
||||||
binding[str(p_tmpl)] = _to_term(triple.p)
|
|
||||||
if isinstance(o_tmpl, Variable):
|
|
||||||
binding[str(o_tmpl)] = _to_term(triple.o)
|
|
||||||
new_solutions.append(binding)
|
|
||||||
|
|
||||||
solutions = new_solutions
|
|
||||||
|
|
||||||
if not solutions:
|
# --- Blocking operators: materialise upstream, then yield ---
|
||||||
break
|
|
||||||
|
|
||||||
return solutions[:limit]
|
def _is_small_node(node):
|
||||||
|
"""Check if a node is likely to produce a small number of solutions."""
|
||||||
|
if not isinstance(node, CompValue):
|
||||||
|
return False
|
||||||
|
if node.name in ("values", "ToMultiSet"):
|
||||||
|
return True
|
||||||
|
if node.name == "Extend" and hasattr(node, "p"):
|
||||||
|
return _is_small_node(node.p)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def _eval_join(node, tc, collection, limit):
|
async def _eval_join(node, tc, collection, limit):
|
||||||
"""Evaluate a Join node."""
|
# Bind join: if one side is small (e.g. VALUES), materialise it and
|
||||||
left = await evaluate(node.p1, tc, collection, limit)
|
# substitute its bindings into the other side's evaluation. This
|
||||||
right = await evaluate(node.p2, tc, collection, limit)
|
# turns wildcard BGP queries into selective ones.
|
||||||
return hash_join(left, right)[:limit]
|
if _is_small_node(node.p1):
|
||||||
|
yield_from = _bind_join(node.p1, node.p2, tc, collection, limit)
|
||||||
|
elif _is_small_node(node.p2):
|
||||||
|
yield_from = _bind_join(node.p2, node.p1, tc, collection, limit)
|
||||||
|
else:
|
||||||
|
yield_from = _hash_join(node, tc, collection, limit)
|
||||||
|
|
||||||
|
async for sol in yield_from:
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
|
async def _hash_join(node, tc, collection, limit):
|
||||||
|
left = await materialise(node.p1, tc, collection, limit)
|
||||||
|
right = await materialise(node.p2, tc, collection, limit)
|
||||||
|
for sol in hash_join(left, right)[:limit]:
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
|
async def _bind_join(small_node, big_node, tc, collection, limit):
|
||||||
|
"""Iterate over the small side and inject bindings into the big side."""
|
||||||
|
small_sols = await materialise(small_node, tc, collection, limit)
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for binding in small_sols:
|
||||||
|
async for sol in _evaluate_with_bindings(
|
||||||
|
big_node, binding, tc, collection, limit
|
||||||
|
):
|
||||||
|
yield sol
|
||||||
|
count += 1
|
||||||
|
if count >= limit:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_compatible(left, right):
|
||||||
|
"""Merge two solutions if compatible (shared vars have equal values)."""
|
||||||
|
merged = dict(left)
|
||||||
|
for k, v in right.items():
|
||||||
|
if k in merged:
|
||||||
|
if _term_key(merged[k]) != _term_key(v):
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
merged[k] = v
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
async def _evaluate_with_bindings(node, bindings, tc, collection, limit):
|
||||||
|
"""Evaluate a node with pre-seeded variable bindings.
|
||||||
|
|
||||||
|
For BGP nodes, the bindings are injected so _resolve_term sees them,
|
||||||
|
turning wildcard queries into selective ones. For other node types,
|
||||||
|
evaluate normally and merge/filter against the bindings.
|
||||||
|
"""
|
||||||
|
if isinstance(node, CompValue) and node.name == "BGP":
|
||||||
|
async for sol in _eval_bgp_with_bindings(
|
||||||
|
node, bindings, tc, collection, limit
|
||||||
|
):
|
||||||
|
yield sol
|
||||||
|
else:
|
||||||
|
async for sol in evaluate(node, tc, collection, limit):
|
||||||
|
merged = _merge_compatible(bindings, sol)
|
||||||
|
if merged is not None:
|
||||||
|
yield merged
|
||||||
|
|
||||||
|
|
||||||
|
async def _eval_bgp_with_bindings(node, bindings, tc, collection, limit):
|
||||||
|
"""Evaluate a BGP with pre-seeded bindings so variables resolve to terms."""
|
||||||
|
triples = node.triples
|
||||||
|
if not triples:
|
||||||
|
yield dict(bindings)
|
||||||
|
return
|
||||||
|
|
||||||
|
def selectivity(pattern):
|
||||||
|
score = 0
|
||||||
|
for t in pattern:
|
||||||
|
if not isinstance(t, Variable):
|
||||||
|
score += 1
|
||||||
|
elif str(t) in bindings:
|
||||||
|
score += 1
|
||||||
|
return score
|
||||||
|
|
||||||
|
sorted_patterns = sorted(
|
||||||
|
enumerate(triples), key=lambda x: -selectivity(x[1])
|
||||||
|
)
|
||||||
|
|
||||||
|
solutions = [dict(bindings)]
|
||||||
|
|
||||||
|
for pattern_idx, (_, pattern) in enumerate(sorted_patterns):
|
||||||
|
s_tmpl, p_tmpl, o_tmpl = pattern
|
||||||
|
is_last = (pattern_idx == len(sorted_patterns) - 1)
|
||||||
|
|
||||||
|
if is_last:
|
||||||
|
count = 0
|
||||||
|
for sol in solutions:
|
||||||
|
s_val = _resolve_term(s_tmpl, sol)
|
||||||
|
p_val = _resolve_term(p_tmpl, sol)
|
||||||
|
o_val = _resolve_term(o_tmpl, sol)
|
||||||
|
|
||||||
|
async for triple in tc.query_gen(
|
||||||
|
s=s_val, p=p_val, o=o_val,
|
||||||
|
limit=limit, collection=collection,
|
||||||
|
):
|
||||||
|
binding = dict(sol)
|
||||||
|
if isinstance(s_tmpl, Variable):
|
||||||
|
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||||
|
if isinstance(p_tmpl, Variable):
|
||||||
|
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||||
|
if isinstance(o_tmpl, Variable):
|
||||||
|
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||||
|
yield binding
|
||||||
|
count += 1
|
||||||
|
if count >= limit:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
new_solutions = []
|
||||||
|
for sol in solutions:
|
||||||
|
s_val = _resolve_term(s_tmpl, sol)
|
||||||
|
p_val = _resolve_term(p_tmpl, sol)
|
||||||
|
o_val = _resolve_term(o_tmpl, sol)
|
||||||
|
|
||||||
|
async for triple in tc.query_gen(
|
||||||
|
s=s_val, p=p_val, o=o_val,
|
||||||
|
limit=limit, collection=collection,
|
||||||
|
):
|
||||||
|
binding = dict(sol)
|
||||||
|
if isinstance(s_tmpl, Variable):
|
||||||
|
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||||
|
if isinstance(p_tmpl, Variable):
|
||||||
|
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||||
|
if isinstance(o_tmpl, Variable):
|
||||||
|
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||||
|
new_solutions.append(binding)
|
||||||
|
|
||||||
|
solutions = new_solutions
|
||||||
|
if not solutions:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
async def _eval_left_join(node, tc, collection, limit):
|
async def _eval_left_join(node, tc, collection, limit):
|
||||||
"""Evaluate a LeftJoin node (OPTIONAL)."""
|
# Buffer right side for hash index; stream left through probe
|
||||||
left_sols = await evaluate(node.p1, tc, collection, limit)
|
left_sols = await materialise(node.p1, tc, collection, limit)
|
||||||
right_sols = await evaluate(node.p2, tc, collection, limit)
|
right_sols = await materialise(node.p2, tc, collection, limit)
|
||||||
|
|
||||||
filter_fn = None
|
filter_fn = None
|
||||||
if hasattr(node, "expr") and node.expr is not None:
|
if hasattr(node, "expr") and node.expr is not None:
|
||||||
|
|
@ -149,27 +320,83 @@ async def _eval_left_join(node, tc, collection, limit):
|
||||||
evaluate_expression(expr, sol)
|
evaluate_expression(expr, sol)
|
||||||
)
|
)
|
||||||
|
|
||||||
return left_join(left_sols, right_sols, filter_fn)[:limit]
|
for sol in left_join(left_sols, right_sols, filter_fn)[:limit]:
|
||||||
|
yield sol
|
||||||
|
|
||||||
async def _eval_union(node, tc, collection, limit):
|
|
||||||
"""Evaluate a Union node."""
|
|
||||||
left = await evaluate(node.p1, tc, collection, limit)
|
|
||||||
right = await evaluate(node.p2, tc, collection, limit)
|
|
||||||
return union(left, right)[:limit]
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_minus(node, tc, collection, limit):
|
async def _eval_minus(node, tc, collection, limit):
|
||||||
"""Evaluate a Minus node."""
|
left = await materialise(node.p1, tc, collection, limit)
|
||||||
left = await evaluate(node.p1, tc, collection, limit)
|
right = await materialise(node.p2, tc, collection, limit)
|
||||||
right = await evaluate(node.p2, tc, collection, limit)
|
for sol in minus(left, right):
|
||||||
return minus(left, right)
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
|
async def _eval_distinct(node, tc, collection, limit):
|
||||||
|
seen = set()
|
||||||
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
|
key = tuple(sorted(
|
||||||
|
(k, _term_key(v)) for k, v in sol.items()
|
||||||
|
))
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
|
async def _eval_reduced(node, tc, collection, limit):
|
||||||
|
async for sol in _eval_distinct(node, tc, collection, limit):
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
|
async def _eval_order_by(node, tc, collection, limit):
|
||||||
|
solutions = await materialise(node.p, tc, collection, limit)
|
||||||
|
|
||||||
|
key_fns = []
|
||||||
|
for cond in node.expr:
|
||||||
|
if isinstance(cond, CompValue) and cond.name == "OrderCondition":
|
||||||
|
ascending = cond.order != "DESC"
|
||||||
|
expr = cond.expr
|
||||||
|
key_fns.append((
|
||||||
|
lambda sol, e=expr: evaluate_expression(e, sol),
|
||||||
|
ascending,
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
key_fns.append((
|
||||||
|
lambda sol, e=cond: evaluate_expression(e, sol),
|
||||||
|
True,
|
||||||
|
))
|
||||||
|
|
||||||
|
for sol in order_by(solutions, key_fns):
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
|
# --- Streamable operators ---
|
||||||
|
|
||||||
|
async def _eval_slice(node, tc, collection, limit):
|
||||||
|
offset = node.start or 0
|
||||||
|
length = node.length
|
||||||
|
skipped = 0
|
||||||
|
emitted = 0
|
||||||
|
|
||||||
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
|
if skipped < offset:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
yield sol
|
||||||
|
emitted += 1
|
||||||
|
if length is not None and emitted >= length:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
async def _eval_union(node, tc, collection, limit):
|
||||||
|
async for sol in evaluate(node.p1, tc, collection, limit):
|
||||||
|
yield sol
|
||||||
|
async for sol in evaluate(node.p2, tc, collection, limit):
|
||||||
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
async def _check_exists(graph_node, sol, tc, collection, limit):
|
async def _check_exists(graph_node, sol, tc, collection, limit):
|
||||||
"""Evaluate an EXISTS graph pattern against a solution."""
|
"""Evaluate an EXISTS graph pattern against a solution."""
|
||||||
results = await evaluate(graph_node, tc, collection, limit)
|
async for r in evaluate(graph_node, tc, collection, limit):
|
||||||
for r in results:
|
|
||||||
shared = set(sol.keys()) & set(r.keys())
|
shared = set(sol.keys()) & set(r.keys())
|
||||||
if all(
|
if all(
|
||||||
_term_key(sol[v]) == _term_key(r[v])
|
_term_key(sol[v]) == _term_key(r[v])
|
||||||
|
|
@ -206,8 +433,6 @@ async def _pre_eval_exists(expr, sol, tc, collection, limit, cache):
|
||||||
|
|
||||||
|
|
||||||
async def _eval_filter(node, tc, collection, limit):
|
async def _eval_filter(node, tc, collection, limit):
|
||||||
"""Evaluate a Filter node."""
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
expr = node.expr
|
expr = node.expr
|
||||||
exists_cache = {}
|
exists_cache = {}
|
||||||
|
|
||||||
|
|
@ -215,60 +440,13 @@ async def _eval_filter(node, tc, collection, limit):
|
||||||
key = id(graph_node), id(sol)
|
key = id(graph_node), id(sol)
|
||||||
return exists_cache.get(key, False)
|
return exists_cache.get(key, False)
|
||||||
|
|
||||||
result = []
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
for sol in solutions:
|
|
||||||
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
||||||
if _effective_boolean(evaluate_expression(expr, sol, exists_cb=exists_cb)):
|
if _effective_boolean(evaluate_expression(expr, sol, exists_cb=exists_cb)):
|
||||||
result.append(sol)
|
yield sol
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_distinct(node, tc, collection, limit):
|
|
||||||
"""Evaluate a Distinct node."""
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
return distinct(solutions)
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_reduced(node, tc, collection, limit):
|
|
||||||
"""Evaluate a Reduced node (like Distinct but implementation-defined)."""
|
|
||||||
# Treat same as Distinct
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
return distinct(solutions)
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_order_by(node, tc, collection, limit):
|
|
||||||
"""Evaluate an OrderBy node."""
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
|
|
||||||
key_fns = []
|
|
||||||
for cond in node.expr:
|
|
||||||
if isinstance(cond, CompValue) and cond.name == "OrderCondition":
|
|
||||||
ascending = cond.order != "DESC"
|
|
||||||
expr = cond.expr
|
|
||||||
key_fns.append((
|
|
||||||
lambda sol, e=expr: evaluate_expression(e, sol),
|
|
||||||
ascending,
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
# Simple variable or expression
|
|
||||||
key_fns.append((
|
|
||||||
lambda sol, e=cond: evaluate_expression(e, sol),
|
|
||||||
True,
|
|
||||||
))
|
|
||||||
|
|
||||||
return order_by(solutions, key_fns)
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_slice(node, tc, collection, limit):
|
|
||||||
"""Evaluate a Slice node (LIMIT/OFFSET)."""
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
return slice_solutions(solutions, node.start or 0, node.length)
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_extend(node, tc, collection, limit):
|
async def _eval_extend(node, tc, collection, limit):
|
||||||
"""Evaluate an Extend node (BIND)."""
|
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
var_name = str(node.var)
|
var_name = str(node.var)
|
||||||
expr = node.expr
|
expr = node.expr
|
||||||
exists_cache = {}
|
exists_cache = {}
|
||||||
|
|
@ -277,8 +455,7 @@ async def _eval_extend(node, tc, collection, limit):
|
||||||
key = id(graph_node), id(sol)
|
key = id(graph_node), id(sol)
|
||||||
return exists_cache.get(key, False)
|
return exists_cache.get(key, False)
|
||||||
|
|
||||||
result = []
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
for sol in solutions:
|
|
||||||
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
||||||
val = evaluate_expression(expr, sol, exists_cb=exists_cb)
|
val = evaluate_expression(expr, sol, exists_cb=exists_cb)
|
||||||
new_sol = dict(sol)
|
new_sol = dict(sol)
|
||||||
|
|
@ -295,16 +472,14 @@ async def _eval_extend(node, tc, collection, limit):
|
||||||
)
|
)
|
||||||
elif val is not None:
|
elif val is not None:
|
||||||
new_sol[var_name] = Term(type=LITERAL, value=str(val))
|
new_sol[var_name] = Term(type=LITERAL, value=str(val))
|
||||||
result.append(new_sol)
|
yield new_sol
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
# --- Aggregation (blocking) ---
|
||||||
|
|
||||||
async def _eval_group(node, tc, collection, limit):
|
async def _eval_group(node, tc, collection, limit):
|
||||||
"""Evaluate a Group node (GROUP BY with aggregation)."""
|
solutions = await materialise(node.p, tc, collection, limit)
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
|
|
||||||
# Extract grouping expressions
|
|
||||||
group_exprs = []
|
group_exprs = []
|
||||||
if hasattr(node, "expr") and node.expr:
|
if hasattr(node, "expr") and node.expr:
|
||||||
for expr in node.expr:
|
for expr in node.expr:
|
||||||
|
|
@ -315,7 +490,6 @@ async def _eval_group(node, tc, collection, limit):
|
||||||
else:
|
else:
|
||||||
group_exprs.append((expr, None))
|
group_exprs.append((expr, None))
|
||||||
|
|
||||||
# Group solutions
|
|
||||||
groups = defaultdict(list)
|
groups = defaultdict(list)
|
||||||
for sol in solutions:
|
for sol in solutions:
|
||||||
key_parts = []
|
key_parts = []
|
||||||
|
|
@ -325,81 +499,72 @@ async def _eval_group(node, tc, collection, limit):
|
||||||
groups[tuple(key_parts)].append(sol)
|
groups[tuple(key_parts)].append(sol)
|
||||||
|
|
||||||
if not group_exprs:
|
if not group_exprs:
|
||||||
# No GROUP BY - entire result is one group
|
|
||||||
groups[()].extend(solutions)
|
groups[()].extend(solutions)
|
||||||
|
|
||||||
# Build grouped solutions (one per group)
|
|
||||||
result = []
|
|
||||||
for key, group_sols in groups.items():
|
for key, group_sols in groups.items():
|
||||||
sol = {}
|
sol = {}
|
||||||
# Include group key variables
|
|
||||||
if group_sols:
|
if group_sols:
|
||||||
for (expr, var_name), k in zip(group_exprs, key):
|
for (expr, var_name), k in zip(group_exprs, key):
|
||||||
if var_name and group_sols:
|
if var_name and group_sols:
|
||||||
sol[var_name] = evaluate_expression(expr, group_sols[0])
|
sol[var_name] = evaluate_expression(expr, group_sols[0])
|
||||||
sol["__group__"] = group_sols
|
sol["__group__"] = group_sols
|
||||||
result.append(sol)
|
yield sol
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_aggregate_join(node, tc, collection, limit):
|
async def _eval_aggregate_join(node, tc, collection, limit):
|
||||||
"""Evaluate an AggregateJoin (aggregation functions after GROUP BY)."""
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for sol in solutions:
|
|
||||||
group = sol.get("__group__", [sol])
|
group = sol.get("__group__", [sol])
|
||||||
new_sol = {k: v for k, v in sol.items() if k != "__group__"}
|
new_sol = {k: v for k, v in sol.items() if k != "__group__"}
|
||||||
|
|
||||||
# Apply aggregate functions
|
|
||||||
if hasattr(node, "A") and node.A:
|
if hasattr(node, "A") and node.A:
|
||||||
for agg in node.A:
|
for agg in node.A:
|
||||||
var_name = str(agg.res)
|
var_name = str(agg.res)
|
||||||
agg_val = _compute_aggregate(agg, group)
|
agg_val = _compute_aggregate(agg, group)
|
||||||
new_sol[var_name] = agg_val
|
new_sol[var_name] = agg_val
|
||||||
|
|
||||||
result.append(new_sol)
|
yield new_sol
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_graph(node, tc, collection, limit):
|
async def _eval_graph(node, tc, collection, limit):
|
||||||
"""Evaluate a Graph node (GRAPH clause)."""
|
|
||||||
term = node.term
|
term = node.term
|
||||||
|
|
||||||
if isinstance(term, URIRef):
|
if isinstance(term, URIRef):
|
||||||
# GRAPH <uri> { ... } — fixed graph
|
|
||||||
# We'd need to pass graph to triples queries
|
|
||||||
# For now, evaluate inner pattern normally
|
|
||||||
logger.info(f"GRAPH <{term}> clause - graph filtering not yet wired")
|
logger.info(f"GRAPH <{term}> clause - graph filtering not yet wired")
|
||||||
return await evaluate(node.p, tc, collection, limit)
|
|
||||||
elif isinstance(term, Variable):
|
elif isinstance(term, Variable):
|
||||||
# GRAPH ?g { ... } — variable graph
|
|
||||||
logger.info(f"GRAPH ?{term} clause - variable graph not yet wired")
|
logger.info(f"GRAPH ?{term} clause - variable graph not yet wired")
|
||||||
return await evaluate(node.p, tc, collection, limit)
|
|
||||||
else:
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
return await evaluate(node.p, tc, collection, limit)
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
async def _eval_values(node, tc, collection, limit):
|
async def _eval_values(node, tc, collection, limit):
|
||||||
"""Evaluate a VALUES clause (inline data)."""
|
# rdflib has two representations for VALUES:
|
||||||
variables = [str(v) for v in node.var]
|
# 1. var=[Variable...], value=[[val, ...], ...] — positional
|
||||||
solutions = []
|
# 2. var=None, res=[{Variable: val, ...}, ...] — dict-based
|
||||||
|
if hasattr(node, "res") and node.res:
|
||||||
|
for row in node.res:
|
||||||
|
sol = {}
|
||||||
|
for var, val in row.items():
|
||||||
|
if val is not None and str(val) != "UNDEF":
|
||||||
|
sol[str(var)] = rdflib_term_to_term(val)
|
||||||
|
yield sol
|
||||||
|
return
|
||||||
|
|
||||||
|
if not node.var or not node.value:
|
||||||
|
yield {}
|
||||||
|
return
|
||||||
|
variables = [str(v) for v in node.var]
|
||||||
for row in node.value:
|
for row in node.value:
|
||||||
sol = {}
|
sol = {}
|
||||||
for var_name, val in zip(variables, row):
|
for var_name, val in zip(variables, row):
|
||||||
if val is not None and str(val) != "UNDEF":
|
if val is not None and str(val) != "UNDEF":
|
||||||
sol[var_name] = rdflib_term_to_term(val)
|
sol[var_name] = rdflib_term_to_term(val)
|
||||||
solutions.append(sol)
|
yield sol
|
||||||
|
|
||||||
return solutions
|
|
||||||
|
|
||||||
|
|
||||||
async def _eval_to_multiset(node, tc, collection, limit):
|
async def _eval_to_multiset(node, tc, collection, limit):
|
||||||
"""Evaluate a ToMultiSet node (subquery)."""
|
async for sol in evaluate(node.p, tc, collection, limit):
|
||||||
return await evaluate(node.p, tc, collection, limit)
|
yield sol
|
||||||
|
|
||||||
|
|
||||||
# --- Aggregate computation ---
|
# --- Aggregate computation ---
|
||||||
|
|
@ -408,7 +573,6 @@ def _compute_aggregate(agg, group):
|
||||||
"""Compute a single aggregate function over a group of solutions."""
|
"""Compute a single aggregate function over a group of solutions."""
|
||||||
agg_name = agg.name if hasattr(agg, "name") else ""
|
agg_name = agg.name if hasattr(agg, "name") else ""
|
||||||
|
|
||||||
# Get the expression to aggregate
|
|
||||||
expr = agg.vars if hasattr(agg, "vars") else None
|
expr = agg.vars if hasattr(agg, "vars") else None
|
||||||
|
|
||||||
if agg_name == "Aggregate_Count":
|
if agg_name == "Aggregate_Count":
|
||||||
|
|
|
||||||
|
|
@ -125,6 +125,13 @@ def _evaluate_comp_value(node, solution):
|
||||||
if name == "MultiplicativeExpression":
|
if name == "MultiplicativeExpression":
|
||||||
return _eval_multiplicative(node, solution)
|
return _eval_multiplicative(node, solution)
|
||||||
|
|
||||||
|
# IN / NOT IN — must be checked before the generic Builtin_ dispatch
|
||||||
|
if name == "Builtin_IN":
|
||||||
|
return _eval_in(node, solution)
|
||||||
|
|
||||||
|
if name == "Builtin_NOTIN":
|
||||||
|
return not _eval_in(node, solution)
|
||||||
|
|
||||||
# SPARQL built-in functions
|
# SPARQL built-in functions
|
||||||
if name.startswith("Builtin_"):
|
if name.startswith("Builtin_"):
|
||||||
return _eval_builtin(name, node, solution)
|
return _eval_builtin(name, node, solution)
|
||||||
|
|
@ -133,19 +140,10 @@ def _evaluate_comp_value(node, solution):
|
||||||
if name == "Function":
|
if name == "Function":
|
||||||
return _eval_function(node, solution)
|
return _eval_function(node, solution)
|
||||||
|
|
||||||
# Exists / NotExists — handled via _eval_builtin now
|
|
||||||
|
|
||||||
# TrueFilter (used with OPTIONAL)
|
# TrueFilter (used with OPTIONAL)
|
||||||
if name == "TrueFilter":
|
if name == "TrueFilter":
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# IN / NOT IN
|
|
||||||
if name == "Builtin_IN":
|
|
||||||
return _eval_in(node, solution)
|
|
||||||
|
|
||||||
if name == "Builtin_NOTIN":
|
|
||||||
return not _eval_in(node, solution)
|
|
||||||
|
|
||||||
logger.warning(f"Unknown CompValue expression: {name}")
|
logger.warning(f"Unknown CompValue expression: {name}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -171,6 +169,22 @@ def _eval_relational(node, solution):
|
||||||
">=": operator.ge,
|
">=": operator.ge,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if str(op) == "IN":
|
||||||
|
items = node.other if isinstance(node.other, list) else [node.other]
|
||||||
|
for item in items:
|
||||||
|
other_val = evaluate_expression(item, solution)
|
||||||
|
if _comparable_value(left) == _comparable_value(other_val):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
if str(op) == "NOT IN":
|
||||||
|
items = node.other if isinstance(node.other, list) else [node.other]
|
||||||
|
for item in items:
|
||||||
|
other_val = evaluate_expression(item, solution)
|
||||||
|
if _comparable_value(left) == _comparable_value(other_val):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
op_fn = ops.get(str(op))
|
op_fn = ops.get(str(op))
|
||||||
if op_fn is None:
|
if op_fn is None:
|
||||||
logger.warning(f"Unknown relational operator: {op}")
|
logger.warning(f"Unknown relational operator: {op}")
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||||
from ... base import TriplesClientSpec
|
from ... base import TriplesClientSpec
|
||||||
|
|
||||||
from . parser import parse_sparql, ParseError
|
from . parser import parse_sparql, ParseError
|
||||||
from . algebra import evaluate, EvaluationError
|
from . algebra import evaluate, materialise, EvaluationError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -66,11 +66,10 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
logger.debug(f"Handling SPARQL query request {id}...")
|
logger.debug(f"Handling SPARQL query request {id}...")
|
||||||
|
|
||||||
response = await self.execute_sparql(request, flow)
|
if request.streaming:
|
||||||
|
await self.execute_sparql_streaming(request, flow, id)
|
||||||
if request.streaming and response.query_type == "select":
|
|
||||||
await self.send_streaming(response, flow, id, request)
|
|
||||||
else:
|
else:
|
||||||
|
response = await self.execute_sparql(request, flow)
|
||||||
await flow("response").send(
|
await flow("response").send(
|
||||||
response, properties={"id": id}
|
response, properties={"id": id}
|
||||||
)
|
)
|
||||||
|
|
@ -92,37 +91,77 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
await flow("response").send(r, properties={"id": id})
|
await flow("response").send(r, properties={"id": id})
|
||||||
|
|
||||||
async def send_streaming(self, response, flow, id, request):
|
async def execute_sparql_streaming(self, request, flow, id):
|
||||||
"""Send SELECT results in batches."""
|
"""Execute a SPARQL query and stream results as they arrive."""
|
||||||
|
|
||||||
bindings = response.bindings
|
try:
|
||||||
|
parsed = parse_sparql(request.query)
|
||||||
|
except ParseError as e:
|
||||||
|
await flow("response").send(
|
||||||
|
SparqlQueryResponse(
|
||||||
|
error=Error(
|
||||||
|
type="sparql-parse-error",
|
||||||
|
message=str(e),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
properties={"id": id}
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if parsed.query_type != "select":
|
||||||
|
response = await self._execute_non_select(parsed, request, flow)
|
||||||
|
await flow("response").send(response, properties={"id": id})
|
||||||
|
return
|
||||||
|
|
||||||
|
triples_client = flow("triples-request")
|
||||||
|
variables = parsed.variables
|
||||||
batch_size = request.batch_size if request.batch_size > 0 else 20
|
batch_size = request.batch_size if request.batch_size > 0 else 20
|
||||||
|
batch = []
|
||||||
|
|
||||||
for i in range(0, len(bindings), batch_size):
|
try:
|
||||||
batch = bindings[i:i + batch_size]
|
async for sol in evaluate(
|
||||||
is_final = (i + batch_size >= len(bindings))
|
parsed.algebra,
|
||||||
r = SparqlQueryResponse(
|
triples_client,
|
||||||
query_type=response.query_type,
|
collection=request.collection or "default",
|
||||||
variables=response.variables,
|
limit=request.limit or 10000,
|
||||||
bindings=batch,
|
):
|
||||||
is_final=is_final,
|
values = [sol.get(v) for v in variables]
|
||||||
)
|
batch.append(SparqlBinding(values=values))
|
||||||
await flow("response").send(r, properties={"id": id})
|
|
||||||
|
|
||||||
# Handle empty results
|
if len(batch) >= batch_size:
|
||||||
if len(bindings) == 0:
|
r = SparqlQueryResponse(
|
||||||
r = SparqlQueryResponse(
|
query_type="select",
|
||||||
query_type=response.query_type,
|
variables=variables,
|
||||||
variables=response.variables,
|
bindings=batch,
|
||||||
bindings=[],
|
is_final=False,
|
||||||
is_final=True,
|
)
|
||||||
|
await flow("response").send(r, properties={"id": id})
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
except EvaluationError as e:
|
||||||
|
await flow("response").send(
|
||||||
|
SparqlQueryResponse(
|
||||||
|
error=Error(
|
||||||
|
type="sparql-evaluation-error",
|
||||||
|
message=str(e),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
properties={"id": id}
|
||||||
)
|
)
|
||||||
await flow("response").send(r, properties={"id": id})
|
return
|
||||||
|
|
||||||
|
# Final batch (may be empty for zero results)
|
||||||
|
r = SparqlQueryResponse(
|
||||||
|
query_type="select",
|
||||||
|
variables=variables,
|
||||||
|
bindings=batch,
|
||||||
|
is_final=True,
|
||||||
|
)
|
||||||
|
await flow("response").send(r, properties={"id": id})
|
||||||
|
|
||||||
async def execute_sparql(self, request, flow):
|
async def execute_sparql(self, request, flow):
|
||||||
"""Parse and evaluate a SPARQL query."""
|
"""Parse and evaluate a SPARQL query (non-streaming)."""
|
||||||
|
|
||||||
# Parse the SPARQL query
|
|
||||||
try:
|
try:
|
||||||
parsed = parse_sparql(request.query)
|
parsed = parse_sparql(request.query)
|
||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
|
|
@ -133,12 +172,31 @@ class Processor(FlowProcessor):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get the triples client from the flow
|
if parsed.query_type == "select":
|
||||||
triples_client = flow("triples-request")
|
triples_client = flow("triples-request")
|
||||||
|
try:
|
||||||
|
solutions = await materialise(
|
||||||
|
parsed.algebra,
|
||||||
|
triples_client,
|
||||||
|
collection=request.collection or "default",
|
||||||
|
limit=request.limit or 10000,
|
||||||
|
)
|
||||||
|
except EvaluationError as e:
|
||||||
|
return SparqlQueryResponse(
|
||||||
|
error=Error(
|
||||||
|
type="sparql-evaluation-error",
|
||||||
|
message=str(e),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return self._build_select_response(parsed, solutions)
|
||||||
|
|
||||||
# Evaluate the algebra
|
return await self._execute_non_select(parsed, request, flow)
|
||||||
|
|
||||||
|
async def _execute_non_select(self, parsed, request, flow):
|
||||||
|
"""Execute ASK, CONSTRUCT, or DESCRIBE queries."""
|
||||||
|
triples_client = flow("triples-request")
|
||||||
try:
|
try:
|
||||||
solutions = await evaluate(
|
solutions = await materialise(
|
||||||
parsed.algebra,
|
parsed.algebra,
|
||||||
triples_client,
|
triples_client,
|
||||||
collection=request.collection or "default",
|
collection=request.collection or "default",
|
||||||
|
|
@ -152,10 +210,7 @@ class Processor(FlowProcessor):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build response based on query type
|
if parsed.query_type == "ask":
|
||||||
if parsed.query_type == "select":
|
|
||||||
return self._build_select_response(parsed, solutions)
|
|
||||||
elif parsed.query_type == "ask":
|
|
||||||
return self._build_ask_response(solutions)
|
return self._build_ask_response(solutions)
|
||||||
elif parsed.query_type == "construct":
|
elif parsed.query_type == "construct":
|
||||||
return self._build_construct_response(parsed, solutions)
|
return self._build_construct_response(parsed, solutions)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue