mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-27 16:25:12 +02:00
SPARQL engine: streaming evaluation, bind joins, and expression fixes (#947)
Convert the SPARQL algebra evaluator from eager list-based evaluation to
lazy async generators so results stream incrementally. This lets Slice
terminate early (via generator cleanup) and avoids materialising full
result sets for streamable operators like Project, Filter, Union, and
Extend. Blocking operators (Join, LeftJoin, OrderBy, Group) materialise
at their boundary then yield.
Add bind join optimization for Join nodes where one side is small
(VALUES/ToMultiSet): instead of materialising both sides independently
and hash-joining, iterate the small side's bindings and evaluate the
large side with those bindings pre-seeded. This turns wildcard BGP
queries into selective ones — e.g. VALUES ?x { <uri> } joined with a
BGP now queries the triple store with ?x bound rather than fetching
all triples.
Add TriplesClient.query_gen() async generator that wraps the existing
streaming callback API via an asyncio.Queue bridge, yielding individual
Triple objects as batches arrive.
Add streaming request path in the SPARQL query service that batches
solutions from the live async generator and sends them as they fill.
Fix FILTER IN/NOT IN: rdflib represents these as RelationalExpression
nodes with op="IN", not as Builtin_IN — handle both representations.
Fix Builtin_IN/Builtin_NOTIN dispatch ordering so the specific handlers
are checked before the generic Builtin_ prefix match.
Fix VALUES handling for rdflib's two representations: positional
(var/value) and dict-based (res).
This commit is contained in:
parent
81e9a3ebe4
commit
6af12f416f
5 changed files with 683 additions and 302 deletions
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
|
|
@ -44,6 +45,60 @@ def from_value(x: Any) -> Any:
|
|||
return Term(type=LITERAL, value=str(x))
|
||||
|
||||
class TriplesClient(RequestResponse):
|
||||
|
||||
async def query_gen(self, s=None, p=None, o=None, limit=20,
|
||||
collection="default",
|
||||
batch_size=20, timeout=30, g=None):
|
||||
"""Async generator yielding Triple objects as batches arrive."""
|
||||
queue = asyncio.Queue()
|
||||
done = False
|
||||
|
||||
async def recipient(resp):
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
batch = [
|
||||
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
||||
for v in resp.triples
|
||||
]
|
||||
await queue.put(batch)
|
||||
|
||||
if resp.is_final:
|
||||
await queue.put(None)
|
||||
|
||||
return resp.is_final
|
||||
|
||||
# Launch the streaming request as a background task
|
||||
task = asyncio.ensure_future(self.request(
|
||||
TriplesQueryRequest(
|
||||
s=from_value(s),
|
||||
p=from_value(p),
|
||||
o=from_value(o),
|
||||
limit=limit,
|
||||
collection=collection,
|
||||
streaming=True,
|
||||
batch_size=batch_size,
|
||||
g=g,
|
||||
),
|
||||
timeout=timeout,
|
||||
recipient=recipient,
|
||||
))
|
||||
|
||||
try:
|
||||
while True:
|
||||
batch = await queue.get()
|
||||
if batch is None:
|
||||
break
|
||||
for triple in batch:
|
||||
yield triple
|
||||
finally:
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
async def query(self, s=None, p=None, o=None, limit=20,
|
||||
collection="default",
|
||||
timeout=30, g=None):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue