mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-10 15:25:14 +02:00
Convert the SPARQL algebra evaluator from eager list-based evaluation to
lazy async generators so results stream incrementally. This lets Slice
terminate early (via generator cleanup) and avoids materialising full
result sets for streamable operators like Project, Filter, Union, and
Extend. Blocking operators (Join, LeftJoin, OrderBy, Group) materialise
at their boundary then yield.
Add bind join optimization for Join nodes where one side is small
(VALUES/ToMultiSet): instead of materialising both sides independently
and hash-joining, iterate the small side's bindings and evaluate the
large side with those bindings pre-seeded. This turns wildcard BGP
queries into selective ones — e.g. VALUES ?x { <uri> } joined with a
BGP now queries the triple store with ?x bound rather than fetching
all triples.
Add TriplesClient.query_gen() async generator that wraps the existing
streaming callback API via an asyncio.Queue bridge, yielding individual
Triple objects as batches arrive.
Add streaming request path in the SPARQL query service that batches
solutions from the live async generator and sends them as they fill.
Fix FILTER IN/NOT IN: rdflib represents these as RelationalExpression
nodes with op="IN", not as Builtin_IN — handle both representations.
Fix Builtin_IN/Builtin_NOTIN dispatch ordering so the specific handlers
are checked before the generic Builtin_ prefix match.
Fix VALUES handling for rdflib's two representations: positional
(var/value) and dict-based (res).
195 lines
5.7 KiB
Python
195 lines
5.7 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from typing import Any
|
|
|
|
from . request_response_spec import RequestResponse, RequestResponseSpec
|
|
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL, TRIPLE
|
|
from .. knowledge import Uri, Literal
|
|
|
|
|
|
class Triple:
|
|
def __init__(self, s, p, o):
|
|
self.s = s
|
|
self.p = p
|
|
self.o = o
|
|
|
|
|
|
def to_value(x: Any) -> Any:
|
|
"""Convert schema Term to Uri or Literal."""
|
|
if x.type == IRI:
|
|
return Uri(x.iri)
|
|
elif x.type == LITERAL:
|
|
return Literal(x.value)
|
|
# Fallback
|
|
return Literal(x.value or x.iri)
|
|
|
|
|
|
def from_value(x: Any) -> Any:
|
|
"""Convert Uri, Literal, string, or Term to schema Term."""
|
|
if x is None:
|
|
return None
|
|
if isinstance(x, Term):
|
|
return x
|
|
if isinstance(x, Uri):
|
|
return Term(type=IRI, iri=str(x))
|
|
elif isinstance(x, Literal):
|
|
return Term(type=LITERAL, value=str(x))
|
|
elif isinstance(x, str):
|
|
# Detect IRIs by common prefixes
|
|
if x.startswith("http://") or x.startswith("https://") or x.startswith("urn:"):
|
|
return Term(type=IRI, iri=x)
|
|
else:
|
|
return Term(type=LITERAL, value=x)
|
|
else:
|
|
return Term(type=LITERAL, value=str(x))
|
|
|
|
class TriplesClient(RequestResponse):
|
|
|
|
async def query_gen(self, s=None, p=None, o=None, limit=20,
|
|
collection="default",
|
|
batch_size=20, timeout=30, g=None):
|
|
"""Async generator yielding Triple objects as batches arrive."""
|
|
queue = asyncio.Queue()
|
|
done = False
|
|
|
|
async def recipient(resp):
|
|
if resp.error:
|
|
raise RuntimeError(resp.error.message)
|
|
|
|
batch = [
|
|
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
|
for v in resp.triples
|
|
]
|
|
await queue.put(batch)
|
|
|
|
if resp.is_final:
|
|
await queue.put(None)
|
|
|
|
return resp.is_final
|
|
|
|
# Launch the streaming request as a background task
|
|
task = asyncio.ensure_future(self.request(
|
|
TriplesQueryRequest(
|
|
s=from_value(s),
|
|
p=from_value(p),
|
|
o=from_value(o),
|
|
limit=limit,
|
|
collection=collection,
|
|
streaming=True,
|
|
batch_size=batch_size,
|
|
g=g,
|
|
),
|
|
timeout=timeout,
|
|
recipient=recipient,
|
|
))
|
|
|
|
try:
|
|
while True:
|
|
batch = await queue.get()
|
|
if batch is None:
|
|
break
|
|
for triple in batch:
|
|
yield triple
|
|
finally:
|
|
if not task.done():
|
|
task.cancel()
|
|
try:
|
|
await task
|
|
except (asyncio.CancelledError, Exception):
|
|
pass
|
|
|
|
async def query(self, s=None, p=None, o=None, limit=20,
|
|
collection="default",
|
|
timeout=30, g=None):
|
|
|
|
resp = await self.request(
|
|
TriplesQueryRequest(
|
|
s = from_value(s),
|
|
p = from_value(p),
|
|
o = from_value(o),
|
|
limit = limit,
|
|
collection = collection,
|
|
g = g,
|
|
),
|
|
timeout=timeout
|
|
)
|
|
|
|
if resp.error:
|
|
raise RuntimeError(resp.error.message)
|
|
|
|
triples = [
|
|
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
|
for v in resp.triples
|
|
]
|
|
|
|
return triples
|
|
|
|
async def query_stream(self, s=None, p=None, o=None, limit=20,
|
|
collection="default",
|
|
batch_size=20, timeout=30,
|
|
batch_callback=None, g=None):
|
|
"""
|
|
Streaming triple query - calls callback for each batch as it arrives.
|
|
|
|
Args:
|
|
s, p, o: Triple pattern (None for wildcard)
|
|
limit: Maximum total triples to return
|
|
collection: Collection name
|
|
batch_size: Triples per batch
|
|
timeout: Request timeout in seconds
|
|
batch_callback: Async callback(batch, is_final) called for each batch
|
|
g: Graph filter. ""=default graph only, None=all graphs,
|
|
or a specific graph IRI.
|
|
|
|
Returns:
|
|
List[Triple]: All triples (flattened) if no callback provided
|
|
"""
|
|
all_triples = []
|
|
|
|
async def recipient(resp):
|
|
if resp.error:
|
|
raise RuntimeError(resp.error.message)
|
|
|
|
batch = [
|
|
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
|
for v in resp.triples
|
|
]
|
|
|
|
if batch_callback:
|
|
await batch_callback(batch, resp.is_final)
|
|
else:
|
|
all_triples.extend(batch)
|
|
|
|
return resp.is_final
|
|
|
|
await self.request(
|
|
TriplesQueryRequest(
|
|
s=from_value(s),
|
|
p=from_value(p),
|
|
o=from_value(o),
|
|
limit=limit,
|
|
collection=collection,
|
|
streaming=True,
|
|
batch_size=batch_size,
|
|
g=g,
|
|
),
|
|
timeout=timeout,
|
|
recipient=recipient,
|
|
)
|
|
|
|
if not batch_callback:
|
|
return all_triples
|
|
|
|
class TriplesClientSpec(RequestResponseSpec):
|
|
def __init__(
|
|
self, request_name, response_name,
|
|
):
|
|
super(TriplesClientSpec, self).__init__(
|
|
request_name = request_name,
|
|
request_schema = TriplesQueryRequest,
|
|
response_name = response_name,
|
|
response_schema = TriplesQueryResponse,
|
|
impl = TriplesClient,
|
|
)
|
|
|