SPARQL query service (#754)

SPARQL 1.1 query service wrapping pub/sub triples interface

Add a backend-agnostic SPARQL query service that parses SPARQL
queries using rdflib, decomposes them into triple pattern lookups
via the existing TriplesClient pub/sub interface, and performs
in-memory joins, filters, and projections.

Includes:
- SPARQL parser, algebra evaluator, expression evaluator, solution
  sequence operations (BGP, JOIN, OPTIONAL, UNION, FILTER, BIND,
  VALUES, GROUP BY, ORDER BY, LIMIT/OFFSET, DISTINCT, aggregates)
- FlowProcessor service with TriplesClientSpec
- Gateway dispatcher, request/response translators, API spec
- Python SDK method (FlowInstance.sparql_query)
- CLI command (tg-invoke-sparql-query)
- Tech spec (docs/tech-specs/sparql-query.md)

New unit tests for SPARQL query
This commit is contained in:
cybermaggedon 2026-04-02 17:21:39 +01:00 committed by GitHub
parent 62c30a3a50
commit d9dc4cbab5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 3498 additions and 3 deletions

View file

@ -1122,6 +1122,45 @@ class FlowInstance:
return result
def sparql_query(
self, query, user="trustgraph", collection="default",
limit=10000
):
"""
Execute a SPARQL query against the knowledge graph.
Args:
query: SPARQL 1.1 query string
user: User/keyspace identifier (default: "trustgraph")
collection: Collection identifier (default: "default")
limit: Safety limit on results (default: 10000)
Returns:
dict with query results. Structure depends on query type:
- SELECT: {"query-type": "select", "variables": [...], "bindings": [...]}
- ASK: {"query-type": "ask", "ask-result": bool}
- CONSTRUCT/DESCRIBE: {"query-type": "construct", "triples": [...]}
Raises:
ProtocolException: If an error occurs
"""
input = {
"query": query,
"user": user,
"collection": collection,
"limit": limit,
}
response = self.request("service/sparql", input)
if "error" in response and response["error"]:
error_type = response["error"].get("type", "unknown")
error_message = response["error"].get("message", "Unknown error")
raise ProtocolException(f"{error_type}: {error_message}")
return response
def nlp_query(self, question, max_results=100):
"""
Convert a natural language question to a GraphQL query.

View file

@ -27,6 +27,7 @@ from .translators.nlp_query import QuestionToStructuredQueryRequestTranslator, Q
from .translators.structured_query import StructuredQueryRequestTranslator, StructuredQueryResponseTranslator
from .translators.diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
from .translators.collection import CollectionManagementRequestTranslator, CollectionManagementResponseTranslator
from .translators.sparql_query import SparqlQueryRequestTranslator, SparqlQueryResponseTranslator
# Register all service translators
TranslatorRegistry.register_service(
@ -149,6 +150,12 @@ TranslatorRegistry.register_service(
CollectionManagementResponseTranslator()
)
TranslatorRegistry.register_service(
"sparql-query",
SparqlQueryRequestTranslator(),
SparqlQueryResponseTranslator()
)
# Register single-direction translators for document loading
TranslatorRegistry.register_request("document", DocumentTranslator())
TranslatorRegistry.register_request("text-document", TextDocumentTranslator())

View file

@ -0,0 +1,111 @@
from typing import Dict, Any, Tuple
from ...schema import (
SparqlQueryRequest, SparqlQueryResponse, SparqlBinding,
Error, Term, Triple, IRI, LITERAL, BLANK,
)
from .base import MessageTranslator
from .primitives import TermTranslator, TripleTranslator
class SparqlQueryRequestTranslator(MessageTranslator):
"""Translator for SparqlQueryRequest schema objects."""
def decode(self, data: Dict[str, Any]) -> SparqlQueryRequest:
return SparqlQueryRequest(
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default"),
query=data.get("query", ""),
limit=int(data.get("limit", 10000)),
)
def encode(self, obj: SparqlQueryRequest) -> Dict[str, Any]:
return {
"user": obj.user,
"collection": obj.collection,
"query": obj.query,
"limit": obj.limit,
}
class SparqlQueryResponseTranslator(MessageTranslator):
"""Translator for SparqlQueryResponse schema objects."""
def __init__(self):
self.term_translator = TermTranslator()
self.triple_translator = TripleTranslator()
def decode(self, data: Dict[str, Any]) -> SparqlQueryResponse:
raise NotImplementedError(
"Response translation to schema not typically needed"
)
def _encode_term(self, v):
"""Encode a Term, handling both Term objects and dicts from
pub/sub deserialization."""
if v is None:
return None
if isinstance(v, dict):
# Reconstruct Term from dict (pub/sub deserializes nested
# dataclasses as dicts)
term = Term(
type=v.get("type", ""),
iri=v.get("iri", ""),
id=v.get("id", ""),
value=v.get("value", ""),
datatype=v.get("datatype", ""),
language=v.get("language", ""),
)
return self.term_translator.encode(term)
return self.term_translator.encode(v)
def _encode_error(self, error):
"""Encode an Error, handling both Error objects and dicts."""
if isinstance(error, dict):
return {
"type": error.get("type", ""),
"message": error.get("message", ""),
}
return {
"type": error.type,
"message": error.message,
}
def encode(self, obj: SparqlQueryResponse) -> Dict[str, Any]:
result = {
"query-type": obj.query_type,
}
if obj.error:
result["error"] = self._encode_error(obj.error)
if obj.query_type == "select":
result["variables"] = obj.variables
bindings = []
for binding in obj.bindings:
# binding may be a SparqlBinding or a dict
if isinstance(binding, dict):
values = binding.get("values", [])
else:
values = binding.values
bindings.append({
"values": [
self._encode_term(v) for v in values
]
})
result["bindings"] = bindings
elif obj.query_type == "ask":
result["ask-result"] = obj.ask_result
elif obj.query_type in ("construct", "describe"):
result["triples"] = [
self.triple_translator.encode(t)
for t in obj.triples
]
return result
def encode_with_completion(
self, obj: SparqlQueryResponse
) -> Tuple[Dict[str, Any], bool]:
return self.encode(obj), True

View file

@ -13,4 +13,5 @@ from .rows_query import *
from .diagnosis import *
from .collection import *
from .storage import *
from .tool_service import *
from .tool_service import *
from .sparql_query import *

View file

@ -0,0 +1,40 @@
from dataclasses import dataclass, field
from ..core.primitives import Error, Term, Triple
from ..core.topic import queue
############################################################################
# SPARQL query
@dataclass
class SparqlBinding:
"""A single row of SPARQL SELECT results.
Values are ordered to match the variables list in SparqlQueryResponse.
"""
values: list[Term | None] = field(default_factory=list)
@dataclass
class SparqlQueryRequest:
user: str = ""
collection: str = ""
query: str = "" # SPARQL query string
limit: int = 10000 # Safety limit on results
@dataclass
class SparqlQueryResponse:
error: Error | None = None
query_type: str = "" # "select", "ask", "construct", "describe"
# For SELECT queries
variables: list[str] = field(default_factory=list)
bindings: list[SparqlBinding] = field(default_factory=list)
# For ASK queries
ask_result: bool = False
# For CONSTRUCT/DESCRIBE queries
triples: list[Triple] = field(default_factory=list)
sparql_query_request_queue = queue('sparql-query', cls='request')
sparql_query_response_queue = queue('sparql-query', cls='response')