Changed schema for Value -> Term, majorly breaking change (#622)

* Changed schema for Value -> Term, majorly breaking change

* Following the schema change, Value -> Term into all processing

* Updated Cassandra for g, p, s, o index patterns (7 indexes)

* Reviewed and updated all tests

* Neo4j, Memgraph and FalkorDB remain broken, will look at once settled down
This commit is contained in:
cybermaggedon 2026-01-27 13:48:08 +00:00 committed by GitHub
parent e061f2c633
commit cf0daedefa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
86 changed files with 2458 additions and 1764 deletions

View file

@ -10,12 +10,27 @@ import json
import base64
from .. knowledge import hash, Uri, Literal
from .. schema import IRI, LITERAL
from . types import Triple
from . exceptions import ProtocolException
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
"""Convert wire format to Uri or Literal."""
if x.get("t") == IRI:
return Uri(x.get("i", ""))
elif x.get("t") == LITERAL:
return Literal(x.get("v", ""))
# Fallback for any other type
return Literal(x.get("v", x.get("i", "")))
def from_value(v):
"""Convert Uri or Literal to wire format."""
if isinstance(v, Uri):
return {"t": IRI, "i": str(v)}
else:
return {"t": LITERAL, "v": str(v)}
class Flow:
"""
@ -751,17 +766,17 @@ class FlowInstance:
if s:
if not isinstance(s, Uri):
raise RuntimeError("s must be Uri")
input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
input["s"] = from_value(s)
if p:
if not isinstance(p, Uri):
raise RuntimeError("p must be Uri")
input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
input["p"] = from_value(p)
if o:
if not isinstance(o, Uri) and not isinstance(o, Literal):
raise RuntimeError("o must be Uri or Literal")
input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
input["o"] = from_value(o)
object = self.request(
"service/triples",
@ -834,9 +849,9 @@ class FlowInstance:
if metadata:
metadata.emit(
lambda t: triples.append({
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
})
)
@ -913,9 +928,9 @@ class FlowInstance:
if metadata:
metadata.emit(
lambda t: triples.append({
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
})
)

View file

@ -10,11 +10,18 @@ import json
import base64
from .. knowledge import hash, Uri, Literal
from .. schema import IRI, LITERAL
from . types import Triple
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
"""Convert wire format to Uri or Literal."""
if x.get("t") == IRI:
return Uri(x.get("i", ""))
elif x.get("t") == LITERAL:
return Literal(x.get("v", ""))
# Fallback for any other type
return Literal(x.get("v", x.get("i", "")))
class Knowledge:
"""

View file

@ -12,13 +12,28 @@ import logging
from . types import DocumentMetadata, ProcessingMetadata, Triple
from .. knowledge import hash, Uri, Literal
from .. schema import IRI, LITERAL
from . exceptions import *
logger = logging.getLogger(__name__)
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
"""Convert wire format to Uri or Literal."""
if x.get("t") == IRI:
return Uri(x.get("i", ""))
elif x.get("t") == LITERAL:
return Literal(x.get("v", ""))
# Fallback for any other type
return Literal(x.get("v", x.get("i", "")))
def from_value(v):
"""Convert Uri or Literal to wire format."""
if isinstance(v, Uri):
return {"t": IRI, "i": str(v)}
else:
return {"t": LITERAL, "v": str(v)}
class Library:
"""
@ -118,18 +133,18 @@ class Library:
if isinstance(metadata, list):
triples = [
{
"s": { "v": t.s, "e": isinstance(t.s, Uri) },
"p": { "v": t.p, "e": isinstance(t.p, Uri) },
"o": { "v": t.o, "e": isinstance(t.o, Uri) }
"s": from_value(t.s),
"p": from_value(t.p),
"o": from_value(t.o),
}
for t in metadata
]
elif hasattr(metadata, "emit"):
metadata.emit(
lambda t: triples.append({
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
})
)
else:
@ -315,9 +330,9 @@ class Library:
"comments": metadata.comments,
"metadata": [
{
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
}
for t in metadata.metadata
],

View file

@ -7,7 +7,7 @@ embeddings.
import logging
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
from .. schema import Error, Value
from .. schema import Error, Term
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec

View file

@ -2,15 +2,21 @@
import logging
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse, IRI, LITERAL
from .. knowledge import Uri, Literal
# Module logger
logger = logging.getLogger(__name__)
def to_value(x):
if x.is_uri: return Uri(x.value)
return Literal(x.value)
"""Convert schema Term to Uri or Literal."""
if x.type == IRI:
return Uri(x.iri)
elif x.type == LITERAL:
return Literal(x.value)
# Fallback
return Literal(x.value or x.iri)
class GraphEmbeddingsClient(RequestResponse):
async def query(self, vectors, limit=20, user="trustgraph",

View file

@ -7,7 +7,7 @@ embeddings.
import logging
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from .. schema import Error, Value
from .. schema import Error, Term
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec

View file

@ -1,24 +1,34 @@
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
from .. knowledge import Uri, Literal
class Triple:
def __init__(self, s, p, o):
self.s = s
self.p = p
self.o = o
def to_value(x):
if x.is_uri: return Uri(x.value)
return Literal(x.value)
"""Convert schema Term to Uri or Literal."""
if x.type == IRI:
return Uri(x.iri)
elif x.type == LITERAL:
return Literal(x.value)
# Fallback
return Literal(x.value or x.iri)
def from_value(x):
if x is None: return None
"""Convert Uri or Literal to schema Term."""
if x is None:
return None
if isinstance(x, Uri):
return Value(value=str(x), is_uri=True)
return Term(type=IRI, iri=str(x))
else:
return Value(value=str(x), is_uri=False)
return Term(type=LITERAL, value=str(x))
class TriplesClient(RequestResponse):
async def query(self, s=None, p=None, o=None, limit=20,

View file

@ -7,7 +7,7 @@ null. Output is a list of triples.
import logging
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .. schema import Value, Triple
from .. schema import Term, Triple
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec

View file

@ -2,7 +2,7 @@
import _pulsar
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
from .. schema import triples_request_queue
from .. schema import triples_response_queue
from . base import BaseClient
@ -46,9 +46,9 @@ class TriplesQueryClient(BaseClient):
if ent == None: return None
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
def request(
self,

View file

@ -1,5 +1,5 @@
from .base import Translator, MessageTranslator
from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
from .primitives import TermTranslator, ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
from .agent import AgentRequestTranslator, AgentResponseTranslator
from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator

View file

@ -1,37 +1,133 @@
from typing import Dict, Any, List
from ...schema import Value, Triple, RowSchema, Field
from ...schema import Term, Triple, RowSchema, Field, IRI, BLANK, LITERAL, TRIPLE
from .base import Translator
class ValueTranslator(Translator):
"""Translator for Value schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> Value:
return Value(value=data["v"], is_uri=data["e"])
def from_pulsar(self, obj: Value) -> Dict[str, Any]:
return {"v": obj.value, "e": obj.is_uri}
class TermTranslator(Translator):
"""
Translator for Term schema objects.
Wire format (compact keys):
- "t": type (i/b/l/t)
- "i": iri (for IRI type)
- "d": id (for BLANK type)
- "v": value (for LITERAL type)
- "dt": datatype (for LITERAL type)
- "ln": language (for LITERAL type)
- "tr": triple (for TRIPLE type, nested)
"""
def to_pulsar(self, data: Dict[str, Any]) -> Term:
term_type = data.get("t", "")
if term_type == IRI:
return Term(type=IRI, iri=data.get("i", ""))
elif term_type == BLANK:
return Term(type=BLANK, id=data.get("d", ""))
elif term_type == LITERAL:
return Term(
type=LITERAL,
value=data.get("v", ""),
datatype=data.get("dt", ""),
language=data.get("ln", ""),
)
elif term_type == TRIPLE:
# Nested triple - use TripleTranslator
triple_data = data.get("tr")
if triple_data:
triple = _triple_translator_to_pulsar(triple_data)
else:
triple = None
return Term(type=TRIPLE, triple=triple)
else:
# Unknown or empty type
return Term(type=term_type)
def from_pulsar(self, obj: Term) -> Dict[str, Any]:
result: Dict[str, Any] = {"t": obj.type}
if obj.type == IRI:
result["i"] = obj.iri
elif obj.type == BLANK:
result["d"] = obj.id
elif obj.type == LITERAL:
result["v"] = obj.value
if obj.datatype:
result["dt"] = obj.datatype
if obj.language:
result["ln"] = obj.language
elif obj.type == TRIPLE:
if obj.triple:
result["tr"] = _triple_translator_from_pulsar(obj.triple)
return result
# Module-level helper functions to avoid circular instantiation
def _triple_translator_to_pulsar(data: Dict[str, Any]) -> Triple:
term_translator = TermTranslator()
return Triple(
s=term_translator.to_pulsar(data["s"]) if data.get("s") else None,
p=term_translator.to_pulsar(data["p"]) if data.get("p") else None,
o=term_translator.to_pulsar(data["o"]) if data.get("o") else None,
g=data.get("g"),
)
def _triple_translator_from_pulsar(obj: Triple) -> Dict[str, Any]:
term_translator = TermTranslator()
result: Dict[str, Any] = {}
if obj.s:
result["s"] = term_translator.from_pulsar(obj.s)
if obj.p:
result["p"] = term_translator.from_pulsar(obj.p)
if obj.o:
result["o"] = term_translator.from_pulsar(obj.o)
if obj.g:
result["g"] = obj.g
return result
class TripleTranslator(Translator):
"""Translator for Triple schema objects"""
"""Translator for Triple schema objects (quads with optional graph)"""
def __init__(self):
self.value_translator = ValueTranslator()
self.term_translator = TermTranslator()
def to_pulsar(self, data: Dict[str, Any]) -> Triple:
return Triple(
s=self.value_translator.to_pulsar(data["s"]),
p=self.value_translator.to_pulsar(data["p"]),
o=self.value_translator.to_pulsar(data["o"])
s=self.term_translator.to_pulsar(data["s"]) if data.get("s") else None,
p=self.term_translator.to_pulsar(data["p"]) if data.get("p") else None,
o=self.term_translator.to_pulsar(data["o"]) if data.get("o") else None,
g=data.get("g"),
)
def from_pulsar(self, obj: Triple) -> Dict[str, Any]:
return {
"s": self.value_translator.from_pulsar(obj.s),
"p": self.value_translator.from_pulsar(obj.p),
"o": self.value_translator.from_pulsar(obj.o)
}
result: Dict[str, Any] = {}
if obj.s:
result["s"] = self.term_translator.from_pulsar(obj.s)
if obj.p:
result["p"] = self.term_translator.from_pulsar(obj.p)
if obj.o:
result["o"] = self.term_translator.from_pulsar(obj.o)
if obj.g:
result["g"] = obj.g
return result
# Backward compatibility alias
ValueTranslator = TermTranslator
class SubgraphTranslator(Translator):

View file

@ -14,11 +14,13 @@ class TriplesQueryRequestTranslator(MessageTranslator):
s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None
p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None
o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None
g = data.get("g") # None=default graph, "*"=all graphs
return TriplesQueryRequest(
s=s,
p=p,
o=o,
g=g,
limit=int(data.get("limit", 10000)),
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default")
@ -30,14 +32,16 @@ class TriplesQueryRequestTranslator(MessageTranslator):
"user": obj.user,
"collection": obj.collection
}
if obj.s:
result["s"] = self.value_translator.from_pulsar(obj.s)
if obj.p:
result["p"] = self.value_translator.from_pulsar(obj.p)
if obj.o:
result["o"] = self.value_translator.from_pulsar(obj.o)
if obj.g is not None:
result["g"] = obj.g
return result

View file

@ -1,22 +1,57 @@
from dataclasses import dataclass, field
# Term type constants
IRI = "i" # IRI/URI node
BLANK = "b" # Blank node
LITERAL = "l" # Literal value
TRIPLE = "t" # Quoted triple (RDF-star)
@dataclass
class Error:
type: str = ""
message: str = ""
@dataclass
class Value:
class Term:
"""
RDF Term - can represent an IRI, blank node, literal, or quoted triple.
The 'type' field determines which other fields are relevant:
- IRI: use 'iri' field
- BLANK: use 'id' field
- LITERAL: use 'value', 'datatype', 'language' fields
- TRIPLE: use 'triple' field
"""
type: str = "" # One of: IRI, BLANK, LITERAL, TRIPLE
# For IRI terms (type == IRI)
iri: str = ""
# For blank nodes (type == BLANK)
id: str = ""
# For literals (type == LITERAL)
value: str = ""
is_uri: bool = False
type: str = ""
datatype: str = "" # XSD datatype URI (mutually exclusive with language)
language: str = "" # Language tag (mutually exclusive with datatype)
# For quoted triples (type == TRIPLE)
triple: "Triple | None" = None
@dataclass
class Triple:
s: Value | None = None
p: Value | None = None
o: Value | None = None
"""
RDF Triple / Quad.
The optional 'g' field specifies the named graph (None = default graph).
"""
s: Term | None = None # Subject
p: Term | None = None # Predicate
o: Term | None = None # Object
g: str | None = None # Graph name (IRI), None = default graph
@dataclass
class Field:

View file

@ -1,7 +1,7 @@
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.primitives import Value, RowSchema
from ..core.primitives import Term, RowSchema
from ..core.topic import topic
############################################################################
@ -10,7 +10,7 @@ from ..core.topic import topic
@dataclass
class EntityEmbeddings:
entity: Value | None = None
entity: Term | None = None
vectors: list[list[float]] = field(default_factory=list)
# This is a 'batching' mechanism for the above data

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from ..core.primitives import Value, Triple
from ..core.primitives import Term, Triple
from ..core.metadata import Metadata
from ..core.topic import topic
@ -10,7 +10,7 @@ from ..core.topic import topic
@dataclass
class EntityContext:
entity: Value | None = None
entity: Term | None = None
context: str = ""
# This is a 'batching' mechanism for the above data

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass
from ..core.primitives import Error, Value, Triple
from ..core.primitives import Error, Term, Triple
from ..core.topic import topic
from ..core.metadata import Metadata

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from ..core.primitives import Error, Value, Triple
from ..core.primitives import Error, Term, Triple
from ..core.topic import topic
############################################################################
@ -17,7 +17,7 @@ class GraphEmbeddingsRequest:
@dataclass
class GraphEmbeddingsResponse:
error: Error | None = None
entities: list[Value] = field(default_factory=list)
entities: list[Term] = field(default_factory=list)
############################################################################
@ -27,9 +27,10 @@ class GraphEmbeddingsResponse:
class TriplesQueryRequest:
user: str = ""
collection: str = ""
s: Value | None = None
p: Value | None = None
o: Value | None = None
s: Term | None = None
p: Term | None = None
o: Term | None = None
g: str | None = None # Graph IRI. None=default graph, "*"=all graphs
limit: int = 0
@dataclass

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass
from ..core.topic import topic
from ..core.primitives import Error, Value
from ..core.primitives import Error, Term
############################################################################