Feature/translator classes (#414)

Pull the JSON/Pulsar message translation into a separate module, will be useful for other comms channels
This commit is contained in:
cybermaggedon 2025-06-20 16:59:55 +01:00 committed by GitHub
parent 3fa004d628
commit a4e2f67cb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 1506 additions and 377 deletions

View file

@ -1,5 +1,6 @@
from ... schema import AgentRequest, AgentResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -20,24 +21,12 @@ class AgentRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("agent")
self.response_translator = TranslatorRegistry.get_response_translator("agent")
def to_request(self, body):
return AgentRequest(
question=body["question"]
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
resp = {
}
if message.answer:
resp["answer"] = message.answer
if message.thought:
resp["thought"] = message.thought
if message.observation:
resp["observation"] = message.observation
# The 2nd boolean expression indicates whether we're done responding
return resp, (message.answer is not None)
return self.response_translator.from_response_with_completion(message)

View file

@ -2,6 +2,7 @@
from ... schema import ConfigRequest, ConfigResponse, ConfigKey, ConfigValue
from ... schema import config_request_queue
from ... schema import config_response_queue
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -19,60 +20,12 @@ class ConfigRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("config")
self.response_translator = TranslatorRegistry.get_response_translator("config")
def to_request(self, body):
if "keys" in body:
keys = [
ConfigKey(
type = k["type"],
key = k["key"],
)
for k in body["keys"]
]
else:
keys = None
if "values" in body:
values = [
ConfigValue(
type = v["type"],
key = v["key"],
value = v["value"],
)
for v in body["values"]
]
else:
values = None
return ConfigRequest(
operation = body.get("operation", None),
keys = keys,
type = body.get("type", None),
values = values
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
response = { }
if message.version is not None:
response["version"] = message.version
if message.values is not None:
response["values"] = [
{
"type": v.type,
"key": v.key,
"value": v.value,
}
for v in message.values
]
if message.directory is not None:
response["directory"] = message.directory
if message.config is not None:
response["config"] = message.config
return response, True
return self.response_translator.from_response_with_completion(message)

View file

@ -6,8 +6,7 @@ from aiohttp import WSMsgType
from ... schema import Metadata
from ... schema import DocumentEmbeddings, ChunkEmbeddings
from ... base import Publisher
from . serialize import to_subgraph
from ... messaging.translators.document_loading import DocumentEmbeddingsTranslator
class DocumentEmbeddingsImport:
@ -17,6 +16,7 @@ class DocumentEmbeddingsImport:
self.ws = ws
self.running = running
self.translator = DocumentEmbeddingsTranslator()
self.publisher = Publisher(
pulsar_client, topic = queue, schema = DocumentEmbeddings
@ -36,23 +36,7 @@ class DocumentEmbeddingsImport:
async def receive(self, msg):
data = msg.json()
elt = DocumentEmbeddings(
metadata=Metadata(
id=data["metadata"]["id"],
metadata=to_subgraph(data["metadata"]["metadata"]),
user=data["metadata"]["user"],
collection=data["metadata"]["collection"],
),
chunks=[
ChunkEmbeddings(
chunk=de["chunk"].encode("utf-8"),
vectors=de["vectors"],
)
for de in data["chunks"]
],
)
elt = self.translator.to_pulsar(data)
await self.publisher.send(None, elt)
async def run(self):

View file

@ -2,9 +2,9 @@
import base64
from ... schema import Document, Metadata
from ... messaging import TranslatorRegistry
from . sender import ServiceSender
from . serialize import to_subgraph
class DocumentLoad(ServiceSender):
def __init__(self, pulsar_client, queue):
@ -15,26 +15,9 @@ class DocumentLoad(ServiceSender):
schema = Document,
)
self.translator = TranslatorRegistry.get_request_translator("document")
def to_request(self, body):
if "metadata" in body:
metadata = to_subgraph(body["metadata"])
else:
metadata = []
# Doing a base64 decoe/encode here to make sure the
# content is valid base64
doc = base64.b64decode(body["data"])
print("Document received")
return Document(
metadata=Metadata(
id=body.get("id"),
metadata=metadata,
user=body.get("user", "trustgraph"),
collection=body.get("collection", "default"),
),
data=base64.b64encode(doc).decode("utf-8")
)
return self.translator.to_pulsar(body)

View file

@ -1,5 +1,6 @@
from ... schema import DocumentRagQuery, DocumentRagResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -20,14 +21,12 @@ class DocumentRagRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("document-rag")
self.response_translator = TranslatorRegistry.get_response_translator("document-rag")
def to_request(self, body):
return DocumentRagQuery(
query=body["query"],
user=body.get("user", "trustgraph"),
collection=body.get("collection", "default"),
doc_limit=int(body.get("doc-limit", 20)),
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
return { "response": message.response }, True
return self.response_translator.from_response_with_completion(message)

View file

@ -1,5 +1,6 @@
from ... schema import EmbeddingsRequest, EmbeddingsResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -20,11 +21,12 @@ class EmbeddingsRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("embeddings")
self.response_translator = TranslatorRegistry.get_response_translator("embeddings")
def to_request(self, body):
return EmbeddingsRequest(
text=body["text"]
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
return { "vectors": message.vectors }, True
return self.response_translator.from_response_with_completion(message)

View file

@ -2,6 +2,7 @@
from ... schema import FlowRequest, FlowResponse
from ... schema import flow_request_queue
from ... schema import flow_response_queue
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -19,34 +20,12 @@ class FlowRequestor(ServiceRequestor):
timeout=timeout,
)
def to_request(self, body):
self.request_translator = TranslatorRegistry.get_request_translator("flow")
self.response_translator = TranslatorRegistry.get_response_translator("flow")
return FlowRequest(
operation = body.get("operation", None),
class_name = body.get("class-name", None),
class_definition = body.get("class-definition", None),
description = body.get("description", None),
flow_id = body.get("flow-id", None),
)
def to_request(self, body):
return self.request_translator.to_pulsar(body)
def from_response(self, message):
response = { }
if message.class_names is not None:
response["class-names"] = message.class_names
if message.flow_ids is not None:
response["flow-ids"] = message.flow_ids
if message.class_definition is not None:
response["class-definition"] = message.class_definition
if message.flow is not None:
response["flow"] = message.flow
if message.description is not None:
response["description"] = message.description
return response, True
return self.response_translator.from_response_with_completion(message)

View file

@ -1,8 +1,8 @@
from ... schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
from . serialize import serialize_value
class GraphEmbeddingsQueryRequestor(ServiceRequestor):
def __init__(
@ -21,22 +21,12 @@ class GraphEmbeddingsQueryRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("graph-embeddings-query")
self.response_translator = TranslatorRegistry.get_response_translator("graph-embeddings-query")
def to_request(self, body):
limit = int(body.get("limit", 20))
return GraphEmbeddingsRequest(
vectors = body["vectors"],
limit = limit,
user = body.get("user", "trustgraph"),
collection = body.get("collection", "default"),
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
return {
"entities": [
serialize_value(ent) for ent in message.entities
]
}, True
return self.response_translator.from_response_with_completion(message)

View file

@ -1,5 +1,6 @@
from ... schema import GraphRagQuery, GraphRagResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -20,17 +21,12 @@ class GraphRagRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("graph-rag")
self.response_translator = TranslatorRegistry.get_response_translator("graph-rag")
def to_request(self, body):
return GraphRagQuery(
query=body["query"],
user=body.get("user", "trustgraph"),
collection=body.get("collection", "default"),
entity_limit=int(body.get("entity-limit", 50)),
triple_limit=int(body.get("triple-limit", 30)),
max_subgraph_size=int(body.get("max-subgraph-size", 1000)),
max_path_length=int(body.get("max-path-length", 2)),
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
return { "response": message.response }, True
return self.response_translator.from_response_with_completion(message)

View file

@ -5,11 +5,9 @@ from ... schema import KnowledgeRequest, KnowledgeResponse, Triples
from ... schema import GraphEmbeddings, Metadata, EntityEmbeddings
from ... schema import knowledge_request_queue
from ... schema import knowledge_response_queue
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
from . serialize import serialize_graph_embeddings
from . serialize import serialize_triples, to_subgraph, to_value
from . serialize import to_document_metadata, to_processing_metadata
class KnowledgeRequestor(ServiceRequestor):
def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
@ -25,73 +23,12 @@ class KnowledgeRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("knowledge")
self.response_translator = TranslatorRegistry.get_response_translator("knowledge")
def to_request(self, body):
if "triples" in body:
triples = Triples(
metadata=Metadata(
id = body["triples"]["metadata"]["id"],
metadata = to_subgraph(body["triples"]["metadata"]["metadata"]),
user = body["triples"]["metadata"]["user"],
),
triples = to_subgraph(body["triples"]["triples"]),
)
else:
triples = None
if "graph-embeddings" in body:
ge = GraphEmbeddings(
metadata = Metadata(
id = body["graph-embeddings"]["metadata"]["id"],
metadata = to_subgraph(body["graph-embeddings"]["metadata"]["metadata"]),
user = body["graph-embeddings"]["metadata"]["user"],
),
entities=[
EntityEmbeddings(
entity = to_value(ent["entity"]),
vectors = ent["vectors"],
)
for ent in body["graph-embeddings"]["entities"]
]
)
else:
ge = None
return KnowledgeRequest(
operation = body.get("operation", None),
user = body.get("user", None),
id = body.get("id", None),
flow = body.get("flow", None),
collection = body.get("collection", None),
triples = triples,
graph_embeddings = ge,
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
# Response to list,
if message.ids is not None:
return {
"ids": message.ids
}, True
if message.triples:
return {
"triples": serialize_triples(message.triples)
}, False
if message.graph_embeddings:
return {
"graph-embeddings": serialize_graph_embeddings(
message.graph_embeddings
)
}, False
if message.eos is True:
return {
"eos": True
}, True
# Empty case, return from successful delete.
return {}, True
return self.response_translator.from_response_with_completion(message)

View file

@ -4,12 +4,9 @@ import base64
from ... schema import LibrarianRequest, LibrarianResponse
from ... schema import librarian_request_queue
from ... schema import librarian_response_queue
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
from . serialize import serialize_document_metadata
from . serialize import serialize_processing_metadata
from . serialize import to_document_metadata, to_processing_metadata
from . serialize import to_criteria
class LibrarianRequestor(ServiceRequestor):
def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
@ -25,67 +22,20 @@ class LibrarianRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("librarian")
self.response_translator = TranslatorRegistry.get_response_translator("librarian")
def to_request(self, body):
# Content gets base64 decoded & encoded again. It at least makes
# sure payload is valid base64.
if "document-metadata" in body:
dm = to_document_metadata(body["document-metadata"])
else:
dm = None
if "processing-metadata" in body:
pm = to_processing_metadata(body["processing-metadata"])
else:
pm = None
if "criteria" in body:
criteria = to_criteria(body["criteria"])
else:
criteria = None
# Handle base64 content processing
if "content" in body:
# Content gets base64 decoded & encoded again to ensure valid base64
content = base64.b64decode(body["content"].encode("utf-8"))
content = base64.b64encode(content).decode("utf-8")
else:
content = None
return LibrarianRequest(
operation = body.get("operation", None),
document_id = body.get("document-id", None),
processing_id = body.get("processing-id", None),
document_metadata = dm,
processing_metadata = pm,
content = content,
user = body.get("user", None),
collection = body.get("collection", None),
criteria = criteria,
)
body = body.copy()
body["content"] = content
return self.request_translator.to_pulsar(body)
def from_response(self, message):
response = {}
if message.document_metadata:
response["document-metadata"] = serialize_document_metadata(
message.document_metadata
)
if message.content:
response["content"] = message.content.decode("utf-8")
if message.document_metadatas != None:
response["document-metadatas"] = [
serialize_document_metadata(v)
for v in message.document_metadatas
]
if message.processing_metadatas != None:
response["processing-metadatas"] = [
serialize_processing_metadata(v)
for v in message.processing_metadatas
]
return response, True
return self.response_translator.from_response_with_completion(message)

View file

@ -2,6 +2,7 @@
import json
from ... schema import PromptRequest, PromptResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -22,22 +23,12 @@ class PromptRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("prompt")
self.response_translator = TranslatorRegistry.get_response_translator("prompt")
def to_request(self, body):
return PromptRequest(
id=body["id"],
terms={
k: json.dumps(v)
for k, v in body["variables"].items()
}
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
if message.object:
return {
"object": message.object
}, True
else:
return {
"text": message.text
}, True
return self.response_translator.from_response_with_completion(message)

View file

@ -3,6 +3,13 @@ import base64
from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
# DEPRECATED: These functions have been moved to trustgraph.... messaging.translators
# Use the new messaging translation system instead for consistency and reusability.
# Examples:
# from trustgraph.... messaging.translators.primitives import ValueTranslator
# value_translator = ValueTranslator()
# pulsar_value = value_translator.to_pulsar({"v": "example", "e": True})
def to_value(x):
return Value(value=x["v"], is_uri=x["e"])

View file

@ -1,5 +1,6 @@
from ... schema import TextCompletionRequest, TextCompletionResponse
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
@ -20,12 +21,12 @@ class TextCompletionRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("text-completion")
self.response_translator = TranslatorRegistry.get_response_translator("text-completion")
def to_request(self, body):
return TextCompletionRequest(
system=body["system"],
prompt=body["prompt"]
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
return { "response": message.response }, True
return self.response_translator.from_response_with_completion(message)

View file

@ -2,9 +2,9 @@
import base64
from ... schema import TextDocument, Metadata
from ... messaging import TranslatorRegistry
from . sender import ServiceSender
from . serialize import to_subgraph
class TextLoad(ServiceSender):
def __init__(self, pulsar_client, queue):
@ -15,30 +15,9 @@ class TextLoad(ServiceSender):
schema = TextDocument,
)
self.translator = TranslatorRegistry.get_request_translator("text-document")
def to_request(self, body):
if "metadata" in body:
metadata = to_subgraph(body["metadata"])
else:
metadata = []
if "charset" in body:
charset = body["charset"]
else:
charset = "utf-8"
# Text is base64 encoded
text = base64.b64decode(body["text"]).decode(charset)
print("Text document received")
return TextDocument(
metadata=Metadata(
id=body.get("id"),
metadata=metadata,
user=body.get("user", "trustgraph"),
collection=body.get("collection", "default"),
),
text=text,
)
return self.translator.to_pulsar(body)

View file

@ -1,8 +1,8 @@
from ... schema import TriplesQueryRequest, TriplesQueryResponse, Triples
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
from . serialize import to_value, serialize_subgraph
class TriplesQueryRequestor(ServiceRequestor):
def __init__(
@ -21,34 +21,12 @@ class TriplesQueryRequestor(ServiceRequestor):
timeout=timeout,
)
self.request_translator = TranslatorRegistry.get_request_translator("triples-query")
self.response_translator = TranslatorRegistry.get_response_translator("triples-query")
def to_request(self, body):
if "s" in body:
s = to_value(body["s"])
else:
s = None
if "p" in body:
p = to_value(body["p"])
else:
p = None
if "o" in body:
o = to_value(body["o"])
else:
o = None
limit = int(body.get("limit", 10000))
return TriplesQueryRequest(
s = s, p = p, o = o,
limit = limit,
user = body.get("user", "trustgraph"),
collection = body.get("collection", "default"),
)
return self.request_translator.to_pulsar(body)
def from_response(self, message):
return {
"response": serialize_subgraph(message.triples)
}, True
return self.response_translator.from_response_with_completion(message)