trustgraph/trustgraph-base/trustgraph/messaging/translators/knowledge.py
cybermaggedon 9f2bfbce0c
Per-workspace queue routing for workspace-scoped services (#862)
Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
2026-05-04 10:30:03 +01:00

161 lines
No EOL
5.7 KiB
Python

from typing import Dict, Any, Tuple, Optional
from ...schema import (
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
Metadata, EntityEmbeddings
)
from .base import MessageTranslator
from .primitives import ValueTranslator, SubgraphTranslator
class KnowledgeRequestTranslator(MessageTranslator):
"""Translator for KnowledgeRequest schema objects"""
def __init__(self):
self.value_translator = ValueTranslator()
self.subgraph_translator = SubgraphTranslator()
def decode(self, data: Dict[str, Any]) -> KnowledgeRequest:
triples = None
if "triples" in data:
triples = Triples(
metadata=Metadata(
id=data["triples"]["metadata"]["id"],
root=data["triples"]["metadata"].get("root", ""),
collection=data["triples"]["metadata"]["collection"]
),
triples=self.subgraph_translator.decode(data["triples"]["triples"]),
)
graph_embeddings = None
if "graph-embeddings" in data:
graph_embeddings = GraphEmbeddings(
metadata=Metadata(
id=data["graph-embeddings"]["metadata"]["id"],
root=data["graph-embeddings"]["metadata"].get("root", ""),
collection=data["graph-embeddings"]["metadata"]["collection"]
),
entities=[
EntityEmbeddings(
entity=self.value_translator.decode(ent["entity"]),
vector=ent["vector"],
)
for ent in data["graph-embeddings"]["entities"]
]
)
return KnowledgeRequest(
operation=data.get("operation"),
id=data.get("id"),
flow=data.get("flow"),
collection=data.get("collection"),
triples=triples,
graph_embeddings=graph_embeddings,
)
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
result = {}
if obj.operation:
result["operation"] = obj.operation
if obj.id:
result["id"] = obj.id
if obj.flow:
result["flow"] = obj.flow
if obj.collection:
result["collection"] = obj.collection
if obj.triples:
result["triples"] = {
"metadata": {
"id": obj.triples.metadata.id,
"root": obj.triples.metadata.root,
"collection": obj.triples.metadata.collection,
},
"triples": self.subgraph_translator.encode(obj.triples.triples),
}
if obj.graph_embeddings:
result["graph-embeddings"] = {
"metadata": {
"id": obj.graph_embeddings.metadata.id,
"root": obj.graph_embeddings.metadata.root,
"collection": obj.graph_embeddings.metadata.collection,
},
"entities": [
{
"vector": entity.vector,
"entity": self.value_translator.encode(entity.entity),
}
for entity in obj.graph_embeddings.entities
],
}
return result
class KnowledgeResponseTranslator(MessageTranslator):
"""Translator for KnowledgeResponse schema objects"""
def __init__(self):
self.value_translator = ValueTranslator()
self.subgraph_translator = SubgraphTranslator()
def decode(self, data: Dict[str, Any]) -> KnowledgeResponse:
raise NotImplementedError("Response translation to Pulsar not typically needed")
def encode(self, obj: KnowledgeResponse) -> Dict[str, Any]:
# Response to list operation
if obj.ids is not None:
return {"ids": obj.ids}
# Streaming triples response
if obj.triples:
return {
"triples": {
"metadata": {
"id": obj.triples.metadata.id,
"root": obj.triples.metadata.root,
"collection": obj.triples.metadata.collection,
},
"triples": self.subgraph_translator.encode(obj.triples.triples),
}
}
# Streaming graph embeddings response
if obj.graph_embeddings:
return {
"graph-embeddings": {
"metadata": {
"id": obj.graph_embeddings.metadata.id,
"root": obj.graph_embeddings.metadata.root,
"collection": obj.graph_embeddings.metadata.collection,
},
"entities": [
{
"vector": entity.vector,
"entity": self.value_translator.encode(entity.entity),
}
for entity in obj.graph_embeddings.entities
],
}
}
# End of stream marker
if obj.eos is True:
return {"eos": True}
# Empty response (successful delete)
return {}
def encode_with_completion(self, obj: KnowledgeResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
response = self.encode(obj)
# Check if this is a final response
is_final = (
obj.ids is not None or # List response
obj.eos is True or # End of stream
(not obj.triples and not obj.graph_embeddings) # Empty response
)
return response, is_final