trustgraph/trustgraph-base/trustgraph/messaging/translators/library.py
Cyber MacGeddon 115e325071 Per-workspace queue routing for workspace-scoped services
Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, and knowledge cores
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter for Cassandra partition keys
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Gateway enforces workspace requirement for workspace dispatchers —
  config moves from system_dispatchers to workspace_dispatchers so the
  gateway can never route to the system config queue
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
2026-05-01 16:24:30 +01:00

174 lines
6.5 KiB
Python

from typing import Dict, Any, Tuple, Optional
from ...schema import LibrarianRequest, LibrarianResponse, DocumentMetadata, ProcessingMetadata, Criteria
from .base import MessageTranslator
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
class LibraryRequestTranslator(MessageTranslator):
"""Translator for LibrarianRequest schema objects"""
def __init__(self):
self.doc_metadata_translator = DocumentMetadataTranslator()
self.proc_metadata_translator = ProcessingMetadataTranslator()
def decode(self, data: Dict[str, Any]) -> LibrarianRequest:
# Document metadata
doc_metadata = None
if "document-metadata" in data:
doc_metadata = self.doc_metadata_translator.decode(data["document-metadata"])
# Processing metadata
proc_metadata = None
if "processing-metadata" in data:
proc_metadata = self.proc_metadata_translator.decode(data["processing-metadata"])
# Criteria
criteria = []
if "criteria" in data:
criteria = [
Criteria(
key=c["key"],
value=c["value"],
operator=c["operator"]
)
for c in data["criteria"]
]
# Content as bytes
content = None
if "content" in data:
if isinstance(data["content"], str):
content = data["content"].encode("utf-8")
else:
content = data["content"]
return LibrarianRequest(
operation=data.get("operation"),
document_id=data.get("document-id", ""),
processing_id=data.get("processing-id", ""),
document_metadata=doc_metadata,
processing_metadata=proc_metadata,
content=content,
collection=data.get("collection", ""),
criteria=criteria,
# Chunked upload fields
total_size=data.get("total-size", 0),
chunk_size=data.get("chunk-size", 0),
upload_id=data.get("upload-id", ""),
chunk_index=data.get("chunk-index", 0),
# List documents filtering
include_children=data.get("include-children", False),
)
def encode(self, obj: LibrarianRequest) -> Dict[str, Any]:
result = {}
if obj.operation:
result["operation"] = obj.operation
if obj.document_id:
result["document-id"] = obj.document_id
if obj.processing_id:
result["processing-id"] = obj.processing_id
if obj.document_metadata:
result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata)
if obj.processing_metadata:
result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
if obj.content:
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
if obj.collection:
result["collection"] = obj.collection
if obj.criteria is not None:
result["criteria"] = [
{
"key": c.key,
"value": c.value,
"operator": c.operator
}
for c in obj.criteria
]
return result
class LibraryResponseTranslator(MessageTranslator):
"""Translator for LibrarianResponse schema objects"""
def __init__(self):
self.doc_metadata_translator = DocumentMetadataTranslator()
self.proc_metadata_translator = ProcessingMetadataTranslator()
def decode(self, data: Dict[str, Any]) -> LibrarianResponse:
raise NotImplementedError("Response translation to Pulsar not typically needed")
def encode(self, obj: LibrarianResponse) -> Dict[str, Any]:
result = {}
if obj.error:
result["error"] = {
"type": obj.error.type,
"message": obj.error.message,
}
if obj.document_metadata:
result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata)
if obj.content:
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
if obj.document_metadatas is not None:
result["document-metadatas"] = [
self.doc_metadata_translator.encode(dm)
for dm in obj.document_metadatas
]
if obj.processing_metadatas is not None:
result["processing-metadatas"] = [
self.proc_metadata_translator.encode(pm)
for pm in obj.processing_metadatas
]
# Chunked upload response fields
if obj.upload_id:
result["upload-id"] = obj.upload_id
if obj.chunk_size:
result["chunk-size"] = obj.chunk_size
if obj.total_chunks:
result["total-chunks"] = obj.total_chunks
if obj.chunk_index:
result["chunk-index"] = obj.chunk_index
if obj.chunks_received:
result["chunks-received"] = obj.chunks_received
if obj.bytes_received:
result["bytes-received"] = obj.bytes_received
if obj.total_bytes:
result["total-bytes"] = obj.total_bytes
if obj.document_id:
result["document-id"] = obj.document_id
if obj.object_id:
result["object-id"] = obj.object_id
if obj.upload_state:
result["upload-state"] = obj.upload_state
if obj.received_chunks:
result["received-chunks"] = obj.received_chunks
if obj.missing_chunks:
result["missing-chunks"] = obj.missing_chunks
if obj.upload_sessions:
result["upload-sessions"] = [
{
"upload-id": s.upload_id,
"document-id": s.document_id,
"document-metadata-json": s.document_metadata_json,
"total-size": s.total_size,
"chunk-size": s.chunk_size,
"total-chunks": s.total_chunks,
"chunks-received": s.chunks_received,
"created-at": s.created_at,
}
for s in obj.upload_sessions
]
return result
def encode_with_completion(self, obj: LibrarianResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
return self.encode(obj), obj.is_final