trustgraph/trustgraph-base/trustgraph/messaging/translators/metadata.py
Cyber MacGeddon 115e325071 Per-workspace queue routing for workspace-scoped services
Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, and knowledge cores
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter for Cassandra partition keys
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Gateway enforces workspace requirement for workspace dispatchers —
  config moves from system_dispatchers to workspace_dispatchers so the
  gateway can never route to the system config queue
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
2026-05-01 16:24:30 +01:00

81 lines
2.6 KiB
Python

from typing import Dict, Any, Optional
from ...schema import DocumentMetadata, ProcessingMetadata
from .base import Translator
from .primitives import SubgraphTranslator
class DocumentMetadataTranslator(Translator):
"""Translator for DocumentMetadata schema objects"""
def __init__(self):
self.subgraph_translator = SubgraphTranslator()
def decode(self, data: Dict[str, Any]) -> DocumentMetadata:
metadata = data.get("metadata", [])
return DocumentMetadata(
id=data.get("id"),
time=data.get("time"),
kind=data.get("kind"),
title=data.get("title"),
comments=data.get("comments"),
metadata=self.subgraph_translator.decode(metadata) if metadata is not None else [],
tags=data.get("tags"),
parent_id=data.get("parent-id", ""),
document_type=data.get("document-type", "source"),
)
def encode(self, obj: DocumentMetadata) -> Dict[str, Any]:
result = {}
if obj.id:
result["id"] = obj.id
if obj.time:
result["time"] = obj.time
if obj.kind:
result["kind"] = obj.kind
if obj.title:
result["title"] = obj.title
if obj.comments:
result["comments"] = obj.comments
if obj.metadata is not None:
result["metadata"] = self.subgraph_translator.encode(obj.metadata)
if obj.tags is not None:
result["tags"] = obj.tags
if obj.parent_id:
result["parent-id"] = obj.parent_id
if obj.document_type:
result["document-type"] = obj.document_type
return result
class ProcessingMetadataTranslator(Translator):
"""Translator for ProcessingMetadata schema objects"""
def decode(self, data: Dict[str, Any]) -> ProcessingMetadata:
return ProcessingMetadata(
id=data.get("id"),
document_id=data.get("document-id"),
time=data.get("time"),
flow=data.get("flow"),
collection=data.get("collection"),
tags=data.get("tags")
)
def encode(self, obj: ProcessingMetadata) -> Dict[str, Any]:
result = {}
if obj.id:
result["id"] = obj.id
if obj.document_id:
result["document-id"] = obj.document_id
if obj.time:
result["time"] = obj.time
if obj.flow:
result["flow"] = obj.flow
if obj.collection:
result["collection"] = obj.collection
if obj.tags is not None:
result["tags"] = obj.tags
return result