Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of message body fields, closing a privilege-escalation vector where a caller could spoof workspace in the request payload. - Add WorkspaceProcessor base class: discovers workspaces from config at startup, creates per-workspace consumers (queue:workspace), and manages consumer lifecycle on workspace create/delete events - Roll out to librarian, flow-svc, knowledge cores, and config-svc - Config service gets a dual-queue regime: a system queue for cross-workspace ops (getvalues-all-ws, bootstrapper writes to __workspaces__) and per-workspace queues for tenant-scoped ops, with workspace discovery from its own Cassandra store - Remove workspace field from request schemas (FlowRequest, LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and from DocumentMetadata / ProcessingMetadata — table stores now accept workspace as an explicit parameter - Strip workspace encode/decode from all message translators and gateway serializers - Gateway enforces workspace existence: reject requests targeting non-existent workspaces instead of routing to queues with no consumer - Config service provisions new workspaces from __template__ on creation - Add workspace lifecycle hooks to AsyncProcessor so any processor can react to workspace create/delete without subclassing WorkspaceProcessor
2026-05-06 22:02:37 +02:00 · 2026-05-04 10:30:03 +01:00 · 2026-05-04 10:30:03 +01:00 · 9f2bfbce0c
commit 9f2bfbce0c
parent 9be257ceee
53 changed files with 1565 additions and 677 deletions
--- a/trustgraph-base/trustgraph/api/library.py
+++ b/trustgraph-base/trustgraph/api/library.py
@ -217,7 +217,6 @@ class Library:
                "title": title,
                "comments": comments,
                "metadata": triples,
-                "workspace": self.api.workspace,
                "tags": tags
            },
            "content": base64.b64encode(document).decode("utf-8"),
@ -249,7 +248,6 @@ class Library:
                "kind": kind,
                "title": title,
                "comments": comments,
-                "workspace": self.api.workspace,
                "tags": tags,
            },
            "total-size": total_size,
@ -377,7 +375,6 @@ class Library:
                        )
                        for w in v["metadata"]
                    ],
-                    workspace = v.get("workspace", ""),
                    tags = v["tags"],
                    parent_id = v.get("parent-id", ""),
                    document_type = v.get("document-type", "source"),
@ -436,7 +433,6 @@ class Library:
                    )
                    for w in doc["metadata"]
                ],
-                workspace = doc.get("workspace", ""),
                tags = doc["tags"],
                parent_id = doc.get("parent-id", ""),
                document_type = doc.get("document-type", "source"),
@ -485,7 +481,6 @@ class Library:
            "operation": "update-document",
            "workspace": self.api.workspace,
            "document-metadata": {
-                "workspace": self.api.workspace,
                "document-id": id,
                "time": metadata.time,
                "title": metadata.title,
@ -599,7 +594,6 @@ class Library:
                "document-id": document_id,
                "time": int(time.time()),
                "flow": flow,
-                "workspace": self.api.workspace,
                "collection": collection,
                "tags": tags,
            }
@ -681,7 +675,6 @@ class Library:
                    document_id = v["document-id"],
                    time = datetime.datetime.fromtimestamp(v["time"]),
                    flow = v["flow"],
-                    workspace = v.get("workspace", ""),
                    collection = v["collection"],
                    tags = v["tags"],
                )
@ -945,7 +938,6 @@ class Library:
                "title": title,
                "comments": comments,
                "metadata": triples,
-                "workspace": self.api.workspace,
                "tags": tags,
                "parent-id": parent_id,
                "document-type": "extracted",
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@ -65,7 +65,6 @@ class DocumentMetadata:
        title: Document title
        comments: Additional comments or description
        metadata: List of RDF triples providing structured metadata
-        workspace: Workspace the document belongs to
        tags: List of tags for categorization
        parent_id: Parent document ID for child documents (empty for top-level docs)
        document_type: "source" for uploaded documents, "extracted" for derived content
@ -76,7 +75,6 @@ class DocumentMetadata:
    title : str
    comments : str
    metadata : List[Triple]
-    workspace : str
    tags : List[str]
    parent_id : str = ""
    document_type : str = "source"
@ -91,7 +89,6 @@ class ProcessingMetadata:
        document_id: ID of the document being processed
        time: Processing start timestamp
        flow: Flow instance handling the processing
-        workspace: Workspace the processing job belongs to
        collection: Target collection for processed data
        tags: List of tags for categorization
    """
@ -99,7 +96,6 @@ class ProcessingMetadata:
    document_id : str
    time : datetime.datetime
    flow : str
-    workspace : str
    collection : str
    tags : List[str]

--- a/trustgraph-base/trustgraph/base/init.py
+++ b/trustgraph-base/trustgraph/base/init.py
@ -7,6 +7,7 @@ from . publisher import Publisher
 from . subscriber import Subscriber
 from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics, SubscriberMetrics
 from . logging import add_logging_args, setup_logging
+from . workspace_processor import WorkspaceProcessor
 from . flow_processor import FlowProcessor
 from . consumer_spec import ConsumerSpec
 from . parameter_spec import ParameterSpec
--- a/trustgraph-base/trustgraph/base/async_processor.py
+++ b/trustgraph-base/trustgraph/base/async_processor.py
@ -71,6 +71,11 @@ class AsyncProcessor:
        # { "handler": async_fn, "types": set_or_none }
        self.config_handlers = []

+        # Workspace lifecycle handlers, called when workspaces are
+        # created or deleted.  Each entry is an async callable:
+        # async def handler(workspace_changes: WorkspaceChanges)
+        self.workspace_handlers = []
+
        # Track the current config version for dedup
        self.config_version = 0

@ -251,6 +256,10 @@ class AsyncProcessor:
            "types": set(types) if types else None,
        })

+    # Register a handler for workspace lifecycle events
+    def register_workspace_handler(self, handler: Callable[..., Any]) -> None:
+        self.workspace_handlers.append(handler)
+
    # Called when a config notify message arrives
    async def on_config_notify(self, message, consumer, flow):

@ -266,6 +275,16 @@ class AsyncProcessor:
            )
            return

+        # Dispatch workspace lifecycle events before config handlers
+        if v.workspace_changes and self.workspace_handlers:
+            for handler in self.workspace_handlers:
+                try:
+                    await handler(v.workspace_changes)
+                except Exception as e:
+                    logger.error(
+                        f"Workspace handler failed: {e}", exc_info=True
+                    )
+
        notify_types = set(changes.keys())

        # Filter out handlers that don't care about any of the changed
--- a/trustgraph-base/trustgraph/base/flow_processor.py
+++ b/trustgraph-base/trustgraph/base/flow_processor.py
@ -14,7 +14,7 @@ from .. schema import Error
 from .. schema import config_request_queue, config_response_queue
 from .. schema import config_push_queue
 from .. log_level import LogLevel
-from . async_processor import AsyncProcessor
+from . workspace_processor import WorkspaceProcessor
 from . flow import Flow

 # Module logger
@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)

 # Parent class for configurable processors, configured with flows by
 # the config service
-class FlowProcessor(AsyncProcessor):
+class FlowProcessor(WorkspaceProcessor):

    def __init__(self, **params):

@ -113,7 +113,7 @@ class FlowProcessor(AsyncProcessor):
    @staticmethod
    def add_args(parser: ArgumentParser) -> None:

-        AsyncProcessor.add_args(parser)
+        WorkspaceProcessor.add_args(parser)

        # parser.add_argument(
        #     '--rate-limit-retry',
--- a/trustgraph-base/trustgraph/base/librarian_client.py
+++ b/trustgraph-base/trustgraph/base/librarian_client.py
@ -202,7 +202,6 @@ class LibrarianClient:

        doc_metadata = DocumentMetadata(
            id=doc_id,
-            workspace=workspace,
            kind=kind,
            title=title or doc_id,
            parent_id=parent_id,
@ -227,7 +226,6 @@ class LibrarianClient:

        doc_metadata = DocumentMetadata(
            id=doc_id,
-            workspace=workspace,
            kind=kind,
            title=title or doc_id,
            document_type=document_type,
--- a/trustgraph-base/trustgraph/base/workspace_processor.py
+++ b/trustgraph-base/trustgraph/base/workspace_processor.py
@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from argparse import ArgumentParser
+
+import logging
+
+from . async_processor import AsyncProcessor
+
+logger = logging.getLogger(__name__)
+
+WORKSPACES_NAMESPACE = "__workspaces__"
+WORKSPACE_TYPE = "workspace"
+
+
+class WorkspaceProcessor(AsyncProcessor):
+
+    def __init__(self, **params):
+
+        super(WorkspaceProcessor, self).__init__(**params)
+
+        self.active_workspaces = set()
+
+        self.register_workspace_handler(self._handle_workspace_changes)
+
+    async def _discover_workspaces(self):
+        client = self._create_config_client()
+        try:
+            await client.start()
+            type_data, version = await self._fetch_type_all_workspaces(
+                client, WORKSPACE_TYPE,
+            )
+            for ws in type_data:
+                if ws == WORKSPACES_NAMESPACE:
+                    for workspace_id in type_data[ws]:
+                        if workspace_id not in self.active_workspaces:
+                            self.active_workspaces.add(workspace_id)
+                            await self.on_workspace_created(workspace_id)
+        finally:
+            await client.stop()
+
+    async def _handle_workspace_changes(self, workspace_changes):
+        for workspace_id in workspace_changes.created:
+            if workspace_id not in self.active_workspaces:
+                self.active_workspaces.add(workspace_id)
+                logger.info(f"Workspace created: {workspace_id}")
+                await self.on_workspace_created(workspace_id)
+
+        for workspace_id in workspace_changes.deleted:
+            if workspace_id in self.active_workspaces:
+                logger.info(f"Workspace deleted: {workspace_id}")
+                await self.on_workspace_deleted(workspace_id)
+                self.active_workspaces.discard(workspace_id)
+
+    async def on_workspace_created(self, workspace):
+        pass
+
+    async def on_workspace_deleted(self, workspace):
+        pass
+
+    async def start(self):
+        await super(WorkspaceProcessor, self).start()
+        await self._discover_workspaces()
+
+    @staticmethod
+    def add_args(parser: ArgumentParser) -> None:
+        AsyncProcessor.add_args(parser)
--- a/trustgraph-base/trustgraph/messaging/translators/collection.py
+++ b/trustgraph-base/trustgraph/messaging/translators/collection.py
@ -9,7 +9,6 @@ class CollectionManagementRequestTranslator(MessageTranslator):
    def decode(self, data: Dict[str, Any]) -> CollectionManagementRequest:
        return CollectionManagementRequest(
            operation=data.get("operation"),
-            workspace=data.get("workspace", ""),
            collection=data.get("collection"),
            timestamp=data.get("timestamp"),
            name=data.get("name"),
@ -24,8 +23,6 @@ class CollectionManagementRequestTranslator(MessageTranslator):

        if obj.operation is not None:
            result["operation"] = obj.operation
-        if obj.workspace:
-            result["workspace"] = obj.workspace
        if obj.collection is not None:
            result["collection"] = obj.collection
        if obj.timestamp is not None:
--- a/trustgraph-base/trustgraph/messaging/translators/flow.py
+++ b/trustgraph-base/trustgraph/messaging/translators/flow.py
@ -9,7 +9,6 @@ class FlowRequestTranslator(MessageTranslator):
    def decode(self, data: Dict[str, Any]) -> FlowRequest:
        return FlowRequest(
            operation=data.get("operation"),
-            workspace=data.get("workspace", ""),
            blueprint_name=data.get("blueprint-name"),
            blueprint_definition=data.get("blueprint-definition"),
            description=data.get("description"),
@ -22,8 +21,6 @@ class FlowRequestTranslator(MessageTranslator):

        if obj.operation is not None:
            result["operation"] = obj.operation
-        if obj.workspace is not None:
-            result["workspace"] = obj.workspace
        if obj.blueprint_name is not None:
            result["blueprint-name"] = obj.blueprint_name
        if obj.blueprint_definition is not None:
--- a/trustgraph-base/trustgraph/messaging/translators/knowledge.py
+++ b/trustgraph-base/trustgraph/messaging/translators/knowledge.py
@ -45,7 +45,6 @@ class KnowledgeRequestTranslator(MessageTranslator):

        return KnowledgeRequest(
            operation=data.get("operation"),
-            workspace=data.get("workspace", ""),
            id=data.get("id"),
            flow=data.get("flow"),
            collection=data.get("collection"),
@ -58,8 +57,6 @@ class KnowledgeRequestTranslator(MessageTranslator):

        if obj.operation:
            result["operation"] = obj.operation
-        if obj.workspace:
-            result["workspace"] = obj.workspace
        if obj.id:
            result["id"] = obj.id
        if obj.flow:
--- a/trustgraph-base/trustgraph/messaging/translators/library.py
+++ b/trustgraph-base/trustgraph/messaging/translators/library.py
@ -49,7 +49,6 @@ class LibraryRequestTranslator(MessageTranslator):
            document_metadata=doc_metadata,
            processing_metadata=proc_metadata,
            content=content,
-            workspace=data.get("workspace", ""),
            collection=data.get("collection", ""),
            criteria=criteria,
            # Chunked upload fields
@ -76,8 +75,6 @@ class LibraryRequestTranslator(MessageTranslator):
            result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
        if obj.content:
            result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
-        if obj.workspace:
-            result["workspace"] = obj.workspace
        if obj.collection:
            result["collection"] = obj.collection
        if obj.criteria is not None:
--- a/trustgraph-base/trustgraph/messaging/translators/metadata.py
+++ b/trustgraph-base/trustgraph/messaging/translators/metadata.py
@ -19,7 +19,6 @@ class DocumentMetadataTranslator(Translator):
            title=data.get("title"),
            comments=data.get("comments"),
            metadata=self.subgraph_translator.decode(metadata) if metadata is not None else [],
-            workspace=data.get("workspace"),
            tags=data.get("tags"),
            parent_id=data.get("parent-id", ""),
            document_type=data.get("document-type", "source"),
@ -40,8 +39,6 @@ class DocumentMetadataTranslator(Translator):
            result["comments"] = obj.comments
        if obj.metadata is not None:
            result["metadata"] = self.subgraph_translator.encode(obj.metadata)
-        if obj.workspace:
-            result["workspace"] = obj.workspace
        if obj.tags is not None:
            result["tags"] = obj.tags
        if obj.parent_id:
@ -61,7 +58,6 @@ class ProcessingMetadataTranslator(Translator):
            document_id=data.get("document-id"),
            time=data.get("time"),
            flow=data.get("flow"),
-            workspace=data.get("workspace"),
            collection=data.get("collection"),
            tags=data.get("tags")
        )
@ -77,8 +73,6 @@ class ProcessingMetadataTranslator(Translator):
            result["time"] = obj.time
        if obj.flow:
            result["flow"] = obj.flow
-        if obj.workspace:
-            result["workspace"] = obj.workspace
        if obj.collection:
            result["collection"] = obj.collection
        if obj.tags is not None:
--- a/trustgraph-base/trustgraph/schema/core/metadata.py
+++ b/trustgraph-base/trustgraph/schema/core/metadata.py
@ -8,7 +8,5 @@ class Metadata:
    # Root document identifier (set by librarian, preserved through pipeline)
    root: str = ""

-    # Collection the message belongs to.  Workspace is NOT carried on the
-    # message — consumers derive it from flow.workspace (the flow the
-    # message arrived on), which is the trusted isolation boundary.
+    # Collection the message belongs to.
    collection: str = ""
--- a/trustgraph-base/trustgraph/schema/knowledge/knowledge.py
+++ b/trustgraph-base/trustgraph/schema/knowledge/knowledge.py
@ -17,7 +17,7 @@ from .embeddings import GraphEmbeddings
 #   <- (error)

 # list-kg-cores
-#   -> (workspace)
+#   -> ()
 #   <- ()
 #   <- (error)

@ -27,9 +27,6 @@ class KnowledgeRequest:
    # load-kg-core, unload-kg-core
    operation: str = ""

-    # Workspace the cores belong to. Partition / isolation boundary.
-    workspace: str = ""
-
    # get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
    # load-kg-core, unload-kg-core
    id: str = ""
--- a/trustgraph-base/trustgraph/schema/services/collection.py
+++ b/trustgraph-base/trustgraph/schema/services/collection.py
@ -22,17 +22,9 @@ class CollectionMetadata:

@dataclass
 class CollectionManagementRequest:
-    """Request for collection management operations.
-
-    Collection-management is a global (non-flow-scoped) service, so the
-    workspace has to travel on the wire — it's the isolation boundary
-    for which workspace's collections the request operates on.
-    """
+    """Request for collection management operations."""
    operation: str = ""  # e.g., "delete-collection"

-    # Workspace the collection belongs to.
-    workspace: str = ""
-
    collection: str = ""
    timestamp: str = ""  # ISO timestamp
    name: str = ""
--- a/trustgraph-base/trustgraph/schema/services/config.py
+++ b/trustgraph-base/trustgraph/schema/services/config.py
@ -70,6 +70,11 @@ class ConfigResponse:
    # Everything
    error: Error | None = None

+@dataclass
+class WorkspaceChanges:
+    created: list[str] = field(default_factory=list)
+    deleted: list[str] = field(default_factory=list)
+
@dataclass
 class ConfigPush:
    version: int = 0
@ -80,6 +85,10 @@ class ConfigPush:
    # e.g. {"prompt": ["workspace-a", "workspace-b"], "schema": ["workspace-a"]}
    changes: dict[str, list[str]] = field(default_factory=dict)

+    # Workspace lifecycle events. Populated when a workspace entry
+    # is created or deleted in the __workspaces__ config namespace.
+    workspace_changes: WorkspaceChanges | None = None
+
 config_request_queue = queue('config', cls='request')
 config_response_queue = queue('config', cls='response')
 config_push_queue = queue('config', cls='notify')
--- a/trustgraph-base/trustgraph/schema/services/flow.py
+++ b/trustgraph-base/trustgraph/schema/services/flow.py
@ -22,9 +22,6 @@ class FlowRequest:
    operation: str = ""  # list-blueprints, get-blueprint, put-blueprint, delete-blueprint
                         # list-flows, get-flow, start-flow, stop-flow

-    # Workspace scope — all operations act within this workspace
-    workspace: str = ""
-
    # get_blueprint, put_blueprint, delete_blueprint, start_flow
    blueprint_name: str = ""

--- a/trustgraph-base/trustgraph/schema/services/library.py
+++ b/trustgraph-base/trustgraph/schema/services/library.py
@ -43,12 +43,12 @@ from ..core.metadata import Metadata
 #   <- (error)

 # list-documents
-#   -> (workspace, collection?)
+#   -> (collection?)
 #   <- (document_metadata[])
 #   <- (error)

 # list-processing
-#   -> (workspace, collection?)
+#   -> (collection?)
 #   <- (processing_metadata[])
 #   <- (error)

@ -78,7 +78,7 @@ from ..core.metadata import Metadata
 #   <- (error)

 # list-uploads
-#   -> (workspace)
+#   -> ()
 #   <- (uploads[])
 #   <- (error)

@ -90,7 +90,6 @@ class DocumentMetadata:
    title: str = ""
    comments: str = ""
    metadata: list[Triple] = field(default_factory=list)
-    workspace: str = ""
    tags: list[str] = field(default_factory=list)
    # Child document support
    parent_id: str = ""  # Empty for top-level docs, set for children
@ -107,7 +106,6 @@ class ProcessingMetadata:
    document_id: str = ""
    time: int = 0
    flow: str = ""
-    workspace: str = ""
    collection: str = ""
    tags: list[str] = field(default_factory=list)

@ -162,9 +160,6 @@ class LibrarianRequest:
    # add-document, upload-chunk
    content: bytes = b""

-    # Workspace scopes every library operation.
-    workspace: str = ""
-
    # list-documents?, list-processing?
    collection: str = ""