Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of message body fields, closing a privilege-escalation vector where a caller could spoof workspace in the request payload. - Add WorkspaceProcessor base class: discovers workspaces from config at startup, creates per-workspace consumers (queue:workspace), and manages consumer lifecycle on workspace create/delete events - Roll out to librarian, flow-svc, knowledge cores, and config-svc - Config service gets a dual-queue regime: a system queue for cross-workspace ops (getvalues-all-ws, bootstrapper writes to __workspaces__) and per-workspace queues for tenant-scoped ops, with workspace discovery from its own Cassandra store - Remove workspace field from request schemas (FlowRequest, LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and from DocumentMetadata / ProcessingMetadata — table stores now accept workspace as an explicit parameter - Strip workspace encode/decode from all message translators and gateway serializers - Gateway enforces workspace existence: reject requests targeting non-existent workspaces instead of routing to queues with no consumer - Config service provisions new workspaces from __template__ on creation - Add workspace lifecycle hooks to AsyncProcessor so any processor can react to workspace create/delete without subclassing WorkspaceProcessor
2026-07-09 13:22:10 +02:00 · 2026-05-04 10:30:03 +01:00 · 2026-05-04 10:30:03 +01:00 · 9f2bfbce0c
commit 9f2bfbce0c
parent 9be257ceee
53 changed files with 1565 additions and 677 deletions
--- a/trustgraph-base/trustgraph/api/library.py
+++ b/trustgraph-base/trustgraph/api/library.py
@ -217,7 +217,6 @@ class Library:
                "title": title,
                "comments": comments,
                "metadata": triples,
-                "workspace": self.api.workspace,
                "tags": tags
            },
            "content": base64.b64encode(document).decode("utf-8"),
@ -249,7 +248,6 @@ class Library:
                "kind": kind,
                "title": title,
                "comments": comments,
-                "workspace": self.api.workspace,
                "tags": tags,
            },
            "total-size": total_size,
@ -377,7 +375,6 @@ class Library:
                        )
                        for w in v["metadata"]
                    ],
-                    workspace = v.get("workspace", ""),
                    tags = v["tags"],
                    parent_id = v.get("parent-id", ""),
                    document_type = v.get("document-type", "source"),
@ -436,7 +433,6 @@ class Library:
                    )
                    for w in doc["metadata"]
                ],
-                workspace = doc.get("workspace", ""),
                tags = doc["tags"],
                parent_id = doc.get("parent-id", ""),
                document_type = doc.get("document-type", "source"),
@ -485,7 +481,6 @@ class Library:
            "operation": "update-document",
            "workspace": self.api.workspace,
            "document-metadata": {
-                "workspace": self.api.workspace,
                "document-id": id,
                "time": metadata.time,
                "title": metadata.title,
@ -599,7 +594,6 @@ class Library:
                "document-id": document_id,
                "time": int(time.time()),
                "flow": flow,
-                "workspace": self.api.workspace,
                "collection": collection,
                "tags": tags,
            }
@ -681,7 +675,6 @@ class Library:
                    document_id = v["document-id"],
                    time = datetime.datetime.fromtimestamp(v["time"]),
                    flow = v["flow"],
-                    workspace = v.get("workspace", ""),
                    collection = v["collection"],
                    tags = v["tags"],
                )
@ -945,7 +938,6 @@ class Library:
                "title": title,
                "comments": comments,
                "metadata": triples,
-                "workspace": self.api.workspace,
                "tags": tags,
                "parent-id": parent_id,
                "document-type": "extracted",
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@ -65,7 +65,6 @@ class DocumentMetadata:
        title: Document title
        comments: Additional comments or description
        metadata: List of RDF triples providing structured metadata
-        workspace: Workspace the document belongs to
        tags: List of tags for categorization
        parent_id: Parent document ID for child documents (empty for top-level docs)
        document_type: "source" for uploaded documents, "extracted" for derived content
@ -76,7 +75,6 @@ class DocumentMetadata:
    title : str
    comments : str
    metadata : List[Triple]
-    workspace : str
    tags : List[str]
    parent_id : str = ""
    document_type : str = "source"
@ -91,7 +89,6 @@ class ProcessingMetadata:
        document_id: ID of the document being processed
        time: Processing start timestamp
        flow: Flow instance handling the processing
-        workspace: Workspace the processing job belongs to
        collection: Target collection for processed data
        tags: List of tags for categorization
    """
@ -99,7 +96,6 @@ class ProcessingMetadata:
    document_id : str
    time : datetime.datetime
    flow : str
-    workspace : str
    collection : str
    tags : List[str]