Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
This commit is contained in:
cybermaggedon 2026-05-04 10:30:03 +01:00 committed by GitHub
parent 9be257ceee
commit 9f2bfbce0c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1565 additions and 677 deletions

View file

@ -30,7 +30,6 @@ class TestDocumentMetadataTranslator:
"title": "Test Document",
"comments": "No comments",
"metadata": [],
"workspace": "alice",
"tags": ["finance", "q4"],
"parent-id": "doc-100",
"document-type": "page",
@ -40,14 +39,12 @@ class TestDocumentMetadataTranslator:
assert obj.time == 1710000000
assert obj.kind == "application/pdf"
assert obj.title == "Test Document"
assert obj.workspace == "alice"
assert obj.tags == ["finance", "q4"]
assert obj.parent_id == "doc-100"
assert obj.document_type == "page"
wire = self.tx.encode(obj)
assert wire["id"] == "doc-123"
assert wire["workspace"] == "alice"
assert wire["parent-id"] == "doc-100"
assert wire["document-type"] == "page"
@ -80,10 +77,9 @@ class TestDocumentMetadataTranslator:
def test_falsy_fields_omitted_from_wire(self):
"""Empty string fields should be omitted from wire format."""
obj = DocumentMetadata(id="", time=0, workspace="")
obj = DocumentMetadata(id="", time=0)
wire = self.tx.encode(obj)
assert "id" not in wire
assert "workspace" not in wire
# ---------------------------------------------------------------------------
@ -101,7 +97,6 @@ class TestProcessingMetadataTranslator:
"document-id": "doc-123",
"time": 1710000000,
"flow": "default",
"workspace": "alice",
"collection": "my-collection",
"tags": ["tag1"],
}
@ -109,20 +104,17 @@ class TestProcessingMetadataTranslator:
assert obj.id == "proc-1"
assert obj.document_id == "doc-123"
assert obj.flow == "default"
assert obj.workspace == "alice"
assert obj.collection == "my-collection"
assert obj.tags == ["tag1"]
wire = self.tx.encode(obj)
assert wire["id"] == "proc-1"
assert wire["document-id"] == "doc-123"
assert wire["workspace"] == "alice"
assert wire["collection"] == "my-collection"
def test_missing_fields_use_defaults(self):
obj = self.tx.decode({})
assert obj.id is None
assert obj.workspace is None
assert obj.collection is None
def test_tags_none_omitted(self):
@ -135,10 +127,9 @@ class TestProcessingMetadataTranslator:
wire = self.tx.encode(obj)
assert wire["tags"] == []
def test_workspace_and_collection_preserved(self):
def test_collection_preserved(self):
"""Core pipeline routing fields must survive round-trip."""
data = {"workspace": "bob", "collection": "research"}
data = {"collection": "research"}
obj = self.tx.decode(data)
wire = self.tx.encode(obj)
assert wire["workspace"] == "bob"
assert wire["collection"] == "research"