Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
This commit is contained in:
cybermaggedon 2026-05-04 10:30:03 +01:00 committed by GitHub
parent 9be257ceee
commit 9f2bfbce0c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1565 additions and 677 deletions

View file

@ -217,7 +217,6 @@ class Library:
"title": title,
"comments": comments,
"metadata": triples,
"workspace": self.api.workspace,
"tags": tags
},
"content": base64.b64encode(document).decode("utf-8"),
@ -249,7 +248,6 @@ class Library:
"kind": kind,
"title": title,
"comments": comments,
"workspace": self.api.workspace,
"tags": tags,
},
"total-size": total_size,
@ -377,7 +375,6 @@ class Library:
)
for w in v["metadata"]
],
workspace = v.get("workspace", ""),
tags = v["tags"],
parent_id = v.get("parent-id", ""),
document_type = v.get("document-type", "source"),
@ -436,7 +433,6 @@ class Library:
)
for w in doc["metadata"]
],
workspace = doc.get("workspace", ""),
tags = doc["tags"],
parent_id = doc.get("parent-id", ""),
document_type = doc.get("document-type", "source"),
@ -485,7 +481,6 @@ class Library:
"operation": "update-document",
"workspace": self.api.workspace,
"document-metadata": {
"workspace": self.api.workspace,
"document-id": id,
"time": metadata.time,
"title": metadata.title,
@ -599,7 +594,6 @@ class Library:
"document-id": document_id,
"time": int(time.time()),
"flow": flow,
"workspace": self.api.workspace,
"collection": collection,
"tags": tags,
}
@ -681,7 +675,6 @@ class Library:
document_id = v["document-id"],
time = datetime.datetime.fromtimestamp(v["time"]),
flow = v["flow"],
workspace = v.get("workspace", ""),
collection = v["collection"],
tags = v["tags"],
)
@ -945,7 +938,6 @@ class Library:
"title": title,
"comments": comments,
"metadata": triples,
"workspace": self.api.workspace,
"tags": tags,
"parent-id": parent_id,
"document-type": "extracted",

View file

@ -65,7 +65,6 @@ class DocumentMetadata:
title: Document title
comments: Additional comments or description
metadata: List of RDF triples providing structured metadata
workspace: Workspace the document belongs to
tags: List of tags for categorization
parent_id: Parent document ID for child documents (empty for top-level docs)
document_type: "source" for uploaded documents, "extracted" for derived content
@ -76,7 +75,6 @@ class DocumentMetadata:
title : str
comments : str
metadata : List[Triple]
workspace : str
tags : List[str]
parent_id : str = ""
document_type : str = "source"
@ -91,7 +89,6 @@ class ProcessingMetadata:
document_id: ID of the document being processed
time: Processing start timestamp
flow: Flow instance handling the processing
workspace: Workspace the processing job belongs to
collection: Target collection for processed data
tags: List of tags for categorization
"""
@ -99,7 +96,6 @@ class ProcessingMetadata:
document_id : str
time : datetime.datetime
flow : str
workspace : str
collection : str
tags : List[str]