Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
This commit is contained in:
cybermaggedon 2026-05-04 10:30:03 +01:00 committed by GitHub
parent 9be257ceee
commit 9f2bfbce0c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1565 additions and 677 deletions

View file

@ -8,7 +8,5 @@ class Metadata:
# Root document identifier (set by librarian, preserved through pipeline)
root: str = ""
# Collection the message belongs to. Workspace is NOT carried on the
# message — consumers derive it from flow.workspace (the flow the
# message arrived on), which is the trusted isolation boundary.
# Collection the message belongs to.
collection: str = ""

View file

@ -17,7 +17,7 @@ from .embeddings import GraphEmbeddings
# <- (error)
# list-kg-cores
# -> (workspace)
# -> ()
# <- ()
# <- (error)
@ -27,9 +27,6 @@ class KnowledgeRequest:
# load-kg-core, unload-kg-core
operation: str = ""
# Workspace the cores belong to. Partition / isolation boundary.
workspace: str = ""
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
# load-kg-core, unload-kg-core
id: str = ""

View file

@ -22,17 +22,9 @@ class CollectionMetadata:
@dataclass
class CollectionManagementRequest:
"""Request for collection management operations.
Collection-management is a global (non-flow-scoped) service, so the
workspace has to travel on the wire it's the isolation boundary
for which workspace's collections the request operates on.
"""
"""Request for collection management operations."""
operation: str = "" # e.g., "delete-collection"
# Workspace the collection belongs to.
workspace: str = ""
collection: str = ""
timestamp: str = "" # ISO timestamp
name: str = ""

View file

@ -70,6 +70,11 @@ class ConfigResponse:
# Everything
error: Error | None = None
@dataclass
class WorkspaceChanges:
created: list[str] = field(default_factory=list)
deleted: list[str] = field(default_factory=list)
@dataclass
class ConfigPush:
version: int = 0
@ -80,6 +85,10 @@ class ConfigPush:
# e.g. {"prompt": ["workspace-a", "workspace-b"], "schema": ["workspace-a"]}
changes: dict[str, list[str]] = field(default_factory=dict)
# Workspace lifecycle events. Populated when a workspace entry
# is created or deleted in the __workspaces__ config namespace.
workspace_changes: WorkspaceChanges | None = None
config_request_queue = queue('config', cls='request')
config_response_queue = queue('config', cls='response')
config_push_queue = queue('config', cls='notify')

View file

@ -22,9 +22,6 @@ class FlowRequest:
operation: str = "" # list-blueprints, get-blueprint, put-blueprint, delete-blueprint
# list-flows, get-flow, start-flow, stop-flow
# Workspace scope — all operations act within this workspace
workspace: str = ""
# get_blueprint, put_blueprint, delete_blueprint, start_flow
blueprint_name: str = ""

View file

@ -43,12 +43,12 @@ from ..core.metadata import Metadata
# <- (error)
# list-documents
# -> (workspace, collection?)
# -> (collection?)
# <- (document_metadata[])
# <- (error)
# list-processing
# -> (workspace, collection?)
# -> (collection?)
# <- (processing_metadata[])
# <- (error)
@ -78,7 +78,7 @@ from ..core.metadata import Metadata
# <- (error)
# list-uploads
# -> (workspace)
# -> ()
# <- (uploads[])
# <- (error)
@ -90,7 +90,6 @@ class DocumentMetadata:
title: str = ""
comments: str = ""
metadata: list[Triple] = field(default_factory=list)
workspace: str = ""
tags: list[str] = field(default_factory=list)
# Child document support
parent_id: str = "" # Empty for top-level docs, set for children
@ -107,7 +106,6 @@ class ProcessingMetadata:
document_id: str = ""
time: int = 0
flow: str = ""
workspace: str = ""
collection: str = ""
tags: list[str] = field(default_factory=list)
@ -162,9 +160,6 @@ class LibrarianRequest:
# add-document, upload-chunk
content: bytes = b""
# Workspace scopes every library operation.
workspace: str = ""
# list-documents?, list-processing?
collection: str = ""