Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
This commit is contained in:
cybermaggedon 2026-05-04 10:30:03 +01:00 committed by GitHub
parent 9be257ceee
commit 9f2bfbce0c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1565 additions and 677 deletions

View file

@ -7,6 +7,7 @@ from . publisher import Publisher
from . subscriber import Subscriber
from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics, SubscriberMetrics
from . logging import add_logging_args, setup_logging
from . workspace_processor import WorkspaceProcessor
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
from . parameter_spec import ParameterSpec

View file

@ -71,6 +71,11 @@ class AsyncProcessor:
# { "handler": async_fn, "types": set_or_none }
self.config_handlers = []
# Workspace lifecycle handlers, called when workspaces are
# created or deleted. Each entry is an async callable:
# async def handler(workspace_changes: WorkspaceChanges)
self.workspace_handlers = []
# Track the current config version for dedup
self.config_version = 0
@ -251,6 +256,10 @@ class AsyncProcessor:
"types": set(types) if types else None,
})
# Register a handler for workspace lifecycle events
def register_workspace_handler(self, handler: Callable[..., Any]) -> None:
self.workspace_handlers.append(handler)
# Called when a config notify message arrives
async def on_config_notify(self, message, consumer, flow):
@ -266,6 +275,16 @@ class AsyncProcessor:
)
return
# Dispatch workspace lifecycle events before config handlers
if v.workspace_changes and self.workspace_handlers:
for handler in self.workspace_handlers:
try:
await handler(v.workspace_changes)
except Exception as e:
logger.error(
f"Workspace handler failed: {e}", exc_info=True
)
notify_types = set(changes.keys())
# Filter out handlers that don't care about any of the changed

View file

@ -14,7 +14,7 @@ from .. schema import Error
from .. schema import config_request_queue, config_response_queue
from .. schema import config_push_queue
from .. log_level import LogLevel
from . async_processor import AsyncProcessor
from . workspace_processor import WorkspaceProcessor
from . flow import Flow
# Module logger
@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
# Parent class for configurable processors, configured with flows by
# the config service
class FlowProcessor(AsyncProcessor):
class FlowProcessor(WorkspaceProcessor):
def __init__(self, **params):
@ -113,7 +113,7 @@ class FlowProcessor(AsyncProcessor):
@staticmethod
def add_args(parser: ArgumentParser) -> None:
AsyncProcessor.add_args(parser)
WorkspaceProcessor.add_args(parser)
# parser.add_argument(
# '--rate-limit-retry',

View file

@ -202,7 +202,6 @@ class LibrarianClient:
doc_metadata = DocumentMetadata(
id=doc_id,
workspace=workspace,
kind=kind,
title=title or doc_id,
parent_id=parent_id,
@ -227,7 +226,6 @@ class LibrarianClient:
doc_metadata = DocumentMetadata(
id=doc_id,
workspace=workspace,
kind=kind,
title=title or doc_id,
document_type=document_type,

View file

@ -0,0 +1,66 @@
from __future__ import annotations
from argparse import ArgumentParser
import logging
from . async_processor import AsyncProcessor
logger = logging.getLogger(__name__)
WORKSPACES_NAMESPACE = "__workspaces__"
WORKSPACE_TYPE = "workspace"
class WorkspaceProcessor(AsyncProcessor):
def __init__(self, **params):
super(WorkspaceProcessor, self).__init__(**params)
self.active_workspaces = set()
self.register_workspace_handler(self._handle_workspace_changes)
async def _discover_workspaces(self):
client = self._create_config_client()
try:
await client.start()
type_data, version = await self._fetch_type_all_workspaces(
client, WORKSPACE_TYPE,
)
for ws in type_data:
if ws == WORKSPACES_NAMESPACE:
for workspace_id in type_data[ws]:
if workspace_id not in self.active_workspaces:
self.active_workspaces.add(workspace_id)
await self.on_workspace_created(workspace_id)
finally:
await client.stop()
async def _handle_workspace_changes(self, workspace_changes):
for workspace_id in workspace_changes.created:
if workspace_id not in self.active_workspaces:
self.active_workspaces.add(workspace_id)
logger.info(f"Workspace created: {workspace_id}")
await self.on_workspace_created(workspace_id)
for workspace_id in workspace_changes.deleted:
if workspace_id in self.active_workspaces:
logger.info(f"Workspace deleted: {workspace_id}")
await self.on_workspace_deleted(workspace_id)
self.active_workspaces.discard(workspace_id)
async def on_workspace_created(self, workspace):
pass
async def on_workspace_deleted(self, workspace):
pass
async def start(self):
await super(WorkspaceProcessor, self).start()
await self._discover_workspaces()
@staticmethod
def add_args(parser: ArgumentParser) -> None:
AsyncProcessor.add_args(parser)