trustgraph/trustgraph-flow/trustgraph/bootstrap/initialisers/default_flow_start.py
cybermaggedon 9f2bfbce0c
Per-workspace queue routing for workspace-scoped services (#862)
Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
2026-05-04 10:30:03 +01:00

115 lines
3.5 KiB
Python

"""
DefaultFlowStart initialiser — starts a named flow in a workspace
using a specified blueprint.
Separated from WorkspaceInit so deployments that want a workspace
without an auto-started flow can simply omit this initialiser.
Parameters
----------
workspace : str (default "default")
Workspace in which to start the flow.
flow_id : str (default "default")
Identifier for the started flow.
blueprint : str (required)
Blueprint name (must already exist in the workspace's config,
typically via TemplateSeed -> WorkspaceInit).
description : str (default "Default")
Human-readable description passed to flow-svc.
parameters : dict (optional)
Optional parameter overrides passed to start-flow.
"""
from trustgraph.schema import FlowRequest
from .. base import Initialiser
class DefaultFlowStart(Initialiser):
def __init__(
self,
workspace="default",
flow_id="default",
blueprint=None,
description="Default",
parameters=None,
**kwargs,
):
super().__init__(**kwargs)
if not blueprint:
raise ValueError(
"DefaultFlowStart requires 'blueprint'"
)
self.workspace = workspace
self.flow_id = flow_id
self.blueprint = blueprint
self.description = description
self.parameters = dict(parameters) if parameters else {}
async def run(self, ctx, old_flag, new_flag):
workspaces = await ctx.config.keys(
"__workspaces__", "workspace",
)
if self.workspace not in workspaces:
raise RuntimeError(
f"Workspace {self.workspace!r} does not exist yet"
)
flow = ctx.make_flow_client(self.workspace)
await flow.start()
try:
# Check whether the flow already exists. Belt-and-braces
# beyond the flag gate: if an operator stops and restarts the
# bootstrapper after the flow is already running, we don't
# want to blindly try to start it again.
list_resp = await flow.request(
FlowRequest(
operation="list-flows",
),
timeout=10,
)
if list_resp.error:
raise RuntimeError(
f"list-flows failed: "
f"{list_resp.error.type}: {list_resp.error.message}"
)
if self.flow_id in (list_resp.flow_ids or []):
ctx.logger.info(
f"Flow {self.flow_id!r} already running in workspace "
f"{self.workspace!r}; nothing to do"
)
return
ctx.logger.info(
f"Starting flow {self.flow_id!r} "
f"(blueprint={self.blueprint!r}) "
f"in workspace {self.workspace!r}"
)
resp = await flow.request(
FlowRequest(
operation="start-flow",
flow_id=self.flow_id,
blueprint_name=self.blueprint,
description=self.description,
parameters=self.parameters,
),
timeout=30,
)
if resp.error:
raise RuntimeError(
f"start-flow failed: "
f"{resp.error.type}: {resp.error.message}"
)
ctx.logger.info(
f"Flow {self.flow_id!r} started"
)
finally:
await flow.stop()