Fix initial flow

This commit is contained in:
Cyber MacGeddon 2026-05-01 16:45:34 +01:00
parent 115e325071
commit 3689e75ffa
3 changed files with 78 additions and 65 deletions

View file

@ -21,7 +21,7 @@ class InitContext:
logger: logging.Logger logger: logging.Logger
config: Any # ConfigClient config: Any # ConfigClient
flow: Any # RequestResponse client for flow-svc make_flow_client: Any # callable(workspace) -> RequestResponse
class Initialiser: class Initialiser:

View file

@ -178,13 +178,13 @@ class Processor(AsyncProcessor):
), ),
) )
def _make_flow_client(self): def _make_flow_client(self, workspace):
rr_id = str(uuid.uuid4()) rr_id = str(uuid.uuid4())
return RequestResponse( return RequestResponse(
backend=self.pubsub_backend, backend=self.pubsub_backend,
subscription=f"{self.id}--flow--{rr_id}", subscription=f"{self.id}--flow--{rr_id}",
consumer_name=self.id, consumer_name=self.id,
request_topic=flow_request_queue, request_topic=f"{flow_request_queue}:{workspace}",
request_schema=FlowRequest, request_schema=FlowRequest,
request_metrics=ProducerMetrics( request_metrics=ProducerMetrics(
processor=self.id, flow=None, name="flow-request", processor=self.id, flow=None, name="flow-request",
@ -198,14 +198,8 @@ class Processor(AsyncProcessor):
async def _open_clients(self): async def _open_clients(self):
config = self._make_config_client() config = self._make_config_client()
flow = self._make_flow_client()
await config.start() await config.start()
try: return config
await flow.start()
except Exception:
await self._safe_stop(config)
raise
return config, flow
async def _safe_stop(self, client): async def _safe_stop(self, client):
try: try:
@ -217,7 +211,14 @@ class Processor(AsyncProcessor):
# Service gate. # Service gate.
# ------------------------------------------------------------------ # ------------------------------------------------------------------
async def _gate_ready(self, config, flow): def _gate_workspace(self):
for spec in self.specs:
ws = getattr(spec.instance, "workspace", None)
if ws and not ws.startswith("_"):
return ws
return None
async def _gate_ready(self, config):
try: try:
await config.keys(SYSTEM_WORKSPACE, INIT_STATE_TYPE) await config.keys(SYSTEM_WORKSPACE, INIT_STATE_TYPE)
except Exception as e: except Exception as e:
@ -226,11 +227,16 @@ class Processor(AsyncProcessor):
) )
return False return False
workspace = self._gate_workspace()
if workspace is None:
return True
flow = self._make_flow_client(workspace)
try: try:
await flow.start()
resp = await flow.request( resp = await flow.request(
FlowRequest( FlowRequest(
operation="list-blueprints", operation="list-blueprints",
workspace=SYSTEM_WORKSPACE,
), ),
timeout=5, timeout=5,
) )
@ -245,6 +251,8 @@ class Processor(AsyncProcessor):
f"Gate: flow-svc not ready ({type(e).__name__}: {e})" f"Gate: flow-svc not ready ({type(e).__name__}: {e})"
) )
return False return False
finally:
await self._safe_stop(flow)
return True return True
@ -271,7 +279,7 @@ class Processor(AsyncProcessor):
# Per-spec execution. # Per-spec execution.
# ------------------------------------------------------------------ # ------------------------------------------------------------------
async def _run_spec(self, spec, config, flow): async def _run_spec(self, spec, config):
"""Run a single initialiser spec. """Run a single initialiser spec.
Returns one of: Returns one of:
@ -298,7 +306,7 @@ class Processor(AsyncProcessor):
child_ctx = InitContext( child_ctx = InitContext(
logger=child_logger, logger=child_logger,
config=config, config=config,
flow=flow, make_flow_client=self._make_flow_client,
) )
child_logger.info( child_logger.info(
@ -340,7 +348,7 @@ class Processor(AsyncProcessor):
sleep_for = STEADY_INTERVAL sleep_for = STEADY_INTERVAL
try: try:
config, flow = await self._open_clients() config = await self._open_clients()
except Exception as e: except Exception as e:
logger.info( logger.info(
f"Failed to open clients " f"Failed to open clients "
@ -358,11 +366,11 @@ class Processor(AsyncProcessor):
pre_results = {} pre_results = {}
for spec in pre_specs: for spec in pre_specs:
pre_results[spec.name] = await self._run_spec( pre_results[spec.name] = await self._run_spec(
spec, config, flow, spec, config,
) )
# Phase 2: gate. # Phase 2: gate.
gate_ok = await self._gate_ready(config, flow) gate_ok = await self._gate_ready(config)
# Phase 3: post-service initialisers, if gate passed. # Phase 3: post-service initialisers, if gate passed.
post_results = {} post_results = {}
@ -373,7 +381,7 @@ class Processor(AsyncProcessor):
] ]
for spec in post_specs: for spec in post_specs:
post_results[spec.name] = await self._run_spec( post_results[spec.name] = await self._run_spec(
spec, config, flow, spec, config,
) )
# Cadence selection. # Cadence selection.
@ -388,7 +396,6 @@ class Processor(AsyncProcessor):
finally: finally:
await self._safe_stop(config) await self._safe_stop(config)
await self._safe_stop(flow)
await asyncio.sleep(sleep_for) await asyncio.sleep(sleep_for)

View file

@ -49,53 +49,59 @@ class DefaultFlowStart(Initialiser):
async def run(self, ctx, old_flag, new_flag): async def run(self, ctx, old_flag, new_flag):
# Check whether the flow already exists. Belt-and-braces flow = ctx.make_flow_client(self.workspace)
# beyond the flag gate: if an operator stops and restarts the await flow.start()
# bootstrapper after the flow is already running, we don't
# want to blindly try to start it again. try:
list_resp = await ctx.flow.request(
FlowRequest( # Check whether the flow already exists. Belt-and-braces
operation="list-flows", # beyond the flag gate: if an operator stops and restarts the
workspace=self.workspace, # bootstrapper after the flow is already running, we don't
), # want to blindly try to start it again.
timeout=10, list_resp = await flow.request(
) FlowRequest(
if list_resp.error: operation="list-flows",
raise RuntimeError( ),
f"list-flows failed: " timeout=10,
f"{list_resp.error.type}: {list_resp.error.message}" )
) if list_resp.error:
raise RuntimeError(
f"list-flows failed: "
f"{list_resp.error.type}: {list_resp.error.message}"
)
if self.flow_id in (list_resp.flow_ids or []):
ctx.logger.info(
f"Flow {self.flow_id!r} already running in workspace "
f"{self.workspace!r}; nothing to do"
)
return
if self.flow_id in (list_resp.flow_ids or []):
ctx.logger.info( ctx.logger.info(
f"Flow {self.flow_id!r} already running in workspace " f"Starting flow {self.flow_id!r} "
f"{self.workspace!r}; nothing to do" f"(blueprint={self.blueprint!r}) "
) f"in workspace {self.workspace!r}"
return
ctx.logger.info(
f"Starting flow {self.flow_id!r} "
f"(blueprint={self.blueprint!r}) "
f"in workspace {self.workspace!r}"
)
resp = await ctx.flow.request(
FlowRequest(
operation="start-flow",
workspace=self.workspace,
flow_id=self.flow_id,
blueprint_name=self.blueprint,
description=self.description,
parameters=self.parameters,
),
timeout=30,
)
if resp.error:
raise RuntimeError(
f"start-flow failed: "
f"{resp.error.type}: {resp.error.message}"
) )
ctx.logger.info( resp = await flow.request(
f"Flow {self.flow_id!r} started" FlowRequest(
) operation="start-flow",
flow_id=self.flow_id,
blueprint_name=self.blueprint,
description=self.description,
parameters=self.parameters,
),
timeout=30,
)
if resp.error:
raise RuntimeError(
f"start-flow failed: "
f"{resp.error.type}: {resp.error.message}"
)
ctx.logger.info(
f"Flow {self.flow_id!r} started"
)
finally:
await flow.stop()