mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-03 04:12:37 +02:00
A generic, long-running bootstrap processor that converges a
deployment to its configured initial state and then idles.
Replaces the previous one-shot `tg-init-trustgraph` container model
and provides an extension point for enterprise / third-party
initialisers.
See docs/tech-specs/bootstrap.md for the full design.
Bootstrapper
------------
A single AsyncProcessor (trustgraph.bootstrap.bootstrapper.Processor)
that:
* Reads a list of initialiser specifications (class, name, flag,
params) from either a direct `initialisers` parameter
(processor-group embedding) or a YAML/JSON file (`-c`, CLI).
* On each wake, runs a cheap service-gate (config-svc +
flow-svc round-trips), then iterates the initialiser list,
running each whose configured flag differs from the one stored
in __system__/init-state/<name>.
* Stores per-initialiser completion state in the reserved
__system__ workspace.
* Adapts cadence: ~5s on gate failure, ~15s while converging,
~300s in steady state.
* Isolates failures — one initialiser's exception does not block
others in the same cycle; the failed one retries next wake.
Initialiser contract
--------------------
* Subclass trustgraph.bootstrap.base.Initialiser.
* Implement async run(ctx, old_flag, new_flag).
* Opt out of the service gate with class attr
wait_for_services=False (only used by PulsarTopology, since
config-svc cannot come up until Pulsar namespaces exist).
* ctx carries short-lived config and flow-svc clients plus a
scoped logger.
Core initialisers (trustgraph.bootstrap.initialisers.*)
-------------------------------------------------------
* PulsarTopology — creates Pulsar tenant + namespaces
(pre-gate, blocking HTTP offloaded to
executor).
* TemplateSeed — seeds __template__ from an external JSON
file; re-run is upsert-missing by default,
overwrite-all opt-in.
* WorkspaceInit — populates a named workspace from either
the full contents of __template__ or a
seed file; raises cleanly if the template
isn't seeded yet so the bootstrapper retries
on the next cycle.
* DefaultFlowStart — starts a specific flow in a workspace;
no-ops if the flow is already running.
Enterprise or third-party initialisers plug in via fully-qualified
dotted class paths in the bootstrapper's configuration — no core
code change required.
Config service
--------------
* push(): filter out reserved workspaces (ids starting with "_")
from the change notifications. Stored config is preserved; only
the broadcast is suppressed, so bootstrap / template state lives
in config-svc without live processors ever reacting to it.
Config client
-------------
* ConfigClient.get_all(workspace): wraps the existing `config`
operation to return {type: {key: value}} for a workspace.
WorkspaceInit uses it to copy __template__ without needing a
hardcoded types list.
pyproject.toml
--------------
* Adds a `bootstrap` console script pointing at the new Processor.
* Remove tg-init-trustgraph, superceded by bootstrap processor
131 lines
4 KiB
Python
131 lines
4 KiB
Python
"""
|
|
PulsarTopology initialiser — creates Pulsar tenant and namespaces
|
|
with their retention policies.
|
|
|
|
Runs pre-gate (``wait_for_services = False``) because config-svc and
|
|
flow-svc can't connect to Pulsar until these namespaces exist.
|
|
Admin-API calls are idempotent so re-runs on flag change are safe.
|
|
"""
|
|
|
|
import asyncio
|
|
import requests
|
|
|
|
from .. base import Initialiser
|
|
|
|
# Namespace configs. flow/request take broker defaults. response
|
|
# and notify get aggressive retention — those classes carry short-lived
|
|
# request/response and notification traffic only.
|
|
NAMESPACE_CONFIG = {
|
|
"flow": {},
|
|
"request": {},
|
|
"response": {
|
|
"retention_policies": {
|
|
"retentionSizeInMB": -1,
|
|
"retentionTimeInMinutes": 3,
|
|
"subscriptionExpirationTimeMinutes": 30,
|
|
},
|
|
},
|
|
"notify": {
|
|
"retention_policies": {
|
|
"retentionSizeInMB": -1,
|
|
"retentionTimeInMinutes": 3,
|
|
"subscriptionExpirationTimeMinutes": 5,
|
|
},
|
|
},
|
|
}
|
|
|
|
REQUEST_TIMEOUT = 10
|
|
|
|
|
|
class PulsarTopology(Initialiser):
|
|
|
|
wait_for_services = False
|
|
|
|
def __init__(
|
|
self,
|
|
admin_url="http://pulsar:8080",
|
|
tenant="tg",
|
|
**kwargs,
|
|
):
|
|
super().__init__(**kwargs)
|
|
self.admin_url = admin_url.rstrip("/")
|
|
self.tenant = tenant
|
|
|
|
async def run(self, ctx, old_flag, new_flag):
|
|
# requests is blocking; offload to executor so the loop stays
|
|
# responsive.
|
|
loop = asyncio.get_event_loop()
|
|
await loop.run_in_executor(None, self._reconcile_sync, ctx.logger)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Sync admin-API calls.
|
|
# ------------------------------------------------------------------
|
|
|
|
def _get_clusters(self):
|
|
resp = requests.get(
|
|
f"{self.admin_url}/admin/v2/clusters",
|
|
timeout=REQUEST_TIMEOUT,
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
def _tenant_exists(self):
|
|
resp = requests.get(
|
|
f"{self.admin_url}/admin/v2/tenants/{self.tenant}",
|
|
timeout=REQUEST_TIMEOUT,
|
|
)
|
|
return resp.status_code == 200
|
|
|
|
def _create_tenant(self, clusters):
|
|
resp = requests.put(
|
|
f"{self.admin_url}/admin/v2/tenants/{self.tenant}",
|
|
json={"adminRoles": [], "allowedClusters": clusters},
|
|
timeout=REQUEST_TIMEOUT,
|
|
)
|
|
if resp.status_code != 204:
|
|
raise RuntimeError(
|
|
f"Tenant {self.tenant!r} create failed: "
|
|
f"{resp.status_code} {resp.text}"
|
|
)
|
|
|
|
def _namespace_exists(self, namespace):
|
|
resp = requests.get(
|
|
f"{self.admin_url}/admin/v2/namespaces/"
|
|
f"{self.tenant}/{namespace}",
|
|
timeout=REQUEST_TIMEOUT,
|
|
)
|
|
return resp.status_code == 200
|
|
|
|
def _create_namespace(self, namespace, config):
|
|
resp = requests.put(
|
|
f"{self.admin_url}/admin/v2/namespaces/"
|
|
f"{self.tenant}/{namespace}",
|
|
json=config,
|
|
timeout=REQUEST_TIMEOUT,
|
|
)
|
|
if resp.status_code != 204:
|
|
raise RuntimeError(
|
|
f"Namespace {self.tenant}/{namespace} create failed: "
|
|
f"{resp.status_code} {resp.text}"
|
|
)
|
|
|
|
def _reconcile_sync(self, logger):
|
|
if not self._tenant_exists():
|
|
clusters = self._get_clusters()
|
|
logger.info(
|
|
f"Creating tenant {self.tenant!r} with clusters {clusters}"
|
|
)
|
|
self._create_tenant(clusters)
|
|
else:
|
|
logger.debug(f"Tenant {self.tenant!r} already exists")
|
|
|
|
for namespace, config in NAMESPACE_CONFIG.items():
|
|
if self._namespace_exists(namespace):
|
|
logger.debug(
|
|
f"Namespace {self.tenant}/{namespace} already exists"
|
|
)
|
|
continue
|
|
logger.info(
|
|
f"Creating namespace {self.tenant}/{namespace}"
|
|
)
|
|
self._create_namespace(namespace, config)
|