mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 01:16:22 +02:00
feat: pluggable bootstrap framework with ordered initialisers (#847)
A generic, long-running bootstrap processor that converges a
deployment to its configured initial state and then idles.
Replaces the previous one-shot `tg-init-trustgraph` container model
and provides an extension point for enterprise / third-party
initialisers.
See docs/tech-specs/bootstrap.md for the full design.
Bootstrapper
------------
A single AsyncProcessor (trustgraph.bootstrap.bootstrapper.Processor)
that:
* Reads a list of initialiser specifications (class, name, flag,
params) from either a direct `initialisers` parameter
(processor-group embedding) or a YAML/JSON file (`-c`, CLI).
* On each wake, runs a cheap service-gate (config-svc +
flow-svc round-trips), then iterates the initialiser list,
running each whose configured flag differs from the one stored
in __system__/init-state/<name>.
* Stores per-initialiser completion state in the reserved
__system__ workspace.
* Adapts cadence: ~5s on gate failure, ~15s while converging,
~300s in steady state.
* Isolates failures — one initialiser's exception does not block
others in the same cycle; the failed one retries next wake.
Initialiser contract
--------------------
* Subclass trustgraph.bootstrap.base.Initialiser.
* Implement async run(ctx, old_flag, new_flag).
* Opt out of the service gate with class attr
wait_for_services=False (only used by PulsarTopology, since
config-svc cannot come up until Pulsar namespaces exist).
* ctx carries short-lived config and flow-svc clients plus a
scoped logger.
Core initialisers (trustgraph.bootstrap.initialisers.*)
-------------------------------------------------------
* PulsarTopology — creates Pulsar tenant + namespaces
(pre-gate, blocking HTTP offloaded to
executor).
* TemplateSeed — seeds __template__ from an external JSON
file; re-run is upsert-missing by default,
overwrite-all opt-in.
* WorkspaceInit — populates a named workspace from either
the full contents of __template__ or a
seed file; raises cleanly if the template
isn't seeded yet so the bootstrapper retries
on the next cycle.
* DefaultFlowStart — starts a specific flow in a workspace;
no-ops if the flow is already running.
Enterprise or third-party initialisers plug in via fully-qualified
dotted class paths in the bootstrapper's configuration — no core
code change required.
Config service
--------------
* push(): filter out reserved workspaces (ids starting with "_")
from the change notifications. Stored config is preserved; only
the broadcast is suppressed, so bootstrap / template state lives
in config-svc without live processors ever reacting to it.
Config client
-------------
* ConfigClient.get_all(workspace): wraps the existing `config`
operation to return {type: {key: value}} for a workspace.
WorkspaceInit uses it to copy __template__ without needing a
hardcoded types list.
pyproject.toml
--------------
* Adds a `bootstrap` console script pointing at the new Processor.
* Remove tg-init-trustgraph, superceded by bootstrap processor
This commit is contained in:
parent
31027e30ae
commit
ae9936c9cc
17 changed files with 1312 additions and 273 deletions
|
|
@ -40,7 +40,6 @@ tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main"
|
|||
tg-get-kg-core = "trustgraph.cli.get_kg_core:main"
|
||||
tg-get-document-content = "trustgraph.cli.get_document_content:main"
|
||||
tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main"
|
||||
tg-init-trustgraph = "trustgraph.cli.init_trustgraph:main"
|
||||
tg-invoke-agent = "trustgraph.cli.invoke_agent:main"
|
||||
tg-invoke-document-rag = "trustgraph.cli.invoke_document_rag:main"
|
||||
tg-invoke-graph-rag = "trustgraph.cli.invoke_graph_rag:main"
|
||||
|
|
|
|||
|
|
@ -1,271 +0,0 @@
|
|||
"""
|
||||
Initialises TrustGraph pub/sub infrastructure and pushes initial config.
|
||||
|
||||
For Pulsar: creates tenant, namespaces, and retention policies.
|
||||
For RabbitMQ: queues are auto-declared, so only config push is needed.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from trustgraph.clients.config_client import ConfigClient
|
||||
from trustgraph.base.pubsub import add_pubsub_args
|
||||
|
||||
default_pulsar_admin_url = "http://pulsar:8080"
|
||||
subscriber = "tg-init-pubsub"
|
||||
|
||||
|
||||
def get_clusters(url):
|
||||
|
||||
print("Get clusters...", flush=True)
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/clusters")
|
||||
|
||||
if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
|
||||
|
||||
return resp.json()
|
||||
|
||||
def ensure_tenant(url, tenant, clusters):
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
print(f"Tenant {tenant} already exists.", flush=True)
|
||||
return
|
||||
|
||||
resp = requests.put(
|
||||
f"{url}/admin/v2/tenants/{tenant}",
|
||||
json={
|
||||
"adminRoles": [],
|
||||
"allowedClusters": clusters,
|
||||
}
|
||||
)
|
||||
|
||||
if resp.status_code != 204:
|
||||
print(resp.text, flush=True)
|
||||
raise RuntimeError("Tenant creation failed.")
|
||||
|
||||
print(f"Tenant {tenant} created.", flush=True)
|
||||
|
||||
def ensure_namespace(url, tenant, namespace, config):
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
|
||||
return
|
||||
|
||||
resp = requests.put(
|
||||
f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
|
||||
json=config,
|
||||
)
|
||||
|
||||
if resp.status_code != 204:
|
||||
print(resp.status_code, flush=True)
|
||||
print(resp.text, flush=True)
|
||||
raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
|
||||
|
||||
print(f"Namespace {tenant}/{namespace} created.", flush=True)
|
||||
|
||||
def ensure_config(config, workspace="default", **pubsub_config):
|
||||
|
||||
cli = ConfigClient(
|
||||
subscriber=subscriber,
|
||||
workspace=workspace,
|
||||
**pubsub_config,
|
||||
)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
print("Get current config...", flush=True)
|
||||
current, version = cli.config(timeout=5)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
time.sleep(2)
|
||||
print("Retrying...", flush=True)
|
||||
continue
|
||||
|
||||
print("Current config version is", version, flush=True)
|
||||
|
||||
if version != 0:
|
||||
print("Already updated, not updating config. Done.", flush=True)
|
||||
return
|
||||
|
||||
print("Config is version 0, updating...", flush=True)
|
||||
|
||||
batch = []
|
||||
|
||||
for type in config:
|
||||
for key in config[type]:
|
||||
print(f"Adding {type}/{key} to update.", flush=True)
|
||||
batch.append({
|
||||
"type": type,
|
||||
"key": key,
|
||||
"value": json.dumps(config[type][key]),
|
||||
})
|
||||
|
||||
try:
|
||||
cli.put(batch, timeout=10)
|
||||
print("Update succeeded.", flush=True)
|
||||
break
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
time.sleep(2)
|
||||
print("Retrying...", flush=True)
|
||||
continue
|
||||
|
||||
def init_pulsar(pulsar_admin_url, tenant):
|
||||
"""Pulsar-specific setup: create tenant, namespaces, retention policies."""
|
||||
|
||||
clusters = get_clusters(pulsar_admin_url)
|
||||
|
||||
ensure_tenant(pulsar_admin_url, tenant, clusters)
|
||||
|
||||
ensure_namespace(pulsar_admin_url, tenant, "flow", {})
|
||||
|
||||
ensure_namespace(pulsar_admin_url, tenant, "request", {})
|
||||
|
||||
ensure_namespace(pulsar_admin_url, tenant, "response", {
|
||||
"retention_policies": {
|
||||
"retentionSizeInMB": -1,
|
||||
"retentionTimeInMinutes": 3,
|
||||
"subscriptionExpirationTimeMinutes": 30,
|
||||
}
|
||||
})
|
||||
|
||||
ensure_namespace(pulsar_admin_url, tenant, "notify", {
|
||||
"retention_policies": {
|
||||
"retentionSizeInMB": -1,
|
||||
"retentionTimeInMinutes": 3,
|
||||
"subscriptionExpirationTimeMinutes": 5,
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
def push_config(config_json, config_file, workspace="default",
|
||||
**pubsub_config):
|
||||
"""Push initial config if provided."""
|
||||
|
||||
if config_json is not None:
|
||||
|
||||
try:
|
||||
print("Decoding config...", flush=True)
|
||||
dec = json.loads(config_json)
|
||||
print("Decoded.", flush=True)
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
raise e
|
||||
|
||||
ensure_config(dec, workspace=workspace, **pubsub_config)
|
||||
|
||||
elif config_file is not None:
|
||||
|
||||
try:
|
||||
print("Decoding config...", flush=True)
|
||||
dec = json.load(open(config_file))
|
||||
print("Decoded.", flush=True)
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
raise e
|
||||
|
||||
ensure_config(dec, workspace=workspace, **pubsub_config)
|
||||
|
||||
else:
|
||||
print("No config to update.", flush=True)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-init-trustgraph',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--pulsar-admin-url',
|
||||
default=default_pulsar_admin_url,
|
||||
help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-c', '--config',
|
||||
help=f'Initial configuration to load',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-C', '--config-file',
|
||||
help=f'Initial configuration to load from file',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--tenant',
|
||||
default="tg",
|
||||
help=f'Tenant (default: tg)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-w', '--workspace',
|
||||
default="default",
|
||||
help=f'Workspace (default: default)',
|
||||
)
|
||||
|
||||
add_pubsub_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
backend_type = args.pubsub_backend
|
||||
|
||||
# Extract pubsub config from args
|
||||
pubsub_config = {
|
||||
k: v for k, v in vars(args).items()
|
||||
if k not in (
|
||||
'pulsar_admin_url', 'config', 'config_file', 'tenant',
|
||||
'workspace',
|
||||
)
|
||||
}
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
# Pulsar-specific setup (tenants, namespaces)
|
||||
if backend_type == 'pulsar':
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Initialising Pulsar at {args.pulsar_admin_url}...",
|
||||
flush=True,
|
||||
)
|
||||
init_pulsar(args.pulsar_admin_url, args.tenant)
|
||||
else:
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Using {backend_type} backend (no admin setup needed).",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# Push config (works with any backend)
|
||||
push_config(
|
||||
args.config, args.config_file,
|
||||
workspace=args.workspace,
|
||||
**pubsub_config,
|
||||
)
|
||||
|
||||
print("Initialisation complete.", flush=True)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
print("Sleeping...", flush=True)
|
||||
time.sleep(2)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue