IAM secure bootstrap options

This commit is contained in:
Cyber MacGeddon 2026-04-23 19:23:18 +01:00
parent 832a030703
commit 8348b7728b
5 changed files with 266 additions and 27 deletions

View file

@ -248,6 +248,46 @@ Passwords, API-key plaintext, and signing-key private material are
never returned in any response other than the explicit one-time
responses above (`reset-password`, `create-api-key`, `bootstrap`).
## Bootstrap modes
`iam-svc` requires a bootstrap mode to be chosen at startup. There is
no default — an unset or invalid mode causes the service to refuse
to start. The purpose is to force the operator to make an explicit
security decision rather than rely on an implicit "safe" fallback.
| Mode | Startup behaviour | `bootstrap` operation | Suitability |
|---|---|---|---|
| `token` | On first start with empty tables, auto-seeds the `default` workspace, admin user, admin API key (using the operator-provided `--bootstrap-token`), and an initial signing key. No-op on subsequent starts. | Refused — returns `auth-failed` / `"auth failure"` regardless of caller. | Production, any public-exposure deployment. |
| `bootstrap` | No startup seeding. Tables remain empty until the `bootstrap` operation is invoked over the pub/sub bus (typically via `tg-bootstrap-iam`). | Live while tables are empty. Generates and returns the admin API key once. Refused (`auth-failed`) once tables are populated. | Dev / compose up / CI. **Not safe under public exposure** — any caller reaching the gateway's `/api/v1/iam` forwarder before the operator can cause a token to be issued to them. Operators choosing this mode accept that risk. |
### Error masking
In both modes, any refused invocation of the `bootstrap` operation
returns the same error (`auth-failed` / `"auth failure"`). A caller
cannot distinguish:
- "service is in token mode"
- "service is in bootstrap mode but already bootstrapped"
- "operation forbidden"
This matches the general IAM error-policy stance (see `iam.md`) and
prevents externally enumerating IAM's state.
### Bootstrap-token lifecycle
The bootstrap token — whether operator-supplied (`token` mode) or
service-generated (`bootstrap` mode) — is a one-time credential. It
is stored as admin's single API key, tagged `name="bootstrap"`. The
operator's first admin action after bootstrap should be:
1. Create a durable admin user and API key (or issue a durable API
key to the bootstrap admin).
2. Revoke the bootstrap key via `revoke-api-key`.
3. Remove the bootstrap token from any deployment configuration.
The `name="bootstrap"` marker makes bootstrap keys easy to detect in
tooling (e.g. a `tg-list-api-keys` filter).
## HTTP forwarding (initial integration)
For the initial gateway integration — before the IAM service is

View file

@ -40,6 +40,7 @@ tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main"
tg-get-kg-core = "trustgraph.cli.get_kg_core:main"
tg-get-document-content = "trustgraph.cli.get_document_content:main"
tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main"
tg-bootstrap-iam = "trustgraph.cli.bootstrap_iam:main"
tg-invoke-agent = "trustgraph.cli.invoke_agent:main"
tg-invoke-document-rag = "trustgraph.cli.invoke_document_rag:main"
tg-invoke-graph-rag = "trustgraph.cli.invoke_graph_rag:main"

View file

@ -0,0 +1,99 @@
"""
Bootstraps the IAM service. Only works when iam-svc is running in
bootstrap mode with empty tables. Prints the initial admin API key
to stdout.
This is a one-time, trust-sensitive operation. The resulting token
is shown once and never again capture it on use. Rotate and
revoke it as soon as a real admin API key has been issued.
"""
import argparse
import json
import os
import sys
import requests
default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
def bootstrap(url, token):
endpoint = url.rstrip("/") + "/api/v1/iam"
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"Bearer {token}"
resp = requests.post(
endpoint,
headers=headers,
data=json.dumps({"operation": "bootstrap"}),
)
if resp.status_code != 200:
raise RuntimeError(
f"HTTP {resp.status_code}: {resp.text}"
)
body = resp.json()
if "error" in body:
raise RuntimeError(
f"IAM {body['error'].get('type', 'error')}: "
f"{body['error'].get('message', '')}"
)
api_key = body.get("bootstrap_admin_api_key")
user_id = body.get("bootstrap_admin_user_id")
if not api_key:
raise RuntimeError(
"IAM response did not contain a bootstrap token — the "
"service may already be bootstrapped, or may be running "
"in token mode."
)
return user_id, api_key
def main():
parser = argparse.ArgumentParser(
prog="tg-bootstrap-iam",
description=__doc__,
)
parser.add_argument(
"-u", "--api-url",
default=default_url,
help=f"API URL (default: {default_url})",
)
parser.add_argument(
"-t", "--token",
default=default_token,
help="Gateway bearer token (default: $TRUSTGRAPH_TOKEN)",
)
args = parser.parse_args()
try:
user_id, api_key = bootstrap(args.api_url, args.token)
except Exception as e:
print("Exception:", e, file=sys.stderr, flush=True)
sys.exit(1)
# Stdout gets machine-readable output (the key). Any operator
# context goes to stderr.
print(f"Admin user id: {user_id}", file=sys.stderr)
print(
"Admin API key (shown once, capture now):",
file=sys.stderr,
)
print(api_key)
if __name__ == "__main__":
main()

View file

@ -172,13 +172,29 @@ def _sign_jwt(kid, private_pem, claims):
class IamService:
def __init__(self, host, username, password, keyspace):
def __init__(self, host, username, password, keyspace,
bootstrap_mode, bootstrap_token=None):
self.table_store = IamTableStore(
host, username, password, keyspace,
)
# Active signing key cache: (kid, private_pem, public_pem) or
# None. Loaded lazily on first use; refreshed whenever a key
# is created.
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
# service auto-seeds on first start using the provided
# bootstrap_token and the ``bootstrap`` operation is refused
# thereafter (indistinguishable from an already-bootstrapped
# deployment per the error policy). In "bootstrap" mode the
# ``bootstrap`` operation is live until tables are populated.
if bootstrap_mode not in ("token", "bootstrap"):
raise ValueError(
f"bootstrap_mode must be 'token' or 'bootstrap', "
f"got {bootstrap_mode!r}"
)
if bootstrap_mode == "token" and not bootstrap_token:
raise ValueError(
"bootstrap_mode='token' requires bootstrap_token"
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
self._signing_key = None
self._signing_key_lock = asyncio.Lock()
@ -283,21 +299,40 @@ class IamService:
# bootstrap
# ------------------------------------------------------------------
async def handle_bootstrap(self, v):
"""No-op if any workspace already exists. Otherwise create
the ``default`` workspace, an ``admin`` user with role
``admin``, and an initial API key for that admin. The
plaintext API key is returned once in the response."""
async def auto_bootstrap_if_token_mode(self):
"""Called from the service processor at startup. In
``token`` mode, if tables are empty, seeds the default
workspace / admin / signing key using the operator-provided
bootstrap token. The admin's API key plaintext is *the*
``bootstrap_token`` the operator already knows it, nothing
needs to be returned or logged.
In ``bootstrap`` mode this is a no-op; seeding happens on
explicit ``bootstrap`` operation invocation."""
if self.bootstrap_mode != "token":
return
if await self.table_store.any_workspace_exists():
logger.info(
"IAM bootstrap: tables already populated; no-op"
"IAM: token mode, tables already populated; skipping "
"auto-bootstrap"
)
return IamResponse()
return
logger.info("IAM: token mode, empty tables; auto-bootstrapping")
await self._seed_tables(self.bootstrap_token)
logger.info(
"IAM: auto-bootstrap complete using operator-provided token"
)
async def _seed_tables(self, api_key_plaintext):
"""Shared seeding logic used by token-mode auto-bootstrap and
bootstrap-mode handle_bootstrap. Creates the default
workspace, admin user, admin API key (using the given
plaintext), and an initial signing key. Returns the admin
user id."""
now = _now_dt()
# Workspace.
await self.table_store.put_workspace(
id=DEFAULT_WORKSPACE,
name="Default",
@ -305,11 +340,7 @@ class IamService:
created=now,
)
# Admin user.
admin_user_id = str(uuid.uuid4())
# Password is set to a random unusable value; admin logs in
# with the API key below. Password login for this user can be
# enabled later by reset-password.
admin_password = secrets.token_urlsafe(32)
await self.table_store.put_user(
id=admin_user_id,
@ -324,21 +355,18 @@ class IamService:
created=now,
)
# Admin API key.
plaintext = _generate_api_key()
key_id = str(uuid.uuid4())
await self.table_store.put_api_key(
key_hash=_hash_api_key(plaintext),
key_hash=_hash_api_key(api_key_plaintext),
id=key_id,
user_id=admin_user_id,
name="bootstrap",
prefix=plaintext[:len(API_KEY_PREFIX) + 4],
prefix=api_key_plaintext[:len(API_KEY_PREFIX) + 4],
expires=None,
created=now,
last_used=None,
)
# Initial JWT signing key.
kid, private_pem, public_pem = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=kid,
@ -347,15 +375,28 @@ class IamService:
created=now,
retired=None,
)
# Populate cache so login calls in this process don't go
# back to Cassandra on first use.
self._signing_key = (kid, private_pem, public_pem)
logger.info(
f"IAM bootstrap: created workspace={DEFAULT_WORKSPACE!r}, "
f"admin user_id={admin_user_id}, initial API key issued, "
f"signing key kid={kid}"
f"IAM seeded: workspace={DEFAULT_WORKSPACE!r}, "
f"admin user_id={admin_user_id}, signing key kid={kid}"
)
return admin_user_id
async def handle_bootstrap(self, v):
"""Explicit bootstrap op. Only available in ``bootstrap``
mode and only when tables are empty. Every other case is
masked to a generic auth failure the caller cannot
distinguish 'not in bootstrap mode' from 'already
bootstrapped' from 'operation forbidden'."""
if self.bootstrap_mode != "bootstrap":
return _err("auth-failed", "auth failure")
if await self.table_store.any_workspace_exists():
return _err("auth-failed", "auth failure")
plaintext = _generate_api_key()
admin_user_id = await self._seed_tables(plaintext)
return IamResponse(
bootstrap_admin_user_id=admin_user_id,

View file

@ -39,6 +39,32 @@ class Processor(AsyncProcessor):
"iam_response_queue", default_iam_response_queue,
)
bootstrap_mode = params.get("bootstrap_mode")
bootstrap_token = params.get("bootstrap_token")
if bootstrap_mode not in ("token", "bootstrap"):
raise RuntimeError(
"iam-svc: --bootstrap-mode is required. Set to 'token' "
"(with --bootstrap-token) for production, or 'bootstrap' "
"to enable the explicit bootstrap operation over the "
"pub/sub bus (dev / quick-start only, not safe under "
"public exposure). Refusing to start."
)
if bootstrap_mode == "token" and not bootstrap_token:
raise RuntimeError(
"iam-svc: --bootstrap-mode=token requires "
"--bootstrap-token. Refusing to start."
)
if bootstrap_mode == "bootstrap" and bootstrap_token:
raise RuntimeError(
"iam-svc: --bootstrap-token is not accepted when "
"--bootstrap-mode=bootstrap. Ambiguous intent. "
"Refusing to start."
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
cassandra_host = params.get("cassandra_host")
cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password")
@ -96,12 +122,19 @@ class Processor(AsyncProcessor):
username=self.cassandra_username,
password=self.cassandra_password,
keyspace=keyspace,
bootstrap_mode=self.bootstrap_mode,
bootstrap_token=self.bootstrap_token,
)
logger.info("IAM service initialised")
logger.info(
f"IAM service initialised (bootstrap-mode={self.bootstrap_mode})"
)
async def start(self):
await self.pubsub.ensure_topic(self.iam_request_topic)
# Token-mode auto-bootstrap runs before we accept requests so
# the first inbound call always sees a populated table.
await self.iam.auto_bootstrap_if_token_mode()
await self.iam_request_consumer.start()
async def on_iam_request(self, msg, consumer, flow):
@ -144,6 +177,31 @@ class Processor(AsyncProcessor):
default=default_iam_response_queue,
help=f"IAM response queue (default: {default_iam_response_queue})",
)
parser.add_argument(
"--bootstrap-mode",
default=None,
choices=["token", "bootstrap"],
help=(
"IAM bootstrap mode (required). "
"'token' = operator supplies the initial admin API "
"key via --bootstrap-token; auto-seeds on first start, "
"bootstrap operation refused. "
"'bootstrap' = bootstrap operation is live over the "
"bus until tables are populated; a token is generated "
"and returned by tg-bootstrap-iam. Unsafe to run "
"'bootstrap' mode with public exposure."
),
)
parser.add_argument(
"--bootstrap-token",
default=None,
help=(
"Initial admin API key plaintext, required when "
"--bootstrap-mode=token. Treat as a one-time "
"credential: the operator should rotate to a new key "
"and revoke this one after first use."
),
)
add_cassandra_args(parser)