diff --git a/docs/tech-specs/iam-protocol.md b/docs/tech-specs/iam-protocol.md index 18c5e0b2..8638e7e9 100644 --- a/docs/tech-specs/iam-protocol.md +++ b/docs/tech-specs/iam-protocol.md @@ -248,6 +248,46 @@ Passwords, API-key plaintext, and signing-key private material are never returned in any response other than the explicit one-time responses above (`reset-password`, `create-api-key`, `bootstrap`). +## Bootstrap modes + +`iam-svc` requires a bootstrap mode to be chosen at startup. There is +no default — an unset or invalid mode causes the service to refuse +to start. The purpose is to force the operator to make an explicit +security decision rather than rely on an implicit "safe" fallback. + +| Mode | Startup behaviour | `bootstrap` operation | Suitability | +|---|---|---|---| +| `token` | On first start with empty tables, auto-seeds the `default` workspace, admin user, admin API key (using the operator-provided `--bootstrap-token`), and an initial signing key. No-op on subsequent starts. | Refused — returns `auth-failed` / `"auth failure"` regardless of caller. | Production, any public-exposure deployment. | +| `bootstrap` | No startup seeding. Tables remain empty until the `bootstrap` operation is invoked over the pub/sub bus (typically via `tg-bootstrap-iam`). | Live while tables are empty. Generates and returns the admin API key once. Refused (`auth-failed`) once tables are populated. | Dev / compose up / CI. **Not safe under public exposure** — any caller reaching the gateway's `/api/v1/iam` forwarder before the operator can cause a token to be issued to them. Operators choosing this mode accept that risk. | + +### Error masking + +In both modes, any refused invocation of the `bootstrap` operation +returns the same error (`auth-failed` / `"auth failure"`). A caller +cannot distinguish: + +- "service is in token mode" +- "service is in bootstrap mode but already bootstrapped" +- "operation forbidden" + +This matches the general IAM error-policy stance (see `iam.md`) and +prevents externally enumerating IAM's state. + +### Bootstrap-token lifecycle + +The bootstrap token — whether operator-supplied (`token` mode) or +service-generated (`bootstrap` mode) — is a one-time credential. It +is stored as admin's single API key, tagged `name="bootstrap"`. The +operator's first admin action after bootstrap should be: + +1. Create a durable admin user and API key (or issue a durable API + key to the bootstrap admin). +2. Revoke the bootstrap key via `revoke-api-key`. +3. Remove the bootstrap token from any deployment configuration. + +The `name="bootstrap"` marker makes bootstrap keys easy to detect in +tooling (e.g. a `tg-list-api-keys` filter). + ## HTTP forwarding (initial integration) For the initial gateway integration — before the IAM service is diff --git a/trustgraph-cli/pyproject.toml b/trustgraph-cli/pyproject.toml index d316ae4f..8d88991d 100644 --- a/trustgraph-cli/pyproject.toml +++ b/trustgraph-cli/pyproject.toml @@ -40,6 +40,7 @@ tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main" tg-get-kg-core = "trustgraph.cli.get_kg_core:main" tg-get-document-content = "trustgraph.cli.get_document_content:main" tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main" +tg-bootstrap-iam = "trustgraph.cli.bootstrap_iam:main" tg-invoke-agent = "trustgraph.cli.invoke_agent:main" tg-invoke-document-rag = "trustgraph.cli.invoke_document_rag:main" tg-invoke-graph-rag = "trustgraph.cli.invoke_graph_rag:main" diff --git a/trustgraph-cli/trustgraph/cli/bootstrap_iam.py b/trustgraph-cli/trustgraph/cli/bootstrap_iam.py new file mode 100644 index 00000000..df282984 --- /dev/null +++ b/trustgraph-cli/trustgraph/cli/bootstrap_iam.py @@ -0,0 +1,99 @@ +""" +Bootstraps the IAM service. Only works when iam-svc is running in +bootstrap mode with empty tables. Prints the initial admin API key +to stdout. + +This is a one-time, trust-sensitive operation. The resulting token +is shown once and never again — capture it on use. Rotate and +revoke it as soon as a real admin API key has been issued. +""" + +import argparse +import json +import os +import sys + +import requests + +default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/") +default_token = os.getenv("TRUSTGRAPH_TOKEN", None) + + +def bootstrap(url, token): + + endpoint = url.rstrip("/") + "/api/v1/iam" + + headers = {"Content-Type": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + + resp = requests.post( + endpoint, + headers=headers, + data=json.dumps({"operation": "bootstrap"}), + ) + + if resp.status_code != 200: + raise RuntimeError( + f"HTTP {resp.status_code}: {resp.text}" + ) + + body = resp.json() + + if "error" in body: + raise RuntimeError( + f"IAM {body['error'].get('type', 'error')}: " + f"{body['error'].get('message', '')}" + ) + + api_key = body.get("bootstrap_admin_api_key") + user_id = body.get("bootstrap_admin_user_id") + + if not api_key: + raise RuntimeError( + "IAM response did not contain a bootstrap token — the " + "service may already be bootstrapped, or may be running " + "in token mode." + ) + + return user_id, api_key + + +def main(): + + parser = argparse.ArgumentParser( + prog="tg-bootstrap-iam", + description=__doc__, + ) + + parser.add_argument( + "-u", "--api-url", + default=default_url, + help=f"API URL (default: {default_url})", + ) + parser.add_argument( + "-t", "--token", + default=default_token, + help="Gateway bearer token (default: $TRUSTGRAPH_TOKEN)", + ) + + args = parser.parse_args() + + try: + user_id, api_key = bootstrap(args.api_url, args.token) + except Exception as e: + print("Exception:", e, file=sys.stderr, flush=True) + sys.exit(1) + + # Stdout gets machine-readable output (the key). Any operator + # context goes to stderr. + print(f"Admin user id: {user_id}", file=sys.stderr) + print( + "Admin API key (shown once, capture now):", + file=sys.stderr, + ) + print(api_key) + + +if __name__ == "__main__": + main() diff --git a/trustgraph-flow/trustgraph/iam/service/iam.py b/trustgraph-flow/trustgraph/iam/service/iam.py index 2fde4a28..7c7aaffd 100644 --- a/trustgraph-flow/trustgraph/iam/service/iam.py +++ b/trustgraph-flow/trustgraph/iam/service/iam.py @@ -172,13 +172,29 @@ def _sign_jwt(kid, private_pem, claims): class IamService: - def __init__(self, host, username, password, keyspace): + def __init__(self, host, username, password, keyspace, + bootstrap_mode, bootstrap_token=None): self.table_store = IamTableStore( host, username, password, keyspace, ) - # Active signing key cache: (kid, private_pem, public_pem) or - # None. Loaded lazily on first use; refreshed whenever a key - # is created. + # bootstrap_mode: "token" or "bootstrap". In "token" mode the + # service auto-seeds on first start using the provided + # bootstrap_token and the ``bootstrap`` operation is refused + # thereafter (indistinguishable from an already-bootstrapped + # deployment per the error policy). In "bootstrap" mode the + # ``bootstrap`` operation is live until tables are populated. + if bootstrap_mode not in ("token", "bootstrap"): + raise ValueError( + f"bootstrap_mode must be 'token' or 'bootstrap', " + f"got {bootstrap_mode!r}" + ) + if bootstrap_mode == "token" and not bootstrap_token: + raise ValueError( + "bootstrap_mode='token' requires bootstrap_token" + ) + self.bootstrap_mode = bootstrap_mode + self.bootstrap_token = bootstrap_token + self._signing_key = None self._signing_key_lock = asyncio.Lock() @@ -283,21 +299,40 @@ class IamService: # bootstrap # ------------------------------------------------------------------ - async def handle_bootstrap(self, v): - """No-op if any workspace already exists. Otherwise create - the ``default`` workspace, an ``admin`` user with role - ``admin``, and an initial API key for that admin. The - plaintext API key is returned once in the response.""" + async def auto_bootstrap_if_token_mode(self): + """Called from the service processor at startup. In + ``token`` mode, if tables are empty, seeds the default + workspace / admin / signing key using the operator-provided + bootstrap token. The admin's API key plaintext is *the* + ``bootstrap_token`` — the operator already knows it, nothing + needs to be returned or logged. + + In ``bootstrap`` mode this is a no-op; seeding happens on + explicit ``bootstrap`` operation invocation.""" + if self.bootstrap_mode != "token": + return if await self.table_store.any_workspace_exists(): logger.info( - "IAM bootstrap: tables already populated; no-op" + "IAM: token mode, tables already populated; skipping " + "auto-bootstrap" ) - return IamResponse() + return + logger.info("IAM: token mode, empty tables; auto-bootstrapping") + await self._seed_tables(self.bootstrap_token) + logger.info( + "IAM: auto-bootstrap complete using operator-provided token" + ) + + async def _seed_tables(self, api_key_plaintext): + """Shared seeding logic used by token-mode auto-bootstrap and + bootstrap-mode handle_bootstrap. Creates the default + workspace, admin user, admin API key (using the given + plaintext), and an initial signing key. Returns the admin + user id.""" now = _now_dt() - # Workspace. await self.table_store.put_workspace( id=DEFAULT_WORKSPACE, name="Default", @@ -305,11 +340,7 @@ class IamService: created=now, ) - # Admin user. admin_user_id = str(uuid.uuid4()) - # Password is set to a random unusable value; admin logs in - # with the API key below. Password login for this user can be - # enabled later by reset-password. admin_password = secrets.token_urlsafe(32) await self.table_store.put_user( id=admin_user_id, @@ -324,21 +355,18 @@ class IamService: created=now, ) - # Admin API key. - plaintext = _generate_api_key() key_id = str(uuid.uuid4()) await self.table_store.put_api_key( - key_hash=_hash_api_key(plaintext), + key_hash=_hash_api_key(api_key_plaintext), id=key_id, user_id=admin_user_id, name="bootstrap", - prefix=plaintext[:len(API_KEY_PREFIX) + 4], + prefix=api_key_plaintext[:len(API_KEY_PREFIX) + 4], expires=None, created=now, last_used=None, ) - # Initial JWT signing key. kid, private_pem, public_pem = _generate_signing_keypair() await self.table_store.put_signing_key( kid=kid, @@ -347,15 +375,28 @@ class IamService: created=now, retired=None, ) - # Populate cache so login calls in this process don't go - # back to Cassandra on first use. self._signing_key = (kid, private_pem, public_pem) logger.info( - f"IAM bootstrap: created workspace={DEFAULT_WORKSPACE!r}, " - f"admin user_id={admin_user_id}, initial API key issued, " - f"signing key kid={kid}" + f"IAM seeded: workspace={DEFAULT_WORKSPACE!r}, " + f"admin user_id={admin_user_id}, signing key kid={kid}" ) + return admin_user_id + + async def handle_bootstrap(self, v): + """Explicit bootstrap op. Only available in ``bootstrap`` + mode and only when tables are empty. Every other case is + masked to a generic auth failure — the caller cannot + distinguish 'not in bootstrap mode' from 'already + bootstrapped' from 'operation forbidden'.""" + if self.bootstrap_mode != "bootstrap": + return _err("auth-failed", "auth failure") + + if await self.table_store.any_workspace_exists(): + return _err("auth-failed", "auth failure") + + plaintext = _generate_api_key() + admin_user_id = await self._seed_tables(plaintext) return IamResponse( bootstrap_admin_user_id=admin_user_id, diff --git a/trustgraph-flow/trustgraph/iam/service/service.py b/trustgraph-flow/trustgraph/iam/service/service.py index 61bc1fd8..8ea31cf0 100644 --- a/trustgraph-flow/trustgraph/iam/service/service.py +++ b/trustgraph-flow/trustgraph/iam/service/service.py @@ -39,6 +39,32 @@ class Processor(AsyncProcessor): "iam_response_queue", default_iam_response_queue, ) + bootstrap_mode = params.get("bootstrap_mode") + bootstrap_token = params.get("bootstrap_token") + + if bootstrap_mode not in ("token", "bootstrap"): + raise RuntimeError( + "iam-svc: --bootstrap-mode is required. Set to 'token' " + "(with --bootstrap-token) for production, or 'bootstrap' " + "to enable the explicit bootstrap operation over the " + "pub/sub bus (dev / quick-start only, not safe under " + "public exposure). Refusing to start." + ) + if bootstrap_mode == "token" and not bootstrap_token: + raise RuntimeError( + "iam-svc: --bootstrap-mode=token requires " + "--bootstrap-token. Refusing to start." + ) + if bootstrap_mode == "bootstrap" and bootstrap_token: + raise RuntimeError( + "iam-svc: --bootstrap-token is not accepted when " + "--bootstrap-mode=bootstrap. Ambiguous intent. " + "Refusing to start." + ) + + self.bootstrap_mode = bootstrap_mode + self.bootstrap_token = bootstrap_token + cassandra_host = params.get("cassandra_host") cassandra_username = params.get("cassandra_username") cassandra_password = params.get("cassandra_password") @@ -96,12 +122,19 @@ class Processor(AsyncProcessor): username=self.cassandra_username, password=self.cassandra_password, keyspace=keyspace, + bootstrap_mode=self.bootstrap_mode, + bootstrap_token=self.bootstrap_token, ) - logger.info("IAM service initialised") + logger.info( + f"IAM service initialised (bootstrap-mode={self.bootstrap_mode})" + ) async def start(self): await self.pubsub.ensure_topic(self.iam_request_topic) + # Token-mode auto-bootstrap runs before we accept requests so + # the first inbound call always sees a populated table. + await self.iam.auto_bootstrap_if_token_mode() await self.iam_request_consumer.start() async def on_iam_request(self, msg, consumer, flow): @@ -144,6 +177,31 @@ class Processor(AsyncProcessor): default=default_iam_response_queue, help=f"IAM response queue (default: {default_iam_response_queue})", ) + parser.add_argument( + "--bootstrap-mode", + default=None, + choices=["token", "bootstrap"], + help=( + "IAM bootstrap mode (required). " + "'token' = operator supplies the initial admin API " + "key via --bootstrap-token; auto-seeds on first start, " + "bootstrap operation refused. " + "'bootstrap' = bootstrap operation is live over the " + "bus until tables are populated; a token is generated " + "and returned by tg-bootstrap-iam. Unsafe to run " + "'bootstrap' mode with public exposure." + ), + ) + parser.add_argument( + "--bootstrap-token", + default=None, + help=( + "Initial admin API key plaintext, required when " + "--bootstrap-mode=token. Treat as a one-time " + "credential: the operator should rotate to a new key " + "and revoke this one after first use." + ), + ) add_cassandra_args(parser)