mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
IAM secure bootstrap options
This commit is contained in:
parent
832a030703
commit
8348b7728b
5 changed files with 266 additions and 27 deletions
|
|
@ -248,6 +248,46 @@ Passwords, API-key plaintext, and signing-key private material are
|
|||
never returned in any response other than the explicit one-time
|
||||
responses above (`reset-password`, `create-api-key`, `bootstrap`).
|
||||
|
||||
## Bootstrap modes
|
||||
|
||||
`iam-svc` requires a bootstrap mode to be chosen at startup. There is
|
||||
no default — an unset or invalid mode causes the service to refuse
|
||||
to start. The purpose is to force the operator to make an explicit
|
||||
security decision rather than rely on an implicit "safe" fallback.
|
||||
|
||||
| Mode | Startup behaviour | `bootstrap` operation | Suitability |
|
||||
|---|---|---|---|
|
||||
| `token` | On first start with empty tables, auto-seeds the `default` workspace, admin user, admin API key (using the operator-provided `--bootstrap-token`), and an initial signing key. No-op on subsequent starts. | Refused — returns `auth-failed` / `"auth failure"` regardless of caller. | Production, any public-exposure deployment. |
|
||||
| `bootstrap` | No startup seeding. Tables remain empty until the `bootstrap` operation is invoked over the pub/sub bus (typically via `tg-bootstrap-iam`). | Live while tables are empty. Generates and returns the admin API key once. Refused (`auth-failed`) once tables are populated. | Dev / compose up / CI. **Not safe under public exposure** — any caller reaching the gateway's `/api/v1/iam` forwarder before the operator can cause a token to be issued to them. Operators choosing this mode accept that risk. |
|
||||
|
||||
### Error masking
|
||||
|
||||
In both modes, any refused invocation of the `bootstrap` operation
|
||||
returns the same error (`auth-failed` / `"auth failure"`). A caller
|
||||
cannot distinguish:
|
||||
|
||||
- "service is in token mode"
|
||||
- "service is in bootstrap mode but already bootstrapped"
|
||||
- "operation forbidden"
|
||||
|
||||
This matches the general IAM error-policy stance (see `iam.md`) and
|
||||
prevents externally enumerating IAM's state.
|
||||
|
||||
### Bootstrap-token lifecycle
|
||||
|
||||
The bootstrap token — whether operator-supplied (`token` mode) or
|
||||
service-generated (`bootstrap` mode) — is a one-time credential. It
|
||||
is stored as admin's single API key, tagged `name="bootstrap"`. The
|
||||
operator's first admin action after bootstrap should be:
|
||||
|
||||
1. Create a durable admin user and API key (or issue a durable API
|
||||
key to the bootstrap admin).
|
||||
2. Revoke the bootstrap key via `revoke-api-key`.
|
||||
3. Remove the bootstrap token from any deployment configuration.
|
||||
|
||||
The `name="bootstrap"` marker makes bootstrap keys easy to detect in
|
||||
tooling (e.g. a `tg-list-api-keys` filter).
|
||||
|
||||
## HTTP forwarding (initial integration)
|
||||
|
||||
For the initial gateway integration — before the IAM service is
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main"
|
|||
tg-get-kg-core = "trustgraph.cli.get_kg_core:main"
|
||||
tg-get-document-content = "trustgraph.cli.get_document_content:main"
|
||||
tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main"
|
||||
tg-bootstrap-iam = "trustgraph.cli.bootstrap_iam:main"
|
||||
tg-invoke-agent = "trustgraph.cli.invoke_agent:main"
|
||||
tg-invoke-document-rag = "trustgraph.cli.invoke_document_rag:main"
|
||||
tg-invoke-graph-rag = "trustgraph.cli.invoke_graph_rag:main"
|
||||
|
|
|
|||
99
trustgraph-cli/trustgraph/cli/bootstrap_iam.py
Normal file
99
trustgraph-cli/trustgraph/cli/bootstrap_iam.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
"""
|
||||
Bootstraps the IAM service. Only works when iam-svc is running in
|
||||
bootstrap mode with empty tables. Prints the initial admin API key
|
||||
to stdout.
|
||||
|
||||
This is a one-time, trust-sensitive operation. The resulting token
|
||||
is shown once and never again — capture it on use. Rotate and
|
||||
revoke it as soon as a real admin API key has been issued.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import requests
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
|
||||
|
||||
def bootstrap(url, token):
|
||||
|
||||
endpoint = url.rstrip("/") + "/api/v1/iam"
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
resp = requests.post(
|
||||
endpoint,
|
||||
headers=headers,
|
||||
data=json.dumps({"operation": "bootstrap"}),
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeError(
|
||||
f"HTTP {resp.status_code}: {resp.text}"
|
||||
)
|
||||
|
||||
body = resp.json()
|
||||
|
||||
if "error" in body:
|
||||
raise RuntimeError(
|
||||
f"IAM {body['error'].get('type', 'error')}: "
|
||||
f"{body['error'].get('message', '')}"
|
||||
)
|
||||
|
||||
api_key = body.get("bootstrap_admin_api_key")
|
||||
user_id = body.get("bootstrap_admin_user_id")
|
||||
|
||||
if not api_key:
|
||||
raise RuntimeError(
|
||||
"IAM response did not contain a bootstrap token — the "
|
||||
"service may already be bootstrapped, or may be running "
|
||||
"in token mode."
|
||||
)
|
||||
|
||||
return user_id, api_key
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="tg-bootstrap-iam",
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-u", "--api-url",
|
||||
default=default_url,
|
||||
help=f"API URL (default: {default_url})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--token",
|
||||
default=default_token,
|
||||
help="Gateway bearer token (default: $TRUSTGRAPH_TOKEN)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
user_id, api_key = bootstrap(args.api_url, args.token)
|
||||
except Exception as e:
|
||||
print("Exception:", e, file=sys.stderr, flush=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Stdout gets machine-readable output (the key). Any operator
|
||||
# context goes to stderr.
|
||||
print(f"Admin user id: {user_id}", file=sys.stderr)
|
||||
print(
|
||||
"Admin API key (shown once, capture now):",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(api_key)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -172,13 +172,29 @@ def _sign_jwt(kid, private_pem, claims):
|
|||
|
||||
class IamService:
|
||||
|
||||
def __init__(self, host, username, password, keyspace):
|
||||
def __init__(self, host, username, password, keyspace,
|
||||
bootstrap_mode, bootstrap_token=None):
|
||||
self.table_store = IamTableStore(
|
||||
host, username, password, keyspace,
|
||||
)
|
||||
# Active signing key cache: (kid, private_pem, public_pem) or
|
||||
# None. Loaded lazily on first use; refreshed whenever a key
|
||||
# is created.
|
||||
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
|
||||
# service auto-seeds on first start using the provided
|
||||
# bootstrap_token and the ``bootstrap`` operation is refused
|
||||
# thereafter (indistinguishable from an already-bootstrapped
|
||||
# deployment per the error policy). In "bootstrap" mode the
|
||||
# ``bootstrap`` operation is live until tables are populated.
|
||||
if bootstrap_mode not in ("token", "bootstrap"):
|
||||
raise ValueError(
|
||||
f"bootstrap_mode must be 'token' or 'bootstrap', "
|
||||
f"got {bootstrap_mode!r}"
|
||||
)
|
||||
if bootstrap_mode == "token" and not bootstrap_token:
|
||||
raise ValueError(
|
||||
"bootstrap_mode='token' requires bootstrap_token"
|
||||
)
|
||||
self.bootstrap_mode = bootstrap_mode
|
||||
self.bootstrap_token = bootstrap_token
|
||||
|
||||
self._signing_key = None
|
||||
self._signing_key_lock = asyncio.Lock()
|
||||
|
||||
|
|
@ -283,21 +299,40 @@ class IamService:
|
|||
# bootstrap
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def handle_bootstrap(self, v):
|
||||
"""No-op if any workspace already exists. Otherwise create
|
||||
the ``default`` workspace, an ``admin`` user with role
|
||||
``admin``, and an initial API key for that admin. The
|
||||
plaintext API key is returned once in the response."""
|
||||
async def auto_bootstrap_if_token_mode(self):
|
||||
"""Called from the service processor at startup. In
|
||||
``token`` mode, if tables are empty, seeds the default
|
||||
workspace / admin / signing key using the operator-provided
|
||||
bootstrap token. The admin's API key plaintext is *the*
|
||||
``bootstrap_token`` — the operator already knows it, nothing
|
||||
needs to be returned or logged.
|
||||
|
||||
In ``bootstrap`` mode this is a no-op; seeding happens on
|
||||
explicit ``bootstrap`` operation invocation."""
|
||||
if self.bootstrap_mode != "token":
|
||||
return
|
||||
|
||||
if await self.table_store.any_workspace_exists():
|
||||
logger.info(
|
||||
"IAM bootstrap: tables already populated; no-op"
|
||||
"IAM: token mode, tables already populated; skipping "
|
||||
"auto-bootstrap"
|
||||
)
|
||||
return IamResponse()
|
||||
return
|
||||
|
||||
logger.info("IAM: token mode, empty tables; auto-bootstrapping")
|
||||
await self._seed_tables(self.bootstrap_token)
|
||||
logger.info(
|
||||
"IAM: auto-bootstrap complete using operator-provided token"
|
||||
)
|
||||
|
||||
async def _seed_tables(self, api_key_plaintext):
|
||||
"""Shared seeding logic used by token-mode auto-bootstrap and
|
||||
bootstrap-mode handle_bootstrap. Creates the default
|
||||
workspace, admin user, admin API key (using the given
|
||||
plaintext), and an initial signing key. Returns the admin
|
||||
user id."""
|
||||
now = _now_dt()
|
||||
|
||||
# Workspace.
|
||||
await self.table_store.put_workspace(
|
||||
id=DEFAULT_WORKSPACE,
|
||||
name="Default",
|
||||
|
|
@ -305,11 +340,7 @@ class IamService:
|
|||
created=now,
|
||||
)
|
||||
|
||||
# Admin user.
|
||||
admin_user_id = str(uuid.uuid4())
|
||||
# Password is set to a random unusable value; admin logs in
|
||||
# with the API key below. Password login for this user can be
|
||||
# enabled later by reset-password.
|
||||
admin_password = secrets.token_urlsafe(32)
|
||||
await self.table_store.put_user(
|
||||
id=admin_user_id,
|
||||
|
|
@ -324,21 +355,18 @@ class IamService:
|
|||
created=now,
|
||||
)
|
||||
|
||||
# Admin API key.
|
||||
plaintext = _generate_api_key()
|
||||
key_id = str(uuid.uuid4())
|
||||
await self.table_store.put_api_key(
|
||||
key_hash=_hash_api_key(plaintext),
|
||||
key_hash=_hash_api_key(api_key_plaintext),
|
||||
id=key_id,
|
||||
user_id=admin_user_id,
|
||||
name="bootstrap",
|
||||
prefix=plaintext[:len(API_KEY_PREFIX) + 4],
|
||||
prefix=api_key_plaintext[:len(API_KEY_PREFIX) + 4],
|
||||
expires=None,
|
||||
created=now,
|
||||
last_used=None,
|
||||
)
|
||||
|
||||
# Initial JWT signing key.
|
||||
kid, private_pem, public_pem = _generate_signing_keypair()
|
||||
await self.table_store.put_signing_key(
|
||||
kid=kid,
|
||||
|
|
@ -347,15 +375,28 @@ class IamService:
|
|||
created=now,
|
||||
retired=None,
|
||||
)
|
||||
# Populate cache so login calls in this process don't go
|
||||
# back to Cassandra on first use.
|
||||
self._signing_key = (kid, private_pem, public_pem)
|
||||
|
||||
logger.info(
|
||||
f"IAM bootstrap: created workspace={DEFAULT_WORKSPACE!r}, "
|
||||
f"admin user_id={admin_user_id}, initial API key issued, "
|
||||
f"signing key kid={kid}"
|
||||
f"IAM seeded: workspace={DEFAULT_WORKSPACE!r}, "
|
||||
f"admin user_id={admin_user_id}, signing key kid={kid}"
|
||||
)
|
||||
return admin_user_id
|
||||
|
||||
async def handle_bootstrap(self, v):
|
||||
"""Explicit bootstrap op. Only available in ``bootstrap``
|
||||
mode and only when tables are empty. Every other case is
|
||||
masked to a generic auth failure — the caller cannot
|
||||
distinguish 'not in bootstrap mode' from 'already
|
||||
bootstrapped' from 'operation forbidden'."""
|
||||
if self.bootstrap_mode != "bootstrap":
|
||||
return _err("auth-failed", "auth failure")
|
||||
|
||||
if await self.table_store.any_workspace_exists():
|
||||
return _err("auth-failed", "auth failure")
|
||||
|
||||
plaintext = _generate_api_key()
|
||||
admin_user_id = await self._seed_tables(plaintext)
|
||||
|
||||
return IamResponse(
|
||||
bootstrap_admin_user_id=admin_user_id,
|
||||
|
|
|
|||
|
|
@ -39,6 +39,32 @@ class Processor(AsyncProcessor):
|
|||
"iam_response_queue", default_iam_response_queue,
|
||||
)
|
||||
|
||||
bootstrap_mode = params.get("bootstrap_mode")
|
||||
bootstrap_token = params.get("bootstrap_token")
|
||||
|
||||
if bootstrap_mode not in ("token", "bootstrap"):
|
||||
raise RuntimeError(
|
||||
"iam-svc: --bootstrap-mode is required. Set to 'token' "
|
||||
"(with --bootstrap-token) for production, or 'bootstrap' "
|
||||
"to enable the explicit bootstrap operation over the "
|
||||
"pub/sub bus (dev / quick-start only, not safe under "
|
||||
"public exposure). Refusing to start."
|
||||
)
|
||||
if bootstrap_mode == "token" and not bootstrap_token:
|
||||
raise RuntimeError(
|
||||
"iam-svc: --bootstrap-mode=token requires "
|
||||
"--bootstrap-token. Refusing to start."
|
||||
)
|
||||
if bootstrap_mode == "bootstrap" and bootstrap_token:
|
||||
raise RuntimeError(
|
||||
"iam-svc: --bootstrap-token is not accepted when "
|
||||
"--bootstrap-mode=bootstrap. Ambiguous intent. "
|
||||
"Refusing to start."
|
||||
)
|
||||
|
||||
self.bootstrap_mode = bootstrap_mode
|
||||
self.bootstrap_token = bootstrap_token
|
||||
|
||||
cassandra_host = params.get("cassandra_host")
|
||||
cassandra_username = params.get("cassandra_username")
|
||||
cassandra_password = params.get("cassandra_password")
|
||||
|
|
@ -96,12 +122,19 @@ class Processor(AsyncProcessor):
|
|||
username=self.cassandra_username,
|
||||
password=self.cassandra_password,
|
||||
keyspace=keyspace,
|
||||
bootstrap_mode=self.bootstrap_mode,
|
||||
bootstrap_token=self.bootstrap_token,
|
||||
)
|
||||
|
||||
logger.info("IAM service initialised")
|
||||
logger.info(
|
||||
f"IAM service initialised (bootstrap-mode={self.bootstrap_mode})"
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
await self.pubsub.ensure_topic(self.iam_request_topic)
|
||||
# Token-mode auto-bootstrap runs before we accept requests so
|
||||
# the first inbound call always sees a populated table.
|
||||
await self.iam.auto_bootstrap_if_token_mode()
|
||||
await self.iam_request_consumer.start()
|
||||
|
||||
async def on_iam_request(self, msg, consumer, flow):
|
||||
|
|
@ -144,6 +177,31 @@ class Processor(AsyncProcessor):
|
|||
default=default_iam_response_queue,
|
||||
help=f"IAM response queue (default: {default_iam_response_queue})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bootstrap-mode",
|
||||
default=None,
|
||||
choices=["token", "bootstrap"],
|
||||
help=(
|
||||
"IAM bootstrap mode (required). "
|
||||
"'token' = operator supplies the initial admin API "
|
||||
"key via --bootstrap-token; auto-seeds on first start, "
|
||||
"bootstrap operation refused. "
|
||||
"'bootstrap' = bootstrap operation is live over the "
|
||||
"bus until tables are populated; a token is generated "
|
||||
"and returned by tg-bootstrap-iam. Unsafe to run "
|
||||
"'bootstrap' mode with public exposure."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bootstrap-token",
|
||||
default=None,
|
||||
help=(
|
||||
"Initial admin API key plaintext, required when "
|
||||
"--bootstrap-mode=token. Treat as a one-time "
|
||||
"credential: the operator should rotate to a new key "
|
||||
"and revoke this one after first use."
|
||||
),
|
||||
)
|
||||
|
||||
add_cassandra_args(parser)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue