IAM secure bootstrap options

This commit is contained in:
Cyber MacGeddon 2026-04-23 19:23:18 +01:00
parent 832a030703
commit 8348b7728b
5 changed files with 266 additions and 27 deletions

View file

@ -172,13 +172,29 @@ def _sign_jwt(kid, private_pem, claims):
class IamService:
def __init__(self, host, username, password, keyspace):
def __init__(self, host, username, password, keyspace,
bootstrap_mode, bootstrap_token=None):
self.table_store = IamTableStore(
host, username, password, keyspace,
)
# Active signing key cache: (kid, private_pem, public_pem) or
# None. Loaded lazily on first use; refreshed whenever a key
# is created.
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
# service auto-seeds on first start using the provided
# bootstrap_token and the ``bootstrap`` operation is refused
# thereafter (indistinguishable from an already-bootstrapped
# deployment per the error policy). In "bootstrap" mode the
# ``bootstrap`` operation is live until tables are populated.
if bootstrap_mode not in ("token", "bootstrap"):
raise ValueError(
f"bootstrap_mode must be 'token' or 'bootstrap', "
f"got {bootstrap_mode!r}"
)
if bootstrap_mode == "token" and not bootstrap_token:
raise ValueError(
"bootstrap_mode='token' requires bootstrap_token"
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
self._signing_key = None
self._signing_key_lock = asyncio.Lock()
@ -283,21 +299,40 @@ class IamService:
# bootstrap
# ------------------------------------------------------------------
async def handle_bootstrap(self, v):
"""No-op if any workspace already exists. Otherwise create
the ``default`` workspace, an ``admin`` user with role
``admin``, and an initial API key for that admin. The
plaintext API key is returned once in the response."""
async def auto_bootstrap_if_token_mode(self):
"""Called from the service processor at startup. In
``token`` mode, if tables are empty, seeds the default
workspace / admin / signing key using the operator-provided
bootstrap token. The admin's API key plaintext is *the*
``bootstrap_token`` the operator already knows it, nothing
needs to be returned or logged.
In ``bootstrap`` mode this is a no-op; seeding happens on
explicit ``bootstrap`` operation invocation."""
if self.bootstrap_mode != "token":
return
if await self.table_store.any_workspace_exists():
logger.info(
"IAM bootstrap: tables already populated; no-op"
"IAM: token mode, tables already populated; skipping "
"auto-bootstrap"
)
return IamResponse()
return
logger.info("IAM: token mode, empty tables; auto-bootstrapping")
await self._seed_tables(self.bootstrap_token)
logger.info(
"IAM: auto-bootstrap complete using operator-provided token"
)
async def _seed_tables(self, api_key_plaintext):
"""Shared seeding logic used by token-mode auto-bootstrap and
bootstrap-mode handle_bootstrap. Creates the default
workspace, admin user, admin API key (using the given
plaintext), and an initial signing key. Returns the admin
user id."""
now = _now_dt()
# Workspace.
await self.table_store.put_workspace(
id=DEFAULT_WORKSPACE,
name="Default",
@ -305,11 +340,7 @@ class IamService:
created=now,
)
# Admin user.
admin_user_id = str(uuid.uuid4())
# Password is set to a random unusable value; admin logs in
# with the API key below. Password login for this user can be
# enabled later by reset-password.
admin_password = secrets.token_urlsafe(32)
await self.table_store.put_user(
id=admin_user_id,
@ -324,21 +355,18 @@ class IamService:
created=now,
)
# Admin API key.
plaintext = _generate_api_key()
key_id = str(uuid.uuid4())
await self.table_store.put_api_key(
key_hash=_hash_api_key(plaintext),
key_hash=_hash_api_key(api_key_plaintext),
id=key_id,
user_id=admin_user_id,
name="bootstrap",
prefix=plaintext[:len(API_KEY_PREFIX) + 4],
prefix=api_key_plaintext[:len(API_KEY_PREFIX) + 4],
expires=None,
created=now,
last_used=None,
)
# Initial JWT signing key.
kid, private_pem, public_pem = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=kid,
@ -347,15 +375,28 @@ class IamService:
created=now,
retired=None,
)
# Populate cache so login calls in this process don't go
# back to Cassandra on first use.
self._signing_key = (kid, private_pem, public_pem)
logger.info(
f"IAM bootstrap: created workspace={DEFAULT_WORKSPACE!r}, "
f"admin user_id={admin_user_id}, initial API key issued, "
f"signing key kid={kid}"
f"IAM seeded: workspace={DEFAULT_WORKSPACE!r}, "
f"admin user_id={admin_user_id}, signing key kid={kid}"
)
return admin_user_id
async def handle_bootstrap(self, v):
"""Explicit bootstrap op. Only available in ``bootstrap``
mode and only when tables are empty. Every other case is
masked to a generic auth failure the caller cannot
distinguish 'not in bootstrap mode' from 'already
bootstrapped' from 'operation forbidden'."""
if self.bootstrap_mode != "bootstrap":
return _err("auth-failed", "auth failure")
if await self.table_store.any_workspace_exists():
return _err("auth-failed", "auth failure")
plaintext = _generate_api_key()
admin_user_id = await self._seed_tables(plaintext)
return IamResponse(
bootstrap_admin_user_id=admin_user_id,

View file

@ -39,6 +39,32 @@ class Processor(AsyncProcessor):
"iam_response_queue", default_iam_response_queue,
)
bootstrap_mode = params.get("bootstrap_mode")
bootstrap_token = params.get("bootstrap_token")
if bootstrap_mode not in ("token", "bootstrap"):
raise RuntimeError(
"iam-svc: --bootstrap-mode is required. Set to 'token' "
"(with --bootstrap-token) for production, or 'bootstrap' "
"to enable the explicit bootstrap operation over the "
"pub/sub bus (dev / quick-start only, not safe under "
"public exposure). Refusing to start."
)
if bootstrap_mode == "token" and not bootstrap_token:
raise RuntimeError(
"iam-svc: --bootstrap-mode=token requires "
"--bootstrap-token. Refusing to start."
)
if bootstrap_mode == "bootstrap" and bootstrap_token:
raise RuntimeError(
"iam-svc: --bootstrap-token is not accepted when "
"--bootstrap-mode=bootstrap. Ambiguous intent. "
"Refusing to start."
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
cassandra_host = params.get("cassandra_host")
cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password")
@ -96,12 +122,19 @@ class Processor(AsyncProcessor):
username=self.cassandra_username,
password=self.cassandra_password,
keyspace=keyspace,
bootstrap_mode=self.bootstrap_mode,
bootstrap_token=self.bootstrap_token,
)
logger.info("IAM service initialised")
logger.info(
f"IAM service initialised (bootstrap-mode={self.bootstrap_mode})"
)
async def start(self):
await self.pubsub.ensure_topic(self.iam_request_topic)
# Token-mode auto-bootstrap runs before we accept requests so
# the first inbound call always sees a populated table.
await self.iam.auto_bootstrap_if_token_mode()
await self.iam_request_consumer.start()
async def on_iam_request(self, msg, consumer, flow):
@ -144,6 +177,31 @@ class Processor(AsyncProcessor):
default=default_iam_response_queue,
help=f"IAM response queue (default: {default_iam_response_queue})",
)
parser.add_argument(
"--bootstrap-mode",
default=None,
choices=["token", "bootstrap"],
help=(
"IAM bootstrap mode (required). "
"'token' = operator supplies the initial admin API "
"key via --bootstrap-token; auto-seeds on first start, "
"bootstrap operation refused. "
"'bootstrap' = bootstrap operation is live over the "
"bus until tables are populated; a token is generated "
"and returned by tg-bootstrap-iam. Unsafe to run "
"'bootstrap' mode with public exposure."
),
)
parser.add_argument(
"--bootstrap-token",
default=None,
help=(
"Initial admin API key plaintext, required when "
"--bootstrap-mode=token. Treat as a one-time "
"credential: the operator should rotate to a new key "
"and revoke this one after first use."
),
)
add_cassandra_args(parser)