trustgraph/trustgraph-flow/trustgraph/iam/service/iam.py
cybermaggedon 9fc1d4527b
iam: self-service ops, optional workspace filters, Mux service routing (#855)
Three threads, all reinforcing the contract's system-level vs.
workspace-association distinction.

WS Mux service routing
- tg-show-flows (and any workspace-level service over the WS) was
  failing with "unknown service" because the post-refactor Mux
  unconditionally looked up flow-service:<kind>.  Now branches on
  the envelope's flow field: with flow → flow-service:<kind>;
  without flow → <kind>:<op> from the inner body; with bare op
  lookup for service=iam.  Resource and parameters come from the
  matched op's own extractors — same path the HTTP endpoints take.

Optional workspace on system-level user/key ops
- list-users returns the deployment-wide list when no workspace is
  supplied, filters when one is.  get-user, update-user,
  disable-user, enable-user, delete-user, reset-password,
  create-api-key, list-api-keys, revoke-api-key all treat workspace
  as an optional integrity check rather than a required argument.
- create-user keeps workspace required — there it's the new user's
  home-workspace binding, a parameter rather than an address.
- API keys reclassified as SYSTEM-level resources.  By the same
  reasoning that makes users system-level, an API key is a
  credential record on a deployment-wide registry; the workspace it
  authenticates to is a property, not a containment.

Self-service surface
- whoami: returns the caller's own user record.  AUTHENTICATED-only;
  no users:read capability required.  Foundation for UI affordances
  that depend on the caller's permissions.
- bootstrap-status: POST /api/v1/auth/bootstrap-status, PUBLIC,
  side-effect-free.  Returns {bootstrap_available: bool} so a
  first-run UI can decide whether to render setup without consuming
  the bootstrap op.
- Gateway now injects actor=identity.handle on every authenticated
  forward to iam-svc (IamEndpoint and WS Mux iam path), overwriting
  any caller-supplied value.  Underpins whoami, audit logging, and
  future regime-side decisions that need actor identity.
- tg-whoami and tg-update-user CLIs.

Spec polish
- iam-contract.md: actor-injection rule documented; whoami /
  bootstrap-status added to operations list; permission-scope
  framing tightened (workspace scope is a property of the grant,
  not the user or role).
- iam.md: self-service section; gateway flow gains the actor-
  injection step; role section reframed so iam-svc constraints
  don't leak into contract-level prose.
- iam-protocol.md: ops table updated for whoami, bootstrap-status,
  optional-workspace pattern; bootstrap_available added to the
  IamResponse listing.
2026-04-28 22:13:12 +01:00

1358 lines
48 KiB
Python

"""
IAM business logic. Handles ``IamRequest`` messages and builds
``IamResponse`` messages. Does not concern itself with transport.
See docs/tech-specs/iam-protocol.md for the wire-level contract and
docs/tech-specs/iam.md for the surrounding architecture.
"""
import asyncio
import base64
import datetime
import hashlib
import json
import logging
import os
import secrets
import uuid
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519
from trustgraph.schema import (
IamResponse, Error,
UserRecord, WorkspaceRecord, ApiKeyRecord,
)
from ... tables.iam import IamTableStore
logger = logging.getLogger(__name__)
DEFAULT_WORKSPACE = "default"
BOOTSTRAP_ADMIN_USERNAME = "admin"
BOOTSTRAP_ADMIN_NAME = "Administrator"
PBKDF2_ITERATIONS = 600_000
API_KEY_PREFIX = "tg_"
API_KEY_RANDOM_BYTES = 24
JWT_ISSUER = "trustgraph-iam"
JWT_TTL_SECONDS = 3600
# Default authorisation cache TTL the regime tells the gateway to
# observe. 60s is the OSS-spec maximum revocation latency: a role
# change, workspace disable, or key revoke takes effect within at
# most this much time.
AUTHZ_CACHE_TTL_SECONDS = 60
# OSS regime role table. Lives here, not in the gateway — the
# gateway is regime-agnostic and must not encode policy.
#
# Each role has a capability set and a workspace scope. The
# evaluator (handle_authorise below) checks (a) that some role
# held by the caller grants the requested capability, and (b)
# that role's workspace scope permits the target workspace.
_READER_CAPS = {
"agent",
"graph:read",
"documents:read",
"rows:read",
"llm",
"embeddings",
"mcp",
"config:read",
"flows:read",
"collections:read",
"knowledge:read",
"keys:self",
}
_WRITER_CAPS = _READER_CAPS | {
"graph:write",
"documents:write",
"rows:write",
"collections:write",
"knowledge:write",
}
_ADMIN_CAPS = _WRITER_CAPS | {
"config:write",
"flows:write",
"users:read", "users:write", "users:admin",
"keys:admin",
"workspaces:admin",
"iam:admin",
"metrics:read",
}
ROLE_DEFINITIONS = {
"reader": {
"capabilities": _READER_CAPS,
"workspace_scope": "assigned",
},
"writer": {
"capabilities": _WRITER_CAPS,
"workspace_scope": "assigned",
},
"admin": {
"capabilities": _ADMIN_CAPS,
"workspace_scope": "*",
},
}
def _scope_permits(role_scope, target_workspace, assigned_workspace):
"""Does the given role apply to ``target_workspace``?"""
if role_scope == "*":
return True
if role_scope == "assigned":
return target_workspace == assigned_workspace
return False
def _now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def _now_dt():
return datetime.datetime.now(datetime.timezone.utc)
def _iso(dt):
if dt is None:
return ""
if isinstance(dt, str):
return dt
if dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.timezone.utc)
return dt.isoformat()
def _hash_password(password):
"""Return an encoded PBKDF2-SHA-256 hash of ``password``.
Format: ``pbkdf2-sha256$<iters>$<b64-salt>$<b64-hash>``. Stored
verbatim in the password_hash column so the algorithm and cost
can be evolved later (new rows get a new prefix; old rows are
verified with their own parameters).
"""
salt = os.urandom(16)
dk = hashlib.pbkdf2_hmac(
"sha256", password.encode("utf-8"), salt, PBKDF2_ITERATIONS,
)
return (
f"pbkdf2-sha256${PBKDF2_ITERATIONS}"
f"${base64.b64encode(salt).decode('ascii')}"
f"${base64.b64encode(dk).decode('ascii')}"
)
def _verify_password(password, encoded):
"""Constant-time verify ``password`` against an encoded hash."""
try:
algo, iters, b64_salt, b64_hash = encoded.split("$")
except ValueError:
return False
if algo != "pbkdf2-sha256":
return False
try:
iters = int(iters)
salt = base64.b64decode(b64_salt)
target = base64.b64decode(b64_hash)
except Exception:
return False
dk = hashlib.pbkdf2_hmac(
"sha256", password.encode("utf-8"), salt, iters,
)
return secrets.compare_digest(dk, target)
def _generate_api_key():
"""Return a fresh API-key plaintext of the form ``tg_<random>``."""
return API_KEY_PREFIX + secrets.token_urlsafe(API_KEY_RANDOM_BYTES)
def _hash_api_key(plaintext):
"""SHA-256 hex digest of an API key plaintext. Used as the
primary key in ``iam_api_keys`` so ``resolve-api-key`` is O(1)."""
return hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
def _err(type, message):
return IamResponse(error=Error(type=type, message=message))
def _parse_expires(s):
if not s:
return None
try:
return datetime.datetime.fromisoformat(s)
except Exception:
return None
def _b64url(data):
"""URL-safe base64 encode without padding, as required by JWT."""
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
def _generate_signing_keypair():
"""Return (kid, private_pem, public_pem) for a fresh Ed25519
keypair. Ed25519 / EdDSA: small (32-byte public key), fast,
deterministic, side-channel-resistant by construction, free of
NIST-curve baggage."""
key = ed25519.Ed25519PrivateKey.generate()
private_pem = key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption(),
).decode("ascii")
public_pem = key.public_key().public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
).decode("ascii")
kid = uuid.uuid4().hex[:16]
return kid, private_pem, public_pem
def _sign_jwt(kid, private_pem, claims):
"""Produce a compact-serialisation EdDSA (Ed25519) JWT for
``claims``."""
key = serialization.load_pem_private_key(
private_pem.encode("ascii"), password=None,
)
if not isinstance(key, ed25519.Ed25519PrivateKey):
raise RuntimeError(
f"signing key is not Ed25519: {type(key).__name__}"
)
header = {"alg": "EdDSA", "typ": "JWT", "kid": kid}
header_b = _b64url(json.dumps(
header, separators=(",", ":"), sort_keys=True,
).encode("utf-8"))
payload_b = _b64url(json.dumps(
claims, separators=(",", ":"), sort_keys=True,
).encode("utf-8"))
signing_input = f"{header_b}.{payload_b}".encode("ascii")
signature = key.sign(signing_input)
return f"{header_b}.{payload_b}.{_b64url(signature)}"
class IamService:
def __init__(self, host, username, password, keyspace,
bootstrap_mode, bootstrap_token=None):
self.table_store = IamTableStore(
host, username, password, keyspace,
)
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
# service auto-seeds on first start using the provided
# bootstrap_token and the ``bootstrap`` operation is refused
# thereafter (indistinguishable from an already-bootstrapped
# deployment per the error policy). In "bootstrap" mode the
# ``bootstrap`` operation is live until tables are populated.
if bootstrap_mode not in ("token", "bootstrap"):
raise ValueError(
f"bootstrap_mode must be 'token' or 'bootstrap', "
f"got {bootstrap_mode!r}"
)
if bootstrap_mode == "token" and not bootstrap_token:
raise ValueError(
"bootstrap_mode='token' requires bootstrap_token"
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
self._signing_key = None
self._signing_key_lock = asyncio.Lock()
# ------------------------------------------------------------------
# Dispatch
# ------------------------------------------------------------------
async def handle(self, v):
op = v.operation
try:
if op == "bootstrap":
return await self.handle_bootstrap(v)
if op == "bootstrap-status":
return await self.handle_bootstrap_status(v)
if op == "whoami":
return await self.handle_whoami(v)
if op == "resolve-api-key":
return await self.handle_resolve_api_key(v)
if op == "create-user":
return await self.handle_create_user(v)
if op == "list-users":
return await self.handle_list_users(v)
if op == "create-api-key":
return await self.handle_create_api_key(v)
if op == "list-api-keys":
return await self.handle_list_api_keys(v)
if op == "revoke-api-key":
return await self.handle_revoke_api_key(v)
if op == "login":
return await self.handle_login(v)
if op == "get-signing-key-public":
return await self.handle_get_signing_key_public(v)
if op == "change-password":
return await self.handle_change_password(v)
if op == "reset-password":
return await self.handle_reset_password(v)
if op == "get-user":
return await self.handle_get_user(v)
if op == "update-user":
return await self.handle_update_user(v)
if op == "disable-user":
return await self.handle_disable_user(v)
if op == "enable-user":
return await self.handle_enable_user(v)
if op == "delete-user":
return await self.handle_delete_user(v)
if op == "create-workspace":
return await self.handle_create_workspace(v)
if op == "list-workspaces":
return await self.handle_list_workspaces(v)
if op == "get-workspace":
return await self.handle_get_workspace(v)
if op == "update-workspace":
return await self.handle_update_workspace(v)
if op == "disable-workspace":
return await self.handle_disable_workspace(v)
if op == "rotate-signing-key":
return await self.handle_rotate_signing_key(v)
if op == "authorise":
return await self.handle_authorise(v)
if op == "authorise-many":
return await self.handle_authorise_many(v)
return _err(
"invalid-argument",
f"unknown or not-yet-implemented operation: {op!r}",
)
except Exception as e:
logger.error(
f"IAM {op} failed: {type(e).__name__}: {e}",
exc_info=True,
)
return _err("internal-error", str(e))
# ------------------------------------------------------------------
# Record conversion
# ------------------------------------------------------------------
def _row_to_user_record(self, row):
(
id, workspace, username, name, email, _password_hash,
roles, enabled, must_change_password, created,
) = row
return UserRecord(
id=id or "",
workspace=workspace or "",
username=username or "",
name=name or "",
email=email or "",
roles=sorted(roles) if roles else [],
enabled=bool(enabled),
must_change_password=bool(must_change_password),
created=_iso(created),
)
def _row_to_api_key_record(self, row):
(
_key_hash, id, user_id, name, prefix, expires,
created, last_used,
) = row
return ApiKeyRecord(
id=id or "",
user_id=user_id or "",
name=name or "",
prefix=prefix or "",
expires=_iso(expires),
created=_iso(created),
last_used=_iso(last_used),
)
# ------------------------------------------------------------------
# bootstrap
# ------------------------------------------------------------------
async def auto_bootstrap_if_token_mode(self):
"""Called from the service processor at startup. In
``token`` mode, if tables are empty, seeds the default
workspace / admin / signing key using the operator-provided
bootstrap token. The admin's API key plaintext is *the*
``bootstrap_token`` — the operator already knows it, nothing
needs to be returned or logged.
In ``bootstrap`` mode this is a no-op; seeding happens on
explicit ``bootstrap`` operation invocation."""
if self.bootstrap_mode != "token":
return
if await self.table_store.any_workspace_exists():
logger.info(
"IAM: token mode, tables already populated; skipping "
"auto-bootstrap"
)
return
logger.info("IAM: token mode, empty tables; auto-bootstrapping")
await self._seed_tables(self.bootstrap_token)
logger.info(
"IAM: auto-bootstrap complete using operator-provided token"
)
async def _seed_tables(self, api_key_plaintext):
"""Shared seeding logic used by token-mode auto-bootstrap and
bootstrap-mode handle_bootstrap. Creates the default
workspace, admin user, admin API key (using the given
plaintext), and an initial signing key. Returns the admin
user id."""
now = _now_dt()
await self.table_store.put_workspace(
id=DEFAULT_WORKSPACE,
name="Default",
enabled=True,
created=now,
)
admin_user_id = str(uuid.uuid4())
admin_password = secrets.token_urlsafe(32)
await self.table_store.put_user(
id=admin_user_id,
workspace=DEFAULT_WORKSPACE,
username=BOOTSTRAP_ADMIN_USERNAME,
name=BOOTSTRAP_ADMIN_NAME,
email="",
password_hash=_hash_password(admin_password),
roles=["admin"],
enabled=True,
must_change_password=True,
created=now,
)
key_id = str(uuid.uuid4())
await self.table_store.put_api_key(
key_hash=_hash_api_key(api_key_plaintext),
id=key_id,
user_id=admin_user_id,
name="bootstrap",
prefix=api_key_plaintext[:len(API_KEY_PREFIX) + 4],
expires=None,
created=now,
last_used=None,
)
kid, private_pem, public_pem = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=kid,
private_pem=private_pem,
public_pem=public_pem,
created=now,
retired=None,
)
self._signing_key = (kid, private_pem, public_pem)
logger.info(
f"IAM seeded: workspace={DEFAULT_WORKSPACE!r}, "
f"admin user_id={admin_user_id}, signing key kid={kid}"
)
return admin_user_id
async def handle_bootstrap(self, v):
"""Explicit bootstrap op. Only available in ``bootstrap``
mode and only when tables are empty. Every other case is
masked to a generic auth failure — the caller cannot
distinguish 'not in bootstrap mode' from 'already
bootstrapped' from 'operation forbidden'."""
if self.bootstrap_mode != "bootstrap":
return _err("auth-failed", "auth failure")
if await self.table_store.any_workspace_exists():
return _err("auth-failed", "auth failure")
plaintext = _generate_api_key()
admin_user_id = await self._seed_tables(plaintext)
return IamResponse(
bootstrap_admin_user_id=admin_user_id,
bootstrap_admin_api_key=plaintext,
)
async def handle_whoami(self, v):
"""Return the caller's own user record. ``v.actor`` is the
authenticated identity's handle (the gateway populates it
from ``identity.handle``). No ``users:read`` capability
required — every authenticated user can read themselves."""
if not v.actor:
return _err(
"invalid-argument",
"actor required (gateway should populate this)",
)
user_row = await self.table_store.get_user(v.actor)
if user_row is None:
return _err("not-found", "user not found")
return IamResponse(user=self._row_to_user_record(user_row))
async def handle_bootstrap_status(self, v):
"""Probe op: returns whether the deployment is currently in
the unconsumed-bootstrap state (i.e. ``bootstrap`` mode with
empty tables, where an explicit ``bootstrap`` call would
succeed). PUBLIC so a UI can decide whether to render the
first-run setup flow without invoking the side-effectful
``bootstrap`` op.
The information leaked is intentionally narrow: an empty
deployment in bootstrap mode is already inferable (no users,
no logins succeed); this just makes the answer explicit
instead of forcing callers to probe the masked-failure path."""
available = (
self.bootstrap_mode == "bootstrap"
and not await self.table_store.any_workspace_exists()
)
return IamResponse(bootstrap_available=available)
# ------------------------------------------------------------------
# Signing key helpers
# ------------------------------------------------------------------
async def _get_active_signing_key(self):
"""Return ``(kid, private_pem, public_pem)`` for the active
signing key. Loads from Cassandra on first call. Generates
and persists a new key if none exists — covers the case where
``login`` is called before ``bootstrap`` (shouldn't happen in
practice but keeps the service internally consistent)."""
if self._signing_key is not None:
return self._signing_key
async with self._signing_key_lock:
if self._signing_key is not None:
return self._signing_key
rows = await self.table_store.list_signing_keys()
active = [r for r in rows if r[4] is None]
if active:
row = active[0]
self._signing_key = (row[0], row[1], row[2])
logger.info(
f"IAM: loaded active signing key kid={row[0]}"
)
return self._signing_key
kid, private_pem, public_pem = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=kid,
private_pem=private_pem,
public_pem=public_pem,
created=_now_dt(),
retired=None,
)
self._signing_key = (kid, private_pem, public_pem)
logger.info(
f"IAM: generated active signing key kid={kid} "
f"(no existing key found)"
)
return self._signing_key
# ------------------------------------------------------------------
# login
# ------------------------------------------------------------------
async def handle_login(self, v):
if not v.username:
return _err("auth-failed", "username required")
if not v.password:
return _err("auth-failed", "password required")
# Login accepts an optional workspace parameter. If omitted
# we use the default workspace (OSS single-workspace
# assumption). Multi-workspace enterprise editions swap in a
# resolver that looks across the caller's permitted set.
workspace = v.workspace or DEFAULT_WORKSPACE
user_id = await self.table_store.get_user_id_by_username(
workspace, v.username,
)
if not user_id:
return _err("auth-failed", "no such user")
user_row = await self.table_store.get_user(user_id)
if user_row is None:
return _err("auth-failed", "user disappeared")
(
id, ws, _username, _name, _email, password_hash,
_roles, enabled, _mcp, _created,
) = user_row
if not enabled:
return _err("auth-failed", "user disabled")
if not password_hash or not _verify_password(
v.password, password_hash,
):
return _err("auth-failed", "bad credentials")
ws_row = await self.table_store.get_workspace(ws)
if ws_row is None or not ws_row[2]:
return _err("auth-failed", "workspace disabled")
kid, private_pem, _ = await self._get_active_signing_key()
now_ts = int(_now_dt().timestamp())
exp_ts = now_ts + JWT_TTL_SECONDS
# Per the IAM contract the gateway never reads policy state
# from the credential — roles stay server-side, reachable
# only via authorise(). JWT carries identity + workspace
# binding only.
claims = {
"iss": JWT_ISSUER,
"sub": id,
"workspace": ws,
"iat": now_ts,
"exp": exp_ts,
}
token = _sign_jwt(kid, private_pem, claims)
expires_iso = datetime.datetime.fromtimestamp(
exp_ts, tz=datetime.timezone.utc,
).isoformat()
return IamResponse(jwt=token, jwt_expires=expires_iso)
# ------------------------------------------------------------------
# get-signing-key-public
# ------------------------------------------------------------------
async def handle_get_signing_key_public(self, v):
_, _, public_pem = await self._get_active_signing_key()
return IamResponse(signing_key_public=public_pem)
# ------------------------------------------------------------------
# Record-conversion helper for workspaces
# ------------------------------------------------------------------
def _row_to_workspace_record(self, row):
id, name, enabled, created = row
return WorkspaceRecord(
id=id or "",
name=name or "",
enabled=bool(enabled),
created=_iso(created),
)
async def _resolve_user(self, user_id, workspace=None):
"""Return (user_row, error_response_or_None). Loads the user
record by id and (when ``workspace`` is supplied) verifies the
record's home workspace matches.
Workspace is an *optional integrity check* — the user record
is system-level, identified by id alone. If the caller asserts
a workspace, we verify; if they omit it, we just return the
record. Authorisation (whether the caller is permitted to
operate on this user) is the gateway's responsibility via the
contract's ``authorise`` call before the handler is reached.
"""
user_row = await self.table_store.get_user(user_id)
if user_row is None:
return None, _err("not-found", "user not found")
if workspace and user_row[1] != workspace:
return None, _err(
"operation-not-permitted",
"user is in a different workspace",
)
return user_row, None
# ------------------------------------------------------------------
# change-password
# ------------------------------------------------------------------
async def handle_change_password(self, v):
if not v.user_id:
return _err("invalid-argument", "user_id required")
if not v.password:
return _err("invalid-argument", "password (current) required")
if not v.new_password:
return _err("invalid-argument", "new_password required")
user_row = await self.table_store.get_user(v.user_id)
if user_row is None:
return _err("auth-failed", "no such user")
_id, _ws, _un, _name, _email, password_hash, _r, enabled, _mcp, _c = (
user_row
)
if not enabled:
return _err("auth-failed", "user disabled")
if not password_hash or not _verify_password(
v.password, password_hash,
):
return _err("auth-failed", "bad credentials")
await self.table_store.update_user_password(
id=v.user_id,
password_hash=_hash_password(v.new_password),
must_change_password=False,
)
return IamResponse()
# ------------------------------------------------------------------
# reset-password
# ------------------------------------------------------------------
async def handle_reset_password(self, v):
if not v.user_id:
return _err("invalid-argument", "user_id required")
_, err = await self._resolve_user(v.user_id, v.workspace or None)
if err is not None:
return err
temporary = secrets.token_urlsafe(12)
await self.table_store.update_user_password(
id=v.user_id,
password_hash=_hash_password(temporary),
must_change_password=True,
)
return IamResponse(temporary_password=temporary)
# ------------------------------------------------------------------
# get-user / update-user / disable-user
# ------------------------------------------------------------------
async def handle_get_user(self, v):
if not v.user_id:
return _err("invalid-argument", "user_id required")
user_row, err = await self._resolve_user(
v.user_id, v.workspace or None,
)
if err is not None:
return err
return IamResponse(user=self._row_to_user_record(user_row))
async def handle_update_user(self, v):
"""Update user profile fields: name, email, roles, enabled,
must_change_password. Username is immutable — change it by
creating a new user and disabling the old one. Password
changes go through change-password / reset-password."""
if not v.user_id:
return _err("invalid-argument", "user_id required")
if v.user is None:
return _err("invalid-argument", "user field required")
if v.user.password:
return _err(
"invalid-argument",
"password cannot be changed via update-user; "
"use change-password or reset-password",
)
existing, err = await self._resolve_user(
v.user_id, v.workspace or None,
)
if err is not None:
return err
if v.user.username and v.user.username != existing[2]:
return _err(
"invalid-argument",
"username is immutable; create a new user instead",
)
# Carry forward fields the caller didn't provide.
(
_id, _ws, _username, cur_name, cur_email, _pw,
cur_roles, cur_enabled, cur_mcp, _created,
) = existing
new_name = v.user.name if v.user.name else cur_name
new_email = v.user.email if v.user.email else cur_email
new_roles = list(v.user.roles) if v.user.roles else list(
cur_roles or [],
)
new_enabled = v.user.enabled if v.user.enabled is not None else (
cur_enabled
)
new_mcp = (
v.user.must_change_password
if v.user.must_change_password is not None
else cur_mcp
)
await self.table_store.update_user_profile(
id=v.user_id,
name=new_name,
email=new_email,
roles=new_roles,
enabled=new_enabled,
must_change_password=new_mcp,
)
updated = await self.table_store.get_user(v.user_id)
return IamResponse(user=self._row_to_user_record(updated))
async def handle_disable_user(self, v):
"""Soft-delete: set enabled=false and revoke every API key
belonging to the user."""
if not v.user_id:
return _err("invalid-argument", "user_id required")
_, err = await self._resolve_user(v.user_id, v.workspace or None)
if err is not None:
return err
await self.table_store.update_user_enabled(
id=v.user_id, enabled=False,
)
# Revoke all their API keys.
key_rows = await self.table_store.list_api_keys_by_user(v.user_id)
for kr in key_rows:
await self.table_store.delete_api_key(kr[0])
return IamResponse()
async def handle_enable_user(self, v):
"""Re-enable a previously disabled user. Does not restore
API keys — those have to be re-issued by the admin."""
if not v.user_id:
return _err("invalid-argument", "user_id required")
_, err = await self._resolve_user(v.user_id, v.workspace or None)
if err is not None:
return err
await self.table_store.update_user_enabled(
id=v.user_id, enabled=True,
)
return IamResponse()
async def handle_delete_user(self, v):
"""Hard-delete a user. Removes the ``iam_users`` row, the
``iam_users_by_username`` lookup row, and every API key
belonging to the user.
Unlike disable, this frees the username for re-use and
removes the user's personal data from storage (intended to
cover GDPR erasure-style requirements). When audit logging
lands, the decision to delete vs. anonymise referenced audit
rows will need to be revisited."""
if not v.user_id:
return _err("invalid-argument", "user_id required")
user_row, err = await self._resolve_user(
v.user_id, v.workspace or None,
)
if err is not None:
return err
# user_row indices match get_user columns. Username is [2].
username = user_row[2]
record_workspace = user_row[1]
# Revoke all API keys.
key_rows = await self.table_store.list_api_keys_by_user(v.user_id)
for kr in key_rows:
await self.table_store.delete_api_key(kr[0])
# Remove username lookup — keyed on (workspace, username),
# so use the resolved workspace from the user record rather
# than relying on the caller-supplied filter.
if username:
await self.table_store.delete_username_lookup(
record_workspace, username,
)
# Remove user record.
await self.table_store.delete_user(v.user_id)
return IamResponse()
# ------------------------------------------------------------------
# Workspace CRUD
# ------------------------------------------------------------------
async def handle_create_workspace(self, v):
if v.workspace_record is None or not v.workspace_record.id:
return _err(
"invalid-argument",
"workspace_record.id required for create-workspace",
)
if v.workspace_record.id.startswith("_"):
return _err(
"invalid-argument",
"workspace ids beginning with '_' are reserved",
)
existing = await self.table_store.get_workspace(
v.workspace_record.id,
)
if existing is not None:
return _err("duplicate", "workspace already exists")
now = _now_dt()
await self.table_store.put_workspace(
id=v.workspace_record.id,
name=v.workspace_record.name or v.workspace_record.id,
enabled=v.workspace_record.enabled,
created=now,
)
row = await self.table_store.get_workspace(v.workspace_record.id)
return IamResponse(workspace=self._row_to_workspace_record(row))
async def handle_list_workspaces(self, v):
rows = await self.table_store.list_workspaces()
return IamResponse(
workspaces=[
self._row_to_workspace_record(r) for r in rows
],
)
async def handle_get_workspace(self, v):
if v.workspace_record is None or not v.workspace_record.id:
return _err("invalid-argument", "workspace_record.id required")
row = await self.table_store.get_workspace(v.workspace_record.id)
if row is None:
return _err("not-found", "workspace not found")
return IamResponse(workspace=self._row_to_workspace_record(row))
async def handle_update_workspace(self, v):
"""Update workspace name / enabled. The id is immutable."""
if v.workspace_record is None or not v.workspace_record.id:
return _err("invalid-argument", "workspace_record.id required")
row = await self.table_store.get_workspace(v.workspace_record.id)
if row is None:
return _err("not-found", "workspace not found")
_, cur_name, cur_enabled, _created = row
new_name = (
v.workspace_record.name
if v.workspace_record.name else cur_name
)
new_enabled = (
v.workspace_record.enabled
if v.workspace_record.enabled is not None
else cur_enabled
)
await self.table_store.update_workspace(
id=v.workspace_record.id,
name=new_name,
enabled=new_enabled,
)
updated = await self.table_store.get_workspace(
v.workspace_record.id,
)
return IamResponse(
workspace=self._row_to_workspace_record(updated),
)
async def handle_disable_workspace(self, v):
"""Set enabled=false, disable every user in the workspace,
revoke every API key belonging to those users."""
if v.workspace_record is None or not v.workspace_record.id:
return _err("invalid-argument", "workspace_record.id required")
row = await self.table_store.get_workspace(v.workspace_record.id)
if row is None:
return _err("not-found", "workspace not found")
await self.table_store.update_workspace(
id=v.workspace_record.id,
name=row[1] or v.workspace_record.id,
enabled=False,
)
user_rows = await self.table_store.list_users_by_workspace(
v.workspace_record.id,
)
for ur in user_rows:
user_id = ur[0]
await self.table_store.update_user_enabled(
id=user_id, enabled=False,
)
key_rows = await self.table_store.list_api_keys_by_user(user_id)
for kr in key_rows:
await self.table_store.delete_api_key(kr[0])
return IamResponse()
# ------------------------------------------------------------------
# rotate-signing-key
# ------------------------------------------------------------------
async def handle_rotate_signing_key(self, v):
"""Create a new Ed25519 signing key, retire the current
active key, switch the in-memory cache over.
The retired key row is kept in ``iam_signing_keys`` so the
gateway's JWT validator can continue to validate previously-
issued tokens during the grace period. Actual grace-period
enforcement (time-window acceptance at the validator) lands
with the gateway auth middleware work."""
# Retire the currently-active key, if any.
current = await self._get_active_signing_key()
now = _now_dt()
if current is not None:
cur_kid, _cur_priv, _cur_pub = current
await self.table_store.retire_signing_key(
kid=cur_kid, retired=now,
)
new_kid, new_priv, new_pub = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=new_kid,
private_pem=new_priv,
public_pem=new_pub,
created=now,
retired=None,
)
self._signing_key = (new_kid, new_priv, new_pub)
logger.info(
f"IAM: rotated signing key. "
f"New kid={new_kid}, retired kid={(current or (None,))[0]}"
)
return IamResponse()
# ------------------------------------------------------------------
# resolve-api-key
# ------------------------------------------------------------------
async def handle_resolve_api_key(self, v):
if not v.api_key:
return _err("auth-failed", "no api key")
row = await self.table_store.get_api_key_by_hash(
_hash_api_key(v.api_key),
)
if row is None:
return _err("auth-failed", "unknown api key")
(
_key_hash, _id, user_id, _name, _prefix, expires,
_created, _last_used,
) = row
if expires is not None:
exp_dt = expires
if isinstance(exp_dt, str):
exp_dt = datetime.datetime.fromisoformat(exp_dt)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=datetime.timezone.utc)
if exp_dt < _now_dt():
return _err("auth-failed", "api key expired")
user_row = await self.table_store.get_user(user_id)
if user_row is None:
return _err("auth-failed", "owning user missing")
user = self._row_to_user_record(user_row)
if not user.enabled:
return _err("auth-failed", "owning user disabled")
# Workspace-disabled check.
ws_row = await self.table_store.get_workspace(user.workspace)
if ws_row is None or not ws_row[2]:
return _err("auth-failed", "owning workspace disabled")
return IamResponse(
resolved_user_id=user.id,
resolved_workspace=user.workspace,
resolved_roles=list(user.roles),
)
# ------------------------------------------------------------------
# create-user
# ------------------------------------------------------------------
async def handle_create_user(self, v):
if not v.workspace:
return _err(
"invalid-argument", "workspace required for create-user",
)
if v.user is None:
return _err(
"invalid-argument", "user field required for create-user",
)
if not v.user.username:
return _err("invalid-argument", "user.username required")
if not v.user.password:
return _err("invalid-argument", "user.password required")
# Workspace must exist and be enabled.
ws = await self.table_store.get_workspace(v.workspace)
if ws is None or not ws[2]:
return _err("not-found", "workspace not found or disabled")
# Uniqueness on username within workspace.
existing = await self.table_store.get_user_id_by_username(
v.workspace, v.user.username,
)
if existing:
return _err("duplicate", "username already exists")
user_id = str(uuid.uuid4())
now = _now_dt()
await self.table_store.put_user(
id=user_id,
workspace=v.workspace,
username=v.user.username,
name=v.user.name or v.user.username,
email=v.user.email or "",
password_hash=_hash_password(v.user.password),
roles=list(v.user.roles or []),
enabled=v.user.enabled,
must_change_password=v.user.must_change_password,
created=now,
)
row = await self.table_store.get_user(user_id)
return IamResponse(user=self._row_to_user_record(row))
# ------------------------------------------------------------------
# list-users
# ------------------------------------------------------------------
async def handle_list_users(self, v):
# System-level operation: workspace, when supplied, is a
# filter on the user record's home-workspace association.
# Empty workspace returns the deployment-wide list — the
# gateway has already authorised the caller's authority to
# see that scope.
if v.workspace:
rows = await self.table_store.list_users_by_workspace(v.workspace)
else:
rows = await self.table_store.list_users()
return IamResponse(
users=[self._row_to_user_record(r) for r in rows],
)
# ------------------------------------------------------------------
# create-api-key
# ------------------------------------------------------------------
async def handle_create_api_key(self, v):
if v.key is None or not v.key.user_id:
return _err("invalid-argument", "key.user_id required")
if not v.key.name:
return _err("invalid-argument", "key.name required")
# API keys are system-level records with a workspace
# association (the user's home workspace). Workspace is an
# optional integrity check on the caller's request — when
# supplied it must match the target user's home workspace;
# when omitted, the user's home workspace is used.
user_row, err = await self._resolve_user(
v.key.user_id, v.workspace or None,
)
if err is not None:
return err
plaintext = _generate_api_key()
key_id = str(uuid.uuid4())
now = _now_dt()
expires_dt = _parse_expires(v.key.expires)
await self.table_store.put_api_key(
key_hash=_hash_api_key(plaintext),
id=key_id,
user_id=v.key.user_id,
name=v.key.name,
prefix=plaintext[:len(API_KEY_PREFIX) + 4],
expires=expires_dt,
created=now,
last_used=None,
)
row = await self.table_store.get_api_key_by_hash(
_hash_api_key(plaintext),
)
return IamResponse(
api_key_plaintext=plaintext,
api_key=self._row_to_api_key_record(row),
)
# ------------------------------------------------------------------
# list-api-keys
# ------------------------------------------------------------------
async def handle_list_api_keys(self, v):
if not v.user_id:
return _err(
"invalid-argument", "user_id required for list-api-keys",
)
# Workspace is an optional integrity check.
_, err = await self._resolve_user(v.user_id, v.workspace or None)
if err is not None:
return err
rows = await self.table_store.list_api_keys_by_user(v.user_id)
return IamResponse(
api_keys=[self._row_to_api_key_record(r) for r in rows],
)
# ------------------------------------------------------------------
# revoke-api-key
# ------------------------------------------------------------------
async def handle_revoke_api_key(self, v):
if not v.key_id:
return _err("invalid-argument", "key_id required")
row = await self.table_store.get_api_key_by_id(v.key_id)
if row is None:
return _err("not-found", "api key not found")
key_hash, _id, user_id, _name, _prefix, _expires, _c, _lu = row
# Workspace is an optional integrity check via the owning user.
if v.workspace:
user_row = await self.table_store.get_user(user_id)
if user_row is None or user_row[1] != v.workspace:
return _err(
"operation-not-permitted",
"key belongs to a different workspace",
)
await self.table_store.delete_api_key(key_hash)
return IamResponse()
# ------------------------------------------------------------------
# authorise / authorise-many
#
# The IAM contract (see docs/tech-specs/iam-contract.md) calls
# for the regime — not the gateway — to decide whether an
# identity may perform a capability on a resource given the
# operation's parameters. These two operations are the OSS
# regime's implementation of that contract.
#
# Inputs (on IamRequest):
# user_id — the identity handle (the gateway's
# opaque reference). For OSS this is the
# user record's id.
# capability — the capability string from the
# capabilities.md vocabulary.
# resource_json — JSON dict, the resource address
# ({} for system, {workspace} for
# workspace, {workspace, flow} for flow).
# parameters_json — JSON dict, decision-relevant operation
# parameters (e.g. workspace association
# on user-registry operations).
# authorise_checks — for authorise-many, a JSON list of
# {capability, resource, parameters}.
#
# Outputs (on IamResponse):
# decision_allow — single allow / deny verdict.
# decision_ttl_seconds — gateway cache TTL for this
# decision.
# decisions_json — for authorise-many, list of
# {allow, ttl} in request order.
# ------------------------------------------------------------------
def _decide(self, user_row, capability, resource, parameters):
"""Single authorisation decision. Returns (allow, ttl)."""
if user_row is None:
return False, AUTHZ_CACHE_TTL_SECONDS
# user_row layout:
# 0:id 1:workspace 2:username 3:name 4:email 5:password_hash
# 6:roles 7:enabled 8:must_change_password 9:created
if not user_row[7]: # disabled
return False, AUTHZ_CACHE_TTL_SECONDS
# Disabled workspace check (defense in depth — credentials
# bound to a disabled workspace shouldn't be able to act).
# Cheap; one row read.
# We do this only when a target workspace is involved, to
# avoid an extra read for system-level operations that
# bypass workspace altogether.
target_workspace = (
(resource or {}).get("workspace")
or (parameters or {}).get("workspace")
)
roles = user_row[6] or set()
assigned_workspace = user_row[1]
for role_name in roles:
defn = ROLE_DEFINITIONS.get(role_name)
if defn is None:
continue
if capability not in defn["capabilities"]:
continue
if target_workspace is None or _scope_permits(
defn["workspace_scope"],
target_workspace,
assigned_workspace,
):
return True, AUTHZ_CACHE_TTL_SECONDS
return False, AUTHZ_CACHE_TTL_SECONDS
async def handle_authorise(self, v):
if not v.capability:
return _err("invalid-argument", "capability required")
if not v.user_id:
return _err("invalid-argument", "user_id (handle) required")
try:
resource = json.loads(v.resource_json or "{}")
parameters = json.loads(v.parameters_json or "{}")
except json.JSONDecodeError as e:
return _err("invalid-argument", f"bad json: {e}")
user_row = await self.table_store.get_user(v.user_id)
allow, ttl = self._decide(
user_row, v.capability, resource, parameters,
)
return IamResponse(
decision_allow=allow,
decision_ttl_seconds=ttl,
)
async def handle_authorise_many(self, v):
if not v.user_id:
return _err("invalid-argument", "user_id (handle) required")
if not v.authorise_checks:
return _err("invalid-argument", "authorise_checks required")
try:
checks = json.loads(v.authorise_checks)
except json.JSONDecodeError as e:
return _err("invalid-argument", f"bad json: {e}")
if not isinstance(checks, list):
return _err(
"invalid-argument",
"authorise_checks must be a JSON list",
)
# One user lookup for the whole batch.
user_row = await self.table_store.get_user(v.user_id)
decisions = []
for c in checks:
if not isinstance(c, dict):
decisions.append({
"allow": False,
"ttl": AUTHZ_CACHE_TTL_SECONDS,
})
continue
allow, ttl = self._decide(
user_row,
c.get("capability", ""),
c.get("resource") or {},
c.get("parameters") or {},
)
decisions.append({"allow": allow, "ttl": ttl})
return IamResponse(decisions_json=json.dumps(decisions))