trustgraph/trustgraph-flow/trustgraph/iam/service/iam.py
cybermaggedon 5e28d3cce0
refactor(iam): pluggable IAM regime via authenticate/authorise contract (#853)
The gateway no longer holds any policy state — capability sets, role
definitions, workspace scope rules.  Per the IAM contract it asks the
regime "may this identity perform this capability on this resource?"
per request.  That moves the OSS role-based regime entirely into
iam-svc, which can be replaced (SSO, ABAC, ReBAC) without changing
the gateway, the wire protocol, or backend services.

Contract:
- authenticate(credential) -> Identity (handle, workspace,
  principal_id, source).  No roles, claims, or policy state surface
  to the gateway.
- authorise(identity, capability, resource, parameters) -> (allow,
  ttl).  Cached per-decision (regime TTL clamped above; fail-closed
  on regime errors).
- authorise_many available as a fan-out variant.

Operation registry drives every authorisation decision:
- /api/v1/iam -> IamEndpoint, looks up bare op name (create-user,
  list-workspaces, ...).
- /api/v1/{kind} -> RegistryRoutedVariableEndpoint, <kind>:<op>
  (config:get, flow:list-blueprints, librarian:add-document, ...).
- /api/v1/flow/{flow}/service/{kind} -> flow-service:<kind>.
- /api/v1/flow/{flow}/{import,export}/{kind} ->
  flow-{import,export}:<kind>.
- WS Mux per-frame -> flow-service:<kind>; closes a gap where
  authenticated users could hit any service kind.
85 operations registered across the surface.

JWT carries identity only — sub + workspace.  The roles claim is gone;
the gateway never reads policy state from a credential.

The three coarse *_KIND_CAPABILITY maps are removed.  The registry is
the only source of truth for the capability + resource shape of an
operation.  Tests migrated to the new Identity shape and to
authorise()-mocked auth doubles.

Specs updated: docs/tech-specs/iam-contract.md (Identity surface,
caching, registry-naming conventions), iam.md (JWT shape, gateway
flow, role section reframed as OSS-regime detail), iam-protocol.md
(positioned as one implementation of the contract).
2026-04-28 16:19:41 +01:00

1342 lines
47 KiB
Python

"""
IAM business logic. Handles ``IamRequest`` messages and builds
``IamResponse`` messages. Does not concern itself with transport.
See docs/tech-specs/iam-protocol.md for the wire-level contract and
docs/tech-specs/iam.md for the surrounding architecture.
"""
import asyncio
import base64
import datetime
import hashlib
import json
import logging
import os
import secrets
import uuid
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519
from trustgraph.schema import (
IamResponse, Error,
UserRecord, WorkspaceRecord, ApiKeyRecord,
)
from ... tables.iam import IamTableStore
logger = logging.getLogger(__name__)
DEFAULT_WORKSPACE = "default"
BOOTSTRAP_ADMIN_USERNAME = "admin"
BOOTSTRAP_ADMIN_NAME = "Administrator"
PBKDF2_ITERATIONS = 600_000
API_KEY_PREFIX = "tg_"
API_KEY_RANDOM_BYTES = 24
JWT_ISSUER = "trustgraph-iam"
JWT_TTL_SECONDS = 3600
# Default authorisation cache TTL the regime tells the gateway to
# observe. 60s is the OSS-spec maximum revocation latency: a role
# change, workspace disable, or key revoke takes effect within at
# most this much time.
AUTHZ_CACHE_TTL_SECONDS = 60
# OSS regime role table. Lives here, not in the gateway — the
# gateway is regime-agnostic and must not encode policy.
#
# Each role has a capability set and a workspace scope. The
# evaluator (handle_authorise below) checks (a) that some role
# held by the caller grants the requested capability, and (b)
# that role's workspace scope permits the target workspace.
_READER_CAPS = {
"agent",
"graph:read",
"documents:read",
"rows:read",
"llm",
"embeddings",
"mcp",
"config:read",
"flows:read",
"collections:read",
"knowledge:read",
"keys:self",
}
_WRITER_CAPS = _READER_CAPS | {
"graph:write",
"documents:write",
"rows:write",
"collections:write",
"knowledge:write",
}
_ADMIN_CAPS = _WRITER_CAPS | {
"config:write",
"flows:write",
"users:read", "users:write", "users:admin",
"keys:admin",
"workspaces:admin",
"iam:admin",
"metrics:read",
}
ROLE_DEFINITIONS = {
"reader": {
"capabilities": _READER_CAPS,
"workspace_scope": "assigned",
},
"writer": {
"capabilities": _WRITER_CAPS,
"workspace_scope": "assigned",
},
"admin": {
"capabilities": _ADMIN_CAPS,
"workspace_scope": "*",
},
}
def _scope_permits(role_scope, target_workspace, assigned_workspace):
"""Does the given role apply to ``target_workspace``?"""
if role_scope == "*":
return True
if role_scope == "assigned":
return target_workspace == assigned_workspace
return False
def _now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def _now_dt():
return datetime.datetime.now(datetime.timezone.utc)
def _iso(dt):
if dt is None:
return ""
if isinstance(dt, str):
return dt
if dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.timezone.utc)
return dt.isoformat()
def _hash_password(password):
"""Return an encoded PBKDF2-SHA-256 hash of ``password``.
Format: ``pbkdf2-sha256$<iters>$<b64-salt>$<b64-hash>``. Stored
verbatim in the password_hash column so the algorithm and cost
can be evolved later (new rows get a new prefix; old rows are
verified with their own parameters).
"""
salt = os.urandom(16)
dk = hashlib.pbkdf2_hmac(
"sha256", password.encode("utf-8"), salt, PBKDF2_ITERATIONS,
)
return (
f"pbkdf2-sha256${PBKDF2_ITERATIONS}"
f"${base64.b64encode(salt).decode('ascii')}"
f"${base64.b64encode(dk).decode('ascii')}"
)
def _verify_password(password, encoded):
"""Constant-time verify ``password`` against an encoded hash."""
try:
algo, iters, b64_salt, b64_hash = encoded.split("$")
except ValueError:
return False
if algo != "pbkdf2-sha256":
return False
try:
iters = int(iters)
salt = base64.b64decode(b64_salt)
target = base64.b64decode(b64_hash)
except Exception:
return False
dk = hashlib.pbkdf2_hmac(
"sha256", password.encode("utf-8"), salt, iters,
)
return secrets.compare_digest(dk, target)
def _generate_api_key():
"""Return a fresh API-key plaintext of the form ``tg_<random>``."""
return API_KEY_PREFIX + secrets.token_urlsafe(API_KEY_RANDOM_BYTES)
def _hash_api_key(plaintext):
"""SHA-256 hex digest of an API key plaintext. Used as the
primary key in ``iam_api_keys`` so ``resolve-api-key`` is O(1)."""
return hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
def _err(type, message):
return IamResponse(error=Error(type=type, message=message))
def _parse_expires(s):
if not s:
return None
try:
return datetime.datetime.fromisoformat(s)
except Exception:
return None
def _b64url(data):
"""URL-safe base64 encode without padding, as required by JWT."""
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
def _generate_signing_keypair():
"""Return (kid, private_pem, public_pem) for a fresh Ed25519
keypair. Ed25519 / EdDSA: small (32-byte public key), fast,
deterministic, side-channel-resistant by construction, free of
NIST-curve baggage."""
key = ed25519.Ed25519PrivateKey.generate()
private_pem = key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption(),
).decode("ascii")
public_pem = key.public_key().public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
).decode("ascii")
kid = uuid.uuid4().hex[:16]
return kid, private_pem, public_pem
def _sign_jwt(kid, private_pem, claims):
"""Produce a compact-serialisation EdDSA (Ed25519) JWT for
``claims``."""
key = serialization.load_pem_private_key(
private_pem.encode("ascii"), password=None,
)
if not isinstance(key, ed25519.Ed25519PrivateKey):
raise RuntimeError(
f"signing key is not Ed25519: {type(key).__name__}"
)
header = {"alg": "EdDSA", "typ": "JWT", "kid": kid}
header_b = _b64url(json.dumps(
header, separators=(",", ":"), sort_keys=True,
).encode("utf-8"))
payload_b = _b64url(json.dumps(
claims, separators=(",", ":"), sort_keys=True,
).encode("utf-8"))
signing_input = f"{header_b}.{payload_b}".encode("ascii")
signature = key.sign(signing_input)
return f"{header_b}.{payload_b}.{_b64url(signature)}"
class IamService:
def __init__(self, host, username, password, keyspace,
bootstrap_mode, bootstrap_token=None):
self.table_store = IamTableStore(
host, username, password, keyspace,
)
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
# service auto-seeds on first start using the provided
# bootstrap_token and the ``bootstrap`` operation is refused
# thereafter (indistinguishable from an already-bootstrapped
# deployment per the error policy). In "bootstrap" mode the
# ``bootstrap`` operation is live until tables are populated.
if bootstrap_mode not in ("token", "bootstrap"):
raise ValueError(
f"bootstrap_mode must be 'token' or 'bootstrap', "
f"got {bootstrap_mode!r}"
)
if bootstrap_mode == "token" and not bootstrap_token:
raise ValueError(
"bootstrap_mode='token' requires bootstrap_token"
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
self._signing_key = None
self._signing_key_lock = asyncio.Lock()
# ------------------------------------------------------------------
# Dispatch
# ------------------------------------------------------------------
async def handle(self, v):
op = v.operation
try:
if op == "bootstrap":
return await self.handle_bootstrap(v)
if op == "resolve-api-key":
return await self.handle_resolve_api_key(v)
if op == "create-user":
return await self.handle_create_user(v)
if op == "list-users":
return await self.handle_list_users(v)
if op == "create-api-key":
return await self.handle_create_api_key(v)
if op == "list-api-keys":
return await self.handle_list_api_keys(v)
if op == "revoke-api-key":
return await self.handle_revoke_api_key(v)
if op == "login":
return await self.handle_login(v)
if op == "get-signing-key-public":
return await self.handle_get_signing_key_public(v)
if op == "change-password":
return await self.handle_change_password(v)
if op == "reset-password":
return await self.handle_reset_password(v)
if op == "get-user":
return await self.handle_get_user(v)
if op == "update-user":
return await self.handle_update_user(v)
if op == "disable-user":
return await self.handle_disable_user(v)
if op == "enable-user":
return await self.handle_enable_user(v)
if op == "delete-user":
return await self.handle_delete_user(v)
if op == "create-workspace":
return await self.handle_create_workspace(v)
if op == "list-workspaces":
return await self.handle_list_workspaces(v)
if op == "get-workspace":
return await self.handle_get_workspace(v)
if op == "update-workspace":
return await self.handle_update_workspace(v)
if op == "disable-workspace":
return await self.handle_disable_workspace(v)
if op == "rotate-signing-key":
return await self.handle_rotate_signing_key(v)
if op == "authorise":
return await self.handle_authorise(v)
if op == "authorise-many":
return await self.handle_authorise_many(v)
return _err(
"invalid-argument",
f"unknown or not-yet-implemented operation: {op!r}",
)
except Exception as e:
logger.error(
f"IAM {op} failed: {type(e).__name__}: {e}",
exc_info=True,
)
return _err("internal-error", str(e))
# ------------------------------------------------------------------
# Record conversion
# ------------------------------------------------------------------
def _row_to_user_record(self, row):
(
id, workspace, username, name, email, _password_hash,
roles, enabled, must_change_password, created,
) = row
return UserRecord(
id=id or "",
workspace=workspace or "",
username=username or "",
name=name or "",
email=email or "",
roles=sorted(roles) if roles else [],
enabled=bool(enabled),
must_change_password=bool(must_change_password),
created=_iso(created),
)
def _row_to_api_key_record(self, row):
(
_key_hash, id, user_id, name, prefix, expires,
created, last_used,
) = row
return ApiKeyRecord(
id=id or "",
user_id=user_id or "",
name=name or "",
prefix=prefix or "",
expires=_iso(expires),
created=_iso(created),
last_used=_iso(last_used),
)
# ------------------------------------------------------------------
# bootstrap
# ------------------------------------------------------------------
async def auto_bootstrap_if_token_mode(self):
"""Called from the service processor at startup. In
``token`` mode, if tables are empty, seeds the default
workspace / admin / signing key using the operator-provided
bootstrap token. The admin's API key plaintext is *the*
``bootstrap_token`` — the operator already knows it, nothing
needs to be returned or logged.
In ``bootstrap`` mode this is a no-op; seeding happens on
explicit ``bootstrap`` operation invocation."""
if self.bootstrap_mode != "token":
return
if await self.table_store.any_workspace_exists():
logger.info(
"IAM: token mode, tables already populated; skipping "
"auto-bootstrap"
)
return
logger.info("IAM: token mode, empty tables; auto-bootstrapping")
await self._seed_tables(self.bootstrap_token)
logger.info(
"IAM: auto-bootstrap complete using operator-provided token"
)
async def _seed_tables(self, api_key_plaintext):
"""Shared seeding logic used by token-mode auto-bootstrap and
bootstrap-mode handle_bootstrap. Creates the default
workspace, admin user, admin API key (using the given
plaintext), and an initial signing key. Returns the admin
user id."""
now = _now_dt()
await self.table_store.put_workspace(
id=DEFAULT_WORKSPACE,
name="Default",
enabled=True,
created=now,
)
admin_user_id = str(uuid.uuid4())
admin_password = secrets.token_urlsafe(32)
await self.table_store.put_user(
id=admin_user_id,
workspace=DEFAULT_WORKSPACE,
username=BOOTSTRAP_ADMIN_USERNAME,
name=BOOTSTRAP_ADMIN_NAME,
email="",
password_hash=_hash_password(admin_password),
roles=["admin"],
enabled=True,
must_change_password=True,
created=now,
)
key_id = str(uuid.uuid4())
await self.table_store.put_api_key(
key_hash=_hash_api_key(api_key_plaintext),
id=key_id,
user_id=admin_user_id,
name="bootstrap",
prefix=api_key_plaintext[:len(API_KEY_PREFIX) + 4],
expires=None,
created=now,
last_used=None,
)
kid, private_pem, public_pem = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=kid,
private_pem=private_pem,
public_pem=public_pem,
created=now,
retired=None,
)
self._signing_key = (kid, private_pem, public_pem)
logger.info(
f"IAM seeded: workspace={DEFAULT_WORKSPACE!r}, "
f"admin user_id={admin_user_id}, signing key kid={kid}"
)
return admin_user_id
async def handle_bootstrap(self, v):
"""Explicit bootstrap op. Only available in ``bootstrap``
mode and only when tables are empty. Every other case is
masked to a generic auth failure — the caller cannot
distinguish 'not in bootstrap mode' from 'already
bootstrapped' from 'operation forbidden'."""
if self.bootstrap_mode != "bootstrap":
return _err("auth-failed", "auth failure")
if await self.table_store.any_workspace_exists():
return _err("auth-failed", "auth failure")
plaintext = _generate_api_key()
admin_user_id = await self._seed_tables(plaintext)
return IamResponse(
bootstrap_admin_user_id=admin_user_id,
bootstrap_admin_api_key=plaintext,
)
# ------------------------------------------------------------------
# Signing key helpers
# ------------------------------------------------------------------
async def _get_active_signing_key(self):
"""Return ``(kid, private_pem, public_pem)`` for the active
signing key. Loads from Cassandra on first call. Generates
and persists a new key if none exists — covers the case where
``login`` is called before ``bootstrap`` (shouldn't happen in
practice but keeps the service internally consistent)."""
if self._signing_key is not None:
return self._signing_key
async with self._signing_key_lock:
if self._signing_key is not None:
return self._signing_key
rows = await self.table_store.list_signing_keys()
active = [r for r in rows if r[4] is None]
if active:
row = active[0]
self._signing_key = (row[0], row[1], row[2])
logger.info(
f"IAM: loaded active signing key kid={row[0]}"
)
return self._signing_key
kid, private_pem, public_pem = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=kid,
private_pem=private_pem,
public_pem=public_pem,
created=_now_dt(),
retired=None,
)
self._signing_key = (kid, private_pem, public_pem)
logger.info(
f"IAM: generated active signing key kid={kid} "
f"(no existing key found)"
)
return self._signing_key
# ------------------------------------------------------------------
# login
# ------------------------------------------------------------------
async def handle_login(self, v):
if not v.username:
return _err("auth-failed", "username required")
if not v.password:
return _err("auth-failed", "password required")
# Login accepts an optional workspace parameter. If omitted
# we use the default workspace (OSS single-workspace
# assumption). Multi-workspace enterprise editions swap in a
# resolver that looks across the caller's permitted set.
workspace = v.workspace or DEFAULT_WORKSPACE
user_id = await self.table_store.get_user_id_by_username(
workspace, v.username,
)
if not user_id:
return _err("auth-failed", "no such user")
user_row = await self.table_store.get_user(user_id)
if user_row is None:
return _err("auth-failed", "user disappeared")
(
id, ws, _username, _name, _email, password_hash,
_roles, enabled, _mcp, _created,
) = user_row
if not enabled:
return _err("auth-failed", "user disabled")
if not password_hash or not _verify_password(
v.password, password_hash,
):
return _err("auth-failed", "bad credentials")
ws_row = await self.table_store.get_workspace(ws)
if ws_row is None or not ws_row[2]:
return _err("auth-failed", "workspace disabled")
kid, private_pem, _ = await self._get_active_signing_key()
now_ts = int(_now_dt().timestamp())
exp_ts = now_ts + JWT_TTL_SECONDS
# Per the IAM contract the gateway never reads policy state
# from the credential — roles stay server-side, reachable
# only via authorise(). JWT carries identity + workspace
# binding only.
claims = {
"iss": JWT_ISSUER,
"sub": id,
"workspace": ws,
"iat": now_ts,
"exp": exp_ts,
}
token = _sign_jwt(kid, private_pem, claims)
expires_iso = datetime.datetime.fromtimestamp(
exp_ts, tz=datetime.timezone.utc,
).isoformat()
return IamResponse(jwt=token, jwt_expires=expires_iso)
# ------------------------------------------------------------------
# get-signing-key-public
# ------------------------------------------------------------------
async def handle_get_signing_key_public(self, v):
_, _, public_pem = await self._get_active_signing_key()
return IamResponse(signing_key_public=public_pem)
# ------------------------------------------------------------------
# Record-conversion helper for workspaces
# ------------------------------------------------------------------
def _row_to_workspace_record(self, row):
id, name, enabled, created = row
return WorkspaceRecord(
id=id or "",
name=name or "",
enabled=bool(enabled),
created=_iso(created),
)
async def _user_in_workspace(self, user_id, workspace):
"""Return (user_row, error_response_or_None). Loads the user
record, verifies it exists, is enabled, and belongs to
``workspace``. The workspace scope check rejects cross-
workspace admin attempts."""
user_row = await self.table_store.get_user(user_id)
if user_row is None:
return None, _err("not-found", "user not found")
if user_row[1] != workspace:
return None, _err(
"operation-not-permitted",
"user is in a different workspace",
)
return user_row, None
# ------------------------------------------------------------------
# change-password
# ------------------------------------------------------------------
async def handle_change_password(self, v):
if not v.user_id:
return _err("invalid-argument", "user_id required")
if not v.password:
return _err("invalid-argument", "password (current) required")
if not v.new_password:
return _err("invalid-argument", "new_password required")
user_row = await self.table_store.get_user(v.user_id)
if user_row is None:
return _err("auth-failed", "no such user")
_id, _ws, _un, _name, _email, password_hash, _r, enabled, _mcp, _c = (
user_row
)
if not enabled:
return _err("auth-failed", "user disabled")
if not password_hash or not _verify_password(
v.password, password_hash,
):
return _err("auth-failed", "bad credentials")
await self.table_store.update_user_password(
id=v.user_id,
password_hash=_hash_password(v.new_password),
must_change_password=False,
)
return IamResponse()
# ------------------------------------------------------------------
# reset-password
# ------------------------------------------------------------------
async def handle_reset_password(self, v):
if not v.workspace:
return _err(
"invalid-argument",
"workspace required for reset-password",
)
if not v.user_id:
return _err("invalid-argument", "user_id required")
_, err = await self._user_in_workspace(v.user_id, v.workspace)
if err is not None:
return err
temporary = secrets.token_urlsafe(12)
await self.table_store.update_user_password(
id=v.user_id,
password_hash=_hash_password(temporary),
must_change_password=True,
)
return IamResponse(temporary_password=temporary)
# ------------------------------------------------------------------
# get-user / update-user / disable-user
# ------------------------------------------------------------------
async def handle_get_user(self, v):
if not v.workspace:
return _err("invalid-argument", "workspace required")
if not v.user_id:
return _err("invalid-argument", "user_id required")
user_row, err = await self._user_in_workspace(
v.user_id, v.workspace,
)
if err is not None:
return err
return IamResponse(user=self._row_to_user_record(user_row))
async def handle_update_user(self, v):
"""Update user profile fields: name, email, roles, enabled,
must_change_password. Username is immutable — change it by
creating a new user and disabling the old one. Password
changes go through change-password / reset-password."""
if not v.workspace:
return _err("invalid-argument", "workspace required")
if not v.user_id:
return _err("invalid-argument", "user_id required")
if v.user is None:
return _err("invalid-argument", "user field required")
if v.user.password:
return _err(
"invalid-argument",
"password cannot be changed via update-user; "
"use change-password or reset-password",
)
if v.user.username and v.user.username != "":
# Compare to existing. Username-change not allowed.
existing, err = await self._user_in_workspace(
v.user_id, v.workspace,
)
if err is not None:
return err
if v.user.username != existing[2]:
return _err(
"invalid-argument",
"username is immutable; create a new user "
"instead",
)
else:
existing, err = await self._user_in_workspace(
v.user_id, v.workspace,
)
if err is not None:
return err
# Carry forward fields the caller didn't provide.
(
_id, _ws, _username, cur_name, cur_email, _pw,
cur_roles, cur_enabled, cur_mcp, _created,
) = existing
new_name = v.user.name if v.user.name else cur_name
new_email = v.user.email if v.user.email else cur_email
new_roles = list(v.user.roles) if v.user.roles else list(
cur_roles or [],
)
new_enabled = v.user.enabled if v.user.enabled is not None else (
cur_enabled
)
new_mcp = (
v.user.must_change_password
if v.user.must_change_password is not None
else cur_mcp
)
await self.table_store.update_user_profile(
id=v.user_id,
name=new_name,
email=new_email,
roles=new_roles,
enabled=new_enabled,
must_change_password=new_mcp,
)
updated = await self.table_store.get_user(v.user_id)
return IamResponse(user=self._row_to_user_record(updated))
async def handle_disable_user(self, v):
"""Soft-delete: set enabled=false and revoke every API key
belonging to the user."""
if not v.workspace:
return _err("invalid-argument", "workspace required")
if not v.user_id:
return _err("invalid-argument", "user_id required")
_, err = await self._user_in_workspace(v.user_id, v.workspace)
if err is not None:
return err
await self.table_store.update_user_enabled(
id=v.user_id, enabled=False,
)
# Revoke all their API keys.
key_rows = await self.table_store.list_api_keys_by_user(v.user_id)
for kr in key_rows:
await self.table_store.delete_api_key(kr[0])
return IamResponse()
async def handle_enable_user(self, v):
"""Re-enable a previously disabled user. Does not restore
API keys — those have to be re-issued by the admin."""
if not v.workspace:
return _err("invalid-argument", "workspace required")
if not v.user_id:
return _err("invalid-argument", "user_id required")
_, err = await self._user_in_workspace(v.user_id, v.workspace)
if err is not None:
return err
await self.table_store.update_user_enabled(
id=v.user_id, enabled=True,
)
return IamResponse()
async def handle_delete_user(self, v):
"""Hard-delete a user. Removes the ``iam_users`` row, the
``iam_users_by_username`` lookup row, and every API key
belonging to the user.
Unlike disable, this frees the username for re-use and
removes the user's personal data from storage (intended to
cover GDPR erasure-style requirements). When audit logging
lands, the decision to delete vs. anonymise referenced audit
rows will need to be revisited."""
if not v.workspace:
return _err("invalid-argument", "workspace required")
if not v.user_id:
return _err("invalid-argument", "user_id required")
user_row, err = await self._user_in_workspace(
v.user_id, v.workspace,
)
if err is not None:
return err
# user_row indices match get_user columns. Username is [2].
username = user_row[2]
# Revoke all API keys.
key_rows = await self.table_store.list_api_keys_by_user(v.user_id)
for kr in key_rows:
await self.table_store.delete_api_key(kr[0])
# Remove username lookup.
if username:
await self.table_store.delete_username_lookup(
v.workspace, username,
)
# Remove user record.
await self.table_store.delete_user(v.user_id)
return IamResponse()
# ------------------------------------------------------------------
# Workspace CRUD
# ------------------------------------------------------------------
async def handle_create_workspace(self, v):
if v.workspace_record is None or not v.workspace_record.id:
return _err(
"invalid-argument",
"workspace_record.id required for create-workspace",
)
if v.workspace_record.id.startswith("_"):
return _err(
"invalid-argument",
"workspace ids beginning with '_' are reserved",
)
existing = await self.table_store.get_workspace(
v.workspace_record.id,
)
if existing is not None:
return _err("duplicate", "workspace already exists")
now = _now_dt()
await self.table_store.put_workspace(
id=v.workspace_record.id,
name=v.workspace_record.name or v.workspace_record.id,
enabled=v.workspace_record.enabled,
created=now,
)
row = await self.table_store.get_workspace(v.workspace_record.id)
return IamResponse(workspace=self._row_to_workspace_record(row))
async def handle_list_workspaces(self, v):
rows = await self.table_store.list_workspaces()
return IamResponse(
workspaces=[
self._row_to_workspace_record(r) for r in rows
],
)
async def handle_get_workspace(self, v):
if v.workspace_record is None or not v.workspace_record.id:
return _err("invalid-argument", "workspace_record.id required")
row = await self.table_store.get_workspace(v.workspace_record.id)
if row is None:
return _err("not-found", "workspace not found")
return IamResponse(workspace=self._row_to_workspace_record(row))
async def handle_update_workspace(self, v):
"""Update workspace name / enabled. The id is immutable."""
if v.workspace_record is None or not v.workspace_record.id:
return _err("invalid-argument", "workspace_record.id required")
row = await self.table_store.get_workspace(v.workspace_record.id)
if row is None:
return _err("not-found", "workspace not found")
_, cur_name, cur_enabled, _created = row
new_name = (
v.workspace_record.name
if v.workspace_record.name else cur_name
)
new_enabled = (
v.workspace_record.enabled
if v.workspace_record.enabled is not None
else cur_enabled
)
await self.table_store.update_workspace(
id=v.workspace_record.id,
name=new_name,
enabled=new_enabled,
)
updated = await self.table_store.get_workspace(
v.workspace_record.id,
)
return IamResponse(
workspace=self._row_to_workspace_record(updated),
)
async def handle_disable_workspace(self, v):
"""Set enabled=false, disable every user in the workspace,
revoke every API key belonging to those users."""
if v.workspace_record is None or not v.workspace_record.id:
return _err("invalid-argument", "workspace_record.id required")
row = await self.table_store.get_workspace(v.workspace_record.id)
if row is None:
return _err("not-found", "workspace not found")
await self.table_store.update_workspace(
id=v.workspace_record.id,
name=row[1] or v.workspace_record.id,
enabled=False,
)
user_rows = await self.table_store.list_users_by_workspace(
v.workspace_record.id,
)
for ur in user_rows:
user_id = ur[0]
await self.table_store.update_user_enabled(
id=user_id, enabled=False,
)
key_rows = await self.table_store.list_api_keys_by_user(user_id)
for kr in key_rows:
await self.table_store.delete_api_key(kr[0])
return IamResponse()
# ------------------------------------------------------------------
# rotate-signing-key
# ------------------------------------------------------------------
async def handle_rotate_signing_key(self, v):
"""Create a new Ed25519 signing key, retire the current
active key, switch the in-memory cache over.
The retired key row is kept in ``iam_signing_keys`` so the
gateway's JWT validator can continue to validate previously-
issued tokens during the grace period. Actual grace-period
enforcement (time-window acceptance at the validator) lands
with the gateway auth middleware work."""
# Retire the currently-active key, if any.
current = await self._get_active_signing_key()
now = _now_dt()
if current is not None:
cur_kid, _cur_priv, _cur_pub = current
await self.table_store.retire_signing_key(
kid=cur_kid, retired=now,
)
new_kid, new_priv, new_pub = _generate_signing_keypair()
await self.table_store.put_signing_key(
kid=new_kid,
private_pem=new_priv,
public_pem=new_pub,
created=now,
retired=None,
)
self._signing_key = (new_kid, new_priv, new_pub)
logger.info(
f"IAM: rotated signing key. "
f"New kid={new_kid}, retired kid={(current or (None,))[0]}"
)
return IamResponse()
# ------------------------------------------------------------------
# resolve-api-key
# ------------------------------------------------------------------
async def handle_resolve_api_key(self, v):
if not v.api_key:
return _err("auth-failed", "no api key")
row = await self.table_store.get_api_key_by_hash(
_hash_api_key(v.api_key),
)
if row is None:
return _err("auth-failed", "unknown api key")
(
_key_hash, _id, user_id, _name, _prefix, expires,
_created, _last_used,
) = row
if expires is not None:
exp_dt = expires
if isinstance(exp_dt, str):
exp_dt = datetime.datetime.fromisoformat(exp_dt)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=datetime.timezone.utc)
if exp_dt < _now_dt():
return _err("auth-failed", "api key expired")
user_row = await self.table_store.get_user(user_id)
if user_row is None:
return _err("auth-failed", "owning user missing")
user = self._row_to_user_record(user_row)
if not user.enabled:
return _err("auth-failed", "owning user disabled")
# Workspace-disabled check.
ws_row = await self.table_store.get_workspace(user.workspace)
if ws_row is None or not ws_row[2]:
return _err("auth-failed", "owning workspace disabled")
return IamResponse(
resolved_user_id=user.id,
resolved_workspace=user.workspace,
resolved_roles=list(user.roles),
)
# ------------------------------------------------------------------
# create-user
# ------------------------------------------------------------------
async def handle_create_user(self, v):
if not v.workspace:
return _err(
"invalid-argument", "workspace required for create-user",
)
if v.user is None:
return _err(
"invalid-argument", "user field required for create-user",
)
if not v.user.username:
return _err("invalid-argument", "user.username required")
if not v.user.password:
return _err("invalid-argument", "user.password required")
# Workspace must exist and be enabled.
ws = await self.table_store.get_workspace(v.workspace)
if ws is None or not ws[2]:
return _err("not-found", "workspace not found or disabled")
# Uniqueness on username within workspace.
existing = await self.table_store.get_user_id_by_username(
v.workspace, v.user.username,
)
if existing:
return _err("duplicate", "username already exists")
user_id = str(uuid.uuid4())
now = _now_dt()
await self.table_store.put_user(
id=user_id,
workspace=v.workspace,
username=v.user.username,
name=v.user.name or v.user.username,
email=v.user.email or "",
password_hash=_hash_password(v.user.password),
roles=list(v.user.roles or []),
enabled=v.user.enabled,
must_change_password=v.user.must_change_password,
created=now,
)
row = await self.table_store.get_user(user_id)
return IamResponse(user=self._row_to_user_record(row))
# ------------------------------------------------------------------
# list-users
# ------------------------------------------------------------------
async def handle_list_users(self, v):
if not v.workspace:
return _err(
"invalid-argument", "workspace required for list-users",
)
rows = await self.table_store.list_users_by_workspace(v.workspace)
return IamResponse(
users=[self._row_to_user_record(r) for r in rows],
)
# ------------------------------------------------------------------
# create-api-key
# ------------------------------------------------------------------
async def handle_create_api_key(self, v):
if not v.workspace:
return _err(
"invalid-argument", "workspace required for create-api-key",
)
if v.key is None or not v.key.user_id:
return _err("invalid-argument", "key.user_id required")
if not v.key.name:
return _err("invalid-argument", "key.name required")
# Target user must exist and belong to the caller's workspace.
user_row = await self.table_store.get_user(v.key.user_id)
if user_row is None:
return _err("not-found", "user not found")
if user_row[1] != v.workspace:
return _err(
"operation-not-permitted",
"target user is in a different workspace",
)
plaintext = _generate_api_key()
key_id = str(uuid.uuid4())
now = _now_dt()
expires_dt = _parse_expires(v.key.expires)
await self.table_store.put_api_key(
key_hash=_hash_api_key(plaintext),
id=key_id,
user_id=v.key.user_id,
name=v.key.name,
prefix=plaintext[:len(API_KEY_PREFIX) + 4],
expires=expires_dt,
created=now,
last_used=None,
)
row = await self.table_store.get_api_key_by_hash(
_hash_api_key(plaintext),
)
return IamResponse(
api_key_plaintext=plaintext,
api_key=self._row_to_api_key_record(row),
)
# ------------------------------------------------------------------
# list-api-keys
# ------------------------------------------------------------------
async def handle_list_api_keys(self, v):
if not v.workspace:
return _err(
"invalid-argument",
"workspace required for list-api-keys",
)
if not v.user_id:
return _err(
"invalid-argument", "user_id required for list-api-keys",
)
# Workspace-scope check: user must live in this workspace.
user_row = await self.table_store.get_user(v.user_id)
if user_row is None or user_row[1] != v.workspace:
return _err("not-found", "user not found in workspace")
rows = await self.table_store.list_api_keys_by_user(v.user_id)
return IamResponse(
api_keys=[self._row_to_api_key_record(r) for r in rows],
)
# ------------------------------------------------------------------
# revoke-api-key
# ------------------------------------------------------------------
async def handle_revoke_api_key(self, v):
if not v.workspace:
return _err(
"invalid-argument",
"workspace required for revoke-api-key",
)
if not v.key_id:
return _err("invalid-argument", "key_id required")
row = await self.table_store.get_api_key_by_id(v.key_id)
if row is None:
return _err("not-found", "api key not found")
key_hash, _id, user_id, _name, _prefix, _expires, _c, _lu = row
# Workspace-scope check via the owning user.
user_row = await self.table_store.get_user(user_id)
if user_row is None or user_row[1] != v.workspace:
return _err(
"operation-not-permitted",
"key belongs to a different workspace",
)
await self.table_store.delete_api_key(key_hash)
return IamResponse()
# ------------------------------------------------------------------
# authorise / authorise-many
#
# The IAM contract (see docs/tech-specs/iam-contract.md) calls
# for the regime — not the gateway — to decide whether an
# identity may perform a capability on a resource given the
# operation's parameters. These two operations are the OSS
# regime's implementation of that contract.
#
# Inputs (on IamRequest):
# user_id — the identity handle (the gateway's
# opaque reference). For OSS this is the
# user record's id.
# capability — the capability string from the
# capabilities.md vocabulary.
# resource_json — JSON dict, the resource address
# ({} for system, {workspace} for
# workspace, {workspace, flow} for flow).
# parameters_json — JSON dict, decision-relevant operation
# parameters (e.g. workspace association
# on user-registry operations).
# authorise_checks — for authorise-many, a JSON list of
# {capability, resource, parameters}.
#
# Outputs (on IamResponse):
# decision_allow — single allow / deny verdict.
# decision_ttl_seconds — gateway cache TTL for this
# decision.
# decisions_json — for authorise-many, list of
# {allow, ttl} in request order.
# ------------------------------------------------------------------
def _decide(self, user_row, capability, resource, parameters):
"""Single authorisation decision. Returns (allow, ttl)."""
if user_row is None:
return False, AUTHZ_CACHE_TTL_SECONDS
# user_row layout:
# 0:id 1:workspace 2:username 3:name 4:email 5:password_hash
# 6:roles 7:enabled 8:must_change_password 9:created
if not user_row[7]: # disabled
return False, AUTHZ_CACHE_TTL_SECONDS
# Disabled workspace check (defense in depth — credentials
# bound to a disabled workspace shouldn't be able to act).
# Cheap; one row read.
# We do this only when a target workspace is involved, to
# avoid an extra read for system-level operations that
# bypass workspace altogether.
target_workspace = (
(resource or {}).get("workspace")
or (parameters or {}).get("workspace")
)
roles = user_row[6] or set()
assigned_workspace = user_row[1]
for role_name in roles:
defn = ROLE_DEFINITIONS.get(role_name)
if defn is None:
continue
if capability not in defn["capabilities"]:
continue
if target_workspace is None or _scope_permits(
defn["workspace_scope"],
target_workspace,
assigned_workspace,
):
return True, AUTHZ_CACHE_TTL_SECONDS
return False, AUTHZ_CACHE_TTL_SECONDS
async def handle_authorise(self, v):
if not v.capability:
return _err("invalid-argument", "capability required")
if not v.user_id:
return _err("invalid-argument", "user_id (handle) required")
try:
resource = json.loads(v.resource_json or "{}")
parameters = json.loads(v.parameters_json or "{}")
except json.JSONDecodeError as e:
return _err("invalid-argument", f"bad json: {e}")
user_row = await self.table_store.get_user(v.user_id)
allow, ttl = self._decide(
user_row, v.capability, resource, parameters,
)
return IamResponse(
decision_allow=allow,
decision_ttl_seconds=ttl,
)
async def handle_authorise_many(self, v):
if not v.user_id:
return _err("invalid-argument", "user_id (handle) required")
if not v.authorise_checks:
return _err("invalid-argument", "authorise_checks required")
try:
checks = json.loads(v.authorise_checks)
except json.JSONDecodeError as e:
return _err("invalid-argument", f"bad json: {e}")
if not isinstance(checks, list):
return _err(
"invalid-argument",
"authorise_checks must be a JSON list",
)
# One user lookup for the whole batch.
user_row = await self.table_store.get_user(v.user_id)
decisions = []
for c in checks:
if not isinstance(c, dict):
decisions.append({
"allow": False,
"ttl": AUTHZ_CACHE_TTL_SECONDS,
})
continue
allow, ttl = self._decide(
user_row,
c.get("capability", ""),
c.get("resource") or {},
c.get("parameters") or {},
)
decisions.append({"allow": allow, "ttl": ttl})
return IamResponse(decisions_json=json.dumps(decisions))