refactor(iam): pluggable IAM regime via authenticate/authorise contract (#853)

The gateway no longer holds any policy state — capability sets, role
definitions, workspace scope rules.  Per the IAM contract it asks the
regime "may this identity perform this capability on this resource?"
per request.  That moves the OSS role-based regime entirely into
iam-svc, which can be replaced (SSO, ABAC, ReBAC) without changing
the gateway, the wire protocol, or backend services.

Contract:
- authenticate(credential) -> Identity (handle, workspace,
  principal_id, source).  No roles, claims, or policy state surface
  to the gateway.
- authorise(identity, capability, resource, parameters) -> (allow,
  ttl).  Cached per-decision (regime TTL clamped above; fail-closed
  on regime errors).
- authorise_many available as a fan-out variant.

Operation registry drives every authorisation decision:
- /api/v1/iam -> IamEndpoint, looks up bare op name (create-user,
  list-workspaces, ...).
- /api/v1/{kind} -> RegistryRoutedVariableEndpoint, <kind>:<op>
  (config:get, flow:list-blueprints, librarian:add-document, ...).
- /api/v1/flow/{flow}/service/{kind} -> flow-service:<kind>.
- /api/v1/flow/{flow}/{import,export}/{kind} ->
  flow-{import,export}:<kind>.
- WS Mux per-frame -> flow-service:<kind>; closes a gap where
  authenticated users could hit any service kind.
85 operations registered across the surface.

JWT carries identity only — sub + workspace.  The roles claim is gone;
the gateway never reads policy state from a credential.

The three coarse *_KIND_CAPABILITY maps are removed.  The registry is
the only source of truth for the capability + resource shape of an
operation.  Tests migrated to the new Identity shape and to
authorise()-mocked auth doubles.

Specs updated: docs/tech-specs/iam-contract.md (Identity surface,
caching, registry-naming conventions), iam.md (JWT shape, gateway
flow, role section reframed as OSS-regime detail), iam-protocol.md
(positioned as one implementation of the contract).
This commit is contained in:
cybermaggedon 2026-04-28 16:19:41 +01:00 committed by GitHub
parent 9f2d9adcb1
commit 5e28d3cce0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 2359 additions and 587 deletions

View file

@ -87,7 +87,6 @@ class TestVerifyJwtEddsa:
priv, pub = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"roles": ["reader"],
"iat": int(time.time()),
"exp": int(time.time()) + 60,
}
@ -99,7 +98,7 @@ class TestVerifyJwtEddsa:
def test_expired_jwt_rejected(self):
priv, pub = make_keypair()
claims = {
"sub": "user-1", "workspace": "default", "roles": [],
"sub": "user-1", "workspace": "default",
"iat": int(time.time()) - 3600,
"exp": int(time.time()) - 1,
}
@ -111,7 +110,7 @@ class TestVerifyJwtEddsa:
priv_a, _ = make_keypair()
_, pub_b = make_keypair()
claims = {
"sub": "user-1", "workspace": "default", "roles": [],
"sub": "user-1", "workspace": "default",
"iat": int(time.time()),
"exp": int(time.time()) + 60,
}
@ -131,7 +130,7 @@ class TestVerifyJwtEddsa:
# since we expect it to bail before verifying.
header = {"alg": "HS256", "typ": "JWT", "kid": "x"}
payload = {
"sub": "user-1", "workspace": "default", "roles": [],
"sub": "user-1", "workspace": "default",
"iat": int(time.time()), "exp": int(time.time()) + 60,
}
h = _b64url(json.dumps(header, separators=(",", ":")).encode())
@ -149,11 +148,12 @@ class TestIdentity:
def test_fields(self):
i = Identity(
user_id="u", workspace="w", roles=["reader"], source="api-key",
handle="u", workspace="w",
principal_id="u", source="api-key",
)
assert i.user_id == "u"
assert i.handle == "u"
assert i.workspace == "w"
assert i.roles == ["reader"]
assert i.principal_id == "u"
assert i.source == "api-key"
@ -194,7 +194,6 @@ class TestIamAuthDispatch:
priv, pub = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"roles": ["writer"],
"iat": int(time.time()),
"exp": int(time.time()) + 60,
}
@ -206,9 +205,9 @@ class TestIamAuthDispatch:
ident = await auth.authenticate(
make_request(f"Bearer {token}")
)
assert ident.user_id == "user-1"
assert ident.handle == "user-1"
assert ident.workspace == "default"
assert ident.roles == ["writer"]
assert ident.principal_id == "user-1"
assert ident.source == "jwt"
@pytest.mark.asyncio
@ -217,7 +216,7 @@ class TestIamAuthDispatch:
# must not validate — even ones that would otherwise pass.
priv, _ = make_keypair()
claims = {
"sub": "user-1", "workspace": "default", "roles": [],
"sub": "user-1", "workspace": "default",
"iat": int(time.time()), "exp": int(time.time()) + 60,
}
token = sign_jwt(priv, claims)
@ -232,6 +231,9 @@ class TestIamAuthDispatch:
async def fake_resolve(api_key):
assert api_key == "tg_testkey"
# Roles are returned by the regime as a hint but the
# gateway ignores them — kept here so the resolve
# protocol shape is exercised.
return ("user-xyz", "default", ["admin"])
async def fake_with_client(op):
@ -241,9 +243,9 @@ class TestIamAuthDispatch:
ident = await auth.authenticate(
make_request("Bearer tg_testkey")
)
assert ident.user_id == "user-xyz"
assert ident.handle == "user-xyz"
assert ident.workspace == "default"
assert ident.roles == ["admin"]
assert ident.principal_id == "user-xyz"
assert ident.source == "api-key"
@pytest.mark.asyncio
@ -301,8 +303,8 @@ class TestApiKeyCache:
a = await auth.authenticate(make_request("Bearer tg_a"))
b = await auth.authenticate(make_request("Bearer tg_b"))
assert a.user_id == "u-tg_a"
assert b.user_id == "u-tg_b"
assert a.handle == "u-tg_a"
assert b.handle == "u-tg_b"
assert seen == ["tg_a", "tg_b"]
@pytest.mark.asyncio
@ -310,3 +312,136 @@ class TestApiKeyCache:
# Not a behaviour test — just ensures we don't accidentally
# set TTL to 0 (which would defeat the cache) or to a week.
assert 10 <= API_KEY_CACHE_TTL <= 3600
# -- IamAuth.authorise -----------------------------------------------------
class TestAuthorise:
"""``authorise()`` is the gateway's only authorisation entry
point under the IAM contract. It calls iam-svc, caches the
decision for the regime's TTL (clamped above), and raises 403
on deny / 401 on regime error (fail closed)."""
def _make_identity(self, handle="u-1", workspace="default"):
return Identity(
handle=handle, workspace=workspace,
principal_id=handle, source="api-key",
)
@pytest.mark.asyncio
async def test_allow_returns_no_exception(self):
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
return await op(Mock(
authorise=AsyncMock(return_value=(True, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
await auth.authorise(
self._make_identity(),
"graph:read",
{"workspace": "default"},
{},
)
@pytest.mark.asyncio
async def test_deny_raises_403(self):
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
return await op(Mock(
authorise=AsyncMock(return_value=(False, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
with pytest.raises(web.HTTPForbidden):
await auth.authorise(
self._make_identity(),
"users:admin",
{},
{"workspace": "acme"},
)
@pytest.mark.asyncio
async def test_regime_error_fails_closed_as_401(self):
# If iam-svc errors, the gateway must NOT silently allow.
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
raise RuntimeError("iam-svc down")
with patch.object(auth, "_with_client", side_effect=fake_with_client):
with pytest.raises(web.HTTPUnauthorized):
await auth.authorise(
self._make_identity(),
"graph:read",
{"workspace": "default"},
{},
)
@pytest.mark.asyncio
async def test_allow_decision_is_cached(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
authorise=AsyncMock(return_value=(True, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = self._make_identity()
for _ in range(5):
await auth.authorise(
ident, "graph:read", {"workspace": "default"}, {},
)
assert calls["n"] == 1
@pytest.mark.asyncio
async def test_deny_decision_is_cached(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
authorise=AsyncMock(return_value=(False, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = self._make_identity()
for _ in range(5):
with pytest.raises(web.HTTPForbidden):
await auth.authorise(
ident, "users:admin", {}, {"workspace": "acme"},
)
# Denies are cached too — repeated attempts don't re-hit IAM.
assert calls["n"] == 1
@pytest.mark.asyncio
async def test_different_resources_cached_separately(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
authorise=AsyncMock(return_value=(True, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = self._make_identity()
await auth.authorise(
ident, "graph:read", {"workspace": "a"}, {},
)
await auth.authorise(
ident, "graph:read", {"workspace": "b"}, {},
)
# Different resource → different cache key → two IAM calls.
assert calls["n"] == 2

View file

@ -1,15 +1,22 @@
"""
Tests for gateway/capabilities.py the capability + role + workspace
model that underpins all gateway authorisation.
Tests for gateway/capabilities.py the thin authorisation surface
under the IAM contract.
The gateway no longer holds policy state (roles, capability sets,
workspace scopes); those live in iam-svc. These tests cover only
what the gateway shim does itself: PUBLIC / AUTHENTICATED short-
circuiting, default-fill of workspace, and forwarding of capability
checks to ``auth.authorise``.
"""
import pytest
from aiohttp import web
from unittest.mock import AsyncMock, MagicMock
from trustgraph.gateway.capabilities import (
PUBLIC, AUTHENTICATED,
KNOWN_CAPABILITIES, ROLE_DEFINITIONS,
check, enforce_workspace, access_denied, auth_failure,
enforce, enforce_workspace,
access_denied, auth_failure,
)
@ -17,109 +24,74 @@ from trustgraph.gateway.capabilities import (
class _Identity:
"""Minimal stand-in for auth.Identity — the capability module
accesses ``.workspace`` and ``.roles``."""
def __init__(self, workspace, roles):
self.user_id = "user-1"
"""Stand-in for auth.Identity — under the IAM contract it has
just ``handle``, ``workspace``, ``principal_id``, ``source``."""
def __init__(self, handle="user-1", workspace="default"):
self.handle = handle
self.workspace = workspace
self.roles = list(roles)
self.principal_id = handle
self.source = "api-key"
def reader_in(ws):
return _Identity(ws, ["reader"])
def _allow_auth(identity=None):
"""Build an Auth double that authenticates to ``identity`` and
allows every authorise() call."""
auth = MagicMock()
auth.authenticate = AsyncMock(
return_value=identity or _Identity(),
)
auth.authorise = AsyncMock(return_value=None)
return auth
def writer_in(ws):
return _Identity(ws, ["writer"])
def _deny_auth(identity=None):
"""Build an Auth double that authenticates but denies authorise."""
auth = MagicMock()
auth.authenticate = AsyncMock(
return_value=identity or _Identity(),
)
auth.authorise = AsyncMock(side_effect=access_denied())
return auth
def admin_in(ws):
return _Identity(ws, ["admin"])
# -- enforce() -------------------------------------------------------------
# -- role table sanity -----------------------------------------------------
class TestEnforce:
@pytest.mark.asyncio
async def test_public_returns_none_no_auth(self):
auth = _allow_auth()
result = await enforce(MagicMock(), auth, PUBLIC)
assert result is None
auth.authenticate.assert_not_called()
auth.authorise.assert_not_called()
class TestRoleTable:
@pytest.mark.asyncio
async def test_authenticated_skips_authorise(self):
identity = _Identity()
auth = _allow_auth(identity)
result = await enforce(MagicMock(), auth, AUTHENTICATED)
assert result is identity
auth.authenticate.assert_awaited_once()
auth.authorise.assert_not_called()
def test_oss_roles_present(self):
assert set(ROLE_DEFINITIONS.keys()) == {"reader", "writer", "admin"}
@pytest.mark.asyncio
async def test_capability_calls_authorise_system_level(self):
identity = _Identity()
auth = _allow_auth(identity)
result = await enforce(MagicMock(), auth, "graph:read")
assert result is identity
auth.authorise.assert_awaited_once_with(
identity, "graph:read", {}, {},
)
def test_admin_is_cross_workspace(self):
assert ROLE_DEFINITIONS["admin"]["workspace_scope"] == "*"
def test_reader_writer_are_assigned_scope(self):
assert ROLE_DEFINITIONS["reader"]["workspace_scope"] == "assigned"
assert ROLE_DEFINITIONS["writer"]["workspace_scope"] == "assigned"
def test_admin_superset_of_writer(self):
admin = ROLE_DEFINITIONS["admin"]["capabilities"]
writer = ROLE_DEFINITIONS["writer"]["capabilities"]
assert writer.issubset(admin)
def test_writer_superset_of_reader(self):
writer = ROLE_DEFINITIONS["writer"]["capabilities"]
reader = ROLE_DEFINITIONS["reader"]["capabilities"]
assert reader.issubset(writer)
def test_admin_has_users_admin(self):
assert "users:admin" in ROLE_DEFINITIONS["admin"]["capabilities"]
def test_writer_does_not_have_users_admin(self):
assert "users:admin" not in ROLE_DEFINITIONS["writer"]["capabilities"]
def test_every_bundled_capability_is_known(self):
for role in ROLE_DEFINITIONS.values():
for cap in role["capabilities"]:
assert cap in KNOWN_CAPABILITIES
# -- check() ---------------------------------------------------------------
class TestCheck:
def test_reader_has_reader_cap_in_own_workspace(self):
assert check(reader_in("default"), "graph:read", "default")
def test_reader_does_not_have_writer_cap(self):
assert not check(reader_in("default"), "graph:write", "default")
def test_reader_cannot_act_in_other_workspace(self):
assert not check(reader_in("default"), "graph:read", "acme")
def test_writer_has_write_in_own_workspace(self):
assert check(writer_in("default"), "graph:write", "default")
def test_writer_cannot_act_in_other_workspace(self):
assert not check(writer_in("default"), "graph:write", "acme")
def test_admin_has_everything_everywhere(self):
for cap in ("graph:read", "graph:write", "config:write",
"users:admin", "metrics:read"):
assert check(admin_in("default"), cap, "acme"), (
f"admin should have {cap} in acme"
)
def test_admin_has_caps_without_explicit_workspace(self):
assert check(admin_in("default"), "users:admin")
def test_default_target_is_identity_workspace(self):
# Reader with no target workspace → should check against own
assert check(reader_in("default"), "graph:read")
def test_unknown_capability_returns_false(self):
assert not check(admin_in("default"), "nonsense:cap", "default")
def test_unknown_role_contributes_nothing(self):
ident = _Identity("default", ["made-up-role"])
assert not check(ident, "graph:read", "default")
def test_multi_role_union(self):
# If a user is both reader and admin, they inherit admin's
# cross-workspace powers.
ident = _Identity("default", ["reader", "admin"])
assert check(ident, "users:admin", "acme")
@pytest.mark.asyncio
async def test_capability_denied_raises_forbidden(self):
auth = _deny_auth()
with pytest.raises(web.HTTPForbidden):
await enforce(MagicMock(), auth, "users:admin")
# -- enforce_workspace() ---------------------------------------------------
@ -127,56 +99,54 @@ class TestCheck:
class TestEnforceWorkspace:
def test_reader_in_own_workspace_allowed(self):
data = {"workspace": "default", "operation": "x"}
enforce_workspace(data, reader_in("default"))
assert data["workspace"] == "default"
def test_reader_no_workspace_injects_assigned(self):
@pytest.mark.asyncio
async def test_default_fills_from_identity(self):
data = {"operation": "x"}
enforce_workspace(data, reader_in("default"))
auth = _allow_auth()
await enforce_workspace(data, _Identity(workspace="default"), auth)
assert data["workspace"] == "default"
def test_reader_mismatched_workspace_denied(self):
@pytest.mark.asyncio
async def test_caller_supplied_workspace_kept(self):
data = {"workspace": "acme", "operation": "x"}
with pytest.raises(web.HTTPForbidden):
enforce_workspace(data, reader_in("default"))
def test_admin_can_target_any_workspace(self):
data = {"workspace": "acme", "operation": "x"}
enforce_workspace(data, admin_in("default"))
auth = _allow_auth()
await enforce_workspace(data, _Identity(workspace="default"), auth)
assert data["workspace"] == "acme"
def test_admin_no_workspace_defaults_to_assigned(self):
data = {"operation": "x"}
enforce_workspace(data, admin_in("default"))
assert data["workspace"] == "default"
def test_writer_same_workspace_specified_allowed(self):
@pytest.mark.asyncio
async def test_no_capability_skips_authorise(self):
data = {"workspace": "default"}
enforce_workspace(data, writer_in("default"))
assert data["workspace"] == "default"
auth = _allow_auth()
await enforce_workspace(data, _Identity(), auth)
auth.authorise.assert_not_called()
def test_non_dict_passthrough(self):
# Non-dict bodies are returned unchanged (e.g. streaming).
result = enforce_workspace("not-a-dict", reader_in("default"))
assert result == "not-a-dict"
@pytest.mark.asyncio
async def test_capability_calls_authorise_with_resource(self):
data = {"workspace": "acme"}
identity = _Identity()
auth = _allow_auth(identity)
await enforce_workspace(
data, identity, auth, capability="graph:read",
)
auth.authorise.assert_awaited_once_with(
identity, "graph:read", {"workspace": "acme"}, {},
)
def test_with_capability_tightens_check(self):
# Reader lacks graph:write; workspace-only check would pass
# (scope is fine), but combined check must reject.
data = {"workspace": "default"}
@pytest.mark.asyncio
async def test_capability_denied_propagates(self):
data = {"workspace": "acme"}
auth = _deny_auth()
with pytest.raises(web.HTTPForbidden):
enforce_workspace(
data, reader_in("default"), capability="graph:write",
await enforce_workspace(
data, _Identity(), auth, capability="users:admin",
)
def test_with_capability_passes_when_granted(self):
data = {"workspace": "default"}
enforce_workspace(
data, reader_in("default"), capability="graph:read",
)
assert data["workspace"] == "default"
@pytest.mark.asyncio
async def test_non_dict_passthrough(self):
auth = _allow_auth()
result = await enforce_workspace("not-a-dict", _Identity(), auth)
assert result == "not-a-dict"
auth.authorise.assert_not_called()
# -- helpers ---------------------------------------------------------------
@ -199,5 +169,3 @@ class TestSentinels:
def test_public_and_authenticated_are_distinct(self):
assert PUBLIC != AUTHENTICATED
assert PUBLIC not in KNOWN_CAPABILITIES
assert AUTHENTICATED not in KNOWN_CAPABILITIES

View file

@ -73,14 +73,16 @@ class TestEndpointManager:
prometheus_url="http://test:9090"
)
# Each dispatcher factory is invoked exactly once during
# construction — one per endpoint that needs a dedicated
# wire. dispatch_auth_iam is the dedicated factory for the
# AuthEndpoints forwarder (login / bootstrap /
# change-password), distinct from dispatch_global_service
# (the generic /api/v1/{kind} route).
# Each dispatcher factory is invoked once per endpoint that
# needs a dedicated wire. dispatch_auth_iam is shared by
# two endpoints — AuthEndpoints (login / bootstrap /
# change-password) and IamEndpoint (registry-driven
# /api/v1/iam) — so it's expected to be called twice.
# Both forwarders pin the dispatcher to kind=iam and reuse
# the same factory; they're distinct from
# dispatch_global_service (the generic /api/v1/{kind} route).
mock_dispatcher_manager.dispatch_global_service.assert_called_once()
mock_dispatcher_manager.dispatch_auth_iam.assert_called_once()
assert mock_dispatcher_manager.dispatch_auth_iam.call_count == 2
mock_dispatcher_manager.dispatch_socket.assert_called_once()
mock_dispatcher_manager.dispatch_flow_service.assert_called_once()
mock_dispatcher_manager.dispatch_flow_import.assert_called_once()

View file

@ -25,11 +25,11 @@ from trustgraph.gateway.auth import Identity
TEST_CAP = "graph:write"
def _valid_identity(roles=("admin",)):
def _valid_identity():
return Identity(
user_id="test-user",
handle="test-user",
workspace="default",
roles=list(roles),
principal_id="test-user",
source="api-key",
)
@ -37,11 +37,12 @@ def _valid_identity(roles=("admin",)):
@pytest.fixture
def mock_auth():
"""Mock IAM-backed authenticator. Successful by default —
``authenticate`` returns a valid admin identity. Tests that
need the auth failure path override the ``authenticate``
attribute locally."""
``authenticate`` returns a valid identity and ``authorise``
allows everything. Tests that need the failure paths override
the relevant attribute locally."""
auth = MagicMock()
auth.authenticate = AsyncMock(return_value=_valid_identity())
auth.authorise = AsyncMock(return_value=None)
return auth
@ -135,6 +136,7 @@ async def test_handle_normal_flow():
"""Valid bearer → handshake accepted, dispatcher created."""
mock_auth = MagicMock()
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
dispatcher_created = False
async def mock_dispatcher_factory(ws, running, match_info):
@ -192,6 +194,7 @@ async def test_handle_exception_group_cleanup():
"""Test exception group triggers dispatcher cleanup."""
mock_auth = MagicMock()
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
mock_dispatcher = AsyncMock()
mock_dispatcher.destroy = AsyncMock()
@ -262,6 +265,7 @@ async def test_handle_dispatcher_cleanup_timeout():
"""Test dispatcher cleanup with timeout."""
mock_auth = MagicMock()
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
# Mock dispatcher that takes long to destroy
mock_dispatcher = AsyncMock()
@ -388,6 +392,7 @@ async def test_handle_websocket_already_closed():
"""Test handling when websocket is already closed."""
mock_auth = MagicMock()
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
mock_dispatcher = AsyncMock()
mock_dispatcher.destroy = AsyncMock()