Merge branch 'release/v2.4'

This commit is contained in:
Cyber MacGeddon 2026-04-29 17:56:48 +01:00
commit f3434307c5
91 changed files with 10657 additions and 1218 deletions

View file

@ -75,6 +75,13 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: "Free up some disk space"
run: |
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
podman image prune --all --force
podman builder prune -a -f
- name: Docker Hub token
run: echo ${{ secrets.DOCKER_SECRET }} > docker-token.txt

View file

@ -0,0 +1,475 @@
#!/usr/bin/env python3
"""
WebSocket smoke / load test that hammers a TrustGraph gateway with a
mix of `embeddings`, `graph-embeddings`, and `triples` queries while
keeping a target number of in-flight requests at all times.
Useful for reproducing the "worker hangs after a while, all subsequent
requests time out" failure mode — leaves enough load on the system to
saturate worker concurrency and reports per-service success/timeout
rates and latency distributions over time.
Usage:
smoke_ws_queries.py --flow onto-rag --duration 120 --concurrency 20
Connects via /api/v1/socket using the first-frame auth protocol.
"""
import argparse
import asyncio
import json
import os
import random
import statistics
import sys
import time
import uuid
from collections import defaultdict
from typing import Any
import websockets
DEFAULT_TEXT = (
"What caused the space shuttle to explode and what were the "
"main factors leading to the disaster?"
)
class Stats:
"""Per-service rolling counters and latency samples."""
def __init__(self) -> None:
self.sent = 0
self.ok = 0
self.err = 0
self.timeout = 0
self.latencies_ms: list[float] = []
def record_ok(self, latency_ms: float) -> None:
self.ok += 1
self.latencies_ms.append(latency_ms)
def record_err(self) -> None:
self.err += 1
def record_timeout(self) -> None:
self.timeout += 1
def percentile(self, p: float) -> float:
if not self.latencies_ms:
return 0.0
s = sorted(self.latencies_ms)
idx = min(len(s) - 1, int(len(s) * p))
return s[idx]
def summary(self) -> str:
if self.latencies_ms:
mn = min(self.latencies_ms)
mx = max(self.latencies_ms)
mean = statistics.mean(self.latencies_ms)
p50 = self.percentile(0.50)
p95 = self.percentile(0.95)
p99 = self.percentile(0.99)
lat = (
f"min={mn:.0f} mean={mean:.0f} p50={p50:.0f} "
f"p95={p95:.0f} p99={p99:.0f} max={mx:.0f} ms"
)
else:
lat = "no successful samples"
return (
f"sent={self.sent} ok={self.ok} err={self.err} "
f"timeout={self.timeout} | {lat}"
)
class WSClient:
"""Thin async websocket client with first-frame auth and a shared
reader task that demuxes responses to per-request asyncio queues."""
def __init__(
self, url: str, token: str | None, workspace: str,
ping_timeout: int,
) -> None:
self.url = url
self.token = token
self.workspace = workspace
self.ping_timeout = ping_timeout
self._ws: Any = None
self._pending: dict[str, asyncio.Queue] = {}
self._reader_task: asyncio.Task | None = None
self._closed = asyncio.Event()
async def connect(self) -> None:
ws_url = self.url.rstrip("/") + "/api/v1/socket"
if ws_url.startswith("http://"):
ws_url = "ws://" + ws_url[len("http://"):]
elif ws_url.startswith("https://"):
ws_url = "wss://" + ws_url[len("https://"):]
elif not (
ws_url.startswith("ws://") or ws_url.startswith("wss://")
):
ws_url = "ws://" + ws_url
self._ws = await websockets.connect(
ws_url,
ping_interval=20,
ping_timeout=self.ping_timeout,
max_size=64 * 1024 * 1024,
)
if self.token:
# First-frame auth handshake.
await self._ws.send(json.dumps({
"type": "auth", "token": self.token,
}))
raw = await asyncio.wait_for(self._ws.recv(), timeout=10)
resp = json.loads(raw)
if resp.get("type") != "auth-ok":
await self._ws.close()
raise RuntimeError(f"auth failed: {resp}")
if "workspace" in resp:
# Server-resolved workspace overrides the user-supplied
# one, mirroring AsyncSocketClient behaviour.
self.workspace = resp["workspace"]
else:
print(
"WARNING: no token provided — skipping auth handshake. "
"Requests will be rejected unless the gateway is "
"running without IAM enforcement.",
file=sys.stderr,
)
self._reader_task = asyncio.create_task(self._reader())
async def _reader(self) -> None:
try:
async for raw in self._ws:
msg = json.loads(raw)
rid = msg.get("id")
if rid and rid in self._pending:
await self._pending[rid].put(msg)
except websockets.exceptions.ConnectionClosed:
pass
except Exception as e:
for q in list(self._pending.values()):
try:
q.put_nowait({"error": {"message": str(e)}})
except Exception:
pass
finally:
self._closed.set()
async def request(
self, service: str, flow: str | None, body: dict, timeout: float,
) -> tuple[dict | None, str | None, float]:
"""Send one request, await final response.
Returns ``(response, error, latency_ms)``. ``response`` is None
on error/timeout. ``error`` describes the failure category.
"""
rid = str(uuid.uuid4())
q: asyncio.Queue = asyncio.Queue()
self._pending[rid] = q
env = {
"id": rid,
"workspace": self.workspace,
"service": service,
"request": body,
}
if flow:
env["flow"] = flow
t0 = time.monotonic()
try:
await self._ws.send(json.dumps(env))
while True:
try:
msg = await asyncio.wait_for(q.get(), timeout=timeout)
except asyncio.TimeoutError:
return None, "timeout", (time.monotonic() - t0) * 1000
if "error" in msg and msg["error"]:
err = msg["error"]
err_msg = (
err.get("message") if isinstance(err, dict) else str(err)
)
return None, f"error: {err_msg}", (time.monotonic() - t0) * 1000
if msg.get("complete"):
return msg.get("response"), None, (time.monotonic() - t0) * 1000
# Otherwise an intermediate streaming chunk — keep waiting.
finally:
self._pending.pop(rid, None)
async def close(self) -> None:
if self._ws is not None:
await self._ws.close()
if self._reader_task is not None:
try:
await asyncio.wait_for(self._reader_task, timeout=2)
except (asyncio.TimeoutError, asyncio.CancelledError):
pass
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description=__doc__)
p.add_argument(
"--url",
default=os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/"),
help="Gateway URL (http or ws). Default: %(default)s",
)
p.add_argument(
"--token",
default=os.getenv("TRUSTGRAPH_TOKEN"),
help="Auth token (or set TRUSTGRAPH_TOKEN). Optional — if "
"omitted, the auth handshake is skipped (only works "
"when the gateway is running without IAM enforcement).",
)
p.add_argument(
"--workspace", default="default",
help="Workspace. Default: %(default)s",
)
p.add_argument(
"--flow", required=True,
help="Flow id. Comma-separated for round-robin across flows "
"(e.g. onto-rag,doc-rag).",
)
p.add_argument(
"--duration", type=int, default=60,
help="Test duration in seconds. Default: %(default)s",
)
p.add_argument(
"--concurrency", type=int, default=15,
help="Target in-flight request count. Default: %(default)s",
)
p.add_argument(
"--services",
default="embeddings,graph-embeddings,triples",
help="Comma-separated services to exercise. "
"Default: %(default)s",
)
p.add_argument(
"--limit", type=int, default=3,
help="limit for triples / graph-embeddings queries. "
"Default: %(default)s",
)
p.add_argument(
"--collection", default="default",
help="Collection. Default: %(default)s",
)
p.add_argument(
"--text", default=DEFAULT_TEXT,
help="Text to embed for embeddings/seed.",
)
p.add_argument(
"--vector-dim", type=int, default=384,
help="Dimension of synthetic vector when --no-seed is used. "
"Default: %(default)s",
)
p.add_argument(
"--no-seed", action="store_true",
help="Skip the embeddings warm-up call. Use a random vector "
"for graph-embeddings queries instead.",
)
p.add_argument(
"--request-timeout", type=float, default=30.0,
help="Per-request timeout (seconds). Default: %(default)s",
)
p.add_argument(
"--report-interval", type=float, default=5.0,
help="How often to print stats (seconds). Default: %(default)s",
)
p.add_argument(
"--ping-timeout", type=int, default=120,
help="Websocket ping timeout. Default: %(default)s",
)
p.add_argument(
"--seed", type=int, default=None,
help="Random seed (for reproducibility).",
)
return p.parse_args()
async def seed_vector(
client: WSClient, flow: str, text: str, timeout: float,
) -> list[float]:
"""Issue one embeddings request to obtain a real vector that
later graph-embeddings calls can reuse."""
resp, err, _ = await client.request(
"embeddings", flow, {"texts": [text]}, timeout,
)
if err or not resp:
raise RuntimeError(f"seed embeddings failed: {err or resp}")
vectors = resp.get("vectors")
if not vectors:
raise RuntimeError(f"seed embeddings: no vectors in response: {resp}")
return vectors[0]
def make_request_body(
service: str, args: argparse.Namespace, vector: list[float],
) -> dict:
if service == "embeddings":
return {"texts": [args.text]}
if service == "graph-embeddings":
return {
"vector": vector,
"limit": args.limit,
"collection": args.collection,
}
if service == "triples":
return {
"limit": args.limit,
"collection": args.collection,
}
raise ValueError(f"Unknown service: {service}")
async def worker(
name: int,
client: WSClient,
flows: list[str],
services: list[str],
args: argparse.Namespace,
vector: list[float],
stats: dict[str, Stats],
in_flight: dict[str, int],
stop_at: float,
) -> None:
rng = random.Random((args.seed or 0) + name)
while time.monotonic() < stop_at:
svc = rng.choice(services)
flow = rng.choice(flows)
body = make_request_body(svc, args, vector)
stats[svc].sent += 1
in_flight[svc] += 1
try:
resp, err, lat = await client.request(
svc, flow, body, args.request_timeout,
)
if err == "timeout":
stats[svc].record_timeout()
elif err:
stats[svc].record_err()
else:
stats[svc].record_ok(lat)
except Exception as e:
stats[svc].record_err()
print(f"worker {name}: unexpected {svc} exception: {e}",
file=sys.stderr)
finally:
in_flight[svc] -= 1
async def reporter(
services: list[str],
stats: dict[str, Stats],
in_flight: dict[str, int],
stop_at: float,
interval: float,
) -> None:
started = time.monotonic()
last_sent = {s: 0 for s in services}
while time.monotonic() < stop_at:
await asyncio.sleep(interval)
now = time.monotonic()
elapsed = now - started
total_inflight = sum(in_flight.values())
print(
f"\n[{elapsed:6.1f}s] in-flight={total_inflight} "
f"per-svc={dict(in_flight)}"
)
for svc in services:
s = stats[svc]
delta = s.sent - last_sent[svc]
rate = delta / interval
last_sent[svc] = s.sent
print(f" {svc:20s} {rate:6.1f}/s | {s.summary()}")
async def run(args: argparse.Namespace) -> int:
if args.seed is not None:
random.seed(args.seed)
services = [s.strip() for s in args.services.split(",") if s.strip()]
flows = [f.strip() for f in args.flow.split(",") if f.strip()]
valid = {"embeddings", "graph-embeddings", "triples"}
bad = [s for s in services if s not in valid]
if bad:
print(f"ERROR: unknown service(s): {bad}. "
f"Supported: {sorted(valid)}", file=sys.stderr)
return 2
client = WSClient(
args.url, args.token, args.workspace, args.ping_timeout,
)
print(f"Connecting to {args.url} ...")
await client.connect()
print(f"Connected. workspace={client.workspace} flows={flows} "
f"services={services} concurrency={args.concurrency} "
f"duration={args.duration}s")
if "graph-embeddings" in services and not args.no_seed:
print("Seeding embedding vector ...")
vector = await seed_vector(
client, flows[0], args.text, args.request_timeout,
)
print(f"Got vector of length {len(vector)}")
else:
vector = [random.uniform(-1.0, 1.0) for _ in range(args.vector_dim)]
stats: dict[str, Stats] = defaultdict(Stats)
in_flight: dict[str, int] = defaultdict(int)
for svc in services:
stats[svc] # initialise
in_flight[svc] = 0
stop_at = time.monotonic() + args.duration
print(f"Starting load: {args.concurrency} workers for "
f"{args.duration}s ...")
workers = [
asyncio.create_task(
worker(
i, client, flows, services, args, vector,
stats, in_flight, stop_at,
)
)
for i in range(args.concurrency)
]
rep = asyncio.create_task(
reporter(services, stats, in_flight, stop_at, args.report_interval)
)
try:
await asyncio.gather(*workers)
finally:
rep.cancel()
try:
await rep
except asyncio.CancelledError:
pass
print("\n=== Final results ===")
any_failures = False
for svc in services:
s = stats[svc]
print(f" {svc:20s} {s.summary()}")
if s.timeout > 0 or s.err > 0:
any_failures = True
await client.close()
return 1 if any_failures else 0
def main() -> int:
args = parse_args()
try:
return asyncio.run(run(args))
except KeyboardInterrupt:
return 130
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,297 @@
---
layout: default
title: "Bootstrap Framework Technical Specification"
parent: "Tech Specs"
---
# Bootstrap Framework Technical Specification
## Overview
A generic, pluggable framework for running one-time initialisation steps
against a TrustGraph deployment — replacing the dedicated
`tg-init-trustgraph` container with a long-running processor that
converges the system to a desired initial state and then idles.
The framework is content-agnostic. It knows how to run, retry,
mark-as-done, and surface failures; the actual init work lives in
small pluggable classes called **initialisers**. Core initialisers
ship in the `trustgraph-flow` package; enterprise and third-party
initialisers can be loaded by dotted path without any core code
change.
## Motivation
The existing `tg-init-trustgraph` is a one-shot CLI run in its own
container. It performs two very different jobs (Pulsar topology
setup and config seeding) in a single script, is wasteful as a whole
container, cannot handle partial-success states, and has no way to
extend the boot process with enterprise-specific concerns (user
provisioning, workspace initialisation, IAM scaffolding) without
forking the tool.
A pluggable, long-running reconciler addresses all of this and slots
naturally into the existing processor-group model.
## Design
### Bootstrapper Processor
A single `AsyncProcessor` subclass. One entry in a processor group.
Parameters include the processor's own identity and a list of
**initialiser specifications** — each spec names a class (by dotted
path), a unique instance name, a flag string, and the parameters
that will be passed to the initialiser's constructor.
On each wake the bootstrapper does the following, in order:
1. Open a short-lived context (config client, flow-svc client,
logger). The context is torn down at the end of the wake so
steady-state idle cost is effectively nil.
2. Run all **pre-service initialisers** (those that opt out of the
service gate — principally `PulsarTopology`, which must run
before the services it gates on can even come up).
3. Check the **service gate**: cheap round-trips to config-svc and
flow-svc. If either fails, skip to the sleep step using the
short gate-retry cadence.
4. Run all **post-service initialisers** that haven't already
completed at the currently-configured flag.
5. Sleep. Cadence adapts to state (see below).
### Initialiser Contract
An initialiser is a class with:
- A class-level `name` identifier, unique within the bootstrapper's
configuration. This is the key under which completion state is
stored.
- A class-level `wait_for_services` flag. When `True` (the default)
the initialiser runs only after the service gate passes. When
`False`, it runs before the gate, on every wake.
- A constructor that accepts the initialiser's own params as kwargs.
- An async `run(ctx, old_flag, new_flag)` method that performs the
init work and returns on success. Any raised exception is
logged and treated as a transient failure — the stored flag is
not updated and the initialiser will re-run on the next cycle.
`old_flag` is the previously-stored flag string, or `None` if the
initialiser has never successfully run in this deployment. `new_flag`
is the flag the operator has configured for this run. This pair
lets an initialiser distinguish a clean first-run from a migration
between flag versions and behave accordingly (see "Flag change and
re-run safety" below).
### Context
The context is the bootstrapper-owned object passed to every
initialiser's `run()` method. Its fields are deliberately narrow:
| Field | Purpose |
|---|---|
| `logger` | A child logger named for the initialiser instance |
| `config` | A short-lived `ConfigClient` for config-svc reads/writes |
| `flow` | A short-lived `RequestResponse` client for flow-svc |
The context is always fully-populated regardless of which services
a given initialiser uses, for symmetry. Additional fields may be
added in future without breaking existing initialisers. Clients are
started at the beginning of a wake cycle and stopped at the end.
Initialisers that need services beyond config-svc and flow-svc are
responsible for their own readiness checks and for raising cleanly
when a prerequisite is not met.
### Completion State
Per-initialiser completion state is stored in the reserved
`__system__` workspace, under a dedicated config type for bootstrap
state. The stored value is the flag string that was configured when
the initialiser last succeeded.
On each cycle, for each initialiser, the bootstrapper reads the
stored flag and compares it to the currently-configured flag. If
they match, the initialiser is skipped silently. If they differ,
the initialiser runs; on success, the stored flag is updated.
Because the state lives in a reserved (`_`-prefixed) workspace, it
is stored by config-svc but excluded from the config push broadcast.
Live processors never see it and cannot act on it.
### The Service Gate
The gate is a cheap, bootstrapper-internal check that config-svc
and flow-svc are both reachable and responsive. It is intentionally
a simple pair of low-cost round-trips — a config list against
`__system__` and a flow-svc `list-blueprints` — rather than any
deeper health check.
Its purpose is to avoid filling logs with noise and to concentrate
retry effort during the brief window when services are coming up.
The gate is applied only to initialisers with
`wait_for_services=True` (the default); `False` is reserved for
initialisers that set up infrastructure the gate itself depends on.
### Adaptive Cadence
The sleep between wake cycles is chosen from three tiers based on
observed state:
| Tier | Duration | When |
|---|---|---|
| Gate backoff | ~5 s | Services not responding — concentrate retry during startup |
| Init retry | ~15 s | Gate passes but at least one initialiser is not yet at its configured flag — transient failures, waiting on prereqs, recently-bumped flag not yet applied |
| Steady | ~300 s | All configured initialisers at their configured flag; gate passes; nothing to do |
The short tiers ensure a fresh deployment converges quickly;
steady state costs a single round-trip per initialiser every few
minutes.
### Failure Handling
An initialiser raising an exception does not stop the bootstrapper
or block other initialisers. Each initialiser in the cycle is
attempted independently; failures are logged and retried on the next
cycle. This means there is no ordered-DAG enforcement: order of
initialisers in the configuration determines the attempt order
within a cycle, but a dependency between two initialisers is
expressed by the dependant raising cleanly when its prerequisite
isn't satisfied. Over successive cycles the system converges.
### Flag Change and Re-run Safety
Each initialiser's completion state is a string flag chosen by the
operator. Typically these follow a simple version pattern
(`v1`, `v2`, ...), but the bootstrapper imposes no format.
Changing the flag in the group configuration causes the
corresponding initialiser to re-run on the next cycle. Initialisers
must be written so that re-running after a flag bump is safe — they
receive both the previous and the new flag and are responsible for
either cleanly re-applying the work or performing a step-change
migration from the prior state.
This gives operators an explicit, visible mechanism for triggering
re-initialisation. Re-runs are never implicit.
## Core Initialisers
The following initialisers ship in `trustgraph.bootstrap.initialisers`
and cover the base deployment case.
### PulsarTopology
Creates the Pulsar tenant and the four namespaces
(`flow`, `request`, `response`, `notify`) with appropriate
retention policies if they don't exist.
Opts out of the service gate (`wait_for_services = False`) because
config-svc and flow-svc cannot come online until the Pulsar
namespaces exist.
Parameters: Pulsar admin URL, tenant name.
Idempotent via the admin API (GET-then-PUT). Flag change causes
re-evaluation of all namespaces; any absent are created.
### TemplateSeed
Populates the reserved `__template__` workspace from an external
JSON seed file. The seed file has the standard shape of
`{config-type: {config-key: value}}`.
Runs post-gate. Parameters: path to the seed file, overwrite
policy (upsert-missing only, or overwrite-all).
On clean run, writes the whole file. On flag change, behaviour
depends on the overwrite policy — typically upsert-missing so
that operator-customised keys are preserved across seed-file
upgrades.
### WorkspaceInit
Creates a named workspace and populates it from the seed file or
from the full contents of the `__template__` workspace.
Runs post-gate. Parameters: workspace name, source (seed file or
`__template__`), optional `seed_file` path, `overwrite` flag.
When `source` is `template`, the initialiser copies every config
type and key present in `__template__` — there is no per-type
selection. Deployments that want to seed only a subset should
either curate the seed file they feed to `TemplateSeed` or use
`source: seed-file` directly here.
Raises cleanly if its source does not exist — depends on
`TemplateSeed` having run in the same cycle or a prior one.
### DefaultFlowStart
Starts a specific flow in a specific workspace using a specific
blueprint.
Runs post-gate. Parameters: workspace name, flow id, blueprint
name, description, optional parameter overrides.
Separated from `WorkspaceInit` deliberately so that deployments
which want a workspace without an auto-started flow can simply omit
this initialiser from their bootstrap configuration.
## Extensibility
New initialisers are added by:
1. Subclassing the initialiser base class.
2. Implementing `run(ctx, old_flag, new_flag)`.
3. Choosing `wait_for_services` (almost always `True`).
4. Adding an entry in the bootstrapper's configuration with the new
class's dotted path.
No core code changes are required to add an enterprise or third-party
initialiser. Enterprise builds ship their own package with their own
initialiser classes (e.g. `CreateAdminUser`, `ProvisionWorkspaces`)
and reference them in the bootstrapper config alongside the core
initialisers.
## Reserved Workspaces
This specification relies on the "reserved workspace" convention:
- Any workspace id beginning with `_` is reserved.
- Reserved workspaces are stored normally by config-svc but never
appear in the config push broadcast.
- Live processors cannot react to reserved-workspace state.
The bootstrapper uses two reserved workspaces:
- `__template__` — factory-default seed config, readable by
initialisers that copy-from-template.
- `__system__` — bootstrapper completion state (under the
`init-state` config type) and any other system-internal bookkeeping.
See the reserved-workspace convention in the config service for
the general rule and its enforcement.
## Non-Goals
- No DAG scheduling across initialisers. Dependencies are expressed
by the dependant failing cleanly until its prerequisite is met,
and convergence over subsequent cycles.
- No parallel execution of initialisers within a cycle. A cycle runs
each initialiser sequentially.
- No implicit re-runs. Re-running an initialiser requires an explicit
flag change by the operator.
- No cross-initialiser atomicity. Each initialiser's completion is
recorded independently on its own success.
## Operational Notes
- Running the bootstrapper as a processor-group entry replaces the
previous `tg-init-trustgraph` container. The bootstrapper is also
CLI-invocable directly for standalone testing via
`Processor.launch(...)`.
- First-boot convergence is typically a handful of short cycles
followed by a transition to the steady cadence. Deployments
should expect the first few minutes of logs to show
initialisation activity, thereafter effective silence.
- Bumping a flag is a deliberate operational act. The log line
emitted on re-run makes the event visible for audit.

View file

@ -0,0 +1,273 @@
---
layout: default
title: "Capability Vocabulary Technical Specification"
parent: "Tech Specs"
---
# Capability Vocabulary Technical Specification
## Overview
Every gateway endpoint maps to exactly one *capability* — a string
from a closed vocabulary defined in this document. When the
gateway authorises a request, it hands the IAM regime four things:
the authenticated identity, the required capability, the
operation's resource (the structured identifier of what's being
operated on), and the operation's parameters. The IAM regime
decides allow or deny; see the [IAM contract](iam-contract.md) for
the full abstraction.
A capability is a **permission**, not a structural classification.
`graph:read` says "the caller may read graphs"; it does not say
where graphs live or how they are addressed. The shape of a
request — whether workspace appears in the URL, the envelope, or
the body, and whether it is a resource address component or an
operation parameter — is determined by what the operation operates
on, not by what permission it requires. Permission and structure
are orthogonal; the contract takes both.
This document defines:
- The **capability vocabulary** — the closed list of capability
strings the gateway uses as input to `authorise`. All IAM
regimes share this vocabulary; that's the only schema the
gateway and the IAM regime have to agree on.
- The **open-source role bundles** — the role-and-scope table the
OSS IAM regime uses to answer `authorise` calls. Other regimes
answer the same call differently; the bundles below are an
OSS-specific implementation detail, not a contract assertion.
A regime may evaluate `authorise` using role bundles (OSS), IdP
group memberships, attribute-based policies, relationship tuples,
or any other mechanism. The gateway is unaware of which. The
capability strings — and the resource component vocabulary the
gateway populates alongside them — are the only thing both sides
have to agree on.
## Motivation
The original IAM spec used hierarchical "minimum role" checks
(`admin` implies `writer` implies `reader`). That shape is simple
but paints the role model into a corner: any enterprise need to
grant a subset of admin abilities (helpdesk that can reset
passwords but not edit flows; analyst who can query but not ingest)
requires a protocol-level change.
A capability vocabulary decouples "what a request needs" from
"what roles a user has" and makes the role table pure data. The
open-source bundles can stay coarse while the enterprise role
table expands without any code movement.
## Design
### Capability string format
`<subsystem>:<verb>` or `<subsystem>` (for capabilities with no
natural read/write split). All lowercase, kebab-case for
multi-word subsystems.
### Capability list
**Data plane**
| Capability | Covers |
|---|---|
| `agent` | agent (query-only; no write counterpart) |
| `graph:read` | graph-rag, graph-embeddings-query, triples-query, sparql, graph-embeddings-export, triples-export |
| `graph:write` | triples-import, graph-embeddings-import |
| `documents:read` | document-rag, document-embeddings-query, document-embeddings-export, entity-contexts-export, document-stream-export, library list / fetch |
| `documents:write` | document-embeddings-import, entity-contexts-import, text-load, document-load, library add / replace / delete |
| `rows:read` | rows-query, row-embeddings-query, nlp-query, structured-query, structured-diag |
| `rows:write` | rows-import |
| `llm` | text-completion, prompt (stateless invocation) |
| `embeddings` | Raw text-embedding service (stateless compute; typed-data embedding stores live under their data-subject capability) |
| `mcp` | mcp-tool |
| `collections:read` | List / describe collections |
| `collections:write` | Create / delete collections |
| `knowledge:read` | List / get knowledge cores |
| `knowledge:write` | Create / delete knowledge cores |
**Control plane**
| Capability | Covers |
|---|---|
| `config:read` | Read workspace config |
| `config:write` | Write workspace config |
| `flows:read` | List / describe flows, blueprints, flow classes |
| `flows:write` | Start / stop / update flows |
| `users:read` | List / get users within the workspace |
| `users:write` | Create / update / disable users within the workspace |
| `users:admin` | Assign / remove roles on users within the workspace |
| `keys:self` | Create / revoke / list **own** API keys |
| `keys:admin` | Create / revoke / list **any user's** API keys within the workspace |
| `workspaces:admin` | Create / delete / disable workspaces (system-level) |
| `iam:admin` | JWT signing-key rotation, IAM-level operations |
| `metrics:read` | Prometheus metrics proxy |
### Open-source role bundles
The open-source edition ships three roles:
| Role | Capabilities |
|---|---|
| `reader` | `agent`, `graph:read`, `documents:read`, `rows:read`, `llm`, `embeddings`, `mcp`, `collections:read`, `knowledge:read`, `flows:read`, `config:read`, `keys:self` |
| `writer` | everything in `reader` **+** `graph:write`, `documents:write`, `rows:write`, `collections:write`, `knowledge:write` |
| `admin` | everything in `writer` **+** `config:write`, `flows:write`, `users:read`, `users:write`, `users:admin`, `keys:admin`, `workspaces:admin`, `iam:admin`, `metrics:read` |
Open-source bundles are deliberately coarse. `workspaces:admin` and
`iam:admin` live inside `admin` without a separate role; a single
`admin` user holds the keys to the whole deployment.
### The `agent` capability and composition
The `agent` capability is granted independently of the capabilities
it composes under the hood (`llm`, `graph`, `documents`, `rows`,
`mcp`, etc.). A user holding `agent` but not `llm` can still cause
LLM invocations because the agent implementation chooses which
services to invoke on the caller's behalf.
This is deliberate. A common policy is "allow controlled access
via the agent, deny raw model calls" — granting `agent` without
granting `llm` expresses exactly that. An administrator granting
`agent` should treat it as a grant of everything the agent
composes at deployment time.
### Authorisation evaluation (OSS regime)
This section describes how the OSS IAM regime answers
`authorise(identity, capability, resource, parameters)`. Other
regimes answer the same contract differently; only the inputs (the
capability vocabulary, the resource components, the parameter
shape) are shared.
For a request bearing a resolved set of roles
`R = {r1, r2, ...}`, a required capability `c`, a resource, and
parameters:
```
let target_workspace =
resource.workspace (workspace-/flow-level resources)
or parameters.workspace (system-level resources whose
parameters reference a workspace)
or unset (system-level operations with no
workspace context)
allow if some role r in R has c in its capability bundle
and (target_workspace is unset
or r's workspace_scope permits target_workspace)
```
The OSS regime considers workspace from whichever role it plays in
the operation:
- For workspace-level and flow-level resources, the workspace lives
in `resource.workspace` and that is what the role's scope is
checked against.
- For system-level resources whose operation parameters reference a
workspace (e.g. `create-user with workspace association W`),
workspace lives in `parameters.workspace` and that is what the
role's scope is checked against. The resource is system-level
(`resource = {}`) but the workspace constraint still bites.
- For system-level operations with no workspace context (e.g.
`bootstrap`, `rotate-signing-key`), the workspace-scope check
collapses — only capability-bundle membership matters.
No hierarchy, no precedence, no role-order sensitivity. A user
with a single role is the common case; a user with multiple roles
is allowed if any role independently grants both the capability
and the relevant workspace scope.
### Enforcement boundary
Capability checks — and authentication — are applied **only at the
API gateway**, on requests arriving from external callers.
Operations originating inside the platform (backend service to
backend service, agent to LLM, flow-svc to config-svc, bootstrap
initialisers, scheduled reconcilers, autonomous flow steps) are
**not capability-checked**. Backend services trust the workspace
set by the gateway on inbound pub/sub messages and trust
internally-originated messages without further authorisation.
This policy has four consequences that are part of the spec, not
accidents of implementation:
1. **The gateway is the single trust boundary for user
authorisation.** Every backend service is a downstream consumer
of an already-authorised workspace scope.
2. **Pub/sub carries workspace, not user identity.** Messages on
the bus do not carry credentials or the identity that originated
a request; they carry the resolved workspace only. This keeps
the bus protocol free of secrets and aligns with the workspace
resolver's role as the gateway-side narrowing step.
3. **Composition is transitive.** Granting a capability that the
platform composes internally (for example, `agent`) transitively
grants everything that capability composes under the hood,
because the downstream calls are internal-origin and are not
re-checked. The composite nature of `agent` described above is
a consequence of this policy, not a special case.
4. **Internal-origin operations have no user.** Bootstrap,
reconcilers, and other platform-initiated work act with
system-level authority. The workspace field on such messages
identifies which workspace's data is being touched, not who
asked.
**Trust model.** Whoever has pub/sub access is implicitly trusted
to act as any workspace. Defense-in-depth within the backend is
not part of this design; the security perimeter is the gateway
and the bus itself (TLS / network isolation between the bus and
any untrusted network).
### Unknown capabilities and unknown roles
- An endpoint declaring an unknown capability is a server-side bug
and fails closed (403, logged).
- A user carrying a role name that is not defined in the role table
is ignored for authorisation purposes and logged as a warning.
Behaviour is deterministic: unknown roles contribute zero
capabilities.
### Capability scope
Every capability is **implicitly scoped to the caller's resolved
workspace**. A `users:write` capability does not permit a user
in workspace `acme` to create users in workspace `beta` — the
workspace-resolver has already narrowed the request to one
workspace before the capability check runs. See the IAM
specification for the workspace-resolver contract.
The three exceptions are the system-level capabilities
`workspaces:admin` and `iam:admin`, which operate across
workspaces by definition, and `metrics:read`, which returns
process-level series not scoped to any workspace.
## Enterprise extensibility
Enterprise editions extend the role table additively:
```
data-analyst: {query, library:read, collections:read, knowledge:read}
helpdesk: {users:read, users:write, users:admin, keys:admin}
data-engineer: writer + {flows:read, config:read}
workspace-owner: admin {workspaces:admin, iam:admin}
```
None of this requires a protocol change — the wire-protocol `roles`
field on user records is already a set, the gateway's
capability-check is already capability-based, and the capability
vocabulary is closed. Enterprises may introduce roles whose bundles
compose the same capabilities differently.
When an enterprise introduces a new capability (e.g. for a feature
that does not exist in open source), the capability string is
added to the vocabulary and recognised by the gateway build that
ships that feature.
## References
- [IAM Contract Specification](iam-contract.md) — the abstract
gateway↔IAM regime contract; capability strings are inputs to
`authorise`.
- [Identity and Access Management Specification](iam.md)
- [IAM Service Protocol Specification](iam-protocol.md) — the OSS
regime's wire-level protocol.
- [Architecture Principles](architecture-principles.md)

View file

@ -22,8 +22,16 @@ are the boundaries around data, and who owns what?
A workspace is the primary isolation boundary. It represents an
organisation, team, or independent operating unit. All data belongs to
exactly one workspace. Cross-workspace access is never permitted through
the API.
exactly one workspace.
Cross-workspace access through the API is gated by the IAM regime
(see [`iam-contract.md`](iam-contract.md)). In the OSS distribution,
the role table defined in [`capabilities.md`](capabilities.md)
permits cross-workspace operation only to the `admin` role; the
`reader` and `writer` roles are constrained to a single assigned
workspace per credential. Other regimes can model the relationship
between identity and workspace differently — the gateway makes no
assumption.
A workspace owns:
- Source documents
@ -279,9 +287,18 @@ A typical workflow:
The current codebase uses a `user` field in message metadata and storage
partition keys to identify the workspace. The `collection` field
identifies the collection within that workspace. The IAM spec describes
how the gateway maps authenticated credentials to a workspace identity
and sets these fields.
identifies the collection within that workspace.
The gateway is the single point at which workspace gets stamped onto
outbound pub/sub messages. An incoming credential authenticates to a
workspace (the credential's binding, not a user-to-workspace lookup —
see [`iam-contract.md`](iam-contract.md) and the *Identity surface*
section of [`iam.md`](iam.md)); any caller-supplied workspace on the
request is reconciled against the authenticated identity by the IAM
regime; the resolved value is what the gateway writes into outgoing
messages and the storage layers' partition keys. Backend services
trust the workspace they receive — defense-in-depth happens at the
gateway, not at the bus.
For details on how each storage backend implements this scoping, see:
@ -302,7 +319,10 @@ For details on how each storage backend implements this scoping, see:
## References
- [Identity and Access Management](iam.md)
- [IAM Contract](iam-contract.md) — gateway↔IAM regime abstraction.
- [Identity and Access Management](iam.md) — gateway-side framing.
- [Capability Vocabulary](capabilities.md) — capability strings and
the OSS role bundles that decide cross-workspace eligibility.
- [Collection Management](collection-management.md)
- [Entity-Centric Graph](entity-centric-graph.md)
- [Neo4j User Collection Isolation](neo4j-user-collection-isolation.md)

View file

@ -0,0 +1,403 @@
---
layout: default
title: "IAM Contract Technical Specification"
parent: "Tech Specs"
---
# IAM Contract Technical Specification
## Overview
The IAM contract is the abstraction between the API gateway and any
identity / access management regime that fronts it. The gateway
treats IAM as a black box behind two operations — *authenticate* and
*authorise* — plus a small surface of management operations. No
regime-specific concept (roles, scopes, groups, claims, policy
languages) is visible to the gateway, and no gateway-specific
concept (capability vocabulary, request anatomy) is visible to
backend services.
The TrustGraph open-source distribution ships one IAM regime — a
role-based implementation defined in
[`iam-protocol.md`](iam-protocol.md) — that is one implementation of
this contract. Enterprise editions can replace it with a different
regime (OIDC / SSO, ABAC, ReBAC, external policy engine) without
changing the gateway, the wire protocol, or the backends.
## Motivation
Authorisation models vary by deployment. A small team might be
happy with three predefined roles; an enterprise might need group-
mapping from an upstream IdP, attribute-based policies, or
relationship-based access control. Hard-wiring any one of those
into the gateway forces every other regime to either compromise its
model or be re-implemented.
A narrow contract — "authenticate this credential" and "may this
identity perform this operation on this resource" — captures what
the gateway actually needs to know without committing to a policy
shape. The IAM regime owns the policy decision; the gateway is a
generic enforcement point.
## Operations
### `authenticate`
```
authenticate(credential: bytes) → Identity | AuthFailure
```
Validates a credential the client presented. The gateway treats
the credential as opaque bytes — for the OSS regime today that's
either an API key plaintext or a JWT, but the gateway does not
parse them; the IAM regime decides.
On success, returns an `Identity`. On any failure the IAM regime
returns the same opaque `AuthFailure` — never a description of which
condition failed. This is the spec's masked-error rule: an
attacker probing the endpoint cannot distinguish "no such key",
"expired", "wrong signature", "revoked", "user disabled", etc.
### `authorise`
```
authorise(identity: Identity,
capability: str,
resource: Resource,
parameters: dict)
→ Decision
```
Asks whether the identity is permitted to perform the named
capability on the named resource, given the operation's
parameters. Returns `allow` or `deny`. `identity` is whatever
`authenticate` returned for this caller; the gateway never
decomposes it.
The four arguments separate concerns:
- **`identity`** — who is asking.
- **`capability`** — what permission they are exercising (e.g.
`users:write`, `graph:read`). Permission, not structure.
- **`resource`** — what is being operated on, as a structured
identifier. See *The Resource model* below.
- **`parameters`** — operation-specific data that the regime may
need to consider beyond the resource identifier. Used when a
decision depends on attributes the request supplies — e.g.
creating a user *with workspace association W*: the resource is
the system-level user registry, and W is a parameter the regime
checks against the caller's permissions for `users:write`.
Different regimes use the four arguments differently — one regime
might evaluate role bundles whose grants carry workspace scope;
another might consult upstream IdP group memberships; an ABAC
regime evaluates a policy with all four as inputs. The contract
is unchanged.
### `authorise_many`
```
authorise_many(identity: Identity,
checks: list[(str, Resource, dict)])
→ list[Decision]
```
Bulk variant of `authorise`. Same semantics, one round-trip for
many decisions. Used when an operation fans out to multiple
resources (e.g. an agent that touches several workspaces) and a
single permission check isn't sufficient.
`authorise_many` is not just a performance optimisation; it pins
the contract for fan-out operations early, before clients (or
internal callers) build patterns that assume one-permission-check-
per-request. Regimes implement it as a loop over `authorise`
unless they have a more efficient path.
### Management operations
Beyond the request-time `authenticate` / `authorise`, the contract
also covers identity-lifecycle and credential-lifecycle operations
that are invoked by administrative requests rather than by the
authentication path. These are regime-specific in detail (an SSO
regime that delegates user management to the IdP may not implement
most of them) but the operation set the gateway can forward is:
- User management: `create-user`, `list-users`, `get-user`,
`update-user`, `disable-user`, `enable-user`, `delete-user`
- Credential management: `create-api-key`, `list-api-keys`,
`revoke-api-key`, `change-password`, `reset-password`
- Workspace management: `create-workspace`, `list-workspaces`,
`get-workspace`, `update-workspace`, `disable-workspace`
- Session management: `login`, `whoami`
- Key management: `get-signing-key-public`, `rotate-signing-key`
- Bootstrap: `bootstrap`, `bootstrap-status`
`whoami` is the self-read counterpart to `get-user`: any
authenticated caller can read their own identity record without
holding a user-management capability. It is the gating-free probe
a UI uses to render affordances appropriate to the caller's role.
`bootstrap-status` is a side-effect-free probe of whether an
unconsumed `bootstrap` call would currently succeed. It exists so
a first-run UI can decide whether to render setup without invoking
the consuming `bootstrap` op. Public — no authentication.
A regime that does not support one of these (e.g. an SSO regime
where users are managed in the IdP) returns a defined "not
supported" error; the gateway surfaces it as a 501.
### Actor injection
For any management operation forwarded by the gateway after
authentication, the gateway injects the authenticated caller's
`handle` as an `actor` field on the request. Regimes use `actor`
to identify *who is making the request* — distinct from the
operation's target (which lives in `user_id` / `key_id` /
`workspace_record` / etc.) — for purposes such as:
- Self-service operations (`whoami`, `change-password`) that
resolve "the caller" without taking a target argument.
- Audit logging, where the actor is recorded against the change.
- Decisions that depend on the resolved resource state. The
gateway authorises against the parameters on the request, but it
cannot know the resolved resource's actual properties (e.g. the
workspace association of a target user) before the regime loads
it. When that matters, the regime can re-decide using the
actor's permissions and the resolved record — closing a class
of cases the gateway-side check can't see.
Caller-supplied `actor` values on the request body are overwritten
by the gateway — the gateway is the only authority for actor
identity, and a regime that consults `actor` can rely on it being
authentic.
## The `Identity` surface
`Identity` is *mostly* opaque. The gateway holds the value as a
token to quote back when calling `authorise`, never decomposing it.
But there are a few gateway-side concerns that need a small
surface:
| Field | Purpose |
|---|---|
| `handle` | Opaque reference passed back to `authorise`. Regime-defined; gateway treats as a string. |
| `workspace` | The workspace this credential authenticates to. Used by the gateway only as a default-fill-in for operations that omit a workspace. Never used as policy input — when authorisation needs to know which workspace the operation acts on, the operation places it in the resource address (or a parameter), and the regime decides. |
| `principal_id` | Stable identifier the gateway logs for audit (a user id, a sub claim, a service account id). Never used for authorisation — that's `authorise`'s job. |
| `source` | How the credential was presented (`api-key`, `jwt`, …). Non-policy; useful for logs and metrics only. |
Anything else — roles, claims, group memberships, policy attributes
— stays inside the regime and is reachable only via `authorise`.
## The `Resource` model
A `Resource` is a structured value identifying *what is being
operated on*. Resources live at one of three levels in TrustGraph,
based on where the resource exists in the deployment:
### Resource levels
| Level | What lives there | Resource shape |
|---|---|---|
| **System** | The user registry, the workspace registry, the signing key, the audit log — anything that exists once per deployment. | `{}` |
| **Workspace** | A workspace's config, flow definitions, library (documents), knowledge cores, collections — things that exist *within* a workspace. | `{workspace: "..."}` |
| **Flow** | A flow's knowledge graph, agent state, LLM context, embedding state, MCP context — things that exist *within* a flow within a workspace. | `{workspace: "...", flow: "..."}` |
Note carefully:
- **Users are a system-level resource.** A user record exists at
the deployment level; the fact that a user has a *workspace
association* (one in OSS, possibly many in other regimes) is a
property of the user record, not a containment. Operations on
the user registry have `resource = {}`; the workspace
association appears as a *parameter*, not as a resource address
component.
- **Workspaces themselves are a system-level resource.** The
workspace registry exists at the deployment level. `create-
workspace` and `list-workspaces` are system-level operations;
the workspace identifier in their bodies is a parameter, not an
address.
- **A workspace's contents are workspace-level resources.** A
workspace's config, flows, library, etc. live within a
workspace. Their resource address is `{workspace: ...}`.
- **A flow's contents are flow-level resources.** Knowledge
graphs, agents, etc. live within a flow. Their resource
address is `{workspace: ..., flow: ...}`.
### Component vocabulary
| Component | Type | Meaning | Used by |
|---|---|---|---|
| `workspace` | string | Identifier of the workspace whose contents are being operated on | workspace-level and flow-level resource addresses |
| `flow` | string | Identifier of a flow within a workspace; always paired with `workspace` | flow-level resource addresses |
| `collection` | string | Reserved for finer-grained scoping within a workspace | future / enterprise |
| `document` | string | Reserved for per-document scoping | future / enterprise |
A `Resource` is a partial mapping of these components to values.
The level of the resource (system / workspace / flow) determines
which components must be present. An empty `{}` is the
system-level resource.
### Workspace as parameter vs. address
Workspace plays two distinct roles in operations and shows up in
two distinct places:
- **As a resource address component** — workspace identifies the
thing being operated on. Lives in `resource.workspace`. Example:
`config:read` reads the config *of* workspace W.
- **As an operation parameter** — workspace is data the operation
acts on or filters by, while the resource itself is system-level.
Lives in `parameters.workspace`. Example: `users:write`
creates a user *with workspace association* W; the resource is
the user registry (system), and W is a parameter.
These are not interchangeable. The IAM regime considers each role
separately; the OSS role table, for instance, applies workspace-
scope to the address component when checking workspace-level
operations, and to a parameter when checking
"create-user-with-workspace-W". Both end up enforcing the admin's
scope, but through different code paths.
### Extension rules
The vocabulary is closed but extensible. Adding a new component:
1. The component is added to the vocabulary in this spec, with a
defined name, type, and meaning.
2. Existing IAM regimes ignore unknown components (forward
compatibility — adding a new component does not break older
regimes that don't understand it).
3. Older gateways that don't populate a new component leave it
unset; regimes that need it for a decision treat "unset" as
"absent" and decide accordingly (typically: cannot grant
permission scoped to a component the gateway didn't supply).
A regime that wants stricter behaviour (e.g. fail-closed on
unknown components rather than ignoring them) declares so as part
of its own configuration; the contract default is "ignore unknown".
## Operation registry (gateway-side)
Mapping a request onto `(capability, resource, parameters)` is
service-specific — it cannot be inferred from the capability
alone. The gateway maintains an **operation registry** that
declares, per operation:
- The required capability.
- The resource level (system / workspace / flow) — determines the
shape of the resource identifier.
- How to extract the resource address components (workspace,
flow) from the request — from URL path, WebSocket envelope, or
body.
- Which body fields are operation parameters (and which of those
the IAM regime should see in the `parameters` argument).
This registry is part of the gateway's endpoint declarations, not
part of the IAM contract. The contract specifies what arguments
`authorise` receives; how the gateway populates them is its own
concern.
In the OSS gateway, registry keys follow these conventions:
| Pattern | Used by | Resource level |
|---|---|---|
| bare op name (`create-user`, `list-users`, `login`, …) | `/api/v1/iam` and the auth surface | system / workspace, per op |
| `<kind>:<op>` (`config:get`, `flow:list-blueprints`, `librarian:add-document`, …) | `/api/v1/{kind}` (workspace-scoped global services) | workspace |
| `flow-service:<kind>` (`flow-service:agent`, `flow-service:graph-rag`, …) | `/api/v1/flow/{flow}/service/{kind}` and the WS Mux | flow |
| `flow-import:<kind>` / `flow-export:<kind>` | `/api/v1/flow/{flow}/{import,export}/{kind}` streaming sockets | flow |
Keys are an OSS-gateway implementation detail — the contract does
not constrain naming. The conventions above exist so the registry
key is uniquely derivable from the request path and (where
applicable) body without ambiguity.
## Caching
Both `authenticate` and `authorise` results are cached at the
gateway, on different policies:
- **`authenticate`** — cached by a hash of the credential. The OSS
gateway uses a fixed short TTL (currently 60 s) so that revoked
API keys and disabled users stop working within the TTL window
without any push mechanism. Regimes that want a different
behaviour can return an `expires` hint with the identity; the
gateway honours the smaller of `expires` and its own ceiling.
- **`authorise`** — cached by a hash of `(handle, capability,
resource, parameters)`. The regime returns a suggested TTL with
the decision; the gateway clamps it above by a deployment-set
ceiling (currently 60 s). Both allow and deny decisions are
cached; denies briefly, to avoid hammering the regime with
repeated rejected attempts.
The TTL ceiling caps the revocation latency window — a role
revoked at the regime takes effect at the gateway no later than
the ceiling. Operators that need stricter revocation can lower
the ceiling.
## Failure modes
| Condition | Behaviour |
|---|---|
| `authenticate` returns AuthFailure | Gateway responds 401 with the masked `auth failure` body. |
| `authorise` returns deny | Gateway responds 403 with the masked `access denied` body. |
| IAM regime unreachable | Gateway responds 401 / 503 (deployment-defined). No fail-open. |
| `authorise_many` partial deny | Gateway treats the request as denied; the operation is rejected. Partial-success semantics are not part of the contract. |
| Regime returns "not supported" for a management operation | Gateway responds 501. |
There is no fallback or "soft" decision path. An IAM regime that
is unavailable, slow, or returning errors causes requests to fail
closed.
## Implementations
### Open-source role-based regime
Defined in [`iam-protocol.md`](iam-protocol.md). Implements the
contract via:
- A pub/sub request/response service (`iam-svc`) reached only by
the gateway over the message bus.
- Credentials are API keys (opaque) or JWTs (Ed25519, locally
validated by the gateway against the regime's published public
key).
- `authorise` reduces to a lookup against the role bundles in
[`capabilities.md`](capabilities.md), with each grant's workspace
scope checked against the operation's workspace component.
- Identity, user, and workspace records live in Cassandra.
The OSS regime is deliberately simple — three roles, a single
workspace association per user (a regime data-model decision, not
a contract assertion), no policy language. Other regimes can
grant the same user different permissions in different workspaces
without changing anything outside the regime.
### Future regimes
The contract is shaped to admit, without code change in the
gateway:
- **OIDC / SSO**`authenticate` validates an OIDC ID token via
the IdP's JWKS; `Identity.handle` carries the verified subject
and group claims; `authorise` evaluates against group-to-
capability mappings configured at the regime.
- **ABAC / Policy engine**`authorise` calls out to a policy
engine (Rego, Cedar, custom DSL) with the identity's attributes
and the resource as the policy input.
- **ReBAC (Zanzibar-style)**`authorise` translates `(identity,
capability, resource)` into a relationship-tuple lookup against
a tuple store.
- **Hybrid** — multiple regimes composed: e.g. authenticate via
SSO, authorise via local policy.
None of these require gateway changes. The contract surface is
the same; the regime is what differs.
## References
- [Identity and Access Management Specification](iam.md) — overall
design and the gateway-side framing.
- [IAM Service Protocol Specification](iam-protocol.md) — the OSS
regime's wire-level protocol.
- [Capability Vocabulary Specification](capabilities.md) — the
capability strings the gateway uses as `authorise` input.

View file

@ -0,0 +1,386 @@
---
layout: default
title: "IAM Service Protocol Technical Specification"
parent: "Tech Specs"
---
# IAM Service Protocol Technical Specification
## Overview
This document specifies the wire protocol of the **open-source IAM
regime** — one implementation of the abstract IAM contract defined
in [`iam-contract.md`](iam-contract.md). Other regimes (OIDC / SSO,
ABAC, ReBAC, external policy engines) implement the same contract
with different transports, data models, and policy semantics; the
gateway is unaware of which regime it's wired against.
The OSS regime is a backend processor (`iam-svc`) reached over the
standard request/response pub/sub pattern. It owns users,
workspaces, API keys, login credentials, and JWT signing keys, all
backed by Cassandra. The API gateway is its only caller.
This document defines:
- the `IamRequest` and `IamResponse` dataclasses on the bus,
- the operation set the OSS regime implements,
- per-operation input and output fields,
- the error taxonomy,
- the bootstrap modes,
- the initial HTTP forwarding endpoint used while the protocol is
being exercised.
The mapping from this regime onto the abstract contract is direct:
| Contract operation | OSS regime operation |
|---|---|
| `authenticate(credential)` | `resolve-api-key` (for API keys); local JWT validation against `get-signing-key-public` (for JWTs) |
| `authorise(identity, capability, resource, parameters)` | Role-table lookup against the OSS role bundles defined in [`capabilities.md`](capabilities.md), gated by workspace scope. Workspace can come from the resource address (workspace- and flow-level resources) or from a parameter (system-level resources whose parameters reference a workspace, e.g. `create-user with workspace association W`). |
| `authorise_many` | Loop over `authorise` |
| Identity / credential / workspace management | `create-user`, `create-api-key`, etc. as listed below. These are operations on system-level resources (the user / workspace / credential registries); workspace, where it appears in the body, is a parameter. |
Architectural context — roles, capabilities, workspace as resource
scope, enforcement boundary — lives in [`iam.md`](iam.md) and
[`capabilities.md`](capabilities.md). The contract abstraction
lives in [`iam-contract.md`](iam-contract.md).
## Transport
- **Request topic:** `request:tg/request/iam-request`
- **Response topic:** `response:tg/response/iam-response`
- **Pattern:** request/response, correlated by the `id` message
property, the same pattern used by `config-svc` and `flow-svc`.
- **Caller:** the API gateway only. Under the enforcement-boundary
policy (see capabilities spec), the IAM service trusts the bus
and performs no per-request authentication or capability check
against the caller. The gateway has already evaluated capability
membership and workspace scoping before sending the request.
## Dataclasses
### `IamRequest`
```python
@dataclass
class IamRequest:
# One of the operation strings below.
operation: str = ""
# Scope of this request. Required on every workspace-scoped
# operation. Omitted (or empty) for system-level ops
# (workspace CRUD, signing-key ops, bootstrap, resolve-api-key,
# login).
workspace: str = ""
# Acting user id. Set by the gateway to the authenticated
# caller's identity handle for every authenticated request
# (overwrites any caller-supplied value — the gateway is the
# only authority for actor identity, so handlers can rely on it
# being authentic). Used for audit logging, self-service ops
# like ``whoami`` that resolve "the caller", and future actor-
# scoped policy checks. Empty for unauthenticated ops
# (``login``, ``bootstrap``, ``bootstrap-status``,
# ``get-signing-key-public``, ``resolve-api-key``). See the
# actor-injection rule in the IAM contract spec.
actor: str = ""
# --- identity selectors ---
user_id: str = ""
username: str = "" # login; unique within a workspace
key_id: str = "" # revoke-api-key, list-api-keys (own)
api_key: str = "" # resolve-api-key (plaintext)
# --- credentials ---
password: str = "" # login, change-password (current)
new_password: str = "" # change-password
# --- user fields ---
user: UserInput | None = None # create-user, update-user
# --- workspace fields ---
workspace_record: WorkspaceInput | None = None # create-workspace, update-workspace
# --- api key fields ---
key: ApiKeyInput | None = None # create-api-key
```
### `IamResponse`
```python
@dataclass
class IamResponse:
# Populated on success of operations that return them.
user: UserRecord | None = None # create-user, get-user, update-user
users: list[UserRecord] = field(default_factory=list) # list-users
workspace: WorkspaceRecord | None = None # create-workspace, get-workspace, update-workspace
workspaces: list[WorkspaceRecord] = field(default_factory=list) # list-workspaces
# create-api-key returns the plaintext once. Never populated
# on any other operation.
api_key_plaintext: str = ""
api_key: ApiKeyRecord | None = None # create-api-key
api_keys: list[ApiKeyRecord] = field(default_factory=list) # list-api-keys
# login, rotate-signing-key
jwt: str = ""
jwt_expires: str = "" # ISO-8601 UTC
# get-signing-key-public
signing_key_public: str = "" # PEM
# resolve-api-key returns who this key authenticates as.
resolved_user_id: str = ""
resolved_workspace: str = ""
resolved_roles: list[str] = field(default_factory=list)
# reset-password
temporary_password: str = "" # returned once to the operator
# bootstrap: on first run, the initial admin's one-time API key
# is returned for the operator to capture.
bootstrap_admin_user_id: str = ""
bootstrap_admin_api_key: str = ""
# bootstrap-status: true iff an unconsumed ``bootstrap`` call
# would currently succeed. Always emitted by the response
# translator (the false case is meaningful for first-run UIs).
bootstrap_available: bool = False
# Present on any failed operation.
error: Error | None = None
```
### Value types
```python
@dataclass
class UserInput:
username: str = ""
name: str = ""
email: str = ""
password: str = "" # only on create-user; never on update-user
roles: list[str] = field(default_factory=list)
enabled: bool = True
must_change_password: bool = False
@dataclass
class UserRecord:
id: str = ""
workspace: str = ""
username: str = ""
name: str = ""
email: str = ""
roles: list[str] = field(default_factory=list)
enabled: bool = True
must_change_password: bool = False
created: str = "" # ISO-8601 UTC
# Password hash is never included in any response.
@dataclass
class WorkspaceInput:
id: str = ""
name: str = ""
enabled: bool = True
@dataclass
class WorkspaceRecord:
id: str = ""
name: str = ""
enabled: bool = True
created: str = "" # ISO-8601 UTC
@dataclass
class ApiKeyInput:
user_id: str = ""
name: str = "" # operator-facing label, e.g. "laptop"
expires: str = "" # optional ISO-8601 UTC; empty = no expiry
@dataclass
class ApiKeyRecord:
id: str = ""
user_id: str = ""
name: str = ""
prefix: str = "" # first 4 chars of plaintext, for identification in lists
expires: str = "" # empty = no expiry
created: str = ""
last_used: str = "" # empty if never used
# key_hash is never included in any response.
```
## Operations
| Operation | Request fields | Response fields | Notes |
|---|---|---|---|
| `login` | `username`, `password`, `workspace` (optional) | `jwt`, `jwt_expires` | If `workspace` omitted, IAM resolves to the user's assigned workspace. |
| `whoami` | `actor` (gateway-injected) | `user` | Returns the calling user's own record. AUTHENTICATED-only; no `users:read` capability required. |
| `resolve-api-key` | `api_key` (plaintext) | `resolved_user_id`, `resolved_workspace`, `resolved_roles` | Gateway-internal. Service returns `auth-failed` for unknown / expired / revoked keys. |
| `change-password` | `user_id`, `password` (current), `new_password` | — | Self-service. IAM validates `password` against stored hash. |
| `reset-password` | `user_id`, `workspace` (optional integrity check) | `temporary_password` | Admin-initiated. IAM generates a random password, sets `must_change_password=true` on the user, returns the plaintext once. |
| `create-user` | `workspace`, `user` | `user` | `user.password` is hashed and stored; `user.roles` must be subset of known roles. `workspace` is the new user's home-workspace binding (a required *parameter*, not an address). |
| `list-users` | `workspace` (optional filter) | `users` | If `workspace` omitted, returns the deployment-wide list. |
| `get-user` | `user_id`, `workspace` (optional integrity check) | `user` | |
| `update-user` | `user_id`, `user`, `workspace` (optional integrity check) | `user` | `password` field on `user` is rejected; use `change-password` / `reset-password`. Username is immutable. |
| `disable-user` | `user_id`, `workspace` (optional integrity check) | — | Soft-delete; sets `enabled=false`. Revokes all the user's API keys. |
| `enable-user` | `user_id`, `workspace` (optional integrity check) | — | Re-enables a previously disabled user; does not restore API keys. |
| `delete-user` | `user_id`, `workspace` (optional integrity check) | — | Hard-delete; removes user record, username lookup, and all the user's API keys. |
| `create-workspace` | `workspace_record` | `workspace` | System-level. |
| `list-workspaces` | — | `workspaces` | System-level. |
| `get-workspace` | `workspace_record` (id only) | `workspace` | System-level. |
| `update-workspace` | `workspace_record` | `workspace` | System-level. |
| `disable-workspace` | `workspace_record` (id only) | — | System-level. Sets `enabled=false`; revokes all workspace API keys; disables all users in the workspace. |
| `create-api-key` | `key`, `workspace` (optional integrity check) | `api_key_plaintext`, `api_key` | Plaintext returned **once**; only hash stored. `key.name` required. |
| `list-api-keys` | `user_id`, `workspace` (optional integrity check) | `api_keys` | |
| `revoke-api-key` | `key_id`, `workspace` (optional integrity check) | — | Deletes the key record. |
| `get-signing-key-public` | — | `signing_key_public` | Gateway fetches this at startup. |
| `rotate-signing-key` | — | — | System-level. Introduces a new signing key; old key continues to validate JWTs for a grace period (implementation-defined, minimum 1h). |
| `bootstrap` | — | `bootstrap_admin_user_id`, `bootstrap_admin_api_key` | If IAM tables are empty and the service is in `bootstrap` mode, creates the initial `default` workspace, an `admin` user, an initial API key, and an initial signing key; returns them once. Otherwise returns a masked auth failure. |
| `bootstrap-status` | — | `bootstrap_available` | Side-effect-free probe; `true` iff iam-svc is in `bootstrap` mode and tables are empty. Intended for first-run UX. |
## Error taxonomy
All errors are carried in the `IamResponse.error` field. `error.type`
is one of the values below; `error.message` is a human-readable
string that is **not** surfaced verbatim to external callers (the
gateway maps to `auth failure` / `access denied` per the IAM error
policy).
| `type` | When |
|---|---|
| `invalid-argument` | Malformed request (missing required field, unknown operation, invalid format). |
| `not-found` | Named resource does not exist (`user_id`, `key_id`, workspace). |
| `duplicate` | Create operation collides with an existing resource (username, workspace id, key name). |
| `auth-failed` | `login` with wrong credentials; `resolve-api-key` with unknown / expired / revoked key; `change-password` with wrong current password. Single bucket to deny oracle attacks. |
| `weak-password` | Password does not meet policy (length, complexity — policy defined at service level). |
| `disabled` | Target user or workspace has `enabled=false`. |
| `operation-not-permitted` | Non-admin attempting system-level operation, or workspace-scoped operation attempting to affect another workspace. |
| `internal-error` | Unexpected IAM-side failure. Log and surface as 500 at the gateway. |
The gateway is responsible for translating `auth-failed` and
`operation-not-permitted` into the obfuscated external error
response (`"auth failure"` / `"access denied"`); `invalid-argument`
becomes a descriptive 400; `not-found` / `duplicate` /
`weak-password` / `disabled` become descriptive 4xx but never leak
IAM-internal detail.
## Credential storage
- **Passwords** are stored using a slow KDF (bcrypt / argon2id — the
service picks; documented as an implementation detail). The
`password_hash` column stores the full KDF-encoded string
(algorithm, cost, salt, hash). Not a plain SHA-256.
- **API keys** are stored as SHA-256 of the plaintext. API keys
are 128-bit random values (`tg_` + base64url); the entropy
makes a slow hash unnecessary. The hash serves as the primary
key on the `iam_api_keys` table, enabling O(1) lookup on
`resolve-api-key`.
- **JWT signing key** is stored as an RSA or Ed25519 private key
(implementation choice) in a dedicated `iam_signing_keys` table
with a `kid`, `created`, and optional `retired` timestamp. At
most one active key; up to N retired keys are kept for a grace
period to validate previously-issued JWTs.
Passwords, API-key plaintext, and signing-key private material are
never returned in any response other than the explicit one-time
responses above (`reset-password`, `create-api-key`, `bootstrap`).
## Bootstrap modes
`iam-svc` requires a bootstrap mode to be chosen at startup. There is
no default — an unset or invalid mode causes the service to refuse
to start. The purpose is to force the operator to make an explicit
security decision rather than rely on an implicit "safe" fallback.
| Mode | Startup behaviour | `bootstrap` operation | Suitability |
|---|---|---|---|
| `token` | On first start with empty tables, auto-seeds the `default` workspace, admin user, admin API key (using the operator-provided `--bootstrap-token`), and an initial signing key. No-op on subsequent starts. | Refused — returns `auth-failed` / `"auth failure"` regardless of caller. | Production, any public-exposure deployment. |
| `bootstrap` | No startup seeding. Tables remain empty until the `bootstrap` operation is invoked over the pub/sub bus (typically via `tg-bootstrap-iam`). | Live while tables are empty. Generates and returns the admin API key once. Refused (`auth-failed`) once tables are populated. | Dev / compose up / CI. **Not safe under public exposure** — any caller reaching the gateway's `/api/v1/iam` forwarder before the operator can cause a token to be issued to them. Operators choosing this mode accept that risk. |
### Error masking
In both modes, any refused invocation of the `bootstrap` operation
returns the same error (`auth-failed` / `"auth failure"`). A caller
cannot distinguish:
- "service is in token mode"
- "service is in bootstrap mode but already bootstrapped"
- "operation forbidden"
This matches the general IAM error-policy stance (see `iam.md`) and
prevents externally enumerating IAM's state.
### Configuration sources
The mode and token can be supplied two ways. Resolution order is
fixed; there is no permissive fallback.
| Source | Field |
|---|---|
| Processor-group YAML / CLI argument | `bootstrap_mode`, `bootstrap_token` |
| Environment variable | `IAM_BOOTSTRAP_MODE`, `IAM_BOOTSTRAP_TOKEN` |
For each setting the service uses the explicit param value if
present; otherwise the environment variable; otherwise the service
refuses to start. The env-var path is intended for the K8s
deployment pattern where the token is injected from a `Secret` via
`secretKeyRef`, so the plaintext never has to live in YAML or git.
A typical production manifest holds `bootstrap_mode: "token"` in
the YAML and pulls `IAM_BOOTSTRAP_TOKEN` from the Secret; the YAML
is then safe to version-control.
### Bootstrap-token lifecycle
The bootstrap token — whether operator-supplied (`token` mode) or
service-generated (`bootstrap` mode) — is a one-time credential. It
is stored as admin's single API key, tagged `name="bootstrap"`. The
operator's first admin action after bootstrap should be:
1. Create a durable admin user and API key (or issue a durable API
key to the bootstrap admin).
2. Revoke the bootstrap key via `revoke-api-key`.
3. Remove the bootstrap token from any deployment configuration
(Secret, env var, or YAML field — wherever it was sourced).
The `name="bootstrap"` marker makes bootstrap keys easy to detect in
tooling (e.g. a `tg-list-api-keys` filter).
## HTTP forwarding (initial integration)
For the initial gateway integration — before the IAM service is
wired into the authentication middleware — the gateway exposes a
single forwarding endpoint:
```
POST /api/v1/iam
```
- Request body is a JSON encoding of `IamRequest`.
- Response body is a JSON encoding of `IamResponse`.
- The gateway's existing authentication (`GATEWAY_SECRET` bearer)
gates access to this endpoint so the IAM protocol can be
exercised end-to-end in tests without touching the live auth
path.
- This endpoint is **not** the final shape. Once the middleware is
in place, per-operation REST endpoints replace it (for example
`POST /api/v1/auth/login`, `POST /api/v1/users`, `DELETE
/api/v1/api-keys/{id}`), and this generic forwarder is removed.
The endpoint performs only message marshalling: it does not read
or rewrite fields in the request, and it applies no capability
check. All authorisation for user / workspace / key management
lands in the subsequent middleware work.
## Non-goals for this spec
- REST endpoint shape for the final gateway surface — covered in
Phase 2 of the IAM implementation plan, not here.
- OIDC / SAML external IdP protocol — out of scope for open source.
- Key-signing algorithm choice, password KDF choice, JWT claim
layout — implementation details captured in code + ADRs, not
locked in the protocol spec.
## References
- [IAM Contract Specification](iam-contract.md) — the abstract
gateway↔IAM regime contract this protocol implements.
- [Identity and Access Management Specification](iam.md)
- [Capability Vocabulary Specification](capabilities.md)

View file

@ -199,9 +199,9 @@ The server rejects all non-auth messages until authentication succeeds.
The socket remains open on auth failure, allowing the client to retry
with a different token without reconnecting. The client can also send
a new auth message at any time to re-authenticate — for example, to
refresh an expiring JWT or to switch workspace. The
resolved identity (user, workspace, roles) is updated on each
successful auth.
refresh an expiring JWT or to switch workspace. The resolved
identity (handle, workspace, principal_id, source) is updated on
each successful auth.
#### API keys
@ -219,7 +219,7 @@ For programmatic access: CLI tools, scripts, and integrations.
On each request, the gateway resolves an API key by:
1. Hashing the token.
2. Checking a local cache (hash → user/workspace/roles).
2. Checking a local cache (hash → identity).
3. On cache miss, calling the IAM service to resolve.
4. Caching the result with a short TTL (e.g. 60 seconds).
@ -233,9 +233,15 @@ For interactive access via the UI or WebSocket connections.
- A user logs in with username and password. The gateway forwards the
request to the IAM service, which validates the credentials and
returns a signed JWT.
- The JWT carries the user ID, workspace, and roles as claims.
- The JWT carries identity-binding claims only — user id (`sub`)
and the workspace this credential authenticates to. No roles,
no policy state. Per the IAM contract, all policy decisions go
through `authorise`; the gateway never reads roles or other
regime-internal state from the credential.
- The gateway validates JWTs locally using the IAM service's public
signing key — no service call needed on subsequent requests.
signing key — no service call needed for the authentication step;
authorisation calls remain per-request (cached per the contract's
caching rules).
- Token expiry is enforced by standard JWT validation at the time the
request (or WebSocket connection) is made.
- For long-lived WebSocket connections, the JWT is validated at connect
@ -262,6 +268,26 @@ The gateway forwards this to the IAM service, which validates
credentials and returns a signed JWT. The gateway returns the JWT to
the caller.
#### Self-service: `whoami` and `bootstrap-status`
Two side-effect-free probes that exist to support UI affordances
without giving the caller broad read access:
- `POST /api/v1/iam` with `{"operation": "whoami"}` — authenticated
only. Returns the caller's own user record (id, username, name,
email, workspace, roles, enabled, must_change_password,
created). No `users:read` capability is required, because every
authenticated caller can read themselves. The gateway populates
`actor` on the request from the authenticated identity, so the
regime resolves "the caller" without taking a target argument.
- `POST /api/v1/auth/bootstrap-status` — public, side-effect-free.
Returns `{"bootstrap_available": true|false}`. `true` iff
iam-svc is in `bootstrap` mode and its tables are empty (i.e. an
unconsumed `bootstrap` call would currently succeed). Exists so
a first-run UI can decide whether to render the setup flow
without invoking the consuming `bootstrap` op.
#### IAM service delegation
The gateway stays thin. Its authentication logic is:
@ -285,35 +311,82 @@ authentication uses API keys or JWTs. On first start, the bootstrap
process creates a default workspace and admin user with an initial API
key.
### User identity
### Identity, credentials, and workspace binding
A user belongs to exactly one workspace. The design supports extending
this to multi-workspace access in the future (see
[Extension points](#extension-points)).
The gateway never asks "which workspace does *this user* belong to?".
That question forces every IAM regime to expose a user-to-workspace
mapping, which prevents regimes where the relationship is many-to-many
or doesn't exist (e.g. SSO with IdP-driven workspace selection).
Instead, the gateway asks "which workspace does *this credential*
authenticate to?" — a question every regime can answer in its own
terms.
A user record contains:
A credential (API key, JWT, OIDC token, etc.) is **bound to a
workspace at issue time**. The IAM regime decides what binding
means:
- **OSS regime** — each user has a home workspace; credentials
issued to that user are bound to that workspace. A 1:1
user-to-workspace constraint is an internal data-model decision,
not a contract assertion.
- **Multi-workspace regime** (future / enterprise) — a user with
access to several workspaces gets a different credential per
workspace. Each credential authenticates to exactly one
workspace; the relationship between user and workspace is a
regime-internal detail the gateway does not see.
When the gateway authenticates a credential, the IAM regime returns
an `Identity` whose `workspace` is the workspace this credential is
for. That value — not "the user's workspace" — is what the gateway
uses for default-fill-in and as input to the IAM `authorise` call.
#### Identity surface
What the gateway holds after `authenticate`:
| Field | Purpose |
|-------|---------|
| `handle` | Opaque token quoted back when calling `authorise`. Regime-defined. |
| `workspace` | The workspace this credential authenticates to. Used as the default if a request omits workspace. |
| `principal_id` | Stable identifier for audit logging (a user id, sub claim, service account id). Never used for authorisation. |
| `source` | How the credential was presented (`api-key`, `jwt`). Logged with audit events; not policy input. |
Anything else — roles, claims, group memberships, policy attributes
— stays inside the regime and is reachable only via `authorise`.
See [`iam-contract.md`](iam-contract.md) for the full contract.
#### OSS user record
The OSS regime stores the following per user. These fields are
**OSS-implementation specifics**, not part of the contract.
| Field | Type | Description |
|-------|------|-------------|
| `id` | string | Unique user identifier (UUID) |
| `name` | string | Display name |
| `email` | string | Email address (optional) |
| `workspace` | string | Workspace the user belongs to |
| `workspace` | string | Home workspace; default binding for issued credentials |
| `roles` | list[string] | Assigned roles (e.g. `["reader"]`) |
| `enabled` | bool | Whether the user can authenticate |
| `created` | datetime | Account creation timestamp |
The `workspace` field maps to the existing `user` field in `Metadata`.
This means the storage-layer isolation (Cassandra, Neo4j, Qdrant
filtering by `user` + `collection`) works without changes — the gateway
sets the `user` metadata field to the authenticated user's workspace.
The `workspace` field on a user record is the **default binding**
used when issuing credentials, not a constraint visible to the
gateway. An enterprise regime may have no user records at all
(authentication delegated to an IdP).
### Workspaces
A workspace is an isolated data boundary. Users belong to a workspace,
and all data operations are scoped to it. Workspaces map to the existing
`user` field in `Metadata` and the corresponding Cassandra keyspace,
Qdrant collection prefix, and Neo4j property filters.
A workspace is an isolated data boundary — a tenancy scope in which
users, flows, configuration, documents, and knowledge graphs live.
Workspaces map to storage-layer isolation: the `user` field in
`Metadata`, the corresponding Cassandra keyspace, the Qdrant
collection prefix, the Neo4j property filter.
Workspace is the most prominent component of an operation's
**resource scope**: when a request says "do X to Y", workspace is
part of "Y". Listing users, creating flows, querying the graph —
all of these target a specific workspace.
| Field | Type | Description |
|-------|------|-------------|
@ -322,57 +395,176 @@ Qdrant collection prefix, and Neo4j property filters.
| `enabled` | bool | Whether the workspace is active |
| `created` | datetime | Creation timestamp |
All data operations are scoped to a workspace. The gateway determines
the effective workspace for each request as follows:
#### Default-fill-in
1. If the request includes a `workspace` parameter, validate it against
the user's assigned workspace.
- If it matches, use it.
- If it does not match, return 403. (This could be extended to
check a workspace access grant list.)
2. If no `workspace` parameter is provided, use the user's assigned
workspace.
If a request omits workspace, the gateway fills it in from the
authenticated identity's bound workspace (`identity.workspace`)
before any IAM check runs. IAM never receives an unresolved
workspace; every `authorise` call sees a concrete value.
The gateway sets the `user` field in `Metadata` to the effective
workspace ID, replacing the caller-supplied `?user=` query parameter.
#### Authorisation
This design ensures forward compatibility. Clients that pass a
workspace parameter will work unchanged if multi-workspace support is
added later. Requests for an unassigned workspace get a clear 403
rather than silent misbehaviour.
Whether the resolved workspace is permitted to be operated on by
this caller is an **IAM decision**, not a gateway one. The gateway
calls `authorise(identity, capability, {workspace: ..., ...})` and
relays the answer. In the OSS regime, the regime checks whether
the caller's permission grants for `<capability>` include this
workspace — see [`capabilities.md`](capabilities.md). In other
regimes the decision could come from group mappings, policies,
relationship tuples, or anything else the regime models.
### Request anatomy
The shape of a request — where workspace appears, where flow
appears, where parameters live — follows from **the level of the
resource being operated on**, not from any single property of the
request like its URL or its required capability.
Resources live at one of three levels (see also the resource model
in [`iam-contract.md`](iam-contract.md)):
| Resource level | Examples | Resource address |
|---|---|---|
| **System** | The user registry, the workspace registry, the IAM signing key, the audit log | empty `{}` |
| **Workspace** | A workspace's config, flow definitions, library, knowledge cores, collections | `{workspace: ...}` |
| **Flow** | A flow's knowledge graph, agent state, LLM context, embeddings, MCP context | `{workspace: ..., flow: ...}` |
For the gateway-to-bus mapping this dictates **where workspace
lives in the message**, but only when workspace is part of the
*resource address*. Workspace can also appear as an *operation
parameter* on system-level resources (see below).
#### Workspace as address vs. parameter
Two distinct roles, two distinct locations:
- **Workspace as address component.** Workspace identifies the
thing being operated on. Used for workspace-level and flow-level
resources. Lives in the addressing layer of the message — the
URL path for HTTP, or the WebSocket envelope alongside `flow` for
flow-scoped operations sent through the Mux.
- **Workspace as operation parameter.** Workspace is data the
operation acts on, while the resource itself is system-level.
Used for operations on the user registry (`create-user with
workspace association W`), the workspace registry (`create-
workspace W`), and other system-level operations that happen to
reference a workspace. Lives in the request body or inner WS
payload alongside the operation's other parameters.
The two roles never coexist on the same operation. Either the
operation addresses something within a workspace (workspace is in
the address) or it operates on a system-level resource with
workspace as a parameter (workspace is in the body) or workspace
is irrelevant (system-level operations like `bootstrap`,
`rotate-signing-key`, `login` itself).
#### Where workspace lives, by request type
| Request type | Resource level | Workspace lives in |
|---|---|---|
| Flow-scoped data plane (`agent`, `graph-rag`, `llm`, `embeddings`, `mcp`, etc.) | Flow | Envelope alongside `flow` (WS) or URL path (HTTP) — part of the address |
| Workspace-scoped control plane (`config`, `library`, `knowledge`, `collection-management`, flow lifecycle) | Workspace | Body / inner request — part of the address |
| User registry ops (`create-user`, `list-users`, `disable-user`, etc.) | System | Body — as a *parameter* (the user's workspace association or a list filter) |
| Workspace registry ops (`create-workspace`, `list-workspaces`, etc.) | System | Body — as a *parameter* (the workspace identifier in `workspace_record`) |
| Credential ops (`create-api-key`, `revoke-api-key`, `change-password`, `reset-password`) | System | Body — as a *parameter* on ops that have one; absent on `change-password` (target is the caller's identity) |
| System ops (`bootstrap`, `login`, `rotate-signing-key`, `get-signing-key-public`) | System | Not present at all |
The classification is deliberate. Users are a global concept that
*have* a workspace; they don't *live* in one. An OSS regime has
1:1 user-to-workspace; a multi-workspace regime maps a user to many
workspaces; an SSO regime might delegate workspace membership to an
IdP entirely. The gateway treats user-registry operations as
system-level so the contract is the same across regimes — the
workspace association is a parameter the regime interprets in its
own terms.
#### HTTP
HTTP routes by URL path, so the address lives in the URL.
Per-operation REST shape:
- Flow-level: `POST /api/v1/workspaces/{w}/flows/{f}/services/{kind}`
`workspace` and `flow` are URL components.
- Workspace-level: `POST /api/v1/workspaces/{w}/config`,
`/api/v1/workspaces/{w}/library`, etc. — `workspace` is a URL
component.
- System-level: `POST /api/v1/users`, `/api/v1/workspaces`, etc. —
no workspace in URL; if the operation references one, it's a
field in the body.
`/api/v1/iam` is itself registry-driven: the body's `operation`
field is looked up against the registry to obtain the capability,
resource shape, and parameter shape per operation, rather than
gating the whole endpoint with a single coarse capability.
#### WebSocket Mux
The Mux envelope is the addressing layer for flow-scoped
operations. For workspace-level and system-level operations the
envelope routes by `service` only, and the inner request payload
carries the address components or parameters as appropriate. See
[`iam-contract.md`](iam-contract.md) for the operation-registry
mechanism the Mux uses to know which fields to read.
### Roles and access control
Three roles with fixed permissions:
Roles are an OSS-regime concept and live entirely in the IAM
service. The gateway does not enumerate or check them — it asks
`authorise(identity, capability, resource, parameters)` per
request and the regime maps the caller's roles to a decision.
| Role | Data operations | Admin operations | System |
|------|----------------|-----------------|--------|
| `reader` | Query knowledge graph, embeddings, RAG | None | None |
| `writer` | All reader operations + load documents, manage collections | None | None |
| `admin` | All writer operations | Config, flows, collection management, user management | Metrics |
The OSS regime ships three roles:
Role checks happen at the gateway before dispatching to backend
services. Each endpoint declares the minimum role required:
| Role | Capabilities granted |
|------|----------------------|
| `reader` | Read capabilities on data and config (`graph:read`, `documents:read`, `rows:read`, `config:read`, `flows:read`, `knowledge:read`, `collections:read`, `keys:self`, plus the per-service caps `agent`, `llm`, `embeddings`, `mcp`). |
| `writer` | All reader capabilities, plus `graph:write`, `documents:write`, `rows:write`, `knowledge:write`, `collections:write`. |
| `admin` | All writer capabilities, plus `config:write`, `flows:write`, `users:read`, `users:write`, `users:admin`, `keys:admin`, `workspaces:admin`, `iam:admin`, `metrics:read`. |
| Endpoint pattern | Minimum role |
|-----------------|--------------|
| `GET /api/v1/socket` (queries) | `reader` |
| `POST /api/v1/librarian` | `writer` |
| `POST /api/v1/flow/*/import/*` | `writer` |
| `POST /api/v1/config` | `admin` |
| `GET /api/v1/flow/*` | `admin` |
| `GET /api/metrics` | `admin` |
Workspace scope is a property of the *grant*, not of the user or
role. In the OSS regime each capability granted by `reader` /
`writer` is scoped to the workspace the user record is associated
with; capabilities granted by `admin` are scoped to `*` (every
workspace). A user is a system-level object — they don't "live
in" a workspace, they hold permissions whose scope happens to
reference one.
Roles are hierarchical: `admin` implies `writer`, which implies
`reader`.
The OSS regime is deliberately limited to one workspace association
per user; future regimes are free to grant the same user different
permissions in different workspaces, or use a non-workspace scope
entirely. This is regime-internal — neither the contract nor the
gateway carries an assumption either way.
The gateway gates each endpoint by *capability*, not by role.
Capabilities are declared per operation in the gateway's operation
registry; see [`iam-contract.md`](iam-contract.md) for the
registry mechanism and [`capabilities.md`](capabilities.md) for
the capability vocabulary.
### IAM service
The IAM service is a new backend service that manages all identity and
access data. It is the authority for users, workspaces, API keys, and
credentials. The gateway delegates to it.
The IAM service is a backend service that implements the
[IAM contract](iam-contract.md) — `authenticate`, `authorise`, and
the management operations the gateway forwards. It is the
authority for identity, credential validation, and access decisions.
The gateway treats it as a black box behind the contract; nothing
in the gateway is regime-specific.
#### Data model
The OSS distribution ships one IAM regime: a role-based service
backed by Cassandra, described in
[`iam-protocol.md`](iam-protocol.md). Enterprise / future regimes
can replace this implementation without changing the gateway, the
wire protocol between gateway and backends, or the capability
vocabulary — see the contract spec for the abstraction the gateway
is wired against and the implementation notes for what other
regimes look like.
#### OSS data model
The OSS regime stores users, workspaces, API keys, and signing
keys in Cassandra. This is an **OSS regime implementation
detail**; it is not part of the contract. Other regimes will have
different (or no) data models.
```
iam_workspaces (
@ -423,44 +615,89 @@ resolve API keys and to handle login requests. User management
operations (create user, revoke key, etc.) also go through the IAM
service.
### Error policy
External error responses carry **no diagnostic detail** for
authentication or access-control failures. The goal is to give an
attacker probing the endpoint no signal about which condition they
tripped.
| Category | HTTP | Body | WebSocket frame |
|----------|------|------|-----------------|
| Authentication failure | `401 Unauthorized` | `{"error": "auth failure"}` | `{"type": "auth-failed", "error": "auth failure"}` |
| Access control failure | `403 Forbidden` | `{"error": "access denied"}` | `{"error": "access denied"}` (endpoint-specific frame type) |
"Authentication failure" covers missing credential, malformed
credential, invalid signature, expired token, revoked API key, and
unknown API key — all indistinguishable to the caller.
"Access control failure" covers role insufficient, workspace
mismatch, user disabled, and workspace disabled — all
indistinguishable to the caller.
**Server-side logging is richer.** The audit log records the specific
reason (`"workspace-mismatch: user alice assigned 'acme', requested
'beta'"`, `"role-insufficient: admin required, user has writer"`,
etc.) for operators and post-incident forensics. These messages never
appear in responses.
Other error classes (bad request, internal error) remain descriptive
because they do not reveal anything about the auth or access-control
surface — e.g. `"missing required field 'workspace'"` or
`"invalid JSON"` is fine.
### Gateway changes
The current `Authenticator` class is replaced with a thin authentication
middleware that delegates to the IAM service:
The current `Authenticator` class is replaced with a thin
authentication+authorisation middleware that delegates to the IAM
service per the IAM contract. The gateway performs no role check
itself — authorisation is asked of the regime via `authorise`.
For HTTP requests:
1. Extract Bearer token from the `Authorization` header.
2. If the token has JWT format (dotted structure):
- Validate signature locally using the cached public key.
- Extract user ID, workspace, and roles from claims.
- Build an `Identity` from `sub` and `workspace` claims (no
other claims are consulted).
3. Otherwise, treat as an API key:
- Hash the token and check the local cache.
- On cache miss, call the IAM service to resolve.
- Cache the result (user/workspace/roles) with a short TTL.
- On cache miss, call the IAM service to resolve to an
`Identity` (handle, workspace, principal_id, source).
- Cache the result with a short TTL.
4. If neither succeeds, return 401.
5. If the user or workspace is disabled, return 403.
6. Check the user's role against the endpoint's minimum role. If
insufficient, return 403.
7. Resolve the effective workspace:
- If the request includes a `workspace` parameter, validate it
against the user's assigned workspace. Return 403 on mismatch.
- If no `workspace` parameter, use the user's assigned workspace.
8. Set the `user` field in the request context to the effective
workspace ID. This propagates through `Metadata` to all downstream
services.
5. Look up the operation in the gateway's operation registry to get
`(capability, resource_level, extractors)`. Build the resource
address (system / workspace / flow level) and parameters from
the request.
6. Default-fill the workspace into the body when the operation is
workspace- or flow-level (so downstream code sees a single
canonical address); the resource address keeps its supplied
value.
7. Call `authorise(identity, capability, resource, parameters)`.
On allow, forward the request; on deny, return 403. On regime
error, fail closed (401 / 503 per deployment).
8. Cache the decision per the contract's caching rules (clamped
above by a deployment-set ceiling).
9. For requests forwarded to iam-svc, set `actor` on the body
from `identity.handle`, overwriting any caller-supplied value.
See [`iam-contract.md`](iam-contract.md#actor-injection).
For WebSocket connections:
1. Accept the connection in an unauthenticated state.
2. Wait for an auth message (`{"type": "auth", "token": "..."}`).
3. Validate the token using the same logic as steps 2-7 above.
3. Validate the token using the same logic as steps 1-3 above.
4. On success, attach the resolved identity to the connection and
send `{"type": "auth-ok", ...}`.
5. On failure, send `{"type": "auth-failed", ...}` but keep the
socket open.
6. Reject all non-auth messages until authentication succeeds.
7. Accept new auth messages at any time to re-authenticate.
8. For each subsequent request frame, look up
`flow-service:<service>` in the registry and call `authorise`
against the `{workspace, flow}` resource — same authority
gateway HTTP callers see, evaluated per-frame.
### CLI changes
@ -713,6 +950,16 @@ These are not implemented but the architecture does not preclude them:
- **Multi-workspace access.** Users could be granted access to
additional workspaces beyond their primary assignment. The workspace
validation step checks a grant list instead of a single assignment.
- **Workspace resolver.** Workspace resolution on each authenticated
request — "given this user and this requested workspace, which
workspace (if any) may the request operate on?" — is encapsulated
in a single pluggable resolver. The open-source edition ships a
resolver that permits only the user's single assigned workspace;
enterprise editions that implement multi-workspace access swap in a
resolver that consults a permitted set. The wire protocol (the
optional `workspace` field on the authenticated request) is
identical in both editions, so clients written against one edition
work unchanged against the other.
- **Rules-based access control.** A separate access control service
could evaluate fine-grained policies (per-collection permissions,
operation-level restrictions, time-based access). The gateway
@ -848,10 +1095,15 @@ service, not in the config service. Reasons:
- **API key scoping.** API keys could be scoped to specific collections
within a workspace rather than granting workspace-wide access. To be
designed when the need arises.
- **tg-init-trustgraph** only initialises a single workspace.
## References
- [IAM Contract Specification](iam-contract.md) — the gateway↔IAM
regime abstraction this design is wired against.
- [IAM Service Protocol Specification](iam-protocol.md) — the OSS
regime's wire-level protocol.
- [Capability Vocabulary Specification](capabilities.md) — the
capability strings the gateway uses as `authorise` input.
- [Data Ownership and Information Separation](data-ownership-model.md)
- [MCP Tool Bearer Token Specification](mcp-tool-bearer-token.md)
- [Multi-Tenant Support Specification](multi-tenant-support.md)

252
iam-testing.txt Normal file
View file

@ -0,0 +1,252 @@
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation": "bootstrap"}'
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation": "resolve-api-key", "api_key": "tg_r-n43hDWV9WOY06w6o5YpevAxirlS33D"}'
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation": "resolve-api-key", "api_key": "asdalsdjasdkasdasda"}'
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"list-users","workspace":"default"}'
# 1. Admin creates a writer user "alice"
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{
"operation": "create-user",
"workspace": "default",
"user": {
"username": "alice",
"name": "Alice",
"email": "alice@example.com",
"password": "changeme",
"roles": ["writer"]
}
}'
# expect: {"user": {"id": "<alice-uuid>", ...}} — grab alice's uuid
# 2. Issue alice an API key
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{
"operation": "create-api-key",
"workspace": "default",
"key": {
"user_id": "f2363a10-3b83-44ea-a008-43caae8ba607",
"name": "alice-laptop"
}
}'
# expect: {"api_key_plaintext": "tg_...", "api_key": {"id": "<key-uuid>", "prefix": "tg_xxxx", ...}}
# 3. Resolve alice's key — should return alice's id + workspace + writer role
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"resolve-api-key","api_key":"tg_gt4buvk5NG-QS7oP_0Gk5yTWyj1qensf"}'
# expect: {"resolved_user_id":"<alice-uuid>","resolved_workspace":"default","resolved_roles":["writer"]}
# 4. List alice's keys (admin view of alice's keys)
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"list-api-keys","workspace":"default","user_id":"f2363a10-3b83-44ea-a008-43caae8ba607"}'
# expect: {"api_keys": [{"id":"<key-uuid>","user_id":"<alice-uuid>","name":"alice-laptop","prefix":"tg_xxxx",...}]}
# 5. Revoke alice's key
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"revoke-api-key","workspace":"default","key_id":"55f1c1f7-5448-49fd-9eda-56c192b61177"}'
# expect: {} (empty, no error)
# 6. Confirm the revoked key no longer resolves
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"resolve-api-key","api_key":"tg_gt4buvk5NG-QS7oP_0Gk5yTWyj1qensf"}'
# expect: {"error":{"type":"auth-failed","message":"unknown api key"}}
----------------------------------------------------------------------------
You'll want to re-bootstrap a fresh deployment to pick up the new signing-key row (or accept that login will lazily generate one on first
call). Then:
# 1. Create a user with a known password (admin's password is random)
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"create-user","workspace":"default","user":{"username":"alice","password":"s3cret","roles":["writer"]}}'
# 2. Log alice in
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"login","username":"alice","password":"s3cret"}'
# expect: {"jwt":"eyJ...","jwt_expires":"2026-..."}
# 3. Fetch the public key (what the gateway will use later to verify)
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"get-signing-key-public"}'
# expect: {"signing_key_public":"-----BEGIN PUBLIC KEY-----\n..."}
# 4. Wrong password
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Authorization: Bearer $GATEWAY_SECRET" \
-H "Content-Type: application/json" \
-d '{"operation":"login","username":"alice","password":"nope"}'
# expect: {"error":{"type":"auth-failed","message":"bad credentials"}}
-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAseLB/a9Bo/RN/Rb/x763
+vdxmUKG75oWsXBmbwZGDXyN6fwqZ3L7cEje93qK0PYFuCHxhY1Hn0gW7FZ8ovH+
qEksekUlpfPYqKGiT5Mb0DKk49D4yKkIbJFugWalpwIilvRbQO0jy3V8knqGQ1xL
NfNYFrI2Rxe0Tq2OHVYc5YwYbyj1nz2TY5fd9qrzXtGRv5HZztkl25lWhRvG9G0K
urKDdBDbi894gIYorXvcwZw/b1GDXG/aUy/By1Oy3hXnCLsN8pA3nA437TTTWxHx
QgPH15jIF9hezO+3/ESZ7EhVEtgmwTxPddfXRa0ZoT6JyWOgcloKtnP4Lp9eQ4va
yQIDAQAB
-----END PUBLIC KEY-----
New operations:
- change-password — self-service. Requires current + new password.
- reset-password — admin-driven. Generates a random temporary, sets must_change_password=true, returns plaintext once.
- get-user, update-user, disable-user — workspace-scoped. update-user refuses to change username (immutable — error if different) and refuses
password-via-update. disable-user also revokes all the user's API keys, per spec.
- create-workspace, list-workspaces, get-workspace, update-workspace, disable-workspace — system-level. disable-workspace cascades: disables
all users + revokes all their keys. Rejects ids starting with _ (reserved, per the bootstrap framework convention).
- rotate-signing-key — generates a new Ed25519 key, retires the current one (sets retired timestamp; row stays for future grace-period
validation), switches the in-memory cache.
Touched files:
- trustgraph-flow/trustgraph/tables/iam.py — added retire_signing_key, update_user_profile, update_user_password, update_user_enabled,
update_workspace.
- trustgraph-flow/trustgraph/iam/service/iam.py — 12 new handlers + dispatch entries.
- trustgraph-base/trustgraph/base/iam_client.py — matching client helpers for all of them.
Smoke-test suggestions:
# change password for alice (from "s3cret" → "n3wer")
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"change-password","user_id":"b2960feb-caef-401d-af65-01bdb6960cad","password":"s3cret","new_password":"n3wer"}'
# login with new password
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"login","username":"alice","password":"n3wer"}'
# admin resets alice's password
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"reset-password","workspace":"default","user_id":"b2960feb-caef-401d-af65-01bdb6960cad"}'
# → {"temporary_password":"..."}
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"login","username":"alice","password":"fH2ttyrIcVXCIkH_"}'
# create a second workspace
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"create-workspace","workspace_record":{"id":"acme","name":"Acme Corp","enabled":true}}'
# rotate signing key (next login produces a JWT signed by a new kid)
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-d '{"operation":"rotate-signing-key"}'
curl -s -X POST "http://localhost:8088/api/v1/flow" \
-H "Authorization: Bearer tg_bs_kBAhfejiEJmbcO1gElbxk3MpV7wQFygP" \
-H "Content-Type: application/json" \
-d '{"operation":"list-flows"}'
curl -s -X POST "http://localhost:8088/api/v1/iam" \
-H "Authorization: Bearer tg_bs_kBAhfejiEJmbcO1gElbxk3MpV7wQFygP" \
-H "Content-Type: application/json" \
-d '{"operation":"list-users"}'
curl -s -X POST http://localhost:8088/api/v1/iam \
-H "Content-Type: application/json" \
-H "Authorization: Bearer tg_bs_kBAhfejiEJmbcO1gElbxk3MpV7wQFygP" \
-d '{
"operation": "create-user",
"workspace": "default",
"user": {
"username": "alice",
"name": "Alice",
"email": "alice@example.com",
"password": "s3cret",
"roles": ["writer"]
}
}'
# Login (public, no token needed) → returns a JWT
curl -s -X POST "http://localhost:8088/api/v1/auth/login" \
-H "Content-Type: application/json" \
-d '{"username":"alice","password":"s3cret"}'
export TRUSTGRAPH_TOKEN=$(tg-bootstrap-iam) # on fresh bootstrap-mode deployment
# or set to your existing admin API key
tg-create-user --username alice --roles writer
# → prints alice's user id
ALICE_ID=<uuid from above>
ALICE_KEY=$(tg-create-api-key --user-id $ALICE_ID --name alice-laptop)
# → alice's plaintext API key
tg-list-users
tg-list-api-keys --user-id $ALICE_ID
tg-revoke-api-key --key-id <...>
tg-disable-user --user-id $ALICE_ID
# User self-service:
tg-login --username alice # prompts for password, prints JWT
tg-change-password # prompts for current + new

View file

@ -14,13 +14,13 @@ from trustgraph.embeddings.ollama.processor import Processor
class TestOllamaDynamicModelLoading(IsolatedAsyncioTestCase):
"""Test Ollama dynamic model selection"""
@patch('trustgraph.embeddings.ollama.processor.Client')
@patch('trustgraph.embeddings.ollama.processor.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.embeddings_service.EmbeddingsService.__init__')
async def test_client_initialized_with_host(self, mock_embeddings_init, mock_async_init, mock_client_class):
"""Test that Ollama client is initialized with correct host"""
# Arrange
mock_ollama_client = Mock()
mock_ollama_client = AsyncMock()
mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2, 0.3, 0.4, 0.5]]
mock_ollama_client.embed.return_value = mock_response
@ -36,13 +36,13 @@ class TestOllamaDynamicModelLoading(IsolatedAsyncioTestCase):
mock_client_class.assert_called_once_with(host="http://localhost:11434")
assert processor.default_model == "test-model"
@patch('trustgraph.embeddings.ollama.processor.Client')
@patch('trustgraph.embeddings.ollama.processor.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.embeddings_service.EmbeddingsService.__init__')
async def test_on_embeddings_uses_default_model(self, mock_embeddings_init, mock_async_init, mock_client_class):
"""Test that on_embeddings uses default model when no model specified"""
# Arrange
mock_ollama_client = Mock()
mock_ollama_client = AsyncMock()
mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2, 0.3, 0.4, 0.5]]
mock_ollama_client.embed.return_value = mock_response
@ -62,13 +62,13 @@ class TestOllamaDynamicModelLoading(IsolatedAsyncioTestCase):
)
assert result == [[0.1, 0.2, 0.3, 0.4, 0.5]]
@patch('trustgraph.embeddings.ollama.processor.Client')
@patch('trustgraph.embeddings.ollama.processor.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.embeddings_service.EmbeddingsService.__init__')
async def test_on_embeddings_uses_specified_model(self, mock_embeddings_init, mock_async_init, mock_client_class):
"""Test that on_embeddings uses specified model when provided"""
# Arrange
mock_ollama_client = Mock()
mock_ollama_client = AsyncMock()
mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2, 0.3, 0.4, 0.5]]
mock_ollama_client.embed.return_value = mock_response
@ -88,13 +88,13 @@ class TestOllamaDynamicModelLoading(IsolatedAsyncioTestCase):
)
assert result == [[0.1, 0.2, 0.3, 0.4, 0.5]]
@patch('trustgraph.embeddings.ollama.processor.Client')
@patch('trustgraph.embeddings.ollama.processor.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.embeddings_service.EmbeddingsService.__init__')
async def test_multiple_model_switches(self, mock_embeddings_init, mock_async_init, mock_client_class):
"""Test switching between multiple models"""
# Arrange
mock_ollama_client = Mock()
mock_ollama_client = AsyncMock()
mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2, 0.3, 0.4, 0.5]]
mock_ollama_client.embed.return_value = mock_response
@ -118,13 +118,13 @@ class TestOllamaDynamicModelLoading(IsolatedAsyncioTestCase):
assert calls[2][1]['model'] == "model-a"
assert calls[3][1]['model'] == "test-model" # Default
@patch('trustgraph.embeddings.ollama.processor.Client')
@patch('trustgraph.embeddings.ollama.processor.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.embeddings_service.EmbeddingsService.__init__')
async def test_none_model_uses_default(self, mock_embeddings_init, mock_async_init, mock_client_class):
"""Test that None model parameter falls back to default"""
# Arrange
mock_ollama_client = Mock()
mock_ollama_client = AsyncMock()
mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2, 0.3, 0.4, 0.5]]
mock_ollama_client.embed.return_value = mock_response
@ -143,13 +143,13 @@ class TestOllamaDynamicModelLoading(IsolatedAsyncioTestCase):
input=["test text"]
)
@patch('trustgraph.embeddings.ollama.processor.Client')
@patch('trustgraph.embeddings.ollama.processor.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.embeddings_service.EmbeddingsService.__init__')
async def test_initialization_without_model_uses_default(self, mock_embeddings_init, mock_async_init, mock_client_class):
"""Test initialization without model parameter uses module default"""
# Arrange
mock_ollama_client = Mock()
mock_ollama_client = AsyncMock()
mock_client_class.return_value = mock_ollama_client
mock_async_init.return_value = None
mock_embeddings_init.return_value = None

View file

@ -1,69 +1,447 @@
"""
Tests for Gateway Authentication
Tests for gateway/auth.py IamAuth, JWT verification, API key
resolution cache.
JWTs are signed with real Ed25519 keypairs generated per-test, so
the crypto path is exercised end-to-end without mocks. API-key
resolution is tested against a stubbed IamClient since the real
one requires pub/sub.
"""
import base64
import json
import time
from unittest.mock import AsyncMock, Mock, patch
import pytest
from aiohttp import web
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519
from trustgraph.gateway.auth import Authenticator
from trustgraph.gateway.auth import (
IamAuth, Identity,
_b64url_decode, _verify_jwt_eddsa,
API_KEY_CACHE_TTL,
)
class TestAuthenticator:
"""Test cases for Authenticator class"""
# -- helpers ---------------------------------------------------------------
def test_authenticator_initialization_with_token(self):
"""Test Authenticator initialization with valid token"""
auth = Authenticator(token="test-token-123")
assert auth.token == "test-token-123"
assert auth.allow_all is False
def test_authenticator_initialization_with_allow_all(self):
"""Test Authenticator initialization with allow_all=True"""
auth = Authenticator(allow_all=True)
assert auth.token is None
assert auth.allow_all is True
def _b64url(data: bytes) -> str:
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
def test_authenticator_initialization_without_token_raises_error(self):
"""Test Authenticator initialization without token raises RuntimeError"""
with pytest.raises(RuntimeError, match="Need a token"):
Authenticator()
def test_authenticator_initialization_with_empty_token_raises_error(self):
"""Test Authenticator initialization with empty token raises RuntimeError"""
with pytest.raises(RuntimeError, match="Need a token"):
Authenticator(token="")
def make_keypair():
priv = ed25519.Ed25519PrivateKey.generate()
public_pem = priv.public_key().public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
).decode("ascii")
return priv, public_pem
def test_permitted_with_allow_all_returns_true(self):
"""Test permitted method returns True when allow_all is enabled"""
auth = Authenticator(allow_all=True)
# Should return True regardless of token or roles
assert auth.permitted("any-token", []) is True
assert auth.permitted("different-token", ["admin"]) is True
assert auth.permitted(None, ["user"]) is True
def test_permitted_with_matching_token_returns_true(self):
"""Test permitted method returns True with matching token"""
auth = Authenticator(token="secret-token")
# Should return True when tokens match
assert auth.permitted("secret-token", []) is True
assert auth.permitted("secret-token", ["admin", "user"]) is True
def sign_jwt(priv, claims, alg="EdDSA"):
header = {"alg": alg, "typ": "JWT", "kid": "kid-test"}
h = _b64url(json.dumps(header, separators=(",", ":"), sort_keys=True).encode())
p = _b64url(json.dumps(claims, separators=(",", ":"), sort_keys=True).encode())
signing_input = f"{h}.{p}".encode("ascii")
if alg == "EdDSA":
sig = priv.sign(signing_input)
else:
raise ValueError(f"test helper doesn't sign {alg}")
return f"{h}.{p}.{_b64url(sig)}"
def test_permitted_with_non_matching_token_returns_false(self):
"""Test permitted method returns False with non-matching token"""
auth = Authenticator(token="secret-token")
# Should return False when tokens don't match
assert auth.permitted("wrong-token", []) is False
assert auth.permitted("different-token", ["admin"]) is False
assert auth.permitted(None, ["user"]) is False
def test_permitted_with_token_and_allow_all_returns_true(self):
"""Test permitted method with both token and allow_all set"""
auth = Authenticator(token="test-token", allow_all=True)
# allow_all should take precedence
assert auth.permitted("any-token", []) is True
assert auth.permitted("wrong-token", ["admin"]) is True
def make_request(auth_header):
"""Minimal stand-in for an aiohttp request — IamAuth only reads
``request.headers["Authorization"]``."""
req = Mock()
req.headers = {}
if auth_header is not None:
req.headers["Authorization"] = auth_header
return req
# -- pure helpers ----------------------------------------------------------
class TestB64UrlDecode:
def test_round_trip_without_padding(self):
data = b"hello"
encoded = _b64url(data)
assert _b64url_decode(encoded) == data
def test_handles_various_lengths(self):
for s in (b"a", b"ab", b"abc", b"abcd", b"abcde"):
assert _b64url_decode(_b64url(s)) == s
# -- JWT verification -----------------------------------------------------
class TestVerifyJwtEddsa:
def test_valid_jwt_passes(self):
priv, pub = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"iat": int(time.time()),
"exp": int(time.time()) + 60,
}
token = sign_jwt(priv, claims)
got = _verify_jwt_eddsa(token, pub)
assert got["sub"] == "user-1"
assert got["workspace"] == "default"
def test_expired_jwt_rejected(self):
priv, pub = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"iat": int(time.time()) - 3600,
"exp": int(time.time()) - 1,
}
token = sign_jwt(priv, claims)
with pytest.raises(ValueError, match="expired"):
_verify_jwt_eddsa(token, pub)
def test_bad_signature_rejected(self):
priv_a, _ = make_keypair()
_, pub_b = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"iat": int(time.time()),
"exp": int(time.time()) + 60,
}
token = sign_jwt(priv_a, claims)
# pub_b never signed this token.
with pytest.raises(Exception):
_verify_jwt_eddsa(token, pub_b)
def test_malformed_jwt_rejected(self):
_, pub = make_keypair()
with pytest.raises(ValueError, match="malformed"):
_verify_jwt_eddsa("not-a-jwt", pub)
def test_unsupported_algorithm_rejected(self):
priv, pub = make_keypair()
# Manually build an "alg":"HS256" header — no signer needed
# since we expect it to bail before verifying.
header = {"alg": "HS256", "typ": "JWT", "kid": "x"}
payload = {
"sub": "user-1", "workspace": "default",
"iat": int(time.time()), "exp": int(time.time()) + 60,
}
h = _b64url(json.dumps(header, separators=(",", ":")).encode())
p = _b64url(json.dumps(payload, separators=(",", ":")).encode())
sig = _b64url(b"not-a-real-sig")
token = f"{h}.{p}.{sig}"
with pytest.raises(ValueError, match="unsupported alg"):
_verify_jwt_eddsa(token, pub)
# -- Identity --------------------------------------------------------------
class TestIdentity:
def test_fields(self):
i = Identity(
handle="u", workspace="w",
principal_id="u", source="api-key",
)
assert i.handle == "u"
assert i.workspace == "w"
assert i.principal_id == "u"
assert i.source == "api-key"
# -- IamAuth.authenticate --------------------------------------------------
class TestIamAuthDispatch:
"""``authenticate()`` chooses between the JWT and API-key paths
by shape of the bearer."""
@pytest.mark.asyncio
async def test_no_authorization_header_raises_401(self):
auth = IamAuth(backend=Mock())
with pytest.raises(web.HTTPUnauthorized):
await auth.authenticate(make_request(None))
@pytest.mark.asyncio
async def test_non_bearer_header_raises_401(self):
auth = IamAuth(backend=Mock())
with pytest.raises(web.HTTPUnauthorized):
await auth.authenticate(make_request("Basic whatever"))
@pytest.mark.asyncio
async def test_empty_bearer_raises_401(self):
auth = IamAuth(backend=Mock())
with pytest.raises(web.HTTPUnauthorized):
await auth.authenticate(make_request("Bearer "))
@pytest.mark.asyncio
async def test_unknown_format_raises_401(self):
# Not tg_... and not dotted-JWT shape.
auth = IamAuth(backend=Mock())
with pytest.raises(web.HTTPUnauthorized):
await auth.authenticate(make_request("Bearer garbage"))
@pytest.mark.asyncio
async def test_valid_jwt_resolves_to_identity(self):
priv, pub = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"iat": int(time.time()),
"exp": int(time.time()) + 60,
}
token = sign_jwt(priv, claims)
auth = IamAuth(backend=Mock())
auth._signing_public_pem = pub
ident = await auth.authenticate(
make_request(f"Bearer {token}")
)
assert ident.handle == "user-1"
assert ident.workspace == "default"
assert ident.principal_id == "user-1"
assert ident.source == "jwt"
@pytest.mark.asyncio
async def test_jwt_without_public_key_fails(self):
# If the gateway hasn't fetched IAM's public key yet, JWTs
# must not validate — even ones that would otherwise pass.
priv, _ = make_keypair()
claims = {
"sub": "user-1", "workspace": "default",
"iat": int(time.time()), "exp": int(time.time()) + 60,
}
token = sign_jwt(priv, claims)
auth = IamAuth(backend=Mock())
# _signing_public_pem defaults to None
with pytest.raises(web.HTTPUnauthorized):
await auth.authenticate(make_request(f"Bearer {token}"))
@pytest.mark.asyncio
async def test_api_key_path(self):
auth = IamAuth(backend=Mock())
async def fake_resolve(api_key):
assert api_key == "tg_testkey"
# Roles are returned by the regime as a hint but the
# gateway ignores them — kept here so the resolve
# protocol shape is exercised.
return ("user-xyz", "default", ["admin"])
async def fake_with_client(op):
return await op(Mock(resolve_api_key=fake_resolve))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = await auth.authenticate(
make_request("Bearer tg_testkey")
)
assert ident.handle == "user-xyz"
assert ident.workspace == "default"
assert ident.principal_id == "user-xyz"
assert ident.source == "api-key"
@pytest.mark.asyncio
async def test_api_key_rejection_masked_as_401(self):
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
raise RuntimeError("auth-failed: unknown api key")
with patch.object(auth, "_with_client", side_effect=fake_with_client):
with pytest.raises(web.HTTPUnauthorized):
await auth.authenticate(
make_request("Bearer tg_bogus")
)
# -- API key cache ---------------------------------------------------------
class TestApiKeyCache:
@pytest.mark.asyncio
async def test_cache_hit_skips_iam(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
resolve_api_key=AsyncMock(
return_value=("u", "default", ["reader"]),
)
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
await auth.authenticate(make_request("Bearer tg_k1"))
await auth.authenticate(make_request("Bearer tg_k1"))
await auth.authenticate(make_request("Bearer tg_k1"))
# Only the first lookup reaches IAM; the rest are cache hits.
assert calls["n"] == 1
@pytest.mark.asyncio
async def test_different_keys_are_separately_cached(self):
auth = IamAuth(backend=Mock())
seen = []
async def fake_with_client(op):
async def resolve(plaintext):
seen.append(plaintext)
return ("u-" + plaintext, "default", ["reader"])
return await op(Mock(resolve_api_key=resolve))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
a = await auth.authenticate(make_request("Bearer tg_a"))
b = await auth.authenticate(make_request("Bearer tg_b"))
assert a.handle == "u-tg_a"
assert b.handle == "u-tg_b"
assert seen == ["tg_a", "tg_b"]
@pytest.mark.asyncio
async def test_cache_has_ttl_constant_set(self):
# Not a behaviour test — just ensures we don't accidentally
# set TTL to 0 (which would defeat the cache) or to a week.
assert 10 <= API_KEY_CACHE_TTL <= 3600
# -- IamAuth.authorise -----------------------------------------------------
class TestAuthorise:
"""``authorise()`` is the gateway's only authorisation entry
point under the IAM contract. It calls iam-svc, caches the
decision for the regime's TTL (clamped above), and raises 403
on deny / 401 on regime error (fail closed)."""
def _make_identity(self, handle="u-1", workspace="default"):
return Identity(
handle=handle, workspace=workspace,
principal_id=handle, source="api-key",
)
@pytest.mark.asyncio
async def test_allow_returns_no_exception(self):
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
return await op(Mock(
authorise=AsyncMock(return_value=(True, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
await auth.authorise(
self._make_identity(),
"graph:read",
{"workspace": "default"},
{},
)
@pytest.mark.asyncio
async def test_deny_raises_403(self):
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
return await op(Mock(
authorise=AsyncMock(return_value=(False, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
with pytest.raises(web.HTTPForbidden):
await auth.authorise(
self._make_identity(),
"users:admin",
{},
{"workspace": "acme"},
)
@pytest.mark.asyncio
async def test_regime_error_fails_closed_as_401(self):
# If iam-svc errors, the gateway must NOT silently allow.
auth = IamAuth(backend=Mock())
async def fake_with_client(op):
raise RuntimeError("iam-svc down")
with patch.object(auth, "_with_client", side_effect=fake_with_client):
with pytest.raises(web.HTTPUnauthorized):
await auth.authorise(
self._make_identity(),
"graph:read",
{"workspace": "default"},
{},
)
@pytest.mark.asyncio
async def test_allow_decision_is_cached(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
authorise=AsyncMock(return_value=(True, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = self._make_identity()
for _ in range(5):
await auth.authorise(
ident, "graph:read", {"workspace": "default"}, {},
)
assert calls["n"] == 1
@pytest.mark.asyncio
async def test_deny_decision_is_cached(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
authorise=AsyncMock(return_value=(False, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = self._make_identity()
for _ in range(5):
with pytest.raises(web.HTTPForbidden):
await auth.authorise(
ident, "users:admin", {}, {"workspace": "acme"},
)
# Denies are cached too — repeated attempts don't re-hit IAM.
assert calls["n"] == 1
@pytest.mark.asyncio
async def test_different_resources_cached_separately(self):
auth = IamAuth(backend=Mock())
calls = {"n": 0}
async def fake_with_client(op):
calls["n"] += 1
return await op(Mock(
authorise=AsyncMock(return_value=(True, 30)),
))
with patch.object(auth, "_with_client", side_effect=fake_with_client):
ident = self._make_identity()
await auth.authorise(
ident, "graph:read", {"workspace": "a"}, {},
)
await auth.authorise(
ident, "graph:read", {"workspace": "b"}, {},
)
# Different resource → different cache key → two IAM calls.
assert calls["n"] == 2

View file

@ -0,0 +1,171 @@
"""
Tests for gateway/capabilities.py the thin authorisation surface
under the IAM contract.
The gateway no longer holds policy state (roles, capability sets,
workspace scopes); those live in iam-svc. These tests cover only
what the gateway shim does itself: PUBLIC / AUTHENTICATED short-
circuiting, default-fill of workspace, and forwarding of capability
checks to ``auth.authorise``.
"""
import pytest
from aiohttp import web
from unittest.mock import AsyncMock, MagicMock
from trustgraph.gateway.capabilities import (
PUBLIC, AUTHENTICATED,
enforce, enforce_workspace,
access_denied, auth_failure,
)
# -- test fixtures ---------------------------------------------------------
class _Identity:
"""Stand-in for auth.Identity — under the IAM contract it has
just ``handle``, ``workspace``, ``principal_id``, ``source``."""
def __init__(self, handle="user-1", workspace="default"):
self.handle = handle
self.workspace = workspace
self.principal_id = handle
self.source = "api-key"
def _allow_auth(identity=None):
"""Build an Auth double that authenticates to ``identity`` and
allows every authorise() call."""
auth = MagicMock()
auth.authenticate = AsyncMock(
return_value=identity or _Identity(),
)
auth.authorise = AsyncMock(return_value=None)
return auth
def _deny_auth(identity=None):
"""Build an Auth double that authenticates but denies authorise."""
auth = MagicMock()
auth.authenticate = AsyncMock(
return_value=identity or _Identity(),
)
auth.authorise = AsyncMock(side_effect=access_denied())
return auth
# -- enforce() -------------------------------------------------------------
class TestEnforce:
@pytest.mark.asyncio
async def test_public_returns_none_no_auth(self):
auth = _allow_auth()
result = await enforce(MagicMock(), auth, PUBLIC)
assert result is None
auth.authenticate.assert_not_called()
auth.authorise.assert_not_called()
@pytest.mark.asyncio
async def test_authenticated_skips_authorise(self):
identity = _Identity()
auth = _allow_auth(identity)
result = await enforce(MagicMock(), auth, AUTHENTICATED)
assert result is identity
auth.authenticate.assert_awaited_once()
auth.authorise.assert_not_called()
@pytest.mark.asyncio
async def test_capability_calls_authorise_system_level(self):
identity = _Identity()
auth = _allow_auth(identity)
result = await enforce(MagicMock(), auth, "graph:read")
assert result is identity
auth.authorise.assert_awaited_once_with(
identity, "graph:read", {}, {},
)
@pytest.mark.asyncio
async def test_capability_denied_raises_forbidden(self):
auth = _deny_auth()
with pytest.raises(web.HTTPForbidden):
await enforce(MagicMock(), auth, "users:admin")
# -- enforce_workspace() ---------------------------------------------------
class TestEnforceWorkspace:
@pytest.mark.asyncio
async def test_default_fills_from_identity(self):
data = {"operation": "x"}
auth = _allow_auth()
await enforce_workspace(data, _Identity(workspace="default"), auth)
assert data["workspace"] == "default"
@pytest.mark.asyncio
async def test_caller_supplied_workspace_kept(self):
data = {"workspace": "acme", "operation": "x"}
auth = _allow_auth()
await enforce_workspace(data, _Identity(workspace="default"), auth)
assert data["workspace"] == "acme"
@pytest.mark.asyncio
async def test_no_capability_skips_authorise(self):
data = {"workspace": "default"}
auth = _allow_auth()
await enforce_workspace(data, _Identity(), auth)
auth.authorise.assert_not_called()
@pytest.mark.asyncio
async def test_capability_calls_authorise_with_resource(self):
data = {"workspace": "acme"}
identity = _Identity()
auth = _allow_auth(identity)
await enforce_workspace(
data, identity, auth, capability="graph:read",
)
auth.authorise.assert_awaited_once_with(
identity, "graph:read", {"workspace": "acme"}, {},
)
@pytest.mark.asyncio
async def test_capability_denied_propagates(self):
data = {"workspace": "acme"}
auth = _deny_auth()
with pytest.raises(web.HTTPForbidden):
await enforce_workspace(
data, _Identity(), auth, capability="users:admin",
)
@pytest.mark.asyncio
async def test_non_dict_passthrough(self):
auth = _allow_auth()
result = await enforce_workspace("not-a-dict", _Identity(), auth)
assert result == "not-a-dict"
auth.authorise.assert_not_called()
# -- helpers ---------------------------------------------------------------
class TestResponseHelpers:
def test_auth_failure_is_401(self):
exc = auth_failure()
assert exc.status == 401
assert "auth failure" in exc.text
def test_access_denied_is_403(self):
exc = access_denied()
assert exc.status == 403
assert "access denied" in exc.text
class TestSentinels:
def test_public_and_authenticated_are_distinct(self):
assert PUBLIC != AUTHENTICATED

View file

@ -42,7 +42,7 @@ class TestDispatcherManager:
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
assert manager.backend == mock_backend
assert manager.config_receiver == mock_config_receiver
@ -59,7 +59,10 @@ class TestDispatcherManager:
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver, prefix="custom-prefix")
manager = DispatcherManager(
mock_backend, mock_config_receiver,
auth=Mock(), prefix="custom-prefix",
)
assert manager.prefix == "custom-prefix"
@ -68,7 +71,7 @@ class TestDispatcherManager:
"""Test start_flow method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
flow_data = {"name": "test_flow", "steps": []}
@ -82,7 +85,7 @@ class TestDispatcherManager:
"""Test stop_flow method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Pre-populate with a flow
flow_data = {"name": "test_flow", "steps": []}
@ -96,7 +99,7 @@ class TestDispatcherManager:
"""Test dispatch_global_service returns DispatcherWrapper"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
wrapper = manager.dispatch_global_service()
@ -107,7 +110,7 @@ class TestDispatcherManager:
"""Test dispatch_core_export returns DispatcherWrapper"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
wrapper = manager.dispatch_core_export()
@ -118,7 +121,7 @@ class TestDispatcherManager:
"""Test dispatch_core_import returns DispatcherWrapper"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
wrapper = manager.dispatch_core_import()
@ -130,7 +133,7 @@ class TestDispatcherManager:
"""Test process_core_import method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
with patch('trustgraph.gateway.dispatch.manager.CoreImport') as mock_core_import:
mock_importer = Mock()
@ -148,7 +151,7 @@ class TestDispatcherManager:
"""Test process_core_export method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
with patch('trustgraph.gateway.dispatch.manager.CoreExport') as mock_core_export:
mock_exporter = Mock()
@ -166,7 +169,7 @@ class TestDispatcherManager:
"""Test process_global_service method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
manager.invoke_global_service = AsyncMock(return_value="global_result")
@ -181,7 +184,7 @@ class TestDispatcherManager:
"""Test invoke_global_service with existing dispatcher"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Pre-populate with existing dispatcher
mock_dispatcher = Mock()
@ -198,7 +201,7 @@ class TestDispatcherManager:
"""Test invoke_global_service creates new dispatcher"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
with patch('trustgraph.gateway.dispatch.manager.global_dispatchers') as mock_dispatchers:
mock_dispatcher_class = Mock()
@ -230,7 +233,7 @@ class TestDispatcherManager:
"""Test dispatch_flow_import returns correct method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
result = manager.dispatch_flow_import()
@ -240,7 +243,7 @@ class TestDispatcherManager:
"""Test dispatch_flow_export returns correct method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
result = manager.dispatch_flow_export()
@ -250,7 +253,7 @@ class TestDispatcherManager:
"""Test dispatch_socket returns correct method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
result = manager.dispatch_socket()
@ -260,7 +263,7 @@ class TestDispatcherManager:
"""Test dispatch_flow_service returns DispatcherWrapper"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
wrapper = manager.dispatch_flow_service()
@ -272,7 +275,7 @@ class TestDispatcherManager:
"""Test process_flow_import with valid flow and kind"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow
manager.flows[("default", "test_flow")] = {
@ -308,7 +311,7 @@ class TestDispatcherManager:
"""Test process_flow_import with invalid flow"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
params = {"flow": "invalid_flow", "kind": "triples"}
@ -323,7 +326,7 @@ class TestDispatcherManager:
warnings.simplefilter("ignore", RuntimeWarning)
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow
manager.flows[("default", "test_flow")] = {
@ -345,7 +348,7 @@ class TestDispatcherManager:
"""Test process_flow_export with valid flow and kind"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow
manager.flows[("default", "test_flow")] = {
@ -378,26 +381,47 @@ class TestDispatcherManager:
@pytest.mark.asyncio
async def test_process_socket(self):
"""Test process_socket method"""
"""process_socket constructs a Mux with the manager's auth
instance passed through this is the gateway's trust path
for first-frame WebSocket authentication. A Mux cannot be
built without auth (tested separately); this test pins that
the dispatcher-manager threads the correct auth value into
the Mux constructor call."""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
mock_auth = Mock()
manager = DispatcherManager(
mock_backend, mock_config_receiver, auth=mock_auth,
)
with patch('trustgraph.gateway.dispatch.manager.Mux') as mock_mux:
mock_mux_instance = Mock()
mock_mux.return_value = mock_mux_instance
result = await manager.process_socket("ws", "running", {})
mock_mux.assert_called_once_with(manager, "ws", "running")
mock_mux.assert_called_once_with(
manager, "ws", "running", auth=mock_auth,
)
assert result == mock_mux_instance
def test_dispatcher_manager_requires_auth(self):
"""Constructing a DispatcherManager without an auth argument
must fail a no-auth DispatcherManager would produce a
Mux without authentication, silently downgrading the socket
auth path."""
mock_backend = Mock()
mock_config_receiver = Mock()
with pytest.raises(ValueError, match="auth"):
DispatcherManager(mock_backend, mock_config_receiver, auth=None)
@pytest.mark.asyncio
async def test_process_flow_service(self):
"""Test process_flow_service method"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
manager.invoke_flow_service = AsyncMock(return_value="flow_result")
@ -412,7 +436,7 @@ class TestDispatcherManager:
"""Test invoke_flow_service with existing dispatcher"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Add flow to the flows dictionary
manager.flows[("default", "test_flow")] = {"services": {"agent": {}}}
@ -432,7 +456,7 @@ class TestDispatcherManager:
"""Test invoke_flow_service creates request-response dispatcher"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow
manager.flows[("default", "test_flow")] = {
@ -476,7 +500,7 @@ class TestDispatcherManager:
"""Test invoke_flow_service creates sender dispatcher"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow
manager.flows[("default", "test_flow")] = {
@ -516,7 +540,7 @@ class TestDispatcherManager:
"""Test invoke_flow_service with invalid flow"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
with pytest.raises(RuntimeError, match="Invalid flow"):
await manager.invoke_flow_service("data", "responder", "default", "invalid_flow", "agent")
@ -526,7 +550,7 @@ class TestDispatcherManager:
"""Test invoke_flow_service with kind not supported by flow"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow without agent interface
manager.flows[("default", "test_flow")] = {
@ -543,7 +567,7 @@ class TestDispatcherManager:
"""Test invoke_flow_service with invalid kind"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
# Setup test flow with interface but unsupported kind
manager.flows[("default", "test_flow")] = {
@ -570,7 +594,7 @@ class TestDispatcherManager:
"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
async def slow_start():
# Yield to the event loop so other coroutines get a chance to run,
@ -606,7 +630,7 @@ class TestDispatcherManager:
"""
mock_backend = Mock()
mock_config_receiver = Mock()
manager = DispatcherManager(mock_backend, mock_config_receiver)
manager = DispatcherManager(mock_backend, mock_config_receiver, auth=Mock())
manager.flows[("default", "test_flow")] = {
"interfaces": {

View file

@ -12,6 +12,19 @@ from trustgraph.gateway.dispatch.mux import Mux, MAX_QUEUE_SIZE
class TestMux:
"""Test cases for Mux class"""
def test_mux_requires_auth(self):
"""Constructing a Mux without an ``auth`` argument must
fail. The Mux implements the first-frame auth protocol and
there is no no-auth mode a no-auth Mux would silently
accept every frame without authenticating it."""
with pytest.raises(ValueError, match="auth"):
Mux(
dispatcher_manager=MagicMock(),
ws=MagicMock(),
running=MagicMock(),
auth=None,
)
def test_mux_initialization(self):
"""Test Mux initialization"""
mock_dispatcher_manager = MagicMock()
@ -21,7 +34,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=mock_ws,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
assert mux.dispatcher_manager == mock_dispatcher_manager
@ -40,7 +54,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=mock_ws,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
# Call destroy
@ -61,7 +76,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=None,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
# Call destroy
@ -81,7 +97,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=mock_ws,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
# Mock message with valid JSON
@ -108,7 +125,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=mock_ws,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
# Mock message without request field
@ -137,7 +155,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=mock_ws,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
# Mock message without id field
@ -164,7 +183,8 @@ class TestMux:
mux = Mux(
dispatcher_manager=mock_dispatcher_manager,
ws=mock_ws,
running=mock_running
running=mock_running,
auth=MagicMock(),
)
# Mock message with invalid JSON

View file

@ -13,29 +13,36 @@ class TestConstantEndpoint:
"""Test cases for ConstantEndpoint class"""
def test_constant_endpoint_initialization(self):
"""Test ConstantEndpoint initialization"""
"""Construction records the configured capability on the
instance. The capability is a required argument no
permissive default and the test passes an explicit
value to demonstrate the contract."""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = ConstantEndpoint(
endpoint_path="/api/test",
auth=mock_auth,
dispatcher=mock_dispatcher
dispatcher=mock_dispatcher,
capability="config:read",
)
assert endpoint.path == "/api/test"
assert endpoint.auth == mock_auth
assert endpoint.dispatcher == mock_dispatcher
assert endpoint.operation == "service"
assert endpoint.capability == "config:read"
@pytest.mark.asyncio
async def test_constant_endpoint_start_method(self):
"""Test ConstantEndpoint start method (should be no-op)"""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = ConstantEndpoint("/api/test", mock_auth, mock_dispatcher)
endpoint = ConstantEndpoint(
"/api/test", mock_auth, mock_dispatcher,
capability="config:read",
)
# start() should complete without error
await endpoint.start()
@ -44,10 +51,13 @@ class TestConstantEndpoint:
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
mock_app = MagicMock()
endpoint = ConstantEndpoint("/api/test", mock_auth, mock_dispatcher)
endpoint = ConstantEndpoint(
"/api/test", mock_auth, mock_dispatcher,
capability="config:read",
)
endpoint.add_routes(mock_app)
# Verify add_routes was called with POST route
mock_app.add_routes.assert_called_once()
# The call should include web.post with the path and handler

View file

@ -1,4 +1,12 @@
"""Tests for Gateway i18n pack endpoint."""
"""Tests for Gateway i18n pack endpoint.
Production registers this endpoint with ``capability=PUBLIC``: the
login UI needs to render its own i18n strings before any user has
authenticated, so the endpoint is deliberately pre-auth. These
tests exercise the PUBLIC configuration that is the production
contract. Behaviour of authenticated endpoints is covered by the
IamAuth tests in ``test_auth.py``.
"""
import json
from unittest.mock import MagicMock
@ -7,6 +15,7 @@ import pytest
from aiohttp import web
from trustgraph.gateway.endpoint.i18n import I18nPackEndpoint
from trustgraph.gateway.capabilities import PUBLIC
class TestI18nPackEndpoint:
@ -17,23 +26,28 @@ class TestI18nPackEndpoint:
endpoint = I18nPackEndpoint(
endpoint_path="/api/v1/i18n/packs/{lang}",
auth=mock_auth,
capability=PUBLIC,
)
assert endpoint.path == "/api/v1/i18n/packs/{lang}"
assert endpoint.auth == mock_auth
assert endpoint.operation == "service"
assert endpoint.capability == PUBLIC
@pytest.mark.asyncio
async def test_i18n_endpoint_start_method(self):
mock_auth = MagicMock()
endpoint = I18nPackEndpoint("/api/v1/i18n/packs/{lang}", mock_auth)
endpoint = I18nPackEndpoint(
"/api/v1/i18n/packs/{lang}", mock_auth, capability=PUBLIC,
)
await endpoint.start()
def test_add_routes_registers_get_handler(self):
mock_auth = MagicMock()
mock_app = MagicMock()
endpoint = I18nPackEndpoint("/api/v1/i18n/packs/{lang}", mock_auth)
endpoint = I18nPackEndpoint(
"/api/v1/i18n/packs/{lang}", mock_auth, capability=PUBLIC,
)
endpoint.add_routes(mock_app)
mock_app.add_routes.assert_called_once()
@ -41,35 +55,55 @@ class TestI18nPackEndpoint:
assert len(call_args) == 1
@pytest.mark.asyncio
async def test_handle_unauthorized_on_invalid_auth_scheme(self):
async def test_handle_returns_pack_without_authenticating(self):
"""The PUBLIC endpoint serves the language pack without
invoking the auth handler at all pre-login UI must be
reachable. The test uses an auth mock that raises if
touched, so any auth attempt by the endpoint is caught."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = True
endpoint = I18nPackEndpoint("/api/v1/i18n/packs/{lang}", mock_auth)
def _should_not_be_called(*args, **kwargs):
raise AssertionError(
"PUBLIC endpoint must not invoke auth.authenticate"
)
mock_auth.authenticate = _should_not_be_called
endpoint = I18nPackEndpoint(
"/api/v1/i18n/packs/{lang}", mock_auth, capability=PUBLIC,
)
request = MagicMock()
request.path = "/api/v1/i18n/packs/en"
# A caller-supplied Authorization header of any form should
# be ignored — PUBLIC means we don't look at it.
request.headers = {"Authorization": "Token abc"}
request.match_info = {"lang": "en"}
resp = await endpoint.handle(request)
assert isinstance(resp, web.HTTPUnauthorized)
@pytest.mark.asyncio
async def test_handle_returns_pack_when_permitted(self):
mock_auth = MagicMock()
mock_auth.permitted.return_value = True
endpoint = I18nPackEndpoint("/api/v1/i18n/packs/{lang}", mock_auth)
request = MagicMock()
request.path = "/api/v1/i18n/packs/en"
request.headers = {}
request.match_info = {"lang": "en"}
resp = await endpoint.handle(request)
assert resp.status == 200
payload = json.loads(resp.body.decode("utf-8"))
assert isinstance(payload, dict)
assert "cli.verify_system_status.title" in payload
@pytest.mark.asyncio
async def test_handle_rejects_path_traversal(self):
"""The ``lang`` path parameter is reflected through to the
filesystem-backed pack loader. The endpoint contains an
explicit defense against ``/`` and ``..`` in the value; this
test pins that defense in place."""
mock_auth = MagicMock()
endpoint = I18nPackEndpoint(
"/api/v1/i18n/packs/{lang}", mock_auth, capability=PUBLIC,
)
for bad in ("../../etc/passwd", "en/../fr", "a/b"):
request = MagicMock()
request.path = f"/api/v1/i18n/packs/{bad}"
request.headers = {}
request.match_info = {"lang": bad}
resp = await endpoint.handle(request)
assert isinstance(resp, web.HTTPBadRequest), (
f"path-traversal defense did not reject lang={bad!r}"
)

View file

@ -12,30 +12,24 @@ class TestEndpointManager:
"""Test cases for EndpointManager class"""
def test_endpoint_manager_initialization(self):
"""Test EndpointManager initialization creates all endpoints"""
"""EndpointManager wires up the full endpoint set and
records dispatcher_manager / timeout on the instance."""
mock_dispatcher_manager = MagicMock()
mock_auth = MagicMock()
# Mock dispatcher methods
mock_dispatcher_manager.dispatch_global_service.return_value = MagicMock()
mock_dispatcher_manager.dispatch_socket.return_value = MagicMock()
mock_dispatcher_manager.dispatch_flow_service.return_value = MagicMock()
mock_dispatcher_manager.dispatch_flow_import.return_value = MagicMock()
mock_dispatcher_manager.dispatch_flow_export.return_value = MagicMock()
mock_dispatcher_manager.dispatch_core_import.return_value = MagicMock()
mock_dispatcher_manager.dispatch_core_export.return_value = MagicMock()
# The dispatcher_manager exposes a small set of factory
# methods — MagicMock auto-creates them, returning fresh
# MagicMocks on each call.
manager = EndpointManager(
dispatcher_manager=mock_dispatcher_manager,
auth=mock_auth,
prometheus_url="http://prometheus:9090",
timeout=300
timeout=300,
)
assert manager.dispatcher_manager == mock_dispatcher_manager
assert manager.timeout == 300
assert manager.services == {}
assert len(manager.endpoints) > 0 # Should have multiple endpoints
assert len(manager.endpoints) > 0
def test_endpoint_manager_with_default_timeout(self):
"""Test EndpointManager with default timeout value"""
@ -79,9 +73,17 @@ class TestEndpointManager:
prometheus_url="http://test:9090"
)
# Verify all dispatcher methods were called during initialization
# Each dispatcher factory is invoked once per endpoint that
# needs a dedicated wire. dispatch_auth_iam is shared by
# two endpoints — AuthEndpoints (login / bootstrap /
# change-password) and IamEndpoint (registry-driven
# /api/v1/iam) — so it's expected to be called twice.
# Both forwarders pin the dispatcher to kind=iam and reuse
# the same factory; they're distinct from
# dispatch_global_service (the generic /api/v1/{kind} route).
mock_dispatcher_manager.dispatch_global_service.assert_called_once()
mock_dispatcher_manager.dispatch_socket.assert_called() # Called twice
assert mock_dispatcher_manager.dispatch_auth_iam.call_count == 2
mock_dispatcher_manager.dispatch_socket.assert_called_once()
mock_dispatcher_manager.dispatch_flow_service.assert_called_once()
mock_dispatcher_manager.dispatch_flow_import.assert_called_once()
mock_dispatcher_manager.dispatch_flow_export.assert_called_once()

View file

@ -12,31 +12,35 @@ class TestMetricsEndpoint:
"""Test cases for MetricsEndpoint class"""
def test_metrics_endpoint_initialization(self):
"""Test MetricsEndpoint initialization"""
"""Construction records the configured capability on the
instance. In production MetricsEndpoint is gated by
'metrics:read' so that's the natural value to pass."""
mock_auth = MagicMock()
endpoint = MetricsEndpoint(
prometheus_url="http://prometheus:9090",
endpoint_path="/metrics",
auth=mock_auth
auth=mock_auth,
capability="metrics:read",
)
assert endpoint.prometheus_url == "http://prometheus:9090"
assert endpoint.path == "/metrics"
assert endpoint.auth == mock_auth
assert endpoint.operation == "service"
assert endpoint.capability == "metrics:read"
@pytest.mark.asyncio
async def test_metrics_endpoint_start_method(self):
"""Test MetricsEndpoint start method (should be no-op)"""
mock_auth = MagicMock()
endpoint = MetricsEndpoint(
prometheus_url="http://localhost:9090",
endpoint_path="/metrics",
auth=mock_auth
auth=mock_auth,
capability="metrics:read",
)
# start() should complete without error
await endpoint.start()
@ -44,15 +48,16 @@ class TestMetricsEndpoint:
"""Test add_routes method registers GET route with wildcard path"""
mock_auth = MagicMock()
mock_app = MagicMock()
endpoint = MetricsEndpoint(
prometheus_url="http://prometheus:9090",
endpoint_path="/metrics",
auth=mock_auth
auth=mock_auth,
capability="metrics:read",
)
endpoint.add_routes(mock_app)
# Verify add_routes was called with GET route
mock_app.add_routes.assert_called_once()
# The call should include web.get with wildcard path pattern

View file

@ -1,5 +1,12 @@
"""
Tests for Gateway Socket Endpoint
Tests for Gateway Socket Endpoint.
In production the only SocketEndpoint registered with HTTP-layer
auth is ``/api/v1/socket`` using ``capability=AUTHENTICATED`` with
``in_band_auth=True`` (first-frame auth over the websocket frames,
not at the handshake). The tests below use AUTHENTICATED as the
representative capability; construction / worker / listener
behaviour is independent of which capability is configured.
"""
import pytest
@ -7,41 +14,47 @@ from unittest.mock import MagicMock, AsyncMock
from aiohttp import WSMsgType
from trustgraph.gateway.endpoint.socket import SocketEndpoint
from trustgraph.gateway.capabilities import AUTHENTICATED
class TestSocketEndpoint:
"""Test cases for SocketEndpoint class"""
def test_socket_endpoint_initialization(self):
"""Test SocketEndpoint initialization"""
"""Construction records the configured capability on the
instance. No permissive default is applied."""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = SocketEndpoint(
endpoint_path="/api/socket",
auth=mock_auth,
dispatcher=mock_dispatcher
dispatcher=mock_dispatcher,
capability=AUTHENTICATED,
)
assert endpoint.path == "/api/socket"
assert endpoint.auth == mock_auth
assert endpoint.dispatcher == mock_dispatcher
assert endpoint.operation == "socket"
assert endpoint.capability == AUTHENTICATED
@pytest.mark.asyncio
async def test_worker_method(self):
"""Test SocketEndpoint worker method"""
mock_auth = MagicMock()
mock_dispatcher = AsyncMock()
endpoint = SocketEndpoint("/api/socket", mock_auth, mock_dispatcher)
endpoint = SocketEndpoint(
"/api/socket", mock_auth, mock_dispatcher,
capability=AUTHENTICATED,
)
mock_ws = MagicMock()
mock_running = MagicMock()
# Call worker method
await endpoint.worker(mock_ws, mock_dispatcher, mock_running)
# Verify dispatcher.run was called
mock_dispatcher.run.assert_called_once()
@ -50,8 +63,11 @@ class TestSocketEndpoint:
"""Test SocketEndpoint listener method with text message"""
mock_auth = MagicMock()
mock_dispatcher = AsyncMock()
endpoint = SocketEndpoint("/api/socket", mock_auth, mock_dispatcher)
endpoint = SocketEndpoint(
"/api/socket", mock_auth, mock_dispatcher,
capability=AUTHENTICATED,
)
# Mock websocket with text message
mock_msg = MagicMock()
@ -80,8 +96,11 @@ class TestSocketEndpoint:
"""Test SocketEndpoint listener method with binary message"""
mock_auth = MagicMock()
mock_dispatcher = AsyncMock()
endpoint = SocketEndpoint("/api/socket", mock_auth, mock_dispatcher)
endpoint = SocketEndpoint(
"/api/socket", mock_auth, mock_dispatcher,
capability=AUTHENTICATED,
)
# Mock websocket with binary message
mock_msg = MagicMock()
@ -110,8 +129,11 @@ class TestSocketEndpoint:
"""Test SocketEndpoint listener method with close message"""
mock_auth = MagicMock()
mock_dispatcher = AsyncMock()
endpoint = SocketEndpoint("/api/socket", mock_auth, mock_dispatcher)
endpoint = SocketEndpoint(
"/api/socket", mock_auth, mock_dispatcher,
capability=AUTHENTICATED,
)
# Mock websocket with close message
mock_msg = MagicMock()

View file

@ -12,48 +12,57 @@ class TestStreamEndpoint:
"""Test cases for StreamEndpoint class"""
def test_stream_endpoint_initialization_with_post(self):
"""Test StreamEndpoint initialization with POST method"""
"""Construction records the configured capability on the
instance. StreamEndpoint is used in production for the
core-import / core-export / document-stream routes; a
document-write capability is a realistic value for a POST
stream (e.g. core-import)."""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = StreamEndpoint(
endpoint_path="/api/stream",
auth=mock_auth,
dispatcher=mock_dispatcher,
method="POST"
capability="documents:write",
method="POST",
)
assert endpoint.path == "/api/stream"
assert endpoint.auth == mock_auth
assert endpoint.dispatcher == mock_dispatcher
assert endpoint.operation == "service"
assert endpoint.capability == "documents:write"
assert endpoint.method == "POST"
def test_stream_endpoint_initialization_with_get(self):
"""Test StreamEndpoint initialization with GET method"""
"""GET stream — export-style endpoint, read capability."""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = StreamEndpoint(
endpoint_path="/api/stream",
auth=mock_auth,
dispatcher=mock_dispatcher,
method="GET"
capability="documents:read",
method="GET",
)
assert endpoint.method == "GET"
def test_stream_endpoint_initialization_default_method(self):
"""Test StreamEndpoint initialization with default POST method"""
"""Test StreamEndpoint initialization with default POST method.
The method default is cosmetic; the capability is not
defaulted it is always required."""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = StreamEndpoint(
endpoint_path="/api/stream",
auth=mock_auth,
dispatcher=mock_dispatcher
dispatcher=mock_dispatcher,
capability="documents:write",
)
assert endpoint.method == "POST" # Default value
@pytest.mark.asyncio
@ -61,9 +70,12 @@ class TestStreamEndpoint:
"""Test StreamEndpoint start method (should be no-op)"""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = StreamEndpoint("/api/stream", mock_auth, mock_dispatcher)
endpoint = StreamEndpoint(
"/api/stream", mock_auth, mock_dispatcher,
capability="documents:write",
)
# start() should complete without error
await endpoint.start()
@ -72,16 +84,17 @@ class TestStreamEndpoint:
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
mock_app = MagicMock()
endpoint = StreamEndpoint(
endpoint_path="/api/stream",
auth=mock_auth,
dispatcher=mock_dispatcher,
method="POST"
capability="documents:write",
method="POST",
)
endpoint.add_routes(mock_app)
# Verify add_routes was called with POST route
mock_app.add_routes.assert_called_once()
call_args = mock_app.add_routes.call_args[0][0]
@ -92,16 +105,17 @@ class TestStreamEndpoint:
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
mock_app = MagicMock()
endpoint = StreamEndpoint(
endpoint_path="/api/stream",
auth=mock_auth,
dispatcher=mock_dispatcher,
method="GET"
capability="documents:read",
method="GET",
)
endpoint.add_routes(mock_app)
# Verify add_routes was called with GET route
mock_app.add_routes.assert_called_once()
call_args = mock_app.add_routes.call_args[0][0]
@ -112,13 +126,14 @@ class TestStreamEndpoint:
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
mock_app = MagicMock()
endpoint = StreamEndpoint(
endpoint_path="/api/stream",
auth=mock_auth,
dispatcher=mock_dispatcher,
method="INVALID"
capability="documents:write",
method="INVALID",
)
with pytest.raises(RuntimeError, match="Bad method"):
endpoint.add_routes(mock_app)

View file

@ -12,29 +12,36 @@ class TestVariableEndpoint:
"""Test cases for VariableEndpoint class"""
def test_variable_endpoint_initialization(self):
"""Test VariableEndpoint initialization"""
"""Construction records the configured capability on the
instance. VariableEndpoint is used in production for the
/api/v1/{kind} admin-scoped global service routes, so a
write-side capability is a realistic value for the test."""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = VariableEndpoint(
endpoint_path="/api/variable",
auth=mock_auth,
dispatcher=mock_dispatcher
dispatcher=mock_dispatcher,
capability="config:write",
)
assert endpoint.path == "/api/variable"
assert endpoint.auth == mock_auth
assert endpoint.dispatcher == mock_dispatcher
assert endpoint.operation == "service"
assert endpoint.capability == "config:write"
@pytest.mark.asyncio
async def test_variable_endpoint_start_method(self):
"""Test VariableEndpoint start method (should be no-op)"""
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
endpoint = VariableEndpoint("/api/var", mock_auth, mock_dispatcher)
endpoint = VariableEndpoint(
"/api/var", mock_auth, mock_dispatcher,
capability="config:write",
)
# start() should complete without error
await endpoint.start()
@ -43,10 +50,13 @@ class TestVariableEndpoint:
mock_auth = MagicMock()
mock_dispatcher = MagicMock()
mock_app = MagicMock()
endpoint = VariableEndpoint("/api/variable", mock_auth, mock_dispatcher)
endpoint = VariableEndpoint(
"/api/variable", mock_auth, mock_dispatcher,
capability="config:write",
)
endpoint.add_routes(mock_app)
# Verify add_routes was called with POST route
mock_app.add_routes.assert_called_once()
call_args = mock_app.add_routes.call_args[0][0]

View file

@ -1,355 +1,179 @@
"""
Tests for Gateway Service API
Tests for gateway/service.py the Api class that wires together
the pub/sub backend, IAM auth, config receiver, dispatcher manager,
and endpoint manager.
The legacy ``GATEWAY_SECRET`` / ``default_api_token`` / allow-all
surface is gone, so the tests here focus on the Api's construction
and composition rather than the removed auth behaviour. IamAuth's
own behaviour is covered in test_auth.py.
"""
import pytest
import asyncio
from unittest.mock import Mock, patch, MagicMock, AsyncMock
from unittest.mock import AsyncMock, Mock, patch
from aiohttp import web
import pulsar
from trustgraph.gateway.service import Api, run, default_pulsar_host, default_prometheus_url, default_timeout, default_port, default_api_token
# Tests for Gateway Service API
from trustgraph.gateway.service import (
Api,
default_pulsar_host, default_prometheus_url,
default_timeout, default_port,
)
from trustgraph.gateway.auth import IamAuth
class TestApi:
"""Test cases for Api class"""
# -- constants -------------------------------------------------------------
def test_api_initialization_with_defaults(self):
"""Test Api initialization with default values"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_backend = Mock()
mock_get_pubsub.return_value = mock_backend
api = Api()
class TestDefaults:
assert api.port == default_port
assert api.timeout == default_timeout
assert api.pulsar_host == default_pulsar_host
assert api.pulsar_api_key is None
assert api.prometheus_url == default_prometheus_url + "/"
assert api.auth.allow_all is True
def test_exports_default_constants(self):
# These are consumed by CLIs / tests / docs. Sanity-check
# that they're the expected shape.
assert default_port == 8088
assert default_timeout == 600
assert default_pulsar_host.startswith("pulsar://")
assert default_prometheus_url.startswith("http")
# Verify get_pubsub was called
mock_get_pubsub.assert_called_once()
def test_api_initialization_with_custom_config(self):
"""Test Api initialization with custom configuration"""
# -- Api construction ------------------------------------------------------
@pytest.fixture
def mock_backend():
return Mock()
@pytest.fixture
def api(mock_backend):
with patch(
"trustgraph.gateway.service.get_pubsub",
return_value=mock_backend,
):
yield Api()
class TestApiConstruction:
def test_defaults(self, api):
assert api.port == default_port
assert api.timeout == default_timeout
assert api.pulsar_host == default_pulsar_host
assert api.pulsar_api_key is None
# prometheus_url gets normalised with a trailing slash
assert api.prometheus_url == default_prometheus_url + "/"
def test_auth_is_iam_backed(self, api):
# Any Api always gets an IamAuth. There is no "no auth" mode
# (GATEWAY_SECRET / allow_all has been removed — see IAM spec).
assert isinstance(api.auth, IamAuth)
def test_components_wired(self, api):
assert api.config_receiver is not None
assert api.dispatcher_manager is not None
assert api.endpoint_manager is not None
def test_dispatcher_manager_has_auth(self, api):
# The Mux uses this handle for first-frame socket auth.
assert api.dispatcher_manager.auth is api.auth
def test_custom_config(self, mock_backend):
config = {
"port": 9000,
"timeout": 300,
"pulsar_host": "pulsar://custom-host:6650",
"pulsar_api_key": "test-api-key",
"pulsar_listener": "custom-listener",
"pulsar_api_key": "custom-key",
"prometheus_url": "http://custom-prometheus:9090",
"api_token": "secret-token"
}
with patch(
"trustgraph.gateway.service.get_pubsub",
return_value=mock_backend,
):
a = Api(**config)
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_backend = Mock()
mock_get_pubsub.return_value = mock_backend
assert a.port == 9000
assert a.timeout == 300
assert a.pulsar_host == "pulsar://custom-host:6650"
assert a.pulsar_api_key == "custom-key"
# Trailing slash added.
assert a.prometheus_url == "http://custom-prometheus:9090/"
api = Api(**config)
def test_prometheus_url_already_has_trailing_slash(self, mock_backend):
with patch(
"trustgraph.gateway.service.get_pubsub",
return_value=mock_backend,
):
a = Api(prometheus_url="http://p:9090/")
assert a.prometheus_url == "http://p:9090/"
assert api.port == 9000
assert api.timeout == 300
assert api.pulsar_host == "pulsar://custom-host:6650"
assert api.pulsar_api_key == "test-api-key"
assert api.prometheus_url == "http://custom-prometheus:9090/"
assert api.auth.token == "secret-token"
assert api.auth.allow_all is False
def test_queue_overrides_parsed_for_config(self, mock_backend):
with patch(
"trustgraph.gateway.service.get_pubsub",
return_value=mock_backend,
):
a = Api(
config_request_queue="alt-config-req",
config_response_queue="alt-config-resp",
)
overrides = a.dispatcher_manager.queue_overrides
assert overrides.get("config", {}).get("request") == "alt-config-req"
assert overrides.get("config", {}).get("response") == "alt-config-resp"
# Verify get_pubsub was called with config
mock_get_pubsub.assert_called_once_with(**config)
def test_api_initialization_with_pulsar_api_key(self):
"""Test Api initialization with Pulsar API key authentication"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
# -- app_factory -----------------------------------------------------------
api = Api(pulsar_api_key="test-key")
# Verify api key was stored
assert api.pulsar_api_key == "test-key"
mock_get_pubsub.assert_called_once()
def test_api_initialization_prometheus_url_normalization(self):
"""Test that prometheus_url gets normalized with trailing slash"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
# Test URL without trailing slash
api = Api(prometheus_url="http://prometheus:9090")
assert api.prometheus_url == "http://prometheus:9090/"
# Test URL with trailing slash
api = Api(prometheus_url="http://prometheus:9090/")
assert api.prometheus_url == "http://prometheus:9090/"
def test_api_initialization_empty_api_token_means_no_auth(self):
"""Test that empty API token results in allow_all authentication"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
api = Api(api_token="")
assert api.auth.allow_all is True
def test_api_initialization_none_api_token_means_no_auth(self):
"""Test that None API token results in allow_all authentication"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
api = Api(api_token=None)
assert api.auth.allow_all is True
class TestAppFactory:
@pytest.mark.asyncio
async def test_app_factory_creates_application(self):
"""Test that app_factory creates aiohttp application"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
api = Api()
# Mock the dependencies
api.config_receiver = Mock()
api.config_receiver.start = AsyncMock()
api.endpoint_manager = Mock()
api.endpoint_manager.add_routes = Mock()
api.endpoint_manager.start = AsyncMock()
app = await api.app_factory()
assert isinstance(app, web.Application)
assert app._client_max_size == 256 * 1024 * 1024
# Verify that config receiver was started
api.config_receiver.start.assert_called_once()
# Verify that endpoint manager was configured
api.endpoint_manager.add_routes.assert_called_once_with(app)
api.endpoint_manager.start.assert_called_once()
async def test_creates_aiohttp_app(self, api):
# Stub out the long-tail dependencies that reach out to IAM /
# pub/sub so we can exercise the factory in isolation.
api.auth.start = AsyncMock()
api.config_receiver = Mock()
api.config_receiver.start = AsyncMock()
api.endpoint_manager = Mock()
api.endpoint_manager.add_routes = Mock()
api.endpoint_manager.start = AsyncMock()
api.endpoints = []
app = await api.app_factory()
assert isinstance(app, web.Application)
assert app._client_max_size == 256 * 1024 * 1024
api.auth.start.assert_called_once()
api.config_receiver.start.assert_called_once()
api.endpoint_manager.add_routes.assert_called_once_with(app)
api.endpoint_manager.start.assert_called_once()
@pytest.mark.asyncio
async def test_app_factory_with_custom_endpoints(self):
"""Test app_factory with custom endpoints"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
api = Api()
# Mock custom endpoints
mock_endpoint1 = Mock()
mock_endpoint1.add_routes = Mock()
mock_endpoint1.start = AsyncMock()
mock_endpoint2 = Mock()
mock_endpoint2.add_routes = Mock()
mock_endpoint2.start = AsyncMock()
api.endpoints = [mock_endpoint1, mock_endpoint2]
# Mock the dependencies
api.config_receiver = Mock()
api.config_receiver.start = AsyncMock()
api.endpoint_manager = Mock()
api.endpoint_manager.add_routes = Mock()
api.endpoint_manager.start = AsyncMock()
app = await api.app_factory()
# Verify custom endpoints were configured
mock_endpoint1.add_routes.assert_called_once_with(app)
mock_endpoint1.start.assert_called_once()
mock_endpoint2.add_routes.assert_called_once_with(app)
mock_endpoint2.start.assert_called_once()
async def test_auth_start_runs_before_accepting_traffic(self, api):
"""``auth.start()`` fetches the IAM signing key, and must
complete (or time out) before the gateway begins accepting
requests. It's the first await in app_factory."""
order = []
def test_run_method_calls_web_run_app(self):
"""Test that run method calls web.run_app"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub, \
patch('aiohttp.web.run_app') as mock_run_app:
mock_get_pubsub.return_value = Mock()
# AsyncMock.side_effect expects a sync callable (its return
# value becomes the coroutine's return); a plain list.append
# avoids the "coroutine was never awaited" trap of an async
# side_effect.
api.auth.start = AsyncMock(
side_effect=lambda: order.append("auth"),
)
api.config_receiver = Mock()
api.config_receiver.start = AsyncMock(
side_effect=lambda: order.append("config"),
)
api.endpoint_manager = Mock()
api.endpoint_manager.add_routes = Mock()
api.endpoint_manager.start = AsyncMock(
side_effect=lambda: order.append("endpoints"),
)
api.endpoints = []
# Api.run() passes self.app_factory() — a coroutine — to
# web.run_app, which would normally consume it inside its own
# event loop. Since we mock run_app, close the coroutine here
# so it doesn't leak as an "unawaited coroutine" RuntimeWarning.
def _consume_coro(coro, **kwargs):
coro.close()
mock_run_app.side_effect = _consume_coro
await api.app_factory()
api = Api(port=8080)
api.run()
# Verify run_app was called once with the correct port
mock_run_app.assert_called_once()
args, kwargs = mock_run_app.call_args
assert len(args) == 1 # Should have one positional arg (the coroutine)
assert kwargs == {'port': 8080} # Should have port keyword arg
def test_api_components_initialization(self):
"""Test that all API components are properly initialized"""
with patch('trustgraph.gateway.service.get_pubsub') as mock_get_pubsub:
mock_get_pubsub.return_value = Mock()
api = Api()
# Verify all components are initialized
assert api.config_receiver is not None
assert api.dispatcher_manager is not None
assert api.endpoint_manager is not None
assert api.endpoints == []
# Verify component relationships
assert api.dispatcher_manager.backend == api.pubsub_backend
assert api.dispatcher_manager.config_receiver == api.config_receiver
assert api.endpoint_manager.dispatcher_manager == api.dispatcher_manager
# EndpointManager doesn't store auth directly, it passes it to individual endpoints
class TestRunFunction:
"""Test cases for the run() function"""
def test_run_function_with_metrics_enabled(self):
"""Test run function with metrics enabled"""
import warnings
# Suppress the specific async warning with a broader pattern
warnings.filterwarnings("ignore", message=".*Api.app_factory.*was never awaited", category=RuntimeWarning)
with patch('argparse.ArgumentParser.parse_args') as mock_parse_args, \
patch('trustgraph.gateway.service.start_http_server') as mock_start_http_server:
# Mock command line arguments
mock_args = Mock()
mock_args.metrics = True
mock_args.metrics_port = 8000
mock_parse_args.return_value = mock_args
# Create a simple mock instance without any async methods
mock_api_instance = Mock()
mock_api_instance.run = Mock()
# Create a mock Api class without importing the real one
mock_api = Mock(return_value=mock_api_instance)
# Patch using context manager to avoid importing the real Api class
with patch('trustgraph.gateway.service.Api', mock_api):
# Mock vars() to return a dict
with patch('builtins.vars') as mock_vars:
mock_vars.return_value = {
'metrics': True,
'metrics_port': 8000,
'pulsar_host': default_pulsar_host,
'timeout': default_timeout
}
run()
# Verify metrics server was started
mock_start_http_server.assert_called_once_with(8000)
# Verify Api was created and run was called
mock_api.assert_called_once()
mock_api_instance.run.assert_called_once()
@patch('trustgraph.gateway.service.start_http_server')
@patch('argparse.ArgumentParser.parse_args')
def test_run_function_with_metrics_disabled(self, mock_parse_args, mock_start_http_server):
"""Test run function with metrics disabled"""
# Mock command line arguments
mock_args = Mock()
mock_args.metrics = False
mock_parse_args.return_value = mock_args
# Create a simple mock instance without any async methods
mock_api_instance = Mock()
mock_api_instance.run = Mock()
# Patch the Api class inside the test without using decorators
with patch('trustgraph.gateway.service.Api') as mock_api:
mock_api.return_value = mock_api_instance
# Mock vars() to return a dict
with patch('builtins.vars') as mock_vars:
mock_vars.return_value = {
'metrics': False,
'metrics_port': 8000,
'pulsar_host': default_pulsar_host,
'timeout': default_timeout
}
run()
# Verify metrics server was NOT started
mock_start_http_server.assert_not_called()
# Verify Api was created and run was called
mock_api.assert_called_once()
mock_api_instance.run.assert_called_once()
@patch('argparse.ArgumentParser.parse_args')
def test_run_function_argument_parsing(self, mock_parse_args):
"""Test that run function properly parses command line arguments"""
# Mock command line arguments
mock_args = Mock()
mock_args.metrics = False
mock_parse_args.return_value = mock_args
# Create a simple mock instance without any async methods
mock_api_instance = Mock()
mock_api_instance.run = Mock()
# Mock vars() to return a dict with all expected arguments
expected_args = {
'pulsar_host': 'pulsar://test:6650',
'pulsar_api_key': 'test-key',
'pulsar_listener': 'test-listener',
'prometheus_url': 'http://test-prometheus:9090',
'port': 9000,
'timeout': 300,
'api_token': 'secret',
'log_level': 'INFO',
'metrics': False,
'metrics_port': 8001
}
# Patch the Api class inside the test without using decorators
with patch('trustgraph.gateway.service.Api') as mock_api:
mock_api.return_value = mock_api_instance
with patch('builtins.vars') as mock_vars:
mock_vars.return_value = expected_args
run()
# Verify Api was created with the parsed arguments
mock_api.assert_called_once_with(**expected_args)
mock_api_instance.run.assert_called_once()
def test_run_function_creates_argument_parser(self):
"""Test that run function creates argument parser with correct arguments"""
with patch('argparse.ArgumentParser') as mock_parser_class:
mock_parser = Mock()
mock_parser_class.return_value = mock_parser
mock_parser.parse_args.return_value = Mock(metrics=False)
with patch('trustgraph.gateway.service.Api') as mock_api, \
patch('builtins.vars') as mock_vars:
mock_vars.return_value = {'metrics': False}
mock_api.return_value = Mock()
run()
# Verify ArgumentParser was created
mock_parser_class.assert_called_once()
# Verify add_argument was called for each expected argument
expected_arguments = [
'pulsar-host', 'pulsar-api-key', 'pulsar-listener',
'prometheus-url', 'port', 'timeout', 'api-token',
'log-level', 'metrics', 'metrics-port'
]
# Check that add_argument was called multiple times (once for each arg)
assert mock_parser.add_argument.call_count >= len(expected_arguments)
# auth.start must be first (before config receiver, before
# any endpoint starts).
assert order[0] == "auth"
# All three must have run.
assert set(order) == {"auth", "config", "endpoints"}

View file

@ -1,4 +1,15 @@
"""Unit tests for SocketEndpoint graceful shutdown functionality."""
"""Unit tests for SocketEndpoint graceful shutdown functionality.
These tests exercise SocketEndpoint in its handshake-auth
configuration (``in_band_auth=False``) the mode used in production
for the flow import/export streaming endpoints. The mux socket at
``/api/v1/socket`` uses ``in_band_auth=True`` instead, where the
handshake always accepts and authentication runs on the first
WebSocket frame; that path is covered by the Mux tests.
Every endpoint constructor here passes an explicit capability no
permissive default is relied upon.
"""
import pytest
import asyncio
@ -6,13 +17,32 @@ from unittest.mock import AsyncMock, MagicMock, patch
from aiohttp import web, WSMsgType
from trustgraph.gateway.endpoint.socket import SocketEndpoint
from trustgraph.gateway.running import Running
from trustgraph.gateway.auth import Identity
# Representative capability used across these tests — corresponds to
# the flow-import streaming endpoint pattern that uses this class.
TEST_CAP = "graph:write"
def _valid_identity():
return Identity(
handle="test-user",
workspace="default",
principal_id="test-user",
source="api-key",
)
@pytest.fixture
def mock_auth():
"""Mock authentication service."""
"""Mock IAM-backed authenticator. Successful by default —
``authenticate`` returns a valid identity and ``authorise``
allows everything. Tests that need the failure paths override
the relevant attribute locally."""
auth = MagicMock()
auth.permitted.return_value = True
auth.authenticate = AsyncMock(return_value=_valid_identity())
auth.authorise = AsyncMock(return_value=None)
return auth
@ -25,7 +55,7 @@ def mock_dispatcher_factory():
dispatcher.receive = AsyncMock()
dispatcher.destroy = AsyncMock()
return dispatcher
return dispatcher_factory
@ -35,7 +65,8 @@ def socket_endpoint(mock_auth, mock_dispatcher_factory):
return SocketEndpoint(
endpoint_path="/test-socket",
auth=mock_auth,
dispatcher=mock_dispatcher_factory
dispatcher=mock_dispatcher_factory,
capability=TEST_CAP,
)
@ -61,7 +92,10 @@ def mock_request():
@pytest.mark.asyncio
async def test_listener_graceful_shutdown_on_close():
"""Test listener handles websocket close gracefully."""
socket_endpoint = SocketEndpoint("/test", MagicMock(), AsyncMock())
socket_endpoint = SocketEndpoint(
"/test", MagicMock(), AsyncMock(),
capability=TEST_CAP,
)
# Mock websocket that closes after one message
ws = AsyncMock()
@ -99,9 +133,10 @@ async def test_listener_graceful_shutdown_on_close():
@pytest.mark.asyncio
async def test_handle_normal_flow():
"""Test normal websocket handling flow."""
"""Valid bearer → handshake accepted, dispatcher created."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = True
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
dispatcher_created = False
async def mock_dispatcher_factory(ws, running, match_info):
@ -111,7 +146,10 @@ async def test_handle_normal_flow():
dispatcher.destroy = AsyncMock()
return dispatcher
socket_endpoint = SocketEndpoint("/test", mock_auth, mock_dispatcher_factory)
socket_endpoint = SocketEndpoint(
"/test", mock_auth, mock_dispatcher_factory,
capability=TEST_CAP,
)
request = MagicMock()
request.query = {"token": "valid-token"}
@ -155,7 +193,8 @@ async def test_handle_normal_flow():
async def test_handle_exception_group_cleanup():
"""Test exception group triggers dispatcher cleanup."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = True
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
mock_dispatcher = AsyncMock()
mock_dispatcher.destroy = AsyncMock()
@ -163,7 +202,10 @@ async def test_handle_exception_group_cleanup():
async def mock_dispatcher_factory(ws, running, match_info):
return mock_dispatcher
socket_endpoint = SocketEndpoint("/test", mock_auth, mock_dispatcher_factory)
socket_endpoint = SocketEndpoint(
"/test", mock_auth, mock_dispatcher_factory,
capability=TEST_CAP,
)
request = MagicMock()
request.query = {"token": "valid-token"}
@ -222,7 +264,8 @@ async def test_handle_exception_group_cleanup():
async def test_handle_dispatcher_cleanup_timeout():
"""Test dispatcher cleanup with timeout."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = True
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
# Mock dispatcher that takes long to destroy
mock_dispatcher = AsyncMock()
@ -231,7 +274,10 @@ async def test_handle_dispatcher_cleanup_timeout():
async def mock_dispatcher_factory(ws, running, match_info):
return mock_dispatcher
socket_endpoint = SocketEndpoint("/test", mock_auth, mock_dispatcher_factory)
socket_endpoint = SocketEndpoint(
"/test", mock_auth, mock_dispatcher_factory,
capability=TEST_CAP,
)
request = MagicMock()
request.query = {"token": "valid-token"}
@ -285,49 +331,68 @@ async def test_handle_dispatcher_cleanup_timeout():
@pytest.mark.asyncio
async def test_handle_unauthorized_request():
"""Test handling of unauthorized requests."""
"""A bearer that the IAM layer rejects causes the handshake to
fail with 401. IamAuth surfaces an HTTPUnauthorized; the
endpoint propagates it. Note that the endpoint intentionally
does NOT distinguish 'bad token', 'expired', 'revoked', etc.
that's the IAM error-masking policy."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = False # Unauthorized
socket_endpoint = SocketEndpoint("/test", mock_auth, AsyncMock())
mock_auth.authenticate = AsyncMock(side_effect=web.HTTPUnauthorized(
text='{"error":"auth failure"}',
content_type="application/json",
))
socket_endpoint = SocketEndpoint(
"/test", mock_auth, AsyncMock(),
capability=TEST_CAP,
)
request = MagicMock()
request.query = {"token": "invalid-token"}
result = await socket_endpoint.handle(request)
# Should return HTTP 401
assert isinstance(result, web.HTTPUnauthorized)
# Should have checked permission
mock_auth.permitted.assert_called_once_with("invalid-token", "socket")
# authenticate must have been invoked with a synthetic request
# carrying Bearer <the-token>. The endpoint wraps the query-
# string token into an Authorization header for a uniform auth
# path — the IAM layer does not look at query strings directly.
mock_auth.authenticate.assert_called_once()
passed_req = mock_auth.authenticate.call_args.args[0]
assert passed_req.headers["Authorization"] == "Bearer invalid-token"
@pytest.mark.asyncio
async def test_handle_missing_token():
"""Test handling of requests with missing token."""
"""Request with no ``token`` query param → 401 before any
IAM call is made (cheap short-circuit)."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = False
socket_endpoint = SocketEndpoint("/test", mock_auth, AsyncMock())
mock_auth.authenticate = AsyncMock(
side_effect=AssertionError(
"authenticate must not be invoked when no token is present"
),
)
socket_endpoint = SocketEndpoint(
"/test", mock_auth, AsyncMock(),
capability=TEST_CAP,
)
request = MagicMock()
request.query = {} # No token
result = await socket_endpoint.handle(request)
# Should return HTTP 401
assert isinstance(result, web.HTTPUnauthorized)
# Should have checked permission with empty token
mock_auth.permitted.assert_called_once_with("", "socket")
mock_auth.authenticate.assert_not_called()
@pytest.mark.asyncio
async def test_handle_websocket_already_closed():
"""Test handling when websocket is already closed."""
mock_auth = MagicMock()
mock_auth.permitted.return_value = True
mock_auth.authenticate = AsyncMock(return_value=_valid_identity())
mock_auth.authorise = AsyncMock(return_value=None)
mock_dispatcher = AsyncMock()
mock_dispatcher.destroy = AsyncMock()
@ -335,7 +400,10 @@ async def test_handle_websocket_already_closed():
async def mock_dispatcher_factory(ws, running, match_info):
return mock_dispatcher
socket_endpoint = SocketEndpoint("/test", mock_auth, mock_dispatcher_factory)
socket_endpoint = SocketEndpoint(
"/test", mock_auth, mock_dispatcher_factory,
capability=TEST_CAP,
)
request = MagicMock()
request.query = {"token": "valid-token"}

View file

@ -15,13 +15,13 @@ from trustgraph.base import LlmResult
class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
"""Test Ollama processor functionality"""
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_processor_initialization_basic(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test basic processor initialization"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_client_class.return_value = mock_client
# Mock the parent class initialization
@ -44,13 +44,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
assert hasattr(processor, 'llm')
mock_client_class.assert_called_once_with(host='http://localhost:11434')
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_success(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test successful content generation"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Generated response from Ollama',
'prompt_eval_count': 15,
@ -83,13 +83,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
assert result.model == 'llama2'
mock_client.generate.assert_called_once_with('llama2', "System prompt\n\nUser prompt", options={'temperature': 0.0})
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_generic_exception(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test handling of generic exceptions"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_client.generate.side_effect = Exception("Connection error")
mock_client_class.return_value = mock_client
@ -110,13 +110,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
with pytest.raises(Exception, match="Connection error"):
await processor.generate_content("System prompt", "User prompt")
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_processor_initialization_with_custom_parameters(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test processor initialization with custom parameters"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_client_class.return_value = mock_client
mock_async_init.return_value = None
@ -137,13 +137,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
assert processor.default_model == 'mistral'
mock_client_class.assert_called_once_with(host='http://192.168.1.100:11434')
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_processor_initialization_with_defaults(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test processor initialization with default values"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_client_class.return_value = mock_client
mock_async_init.return_value = None
@ -164,13 +164,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
# Should use default_ollama (http://localhost:11434 or from OLLAMA_HOST env)
mock_client_class.assert_called_once()
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_empty_prompts(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test content generation with empty prompts"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Default response',
'prompt_eval_count': 2,
@ -205,13 +205,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
# The prompt should be "" + "\n\n" + "" = "\n\n"
mock_client.generate.assert_called_once_with('llama2', "\n\n", options={'temperature': 0.0})
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_token_counting(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test token counting from Ollama response"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Test response',
'prompt_eval_count': 50,
@ -243,13 +243,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
assert result.out_token == 25
assert result.model == 'llama2'
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_ollama_client_initialization(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test that Ollama client is initialized correctly"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_client_class.return_value = mock_client
mock_async_init.return_value = None
@ -273,13 +273,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
# Verify processor has the client
assert processor.llm == mock_client
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_prompt_construction(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test prompt construction with system and user prompts"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Response with system instructions',
'prompt_eval_count': 25,
@ -312,13 +312,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
# Verify the combined prompt
mock_client.generate.assert_called_once_with('llama2', "You are a helpful assistant\n\nWhat is AI?", options={'temperature': 0.0})
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_temperature_override(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test temperature parameter override functionality"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Response with custom temperature',
'prompt_eval_count': 20,
@ -360,13 +360,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
options={'temperature': 0.8} # Should use runtime override
)
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_model_override(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test model parameter override functionality"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Response with custom model',
'prompt_eval_count': 18,
@ -408,13 +408,13 @@ class TestOllamaProcessorSimple(IsolatedAsyncioTestCase):
options={'temperature': 0.1} # Should use processor default
)
@patch('trustgraph.model.text_completion.ollama.llm.Client')
@patch('trustgraph.model.text_completion.ollama.llm.AsyncClient')
@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
@patch('trustgraph.base.llm_service.LlmService.__init__')
async def test_generate_content_both_parameters_override(self, mock_llm_init, mock_async_init, mock_client_class):
"""Test overriding both model and temperature parameters simultaneously"""
# Arrange
mock_client = MagicMock()
mock_client = AsyncMock()
mock_response = {
'response': 'Response with both overrides',
'prompt_eval_count': 22,

View file

@ -49,21 +49,67 @@ class AsyncSocketClient:
return f"ws://{url}"
def _build_ws_url(self):
ws_url = f"{self.url.rstrip('/')}/api/v1/socket"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
return ws_url
# /api/v1/socket uses the first-frame auth protocol — the
# token is sent as the first frame after connecting rather
# than in the URL. This avoids browser issues with 401 on
# the WebSocket handshake and lets long-lived sockets
# refresh credentials mid-session.
return f"{self.url.rstrip('/')}/api/v1/socket"
async def connect(self):
"""Establish the persistent websocket connection."""
"""Establish the persistent websocket connection and run the
first-frame auth handshake."""
if self._connected:
return
if not self.token:
raise ProtocolException(
"AsyncSocketClient requires a token for first-frame "
"auth against /api/v1/socket"
)
ws_url = self._build_ws_url()
self._connect_cm = websockets.connect(
ws_url, ping_interval=20, ping_timeout=self.timeout
)
self._socket = await self._connect_cm.__aenter__()
# First-frame auth: send {"type":"auth","token":"..."} and
# wait for auth-ok / auth-failed. Run before starting the
# reader task so the response isn't consumed by the reader's
# id-based routing.
await self._socket.send(json.dumps({
"type": "auth", "token": self.token,
}))
try:
raw = await asyncio.wait_for(
self._socket.recv(), timeout=self.timeout,
)
except asyncio.TimeoutError:
await self._socket.close()
raise ProtocolException("Timeout waiting for auth response")
try:
resp = json.loads(raw)
except Exception:
await self._socket.close()
raise ProtocolException(
f"Unexpected non-JSON auth response: {raw!r}"
)
if resp.get("type") == "auth-ok":
self.workspace = resp.get("workspace", self.workspace)
elif resp.get("type") == "auth-failed":
await self._socket.close()
raise ProtocolException(
f"auth failure: {resp.get('error', 'unknown')}"
)
else:
await self._socket.close()
raise ProtocolException(
f"Unexpected auth response: {resp!r}"
)
self._connected = True
self._reader_task = asyncio.create_task(self._reader())

View file

@ -112,10 +112,10 @@ class SocketClient:
return f"ws://{url}"
def _build_ws_url(self):
ws_url = f"{self.url.rstrip('/')}/api/v1/socket"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
return ws_url
# /api/v1/socket uses the first-frame auth protocol — the
# token is sent as the first frame after connecting rather
# than in the URL.
return f"{self.url.rstrip('/')}/api/v1/socket"
def _get_loop(self):
"""Get or create the event loop, reusing across calls."""
@ -132,15 +132,58 @@ class SocketClient:
return self._loop
async def _ensure_connected(self):
"""Lazily establish the persistent websocket connection."""
"""Lazily establish the persistent websocket connection and
run the first-frame auth handshake."""
if self._connected:
return
if not self.token:
raise ProtocolException(
"SocketClient requires a token for first-frame auth "
"against /api/v1/socket"
)
ws_url = self._build_ws_url()
self._connect_cm = websockets.connect(
ws_url, ping_interval=20, ping_timeout=self.timeout
)
self._socket = await self._connect_cm.__aenter__()
# First-frame auth — run before starting the reader so the
# auth-ok / auth-failed response isn't consumed by the reader
# loop's id-based routing.
await self._socket.send(json.dumps({
"type": "auth", "token": self.token,
}))
try:
raw = await asyncio.wait_for(
self._socket.recv(), timeout=self.timeout,
)
except asyncio.TimeoutError:
await self._socket.close()
raise ProtocolException("Timeout waiting for auth response")
try:
resp = json.loads(raw)
except Exception:
await self._socket.close()
raise ProtocolException(
f"Unexpected non-JSON auth response: {raw!r}"
)
if resp.get("type") == "auth-ok":
self.workspace = resp.get("workspace", self.workspace)
elif resp.get("type") == "auth-failed":
await self._socket.close()
raise ProtocolException(
f"auth failure: {resp.get('error', 'unknown')}"
)
else:
await self._socket.close()
raise ProtocolException(
f"Unexpected auth response: {resp!r}"
)
self._connected = True
self._reader_task = asyncio.create_task(self._reader())

View file

@ -84,6 +84,18 @@ class ConfigClient(RequestResponse):
)
return resp.directory
async def get_all(self, workspace, timeout=CONFIG_TIMEOUT):
"""Return every config entry in ``workspace`` as a nested dict
``{type: {key: value}}``. Values are returned as the raw
strings stored by config-svc (typically JSON); callers parse
as needed. An empty dict means the workspace has no config."""
resp = await self._request(
operation="config",
workspace=workspace,
timeout=timeout,
)
return resp.config
async def workspaces_for_type(self, type, timeout=CONFIG_TIMEOUT):
"""Return the set of distinct workspaces with any config of
the given type."""

View file

@ -0,0 +1,342 @@
import json
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import (
IamRequest, IamResponse,
UserInput, WorkspaceInput, ApiKeyInput,
)
IAM_TIMEOUT = 10
class IamClient(RequestResponse):
"""Client for the IAM service request/response pub/sub protocol.
Mirrors ``ConfigClient``: a thin wrapper around ``RequestResponse``
that knows the IAM request / response schemas. Only the subset of
operations actually implemented by the server today has helper
methods here; callers that need an unimplemented operation can
build ``IamRequest`` and call ``request()`` directly.
"""
async def _request(self, timeout=IAM_TIMEOUT, **kwargs):
resp = await self.request(
IamRequest(**kwargs),
timeout=timeout,
)
if resp.error:
raise RuntimeError(
f"{resp.error.type}: {resp.error.message}"
)
return resp
async def bootstrap(self, timeout=IAM_TIMEOUT):
"""Initial-run IAM self-seed. Returns a tuple of
``(admin_user_id, admin_api_key_plaintext)``. Both are empty
strings on repeat calls the operation is a no-op once the
IAM tables are populated."""
resp = await self._request(
operation="bootstrap", timeout=timeout,
)
return resp.bootstrap_admin_user_id, resp.bootstrap_admin_api_key
async def bootstrap_status(self, timeout=IAM_TIMEOUT):
"""Returns whether an unconsumed ``bootstrap`` call would
currently succeed (i.e. iam-svc is in ``bootstrap`` mode and
its tables are empty). Side-effect-free; intended for first-
run UX so a UI can decide whether to render setup."""
resp = await self._request(
operation="bootstrap-status", timeout=timeout,
)
return resp.bootstrap_available
async def whoami(self, actor, timeout=IAM_TIMEOUT):
"""Return the user record for ``actor`` (the authenticated
caller's handle). AUTHENTICATED-only; no capability check —
every authenticated user can read themselves."""
resp = await self._request(
operation="whoami",
actor=actor,
timeout=timeout,
)
return resp.user
async def resolve_api_key(self, api_key, timeout=IAM_TIMEOUT):
"""Resolve a plaintext API key to its identity triple.
Returns ``(user_id, workspace, roles)`` or raises
``RuntimeError`` with error type ``auth-failed`` if the key is
unknown / expired / revoked.
Note: the ``roles`` value is a regime-internal hint and is
not used by the gateway directly under the IAM contract;
all authorisation decisions go through ``authorise()``.
Returned here only for backward compatibility with callers
that haven't migrated."""
resp = await self._request(
operation="resolve-api-key",
api_key=api_key,
timeout=timeout,
)
return (
resp.resolved_user_id,
resp.resolved_workspace,
list(resp.resolved_roles),
)
async def authorise(self, identity_handle, capability,
resource, parameters, timeout=IAM_TIMEOUT):
"""Ask the IAM regime whether ``identity_handle`` may perform
``capability`` on ``resource`` given ``parameters``.
Implements the contract ``authorise(identity, capability,
resource, parameters) (decision, ttl)``. Returns a tuple
``(allow: bool, ttl_seconds: int)``. The TTL is the
regime's suggested cache lifetime for this decision; the
gateway honours it (clamped above by gateway-side policy)."""
resp = await self._request(
operation="authorise",
user_id=identity_handle,
capability=capability,
resource_json=json.dumps(resource or {}, sort_keys=True),
parameters_json=json.dumps(parameters or {}, sort_keys=True),
timeout=timeout,
)
return resp.decision_allow, resp.decision_ttl_seconds
async def authorise_many(self, identity_handle, checks,
timeout=IAM_TIMEOUT):
"""Bulk authorise. ``checks`` is a list of dicts each
carrying ``capability``, ``resource``, and ``parameters``.
Returns a list of ``(allow, ttl)`` tuples in the same order."""
resp = await self._request(
operation="authorise-many",
user_id=identity_handle,
authorise_checks=json.dumps(list(checks), sort_keys=True),
timeout=timeout,
)
decisions = json.loads(resp.decisions_json or "[]")
return [(d.get("allow", False), d.get("ttl", 0)) for d in decisions]
async def create_user(self, workspace, user, actor="",
timeout=IAM_TIMEOUT):
"""Create a user. ``user`` is a ``UserInput``."""
resp = await self._request(
operation="create-user",
workspace=workspace,
actor=actor,
user=user,
timeout=timeout,
)
return resp.user
async def list_users(self, workspace, actor="", timeout=IAM_TIMEOUT):
resp = await self._request(
operation="list-users",
workspace=workspace,
actor=actor,
timeout=timeout,
)
return list(resp.users)
async def create_api_key(self, workspace, key, actor="",
timeout=IAM_TIMEOUT):
"""Create an API key. ``key`` is an ``ApiKeyInput``. Returns
``(plaintext, record)`` plaintext is returned once and the
caller is responsible for surfacing it to the operator."""
resp = await self._request(
operation="create-api-key",
workspace=workspace,
actor=actor,
key=key,
timeout=timeout,
)
return resp.api_key_plaintext, resp.api_key
async def list_api_keys(self, workspace, user_id, actor="",
timeout=IAM_TIMEOUT):
resp = await self._request(
operation="list-api-keys",
workspace=workspace,
actor=actor,
user_id=user_id,
timeout=timeout,
)
return list(resp.api_keys)
async def revoke_api_key(self, workspace, key_id, actor="",
timeout=IAM_TIMEOUT):
await self._request(
operation="revoke-api-key",
workspace=workspace,
actor=actor,
key_id=key_id,
timeout=timeout,
)
async def login(self, username, password, workspace="",
timeout=IAM_TIMEOUT):
"""Validate credentials and return ``(jwt, expires_iso)``.
``workspace`` is optional; defaults at the server to the
OSS default workspace."""
resp = await self._request(
operation="login",
workspace=workspace,
username=username,
password=password,
timeout=timeout,
)
return resp.jwt, resp.jwt_expires
async def get_signing_key_public(self, timeout=IAM_TIMEOUT):
"""Return the active JWT signing public key in PEM. The
gateway calls this at startup and caches the result."""
resp = await self._request(
operation="get-signing-key-public",
timeout=timeout,
)
return resp.signing_key_public
async def change_password(self, user_id, current_password,
new_password, timeout=IAM_TIMEOUT):
await self._request(
operation="change-password",
user_id=user_id,
password=current_password,
new_password=new_password,
timeout=timeout,
)
async def reset_password(self, workspace, user_id, actor="",
timeout=IAM_TIMEOUT):
"""Admin-driven password reset. Returns the plaintext
temporary password (returned once)."""
resp = await self._request(
operation="reset-password",
workspace=workspace,
actor=actor,
user_id=user_id,
timeout=timeout,
)
return resp.temporary_password
async def get_user(self, workspace, user_id, actor="",
timeout=IAM_TIMEOUT):
resp = await self._request(
operation="get-user",
workspace=workspace,
actor=actor,
user_id=user_id,
timeout=timeout,
)
return resp.user
async def update_user(self, workspace, user_id, user, actor="",
timeout=IAM_TIMEOUT):
resp = await self._request(
operation="update-user",
workspace=workspace,
actor=actor,
user_id=user_id,
user=user,
timeout=timeout,
)
return resp.user
async def disable_user(self, workspace, user_id, actor="",
timeout=IAM_TIMEOUT):
await self._request(
operation="disable-user",
workspace=workspace,
actor=actor,
user_id=user_id,
timeout=timeout,
)
async def enable_user(self, workspace, user_id, actor="",
timeout=IAM_TIMEOUT):
await self._request(
operation="enable-user",
workspace=workspace,
actor=actor,
user_id=user_id,
timeout=timeout,
)
async def delete_user(self, workspace, user_id, actor="",
timeout=IAM_TIMEOUT):
await self._request(
operation="delete-user",
workspace=workspace,
actor=actor,
user_id=user_id,
timeout=timeout,
)
async def create_workspace(self, workspace_record, actor="",
timeout=IAM_TIMEOUT):
resp = await self._request(
operation="create-workspace",
actor=actor,
workspace_record=workspace_record,
timeout=timeout,
)
return resp.workspace
async def list_workspaces(self, actor="", timeout=IAM_TIMEOUT):
resp = await self._request(
operation="list-workspaces",
actor=actor,
timeout=timeout,
)
return list(resp.workspaces)
async def get_workspace(self, workspace_id, actor="",
timeout=IAM_TIMEOUT):
from ..schema import WorkspaceInput
resp = await self._request(
operation="get-workspace",
actor=actor,
workspace_record=WorkspaceInput(id=workspace_id),
timeout=timeout,
)
return resp.workspace
async def update_workspace(self, workspace_record, actor="",
timeout=IAM_TIMEOUT):
resp = await self._request(
operation="update-workspace",
actor=actor,
workspace_record=workspace_record,
timeout=timeout,
)
return resp.workspace
async def disable_workspace(self, workspace_id, actor="",
timeout=IAM_TIMEOUT):
from ..schema import WorkspaceInput
await self._request(
operation="disable-workspace",
actor=actor,
workspace_record=WorkspaceInput(id=workspace_id),
timeout=timeout,
)
async def rotate_signing_key(self, actor="", timeout=IAM_TIMEOUT):
await self._request(
operation="rotate-signing-key",
actor=actor,
timeout=timeout,
)
class IamClientSpec(RequestResponseSpec):
def __init__(self, request_name, response_name):
super().__init__(
request_name=request_name,
request_schema=IamRequest,
response_name=response_name,
response_schema=IamResponse,
impl=IamClient,
)

View file

@ -15,6 +15,7 @@ from .translators.library import LibraryRequestTranslator, LibraryResponseTransl
from .translators.document_loading import DocumentTranslator, TextDocumentTranslator
from .translators.config import ConfigRequestTranslator, ConfigResponseTranslator
from .translators.flow import FlowRequestTranslator, FlowResponseTranslator
from .translators.iam import IamRequestTranslator, IamResponseTranslator
from .translators.prompt import PromptRequestTranslator, PromptResponseTranslator
from .translators.tool import ToolRequestTranslator, ToolResponseTranslator
from .translators.embeddings_query import (
@ -85,11 +86,17 @@ TranslatorRegistry.register_service(
)
TranslatorRegistry.register_service(
"flow",
FlowRequestTranslator(),
"flow",
FlowRequestTranslator(),
FlowResponseTranslator()
)
TranslatorRegistry.register_service(
"iam",
IamRequestTranslator(),
IamResponseTranslator()
)
TranslatorRegistry.register_service(
"prompt",
PromptRequestTranslator(),

View file

@ -0,0 +1,198 @@
from typing import Dict, Any, Tuple
from ...schema import IamRequest, IamResponse
from ...schema import (
UserInput, UserRecord,
WorkspaceInput, WorkspaceRecord,
ApiKeyInput, ApiKeyRecord,
)
from .base import MessageTranslator
def _user_input_from_dict(d):
if d is None:
return None
return UserInput(
username=d.get("username", ""),
name=d.get("name", ""),
email=d.get("email", ""),
password=d.get("password", ""),
roles=list(d.get("roles", [])),
enabled=d.get("enabled", True),
must_change_password=d.get("must_change_password", False),
)
def _workspace_input_from_dict(d):
if d is None:
return None
return WorkspaceInput(
id=d.get("id", ""),
name=d.get("name", ""),
enabled=d.get("enabled", True),
)
def _api_key_input_from_dict(d):
if d is None:
return None
return ApiKeyInput(
user_id=d.get("user_id", ""),
name=d.get("name", ""),
expires=d.get("expires", ""),
)
def _user_record_to_dict(r):
if r is None:
return None
return {
"id": r.id,
"workspace": r.workspace,
"username": r.username,
"name": r.name,
"email": r.email,
"roles": list(r.roles),
"enabled": r.enabled,
"must_change_password": r.must_change_password,
"created": r.created,
}
def _workspace_record_to_dict(r):
if r is None:
return None
return {
"id": r.id,
"name": r.name,
"enabled": r.enabled,
"created": r.created,
}
def _api_key_record_to_dict(r):
if r is None:
return None
return {
"id": r.id,
"user_id": r.user_id,
"name": r.name,
"prefix": r.prefix,
"expires": r.expires,
"created": r.created,
"last_used": r.last_used,
}
class IamRequestTranslator(MessageTranslator):
def decode(self, data: Dict[str, Any]) -> IamRequest:
return IamRequest(
operation=data.get("operation", ""),
workspace=data.get("workspace", ""),
actor=data.get("actor", ""),
user_id=data.get("user_id", ""),
username=data.get("username", ""),
key_id=data.get("key_id", ""),
api_key=data.get("api_key", ""),
password=data.get("password", ""),
new_password=data.get("new_password", ""),
user=_user_input_from_dict(data.get("user")),
workspace_record=_workspace_input_from_dict(
data.get("workspace_record")
),
key=_api_key_input_from_dict(data.get("key")),
)
def encode(self, obj: IamRequest) -> Dict[str, Any]:
result = {"operation": obj.operation}
for fname in (
"workspace", "actor", "user_id", "username", "key_id",
"api_key", "password", "new_password",
):
v = getattr(obj, fname, "")
if v:
result[fname] = v
if obj.user is not None:
result["user"] = {
"username": obj.user.username,
"name": obj.user.name,
"email": obj.user.email,
"password": obj.user.password,
"roles": list(obj.user.roles),
"enabled": obj.user.enabled,
"must_change_password": obj.user.must_change_password,
}
if obj.workspace_record is not None:
result["workspace_record"] = {
"id": obj.workspace_record.id,
"name": obj.workspace_record.name,
"enabled": obj.workspace_record.enabled,
}
if obj.key is not None:
result["key"] = {
"user_id": obj.key.user_id,
"name": obj.key.name,
"expires": obj.key.expires,
}
return result
class IamResponseTranslator(MessageTranslator):
def decode(self, data: Dict[str, Any]) -> IamResponse:
raise NotImplementedError(
"IamResponse is a server-produced message; no HTTP→schema "
"path is needed"
)
def encode(self, obj: IamResponse) -> Dict[str, Any]:
result: Dict[str, Any] = {}
if obj.user is not None:
result["user"] = _user_record_to_dict(obj.user)
if obj.users:
result["users"] = [_user_record_to_dict(u) for u in obj.users]
if obj.workspace is not None:
result["workspace"] = _workspace_record_to_dict(obj.workspace)
if obj.workspaces:
result["workspaces"] = [
_workspace_record_to_dict(w) for w in obj.workspaces
]
if obj.api_key_plaintext:
result["api_key_plaintext"] = obj.api_key_plaintext
if obj.api_key is not None:
result["api_key"] = _api_key_record_to_dict(obj.api_key)
if obj.api_keys:
result["api_keys"] = [
_api_key_record_to_dict(k) for k in obj.api_keys
]
if obj.jwt:
result["jwt"] = obj.jwt
if obj.jwt_expires:
result["jwt_expires"] = obj.jwt_expires
if obj.signing_key_public:
result["signing_key_public"] = obj.signing_key_public
if obj.resolved_user_id:
result["resolved_user_id"] = obj.resolved_user_id
if obj.resolved_workspace:
result["resolved_workspace"] = obj.resolved_workspace
if obj.resolved_roles:
result["resolved_roles"] = list(obj.resolved_roles)
if obj.temporary_password:
result["temporary_password"] = obj.temporary_password
if obj.bootstrap_admin_user_id:
result["bootstrap_admin_user_id"] = obj.bootstrap_admin_user_id
if obj.bootstrap_admin_api_key:
result["bootstrap_admin_api_key"] = obj.bootstrap_admin_api_key
# bootstrap-status: emit unconditionally — the false case is
# meaningful for UIs deciding whether to render first-run
# setup, so it can't be dropped by a truthy-only filter.
result["bootstrap_available"] = bool(obj.bootstrap_available)
return result
def encode_with_completion(
self, obj: IamResponse,
) -> Tuple[Dict[str, Any], bool]:
return self.encode(obj), True

View file

@ -5,6 +5,7 @@ from .agent import *
from .flow import *
from .prompt import *
from .config import *
from .iam import *
from .library import *
from .lookup import *
from .nlp_query import *

View file

@ -0,0 +1,173 @@
from dataclasses import dataclass, field
from ..core.topic import queue
from ..core.primitives import Error
############################################################################
# IAM service — see docs/tech-specs/iam-protocol.md for the full protocol.
#
# Transport: request/response pub/sub, correlated by the `id` message
# property. Caller is the API gateway only; the IAM service trusts
# the bus per the enforcement-boundary policy (no per-request auth
# against the caller).
@dataclass
class UserInput:
username: str = ""
name: str = ""
email: str = ""
# Only populated on create-user; never on update-user.
password: str = ""
roles: list[str] = field(default_factory=list)
enabled: bool = True
must_change_password: bool = False
@dataclass
class UserRecord:
id: str = ""
workspace: str = ""
username: str = ""
name: str = ""
email: str = ""
roles: list[str] = field(default_factory=list)
enabled: bool = True
must_change_password: bool = False
created: str = ""
@dataclass
class WorkspaceInput:
id: str = ""
name: str = ""
enabled: bool = True
@dataclass
class WorkspaceRecord:
id: str = ""
name: str = ""
enabled: bool = True
created: str = ""
@dataclass
class ApiKeyInput:
user_id: str = ""
name: str = ""
expires: str = ""
@dataclass
class ApiKeyRecord:
id: str = ""
user_id: str = ""
name: str = ""
# First 4 chars of the plaintext token, for operator identification
# in list-api-keys. Never enough to reconstruct the key.
prefix: str = ""
expires: str = ""
created: str = ""
last_used: str = ""
@dataclass
class IamRequest:
operation: str = ""
# Workspace scope. Required on workspace-scoped operations;
# omitted for system-level ops (workspace CRUD, signing-key
# ops, bootstrap, resolve-api-key, login).
workspace: str = ""
# Acting user id for audit. Empty for internal-origin and for
# operations that resolve an identity (login, resolve-api-key).
actor: str = ""
user_id: str = ""
username: str = ""
key_id: str = ""
api_key: str = ""
password: str = ""
new_password: str = ""
user: UserInput | None = None
workspace_record: WorkspaceInput | None = None
key: ApiKeyInput | None = None
# ---- authorise / authorise-many inputs ----
# Capability string from the vocabulary in capabilities.md.
capability: str = ""
# Resource identifier as JSON. See the IAM contract spec for
# the resource-component vocabulary. An empty dict denotes a
# system-level resource.
resource_json: str = ""
# Operation parameters as JSON. Decision-relevant fields the
# operation supplied that are not part of the resource address
# (e.g. workspace association on create-user).
parameters_json: str = ""
# For authorise-many: a JSON-serialised list of
# {"capability": str, "resource": dict, "parameters": dict}.
authorise_checks: str = ""
@dataclass
class IamResponse:
user: UserRecord | None = None
users: list[UserRecord] = field(default_factory=list)
workspace: WorkspaceRecord | None = None
workspaces: list[WorkspaceRecord] = field(default_factory=list)
# create-api-key returns the plaintext once; never populated
# on any other operation.
api_key_plaintext: str = ""
api_key: ApiKeyRecord | None = None
api_keys: list[ApiKeyRecord] = field(default_factory=list)
# login, rotate-signing-key
jwt: str = ""
jwt_expires: str = ""
# get-signing-key-public
signing_key_public: str = ""
# resolve-api-key
resolved_user_id: str = ""
resolved_workspace: str = ""
resolved_roles: list[str] = field(default_factory=list)
# reset-password
temporary_password: str = ""
# bootstrap
bootstrap_admin_user_id: str = ""
bootstrap_admin_api_key: str = ""
# bootstrap-status — true iff iam-svc is in 'bootstrap' mode with
# empty tables, i.e. an unconsumed bootstrap call would succeed.
bootstrap_available: bool = False
# ---- authorise / authorise-many outputs ----
# authorise: the regime's allow / deny verdict.
decision_allow: bool = False
# Cache TTL the regime suggests, in seconds. Gateway respects
# this for both allow and deny decisions; bounded above by
# gateway-side policy (typically <= 60s).
decision_ttl_seconds: int = 0
# authorise-many: a JSON-serialised list of {"allow": bool,
# "ttl": int} in the same order as the request's
# authorise_checks.
decisions_json: str = ""
error: Error | None = None
iam_request_queue = queue('iam', cls='request')
iam_response_queue = queue('iam', cls='response')
############################################################################

View file

@ -40,7 +40,22 @@ tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main"
tg-get-kg-core = "trustgraph.cli.get_kg_core:main"
tg-get-document-content = "trustgraph.cli.get_document_content:main"
tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main"
tg-init-trustgraph = "trustgraph.cli.init_trustgraph:main"
tg-bootstrap-iam = "trustgraph.cli.bootstrap_iam:main"
tg-login = "trustgraph.cli.login:main"
tg-create-user = "trustgraph.cli.create_user:main"
tg-list-users = "trustgraph.cli.list_users:main"
tg-whoami = "trustgraph.cli.whoami:main"
tg-update-user = "trustgraph.cli.update_user:main"
tg-disable-user = "trustgraph.cli.disable_user:main"
tg-enable-user = "trustgraph.cli.enable_user:main"
tg-delete-user = "trustgraph.cli.delete_user:main"
tg-change-password = "trustgraph.cli.change_password:main"
tg-reset-password = "trustgraph.cli.reset_password:main"
tg-create-api-key = "trustgraph.cli.create_api_key:main"
tg-list-api-keys = "trustgraph.cli.list_api_keys:main"
tg-revoke-api-key = "trustgraph.cli.revoke_api_key:main"
tg-list-workspaces = "trustgraph.cli.list_workspaces:main"
tg-create-workspace = "trustgraph.cli.create_workspace:main"
tg-invoke-agent = "trustgraph.cli.invoke_agent:main"
tg-invoke-document-rag = "trustgraph.cli.invoke_document_rag:main"
tg-invoke-graph-rag = "trustgraph.cli.invoke_graph_rag:main"

View file

@ -0,0 +1,75 @@
"""
Shared helpers for IAM CLI tools.
All IAM operations go through the gateway's ``/api/v1/iam`` forwarder,
with the three public auth operations (``login``, ``bootstrap``,
``change-password``) served via ``/api/v1/auth/...`` instead. These
helpers encapsulate the HTTP plumbing so each CLI can stay focused
on its own argument parsing and output formatting.
"""
import json
import os
import sys
import requests
DEFAULT_URL = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")
DEFAULT_TOKEN = os.getenv("TRUSTGRAPH_TOKEN", None)
def _fmt_error(resp_json):
err = resp_json.get("error", {})
if isinstance(err, dict):
t = err.get("type", "")
m = err.get("message", "")
return f"{t}: {m}" if t else m or "error"
return str(err)
def _post(url, path, token, body):
endpoint = url.rstrip("/") + path
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"Bearer {token}"
resp = requests.post(
endpoint, headers=headers, data=json.dumps(body),
)
if resp.status_code != 200:
try:
payload = resp.json()
detail = _fmt_error(payload)
except Exception:
detail = resp.text
raise RuntimeError(f"HTTP {resp.status_code}: {detail}")
body = resp.json()
if "error" in body:
raise RuntimeError(_fmt_error(body))
return body
def call_iam(url, token, request):
"""Forward an IAM request through ``/api/v1/iam``. ``request`` is
the ``IamRequest`` dict shape."""
return _post(url, "/api/v1/iam", token, request)
def call_auth(url, path, token, body):
"""Hit one of the public auth endpoints
(``/api/v1/auth/login``, ``/api/v1/auth/change-password``, etc.).
``token`` is optional login and bootstrap don't need one."""
return _post(url, path, token, body)
def run_main(fn, parser):
"""Standard error-handling wrapper for CLI main() bodies."""
args = parser.parse_args()
try:
fn(args)
except Exception as e:
print("Exception:", e, file=sys.stderr, flush=True)
sys.exit(1)

View file

@ -0,0 +1,94 @@
"""
Bootstraps the IAM service. Only works when iam-svc is running in
bootstrap mode with empty tables. Prints the initial admin API key
to stdout.
This is a one-time, trust-sensitive operation. The resulting token
is shown once and never again capture it on use. Rotate and
revoke it as soon as a real admin API key has been issued.
"""
import argparse
import json
import os
import sys
import requests
default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")
def bootstrap(url):
# Unauthenticated public endpoint — IAM refuses the bootstrap
# operation unless the service is running in bootstrap mode with
# empty tables, so the safety gate lives on the server side.
endpoint = url.rstrip("/") + "/api/v1/auth/bootstrap"
headers = {"Content-Type": "application/json"}
resp = requests.post(
endpoint,
headers=headers,
data=json.dumps({}),
)
if resp.status_code != 200:
raise RuntimeError(
f"HTTP {resp.status_code}: {resp.text}"
)
body = resp.json()
if "error" in body:
raise RuntimeError(
f"IAM {body['error'].get('type', 'error')}: "
f"{body['error'].get('message', '')}"
)
api_key = body.get("bootstrap_admin_api_key")
user_id = body.get("bootstrap_admin_user_id")
if not api_key:
raise RuntimeError(
"IAM response did not contain a bootstrap token — the "
"service may already be bootstrapped, or may be running "
"in token mode."
)
return user_id, api_key
def main():
parser = argparse.ArgumentParser(
prog="tg-bootstrap-iam",
description=__doc__,
)
parser.add_argument(
"-u", "--api-url",
default=default_url,
help=f"API URL (default: {default_url})",
)
args = parser.parse_args()
try:
user_id, api_key = bootstrap(args.api_url)
except Exception as e:
print("Exception:", e, file=sys.stderr, flush=True)
sys.exit(1)
# Stdout gets machine-readable output (the key). Any operator
# context goes to stderr.
print(f"Admin user id: {user_id}", file=sys.stderr)
print(
"Admin API key (shown once, capture now):",
file=sys.stderr,
)
print(api_key)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,46 @@
"""
Change your own password. Requires the current password.
"""
import argparse
import getpass
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_auth, run_main
def do_change_password(args):
current = args.current or getpass.getpass("Current password: ")
new = args.new or getpass.getpass("New password: ")
call_auth(
args.api_url, "/api/v1/auth/change-password", args.token,
{"current_password": current, "new_password": new},
)
print("Password changed.")
def main():
parser = argparse.ArgumentParser(
prog="tg-change-password", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--current", default=None,
help="Current password (prompted if omitted)",
)
parser.add_argument(
"--new", default=None,
help="New password (prompted if omitted)",
)
run_main(do_change_password, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,71 @@
"""
Create an API key for a user. Prints the plaintext key to stdout
shown once only.
"""
import argparse
import sys
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_create_api_key(args):
key = {
"user_id": args.user_id,
"name": args.name,
}
if args.expires:
key["expires"] = args.expires
req = {"operation": "create-api-key", "key": key}
if args.workspace:
req["workspace"] = args.workspace
resp = call_iam(args.api_url, args.token, req)
plaintext = resp.get("api_key_plaintext", "")
rec = resp.get("api_key", {})
print(f"Key id: {rec.get('id', '')}", file=sys.stderr)
print(f"Name: {rec.get('name', '')}", file=sys.stderr)
print(f"Prefix: {rec.get('prefix', '')}", file=sys.stderr)
print(
"API key (shown once, capture now):", file=sys.stderr,
)
print(plaintext)
def main():
parser = argparse.ArgumentParser(
prog="tg-create-api-key", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True,
help="Owner user id",
)
parser.add_argument(
"--name", required=True,
help="Operator-facing label (e.g. 'laptop', 'ci')",
)
parser.add_argument(
"--expires", default=None,
help="ISO-8601 expiry (optional; empty = no expiry)",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_create_api_key, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,87 @@
"""
Create a user in the caller's workspace. Prints the new user id.
"""
import argparse
import getpass
import sys
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_create_user(args):
password = args.password
if not password:
password = getpass.getpass(
f"Password for new user {args.username}: "
)
user = {
"username": args.username,
"password": password,
"roles": args.roles,
}
if args.name:
user["name"] = args.name
if args.email:
user["email"] = args.email
if args.must_change_password:
user["must_change_password"] = True
req = {"operation": "create-user", "user": user}
if args.workspace:
req["workspace"] = args.workspace
resp = call_iam(args.api_url, args.token, req)
rec = resp.get("user", {})
print(f"User id: {rec.get('id', '')}", file=sys.stderr)
print(f"Username: {rec.get('username', '')}", file=sys.stderr)
print(f"Roles: {', '.join(rec.get('roles', []))}", file=sys.stderr)
print(rec.get("id", ""))
def main():
parser = argparse.ArgumentParser(
prog="tg-create-user", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--username", required=True, help="Username (unique in workspace)",
)
parser.add_argument(
"--password", default=None,
help="Password (prompted if omitted)",
)
parser.add_argument(
"--name", default=None, help="Display name",
)
parser.add_argument(
"--email", default=None, help="Email",
)
parser.add_argument(
"--roles", nargs="+", default=["reader"],
help="One or more role names (default: reader)",
)
parser.add_argument(
"--must-change-password", action="store_true",
help="Force password change on next login",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_create_user, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,46 @@
"""
Create a workspace (system-level; requires admin).
"""
import argparse
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_create_workspace(args):
ws = {"id": args.workspace_id, "enabled": True}
if args.name:
ws["name"] = args.name
resp = call_iam(args.api_url, args.token, {
"operation": "create-workspace",
"workspace_record": ws,
})
rec = resp.get("workspace", {})
print(f"Workspace created: {rec.get('id', '')}")
def main():
parser = argparse.ArgumentParser(
prog="tg-create-workspace", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--workspace-id", required=True,
help="New workspace id (must not start with '_')",
)
parser.add_argument(
"--name", default=None, help="Display name",
)
run_main(do_create_workspace, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,62 @@
"""
Delete a user. Removes the user record, their username lookup,
and all their API keys. The freed username becomes available for
re-use.
Irreversible. Use tg-disable-user if you want to preserve the
record (audit trail, username squatting protection).
"""
import argparse
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_delete_user(args):
if not args.yes:
confirm = input(
f"Delete user {args.user_id}? This is irreversible. "
f"[type 'yes' to confirm]: "
)
if confirm.strip() != "yes":
print("Aborted.")
return
req = {"operation": "delete-user", "user_id": args.user_id}
if args.workspace:
req["workspace"] = args.workspace
call_iam(args.api_url, args.token, req)
print(f"Deleted user {args.user_id}")
def main():
parser = argparse.ArgumentParser(
prog="tg-delete-user", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True, help="User id to delete",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
parser.add_argument(
"--yes", action="store_true",
help="Skip the interactive confirmation prompt",
)
run_main(do_delete_user, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,45 @@
"""
Disable a user. Soft-deletes (enabled=false) and revokes all their
API keys.
"""
import argparse
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_disable_user(args):
req = {"operation": "disable-user", "user_id": args.user_id}
if args.workspace:
req["workspace"] = args.workspace
call_iam(args.api_url, args.token, req)
print(f"Disabled user {args.user_id}")
def main():
parser = argparse.ArgumentParser(
prog="tg-disable-user", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True, help="User id to disable",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_disable_user, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,45 @@
"""
Re-enable a previously disabled user. Does not restore their API
keys those must be re-issued by an admin.
"""
import argparse
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_enable_user(args):
req = {"operation": "enable-user", "user_id": args.user_id}
if args.workspace:
req["workspace"] = args.workspace
call_iam(args.api_url, args.token, req)
print(f"Enabled user {args.user_id}")
def main():
parser = argparse.ArgumentParser(
prog="tg-enable-user", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True, help="User id to enable",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_enable_user, parser)
if __name__ == "__main__":
main()

View file

@ -1,271 +0,0 @@
"""
Initialises TrustGraph pub/sub infrastructure and pushes initial config.
For Pulsar: creates tenant, namespaces, and retention policies.
For RabbitMQ: queues are auto-declared, so only config push is needed.
"""
import requests
import time
import argparse
import json
from trustgraph.clients.config_client import ConfigClient
from trustgraph.base.pubsub import add_pubsub_args
default_pulsar_admin_url = "http://pulsar:8080"
subscriber = "tg-init-pubsub"
def get_clusters(url):
print("Get clusters...", flush=True)
resp = requests.get(f"{url}/admin/v2/clusters")
if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
return resp.json()
def ensure_tenant(url, tenant, clusters):
resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
if resp.status_code == 200:
print(f"Tenant {tenant} already exists.", flush=True)
return
resp = requests.put(
f"{url}/admin/v2/tenants/{tenant}",
json={
"adminRoles": [],
"allowedClusters": clusters,
}
)
if resp.status_code != 204:
print(resp.text, flush=True)
raise RuntimeError("Tenant creation failed.")
print(f"Tenant {tenant} created.", flush=True)
def ensure_namespace(url, tenant, namespace, config):
resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
if resp.status_code == 200:
print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
return
resp = requests.put(
f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
json=config,
)
if resp.status_code != 204:
print(resp.status_code, flush=True)
print(resp.text, flush=True)
raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
print(f"Namespace {tenant}/{namespace} created.", flush=True)
def ensure_config(config, workspace="default", **pubsub_config):
cli = ConfigClient(
subscriber=subscriber,
workspace=workspace,
**pubsub_config,
)
while True:
try:
print("Get current config...", flush=True)
current, version = cli.config(timeout=5)
except Exception as e:
print("Exception:", e, flush=True)
time.sleep(2)
print("Retrying...", flush=True)
continue
print("Current config version is", version, flush=True)
if version != 0:
print("Already updated, not updating config. Done.", flush=True)
return
print("Config is version 0, updating...", flush=True)
batch = []
for type in config:
for key in config[type]:
print(f"Adding {type}/{key} to update.", flush=True)
batch.append({
"type": type,
"key": key,
"value": json.dumps(config[type][key]),
})
try:
cli.put(batch, timeout=10)
print("Update succeeded.", flush=True)
break
except Exception as e:
print("Exception:", e, flush=True)
time.sleep(2)
print("Retrying...", flush=True)
continue
def init_pulsar(pulsar_admin_url, tenant):
"""Pulsar-specific setup: create tenant, namespaces, retention policies."""
clusters = get_clusters(pulsar_admin_url)
ensure_tenant(pulsar_admin_url, tenant, clusters)
ensure_namespace(pulsar_admin_url, tenant, "flow", {})
ensure_namespace(pulsar_admin_url, tenant, "request", {})
ensure_namespace(pulsar_admin_url, tenant, "response", {
"retention_policies": {
"retentionSizeInMB": -1,
"retentionTimeInMinutes": 3,
"subscriptionExpirationTimeMinutes": 30,
}
})
ensure_namespace(pulsar_admin_url, tenant, "notify", {
"retention_policies": {
"retentionSizeInMB": -1,
"retentionTimeInMinutes": 3,
"subscriptionExpirationTimeMinutes": 5,
}
})
def push_config(config_json, config_file, workspace="default",
**pubsub_config):
"""Push initial config if provided."""
if config_json is not None:
try:
print("Decoding config...", flush=True)
dec = json.loads(config_json)
print("Decoded.", flush=True)
except Exception as e:
print("Exception:", e, flush=True)
raise e
ensure_config(dec, workspace=workspace, **pubsub_config)
elif config_file is not None:
try:
print("Decoding config...", flush=True)
dec = json.load(open(config_file))
print("Decoded.", flush=True)
except Exception as e:
print("Exception:", e, flush=True)
raise e
ensure_config(dec, workspace=workspace, **pubsub_config)
else:
print("No config to update.", flush=True)
def main():
parser = argparse.ArgumentParser(
prog='tg-init-trustgraph',
description=__doc__,
)
parser.add_argument(
'--pulsar-admin-url',
default=default_pulsar_admin_url,
help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
)
parser.add_argument(
'-c', '--config',
help=f'Initial configuration to load',
)
parser.add_argument(
'-C', '--config-file',
help=f'Initial configuration to load from file',
)
parser.add_argument(
'-t', '--tenant',
default="tg",
help=f'Tenant (default: tg)',
)
parser.add_argument(
'-w', '--workspace',
default="default",
help=f'Workspace (default: default)',
)
add_pubsub_args(parser)
args = parser.parse_args()
backend_type = args.pubsub_backend
# Extract pubsub config from args
pubsub_config = {
k: v for k, v in vars(args).items()
if k not in (
'pulsar_admin_url', 'config', 'config_file', 'tenant',
'workspace',
)
}
while True:
try:
# Pulsar-specific setup (tenants, namespaces)
if backend_type == 'pulsar':
print(flush=True)
print(
f"Initialising Pulsar at {args.pulsar_admin_url}...",
flush=True,
)
init_pulsar(args.pulsar_admin_url, args.tenant)
else:
print(flush=True)
print(
f"Using {backend_type} backend (no admin setup needed).",
flush=True,
)
# Push config (works with any backend)
push_config(
args.config, args.config_file,
workspace=args.workspace,
**pubsub_config,
)
print("Initialisation complete.", flush=True)
break
except Exception as e:
print("Exception:", e, flush=True)
print("Sleeping...", flush=True)
time.sleep(2)
print("Will retry...", flush=True)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,69 @@
"""
List the API keys for a user.
"""
import argparse
import tabulate
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_list_api_keys(args):
req = {"operation": "list-api-keys", "user_id": args.user_id}
if args.workspace:
req["workspace"] = args.workspace
resp = call_iam(args.api_url, args.token, req)
keys = resp.get("api_keys", [])
if not keys:
print("No keys.")
return
rows = [
[
k.get("id", ""),
k.get("name", ""),
k.get("prefix", ""),
k.get("created", ""),
k.get("last_used", "") or "",
k.get("expires", "") or "never",
]
for k in keys
]
print(tabulate.tabulate(
rows,
headers=["id", "name", "prefix", "created", "last used", "expires"],
tablefmt="pretty",
stralign="left",
))
def main():
parser = argparse.ArgumentParser(
prog="tg-list-api-keys", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True,
help="Owner user id",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_list_api_keys, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,65 @@
"""
List users in the caller's workspace.
"""
import argparse
import tabulate
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_list_users(args):
req = {"operation": "list-users"}
if args.workspace:
req["workspace"] = args.workspace
resp = call_iam(args.api_url, args.token, req)
users = resp.get("users", [])
if not users:
print("No users.")
return
rows = [
[
u.get("id", ""),
u.get("username", ""),
u.get("name", ""),
", ".join(u.get("roles", [])),
"yes" if u.get("enabled") else "no",
"yes" if u.get("must_change_password") else "no",
]
for u in users
]
print(tabulate.tabulate(
rows,
headers=["id", "username", "name", "roles", "enabled", "change-pw"],
tablefmt="pretty",
stralign="left",
))
def main():
parser = argparse.ArgumentParser(
prog="tg-list-users", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_list_users, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,53 @@
"""
List workspaces (system-level; requires admin).
"""
import argparse
import tabulate
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_list_workspaces(args):
resp = call_iam(
args.api_url, args.token, {"operation": "list-workspaces"},
)
workspaces = resp.get("workspaces", [])
if not workspaces:
print("No workspaces.")
return
rows = [
[
w.get("id", ""),
w.get("name", ""),
"yes" if w.get("enabled") else "no",
w.get("created", ""),
]
for w in workspaces
]
print(tabulate.tabulate(
rows,
headers=["id", "name", "enabled", "created"],
tablefmt="pretty",
stralign="left",
))
def main():
parser = argparse.ArgumentParser(
prog="tg-list-workspaces", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
run_main(do_list_workspaces, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,62 @@
"""
Log in with username / password. Prints the resulting JWT to
stdout so it can be captured for subsequent CLI use.
"""
import argparse
import getpass
import sys
from ._iam import DEFAULT_URL, call_auth, run_main
def do_login(args):
password = args.password
if not password:
password = getpass.getpass(f"Password for {args.username}: ")
body = {
"username": args.username,
"password": password,
}
if args.workspace:
body["workspace"] = args.workspace
resp = call_auth(args.api_url, "/api/v1/auth/login", None, body)
jwt = resp.get("jwt", "")
expires = resp.get("jwt_expires", "")
if expires:
print(f"JWT expires: {expires}", file=sys.stderr)
# Machine-readable on stdout.
print(jwt)
def main():
parser = argparse.ArgumentParser(
prog="tg-login", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"--username", required=True, help="Username",
)
parser.add_argument(
"--password", default=None,
help="Password (prompted if omitted)",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Optional workspace to log in against. Defaults to "
"the user's assigned workspace."
),
)
run_main(do_login, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,54 @@
"""
Admin: reset another user's password. Prints a one-time temporary
password to stdout. The user is forced to change it on next login.
"""
import argparse
import sys
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_reset_password(args):
req = {"operation": "reset-password", "user_id": args.user_id}
if args.workspace:
req["workspace"] = args.workspace
resp = call_iam(args.api_url, args.token, req)
tmp = resp.get("temporary_password", "")
if not tmp:
raise RuntimeError(
"IAM returned no temporary password — unexpected"
)
print("Temporary password (shown once, capture now):", file=sys.stderr)
print(tmp)
def main():
parser = argparse.ArgumentParser(
prog="tg-reset-password", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True,
help="Target user id",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_reset_password, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,44 @@
"""
Revoke an API key by id.
"""
import argparse
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_revoke_api_key(args):
req = {"operation": "revoke-api-key", "key_id": args.key_id}
if args.workspace:
req["workspace"] = args.workspace
call_iam(args.api_url, args.token, req)
print(f"Revoked key {args.key_id}")
def main():
parser = argparse.ArgumentParser(
prog="tg-revoke-api-key", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--key-id", required=True, help="Key id to revoke",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Target workspace (admin only; defaults to caller's "
"assigned workspace)"
),
)
run_main(do_revoke_api_key, parser)
if __name__ == "__main__":
main()

View file

@ -44,16 +44,18 @@ def show_processors(metrics_url, flow_label):
obj = resp.json()
tbl = [
[
m["metric"]["job"],
"\U0001f49a" if int(m["value"][1]) > 0 else "\U0000274c"
]
for m in obj["data"]["result"]
]
# consumer_state is one sample per consumer (queue); a processor
# with N subscriptions shows up N times. Aggregate to one row per
# processor: green only if every consumer is running.
by_proc = {}
for m in obj["data"]["result"]:
name = m["metric"].get("processor", m["metric"]["job"])
running = int(m["value"][1]) > 0
by_proc[name] = by_proc.get(name, True) and running
for row in tbl:
print(f"- {row[0]:30} {row[1]}")
for name in sorted(by_proc):
icon = "\U0001f49a" if by_proc[name] else "\U0000274c"
print(f"- {name:30} {icon}")
def main():

View file

@ -17,7 +17,7 @@ def dump_status(url):
tbl = [
[
m["metric"]["job"],
m["metric"].get("processor", m["metric"]["job"]),
"\U0001f49a"
]
for m in obj["data"]["result"]

View file

@ -0,0 +1,125 @@
"""
Update a user's profile fields: name, email, roles, enabled flag,
must-change-password flag.
Username is immutable create a new user and disable the old one
to effect a username change. Password changes go through
``tg-change-password`` (self-service) or ``tg-reset-password``
(admin-driven).
Only the fields you supply are changed; omitted fields are left
untouched on the user record. An empty ``--roles`` is rejected by
iam-svc (a user must have at least one role); to demote a user use
``tg-disable-user``.
"""
import argparse
import sys
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def _parse_bool(s):
if s is None:
return None
s = s.strip().lower()
if s in ("yes", "y", "true", "t", "1"):
return True
if s in ("no", "n", "false", "f", "0"):
return False
raise argparse.ArgumentTypeError(
f"expected yes/no, got {s!r}"
)
def do_update_user(args):
user = {}
if args.name is not None:
user["name"] = args.name
if args.email is not None:
user["email"] = args.email
if args.roles is not None:
user["roles"] = args.roles
if args.enabled is not None:
user["enabled"] = args.enabled
if args.must_change_password is not None:
user["must_change_password"] = args.must_change_password
if not user:
print(
"tg-update-user: nothing to change — supply at least "
"one of --name / --email / --roles / --enabled / "
"--must-change-password",
file=sys.stderr,
)
sys.exit(2)
req = {
"operation": "update-user",
"user_id": args.user_id,
"user": user,
}
if args.workspace:
req["workspace"] = args.workspace
resp = call_iam(args.api_url, args.token, req)
rec = resp.get("user", {})
print(f"id : {rec.get('id', '')}")
print(f"username : {rec.get('username', '')}")
print(f"name : {rec.get('name', '')}")
print(f"email : {rec.get('email', '')}")
print(f"workspace : {rec.get('workspace', '')}")
print(f"roles : {', '.join(rec.get('roles', []))}")
print(f"enabled : {'yes' if rec.get('enabled') else 'no'}")
print(
f"must-change-pw: "
f"{'yes' if rec.get('must_change_password') else 'no'}"
)
def main():
parser = argparse.ArgumentParser(
prog="tg-update-user", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
parser.add_argument(
"--user-id", required=True, help="Target user id",
)
parser.add_argument(
"--name", default=None, help="New display name",
)
parser.add_argument(
"--email", default=None, help="New email",
)
parser.add_argument(
"--roles", nargs="+", default=None,
help="Replacement role list (e.g. --roles reader writer)",
)
parser.add_argument(
"--enabled", type=_parse_bool, default=None,
help="Set enabled flag (yes/no)",
)
parser.add_argument(
"--must-change-password", type=_parse_bool, default=None,
help="Set must-change-password flag (yes/no)",
)
parser.add_argument(
"-w", "--workspace", default=None,
help=(
"Optional workspace integrity check — when supplied, "
"iam-svc verifies the target user's home workspace "
"matches"
),
)
run_main(do_update_user, parser)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,52 @@
"""
Show the authenticated caller's own user record.
"""
import argparse
import tabulate
from ._iam import DEFAULT_URL, DEFAULT_TOKEN, call_iam, run_main
def do_whoami(args):
resp = call_iam(args.api_url, args.token, {"operation": "whoami"})
user = resp.get("user")
if not user:
print("(no user record returned)")
return
rows = [
["id", user.get("id", "")],
["username", user.get("username", "")],
["name", user.get("name", "")],
["email", user.get("email", "")],
["workspace", user.get("workspace", "")],
["roles", ", ".join(user.get("roles", []))],
["enabled", "yes" if user.get("enabled") else "no"],
[
"must change password",
"yes" if user.get("must_change_password") else "no",
],
["created", user.get("created", "")],
]
print(tabulate.tabulate(rows, tablefmt="plain"))
def main():
parser = argparse.ArgumentParser(
prog="tg-whoami", description=__doc__,
)
parser.add_argument(
"-u", "--api-url", default=DEFAULT_URL,
help=f"API URL (default: {DEFAULT_URL})",
)
parser.add_argument(
"-t", "--token", default=DEFAULT_TOKEN,
help="Auth token (default: $TRUSTGRAPH_TOKEN)",
)
run_main(do_whoami, parser)
if __name__ == "__main__":
main()

View file

@ -60,8 +60,10 @@ agent-orchestrator = "trustgraph.agent.orchestrator:run"
api-gateway = "trustgraph.gateway:run"
chunker-recursive = "trustgraph.chunking.recursive:run"
chunker-token = "trustgraph.chunking.token:run"
bootstrap = "trustgraph.bootstrap.bootstrapper:run"
config-svc = "trustgraph.config.service:run"
flow-svc = "trustgraph.flow.service:run"
iam-svc = "trustgraph.iam.service:run"
doc-embeddings-query-milvus = "trustgraph.query.doc_embeddings.milvus:run"
doc-embeddings-query-pinecone = "trustgraph.query.doc_embeddings.pinecone:run"
doc-embeddings-query-qdrant = "trustgraph.query.doc_embeddings.qdrant:run"

View file

@ -0,0 +1,68 @@
"""
Bootstrap framework: Initialiser base class and per-wake context.
See docs/tech-specs/bootstrap.md for the full design.
"""
import logging
from dataclasses import dataclass
from typing import Any
@dataclass
class InitContext:
"""Shared per-wake context passed to each initialiser.
The bootstrapper constructs one of these on every wake cycle,
tears it down at cycle end, and passes it into each initialiser's
``run()`` method. Fields are short-lived and safe to use during
a single cycle only.
"""
logger: logging.Logger
config: Any # ConfigClient
flow: Any # RequestResponse client for flow-svc
class Initialiser:
"""Base class for bootstrap initialisers.
Subclasses implement :meth:`run`. The bootstrapper manages
completion state, flag comparison, retry and error handling
subclasses describe only the work to perform.
Class attributes:
* ``wait_for_services`` (bool, default ``True``): when ``True`` the
initialiser only runs after the bootstrapper's service gate has
passed (config-svc and flow-svc reachable). Set ``False`` for
initialisers that bring up infrastructure the gate itself
depends on principally Pulsar topology, without which
config-svc cannot come online.
"""
wait_for_services: bool = True
def __init__(self, **params):
# Subclasses should consume their own params via keyword
# arguments in their own __init__ signatures. This catch-all
# is here so any kwargs that filter through unnoticed don't
# raise TypeError on construction.
pass
async def run(self, ctx, old_flag, new_flag):
"""Perform initialisation work.
:param ctx: :class:`InitContext` with logger, config client,
flow-svc client.
:param old_flag: Previously-stored flag string, or ``None`` if
this initialiser has never successfully completed in this
deployment.
:param new_flag: Currently-configured flag. A string chosen
by the operator; typically something like ``"v1"``.
:raises: Any exception on failure. The bootstrapper catches,
logs, and re-runs on the next cycle; completion state is
only written on clean return.
"""
raise NotImplementedError

View file

@ -0,0 +1 @@
from . service import *

View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from . service import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,414 @@
"""
Bootstrapper processor.
Runs a pluggable list of initialisers in a reconciliation loop.
Each initialiser's completion state is recorded in the reserved
``__system__`` workspace under the ``init-state`` config type.
See docs/tech-specs/bootstrap.md for the full design.
"""
import asyncio
import importlib
import json
import logging
import uuid
from argparse import ArgumentParser
from dataclasses import dataclass
from trustgraph.base import AsyncProcessor
from trustgraph.base import ProducerMetrics, SubscriberMetrics
from trustgraph.base.config_client import ConfigClient
from trustgraph.base.request_response_spec import RequestResponse
from trustgraph.schema import (
ConfigRequest, ConfigResponse,
config_request_queue, config_response_queue,
)
from trustgraph.schema import (
FlowRequest, FlowResponse,
flow_request_queue, flow_response_queue,
)
from .. base import Initialiser, InitContext
logger = logging.getLogger(__name__)
default_ident = "bootstrap"
# Reserved workspace + config type under which completion state is
# stored. Reserved (`_`-prefix) workspaces are excluded from the
# config push broadcast — live processors never see these keys.
SYSTEM_WORKSPACE = "__system__"
INIT_STATE_TYPE = "init-state"
# Cadence tiers.
GATE_BACKOFF = 5 # Services not responding; retry soon.
INIT_RETRY = 15 # Gate passed but something ran/failed;
# converge quickly.
STEADY_INTERVAL = 300 # Everything at target flag; idle cheaply.
@dataclass
class InitialiserSpec:
"""One entry in the bootstrapper's configured list of initialisers."""
name: str
flag: str
instance: Initialiser
def _resolve_class(dotted):
"""Import and return a class by its dotted path."""
module_path, _, class_name = dotted.rpartition(".")
if not module_path:
raise ValueError(
f"Initialiser class must be a dotted path, got {dotted!r}"
)
module = importlib.import_module(module_path)
return getattr(module, class_name)
def _load_initialisers_file(path):
"""Load the initialisers spec list from a YAML or JSON file.
File shape:
.. code-block:: yaml
initialisers:
- class: trustgraph.bootstrap.initialisers.PulsarTopology
name: pulsar-topology
flag: v1
params:
admin_url: http://pulsar:8080
tenant: tg
- ...
"""
with open(path) as f:
content = f.read()
if path.endswith((".yaml", ".yml")):
import yaml
doc = yaml.safe_load(content)
else:
doc = json.loads(content)
if not isinstance(doc, dict) or "initialisers" not in doc:
raise RuntimeError(
f"{path}: expected a mapping with an 'initialisers' key"
)
return doc["initialisers"]
class Processor(AsyncProcessor):
def __init__(self, **params):
super().__init__(**params)
# Source the initialisers list either from a direct parameter
# (processor-group embedding) or from a file (CLI launch).
inits = params.get("initialisers")
if inits is None:
inits_file = params.get("initialisers_file")
if inits_file is None:
raise RuntimeError(
"Bootstrapper requires either the 'initialisers' "
"parameter or --initialisers-file"
)
inits = _load_initialisers_file(inits_file)
self.specs = []
names = set()
for entry in inits:
if not isinstance(entry, dict):
raise RuntimeError(
f"Initialiser entry must be a mapping, got: {entry!r}"
)
for required in ("class", "name", "flag"):
if required not in entry:
raise RuntimeError(
f"Initialiser entry missing required field "
f"{required!r}: {entry!r}"
)
name = entry["name"]
if name in names:
raise RuntimeError(f"Duplicate initialiser name {name!r}")
names.add(name)
cls = _resolve_class(entry["class"])
try:
instance = cls(**entry.get("params", {}))
except Exception as e:
raise RuntimeError(
f"Failed to instantiate initialiser "
f"{entry['class']!r} as {name!r}: "
f"{type(e).__name__}: {e}"
)
self.specs.append(InitialiserSpec(
name=name,
flag=entry["flag"],
instance=instance,
))
logger.info(
f"Bootstrapper: loaded {len(self.specs)} initialisers"
)
# ------------------------------------------------------------------
# Client construction (short-lived per wake cycle).
# ------------------------------------------------------------------
def _make_config_client(self):
rr_id = str(uuid.uuid4())
return ConfigClient(
backend=self.pubsub_backend,
subscription=f"{self.id}--config--{rr_id}",
consumer_name=self.id,
request_topic=config_request_queue,
request_schema=ConfigRequest,
request_metrics=ProducerMetrics(
processor=self.id, flow=None, name="config-request",
),
response_topic=config_response_queue,
response_schema=ConfigResponse,
response_metrics=SubscriberMetrics(
processor=self.id, flow=None, name="config-response",
),
)
def _make_flow_client(self):
rr_id = str(uuid.uuid4())
return RequestResponse(
backend=self.pubsub_backend,
subscription=f"{self.id}--flow--{rr_id}",
consumer_name=self.id,
request_topic=flow_request_queue,
request_schema=FlowRequest,
request_metrics=ProducerMetrics(
processor=self.id, flow=None, name="flow-request",
),
response_topic=flow_response_queue,
response_schema=FlowResponse,
response_metrics=SubscriberMetrics(
processor=self.id, flow=None, name="flow-response",
),
)
async def _open_clients(self):
config = self._make_config_client()
flow = self._make_flow_client()
await config.start()
try:
await flow.start()
except Exception:
await self._safe_stop(config)
raise
return config, flow
async def _safe_stop(self, client):
try:
await client.stop()
except Exception:
pass
# ------------------------------------------------------------------
# Service gate.
# ------------------------------------------------------------------
async def _gate_ready(self, config, flow):
try:
await config.keys(SYSTEM_WORKSPACE, INIT_STATE_TYPE)
except Exception as e:
logger.info(
f"Gate: config-svc not ready ({type(e).__name__}: {e})"
)
return False
try:
resp = await flow.request(
FlowRequest(
operation="list-blueprints",
workspace=SYSTEM_WORKSPACE,
),
timeout=5,
)
if resp.error:
logger.info(
f"Gate: flow-svc error: "
f"{resp.error.type}: {resp.error.message}"
)
return False
except Exception as e:
logger.info(
f"Gate: flow-svc not ready ({type(e).__name__}: {e})"
)
return False
return True
# ------------------------------------------------------------------
# Completion state.
# ------------------------------------------------------------------
async def _stored_flag(self, config, name):
raw = await config.get(SYSTEM_WORKSPACE, INIT_STATE_TYPE, name)
if raw is None:
return None
try:
return json.loads(raw)
except Exception:
return raw
async def _store_flag(self, config, name, flag):
await config.put(
SYSTEM_WORKSPACE, INIT_STATE_TYPE, name,
json.dumps(flag),
)
# ------------------------------------------------------------------
# Per-spec execution.
# ------------------------------------------------------------------
async def _run_spec(self, spec, config, flow):
"""Run a single initialiser spec.
Returns one of:
- ``"skip"``: stored flag already matches target, nothing to do.
- ``"ran"``: initialiser ran and completion state was updated.
- ``"failed"``: initialiser raised.
- ``"failed-state-write"``: initialiser succeeded but we could
not persist the new flag (transient will re-run next cycle).
"""
try:
old_flag = await self._stored_flag(config, spec.name)
except Exception as e:
logger.warning(
f"{spec.name}: could not read stored flag "
f"({type(e).__name__}: {e})"
)
return "failed"
if old_flag == spec.flag:
return "skip"
child_logger = logger.getChild(spec.name)
child_ctx = InitContext(
logger=child_logger,
config=config,
flow=flow,
)
child_logger.info(
f"Running (old_flag={old_flag!r} -> new_flag={spec.flag!r})"
)
try:
await spec.instance.run(child_ctx, old_flag, spec.flag)
except Exception as e:
child_logger.error(
f"Failed: {type(e).__name__}: {e}", exc_info=True,
)
return "failed"
try:
await self._store_flag(config, spec.name, spec.flag)
except Exception as e:
child_logger.warning(
f"Completed but could not persist state flag "
f"({type(e).__name__}: {e}); will re-run next cycle"
)
return "failed-state-write"
child_logger.info(f"Completed (flag={spec.flag!r})")
return "ran"
# ------------------------------------------------------------------
# Main loop.
# ------------------------------------------------------------------
async def run(self):
logger.info(
f"Bootstrapper starting with {len(self.specs)} initialisers"
)
while self.running:
sleep_for = STEADY_INTERVAL
try:
config, flow = await self._open_clients()
except Exception as e:
logger.info(
f"Failed to open clients "
f"({type(e).__name__}: {e}); retry in {GATE_BACKOFF}s"
)
await asyncio.sleep(GATE_BACKOFF)
continue
try:
# Phase 1: pre-service initialisers run unconditionally.
pre_specs = [
s for s in self.specs
if not s.instance.wait_for_services
]
pre_results = {}
for spec in pre_specs:
pre_results[spec.name] = await self._run_spec(
spec, config, flow,
)
# Phase 2: gate.
gate_ok = await self._gate_ready(config, flow)
# Phase 3: post-service initialisers, if gate passed.
post_results = {}
if gate_ok:
post_specs = [
s for s in self.specs
if s.instance.wait_for_services
]
for spec in post_specs:
post_results[spec.name] = await self._run_spec(
spec, config, flow,
)
# Cadence selection.
if not gate_ok:
sleep_for = GATE_BACKOFF
else:
all_results = {**pre_results, **post_results}
if any(r != "skip" for r in all_results.values()):
sleep_for = INIT_RETRY
else:
sleep_for = STEADY_INTERVAL
finally:
await self._safe_stop(config)
await self._safe_stop(flow)
await asyncio.sleep(sleep_for)
# ------------------------------------------------------------------
# CLI arg plumbing.
# ------------------------------------------------------------------
@staticmethod
def add_args(parser: ArgumentParser) -> None:
AsyncProcessor.add_args(parser)
parser.add_argument(
'-c', '--initialisers-file',
help='Path to YAML or JSON file describing the '
'initialisers to run. Ignored when the '
"'initialisers' parameter is provided directly "
'(e.g. when running inside a processor group).',
)
def run():
Processor.launch(default_ident, __doc__)

View file

@ -0,0 +1,20 @@
"""
Core bootstrap initialisers.
These cover the base TrustGraph deployment case. Enterprise or
third-party initialisers live in their own packages and are
referenced in the bootstrapper's config by fully-qualified dotted
path.
"""
from . pulsar_topology import PulsarTopology
from . template_seed import TemplateSeed
from . workspace_init import WorkspaceInit
from . default_flow_start import DefaultFlowStart
__all__ = [
"PulsarTopology",
"TemplateSeed",
"WorkspaceInit",
"DefaultFlowStart",
]

View file

@ -0,0 +1,101 @@
"""
DefaultFlowStart initialiser starts a named flow in a workspace
using a specified blueprint.
Separated from WorkspaceInit so deployments that want a workspace
without an auto-started flow can simply omit this initialiser.
Parameters
----------
workspace : str (default "default")
Workspace in which to start the flow.
flow_id : str (default "default")
Identifier for the started flow.
blueprint : str (required)
Blueprint name (must already exist in the workspace's config,
typically via TemplateSeed -> WorkspaceInit).
description : str (default "Default")
Human-readable description passed to flow-svc.
parameters : dict (optional)
Optional parameter overrides passed to start-flow.
"""
from trustgraph.schema import FlowRequest
from .. base import Initialiser
class DefaultFlowStart(Initialiser):
def __init__(
self,
workspace="default",
flow_id="default",
blueprint=None,
description="Default",
parameters=None,
**kwargs,
):
super().__init__(**kwargs)
if not blueprint:
raise ValueError(
"DefaultFlowStart requires 'blueprint'"
)
self.workspace = workspace
self.flow_id = flow_id
self.blueprint = blueprint
self.description = description
self.parameters = dict(parameters) if parameters else {}
async def run(self, ctx, old_flag, new_flag):
# Check whether the flow already exists. Belt-and-braces
# beyond the flag gate: if an operator stops and restarts the
# bootstrapper after the flow is already running, we don't
# want to blindly try to start it again.
list_resp = await ctx.flow.request(
FlowRequest(
operation="list-flows",
workspace=self.workspace,
),
timeout=10,
)
if list_resp.error:
raise RuntimeError(
f"list-flows failed: "
f"{list_resp.error.type}: {list_resp.error.message}"
)
if self.flow_id in (list_resp.flow_ids or []):
ctx.logger.info(
f"Flow {self.flow_id!r} already running in workspace "
f"{self.workspace!r}; nothing to do"
)
return
ctx.logger.info(
f"Starting flow {self.flow_id!r} "
f"(blueprint={self.blueprint!r}) "
f"in workspace {self.workspace!r}"
)
resp = await ctx.flow.request(
FlowRequest(
operation="start-flow",
workspace=self.workspace,
flow_id=self.flow_id,
blueprint_name=self.blueprint,
description=self.description,
parameters=self.parameters,
),
timeout=30,
)
if resp.error:
raise RuntimeError(
f"start-flow failed: "
f"{resp.error.type}: {resp.error.message}"
)
ctx.logger.info(
f"Flow {self.flow_id!r} started"
)

View file

@ -0,0 +1,131 @@
"""
PulsarTopology initialiser creates Pulsar tenant and namespaces
with their retention policies.
Runs pre-gate (``wait_for_services = False``) because config-svc and
flow-svc can't connect to Pulsar until these namespaces exist.
Admin-API calls are idempotent so re-runs on flag change are safe.
"""
import asyncio
import requests
from .. base import Initialiser
# Namespace configs. flow/request take broker defaults. response
# and notify get aggressive retention — those classes carry short-lived
# request/response and notification traffic only.
NAMESPACE_CONFIG = {
"flow": {},
"request": {},
"response": {
"retention_policies": {
"retentionSizeInMB": -1,
"retentionTimeInMinutes": 3,
"subscriptionExpirationTimeMinutes": 30,
},
},
"notify": {
"retention_policies": {
"retentionSizeInMB": -1,
"retentionTimeInMinutes": 3,
"subscriptionExpirationTimeMinutes": 5,
},
},
}
REQUEST_TIMEOUT = 10
class PulsarTopology(Initialiser):
wait_for_services = False
def __init__(
self,
admin_url="http://pulsar:8080",
tenant="tg",
**kwargs,
):
super().__init__(**kwargs)
self.admin_url = admin_url.rstrip("/")
self.tenant = tenant
async def run(self, ctx, old_flag, new_flag):
# requests is blocking; offload to executor so the loop stays
# responsive.
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self._reconcile_sync, ctx.logger)
# ------------------------------------------------------------------
# Sync admin-API calls.
# ------------------------------------------------------------------
def _get_clusters(self):
resp = requests.get(
f"{self.admin_url}/admin/v2/clusters",
timeout=REQUEST_TIMEOUT,
)
resp.raise_for_status()
return resp.json()
def _tenant_exists(self):
resp = requests.get(
f"{self.admin_url}/admin/v2/tenants/{self.tenant}",
timeout=REQUEST_TIMEOUT,
)
return resp.status_code == 200
def _create_tenant(self, clusters):
resp = requests.put(
f"{self.admin_url}/admin/v2/tenants/{self.tenant}",
json={"adminRoles": [], "allowedClusters": clusters},
timeout=REQUEST_TIMEOUT,
)
if resp.status_code != 204:
raise RuntimeError(
f"Tenant {self.tenant!r} create failed: "
f"{resp.status_code} {resp.text}"
)
def _namespace_exists(self, namespace):
resp = requests.get(
f"{self.admin_url}/admin/v2/namespaces/"
f"{self.tenant}/{namespace}",
timeout=REQUEST_TIMEOUT,
)
return resp.status_code == 200
def _create_namespace(self, namespace, config):
resp = requests.put(
f"{self.admin_url}/admin/v2/namespaces/"
f"{self.tenant}/{namespace}",
json=config,
timeout=REQUEST_TIMEOUT,
)
if resp.status_code != 204:
raise RuntimeError(
f"Namespace {self.tenant}/{namespace} create failed: "
f"{resp.status_code} {resp.text}"
)
def _reconcile_sync(self, logger):
if not self._tenant_exists():
clusters = self._get_clusters()
logger.info(
f"Creating tenant {self.tenant!r} with clusters {clusters}"
)
self._create_tenant(clusters)
else:
logger.debug(f"Tenant {self.tenant!r} already exists")
for namespace, config in NAMESPACE_CONFIG.items():
if self._namespace_exists(namespace):
logger.debug(
f"Namespace {self.tenant}/{namespace} already exists"
)
continue
logger.info(
f"Creating namespace {self.tenant}/{namespace}"
)
self._create_namespace(namespace, config)

View file

@ -0,0 +1,93 @@
"""
TemplateSeed initialiser populates the reserved ``__template__``
workspace from an external JSON seed file.
Seed file shape:
.. code-block:: json
{
"flow-blueprint": {
"ontology": { ... },
"agent": { ... }
},
"prompt": {
...
},
...
}
Top-level keys are config types; nested keys are config entries.
Values are arbitrary JSON (they'll be ``json.dumps()``'d on write).
Parameters
----------
config_file : str
Path to the seed file on disk.
overwrite : bool (default False)
On re-run (flag change), if True overwrite all keys; if False
upsert-missing-only (preserves any operator customisation of
the template).
"""
import json
from .. base import Initialiser
TEMPLATE_WORKSPACE = "__template__"
class TemplateSeed(Initialiser):
def __init__(self, config_file, overwrite=False, **kwargs):
super().__init__(**kwargs)
if not config_file:
raise ValueError("TemplateSeed requires 'config_file'")
self.config_file = config_file
self.overwrite = overwrite
async def run(self, ctx, old_flag, new_flag):
with open(self.config_file) as f:
seed = json.load(f)
if old_flag is None:
# Clean first run — write every entry.
await self._write_all(ctx, seed)
return
# Re-run after flag change.
if self.overwrite:
await self._write_all(ctx, seed)
else:
await self._upsert_missing(ctx, seed)
async def _write_all(self, ctx, seed):
values = []
for type_name, entries in seed.items():
for key, value in entries.items():
values.append((type_name, key, json.dumps(value)))
if values:
await ctx.config.put_many(TEMPLATE_WORKSPACE, values)
ctx.logger.info(
f"Template seeded with {len(values)} entries"
)
async def _upsert_missing(self, ctx, seed):
written = 0
for type_name, entries in seed.items():
existing = set(
await ctx.config.keys(TEMPLATE_WORKSPACE, type_name)
)
values = []
for key, value in entries.items():
if key not in existing:
values.append(
(type_name, key, json.dumps(value))
)
if values:
await ctx.config.put_many(TEMPLATE_WORKSPACE, values)
written += len(values)
ctx.logger.info(
f"Template upsert-missing: {written} new entries"
)

View file

@ -0,0 +1,138 @@
"""
WorkspaceInit initialiser creates a workspace and populates it from
either the ``__template__`` workspace or a seed file on disk.
Parameters
----------
workspace : str
Target workspace to create / populate.
source : str
Either ``"template"`` (copy the full contents of the
``__template__`` workspace) or ``"seed-file"`` (read from
``seed_file``).
seed_file : str (required when source=="seed-file")
Path to a JSON seed file with the same shape TemplateSeed consumes.
overwrite : bool (default False)
On re-run (flag change), if True overwrite all keys; if False,
upsert-missing-only (preserves in-workspace customisations).
Raises (in ``run``)
-------------------
When source is ``"template"``, raises ``RuntimeError`` if the
``__template__`` workspace is empty indicating that TemplateSeed
hasn't run yet. The bootstrapper's retry loop will re-attempt on
the next cycle once the prerequisite is satisfied.
"""
import json
from .. base import Initialiser
TEMPLATE_WORKSPACE = "__template__"
class WorkspaceInit(Initialiser):
def __init__(
self,
workspace="default",
source="template",
seed_file=None,
overwrite=False,
**kwargs,
):
super().__init__(**kwargs)
if source not in ("template", "seed-file"):
raise ValueError(
f"WorkspaceInit: source must be 'template' or "
f"'seed-file', got {source!r}"
)
if source == "seed-file" and not seed_file:
raise ValueError(
"WorkspaceInit: seed_file required when source='seed-file'"
)
self.workspace = workspace
self.source = source
self.seed_file = seed_file
self.overwrite = overwrite
async def run(self, ctx, old_flag, new_flag):
if self.source == "seed-file":
tree = self._load_seed_file()
else:
tree = await self._load_from_template(ctx)
if old_flag is None or self.overwrite:
await self._write_all(ctx, tree)
else:
await self._upsert_missing(ctx, tree)
def _load_seed_file(self):
with open(self.seed_file) as f:
return json.load(f)
async def _load_from_template(self, ctx):
"""Build a seed tree from the entire ``__template__`` workspace.
Raises if the workspace is empty, so the bootstrapper knows
the prerequisite isn't met yet."""
raw_tree = await ctx.config.get_all(TEMPLATE_WORKSPACE)
tree = {}
total = 0
for type_name, entries in raw_tree.items():
parsed = {}
for key, raw in entries.items():
if raw is None:
continue
try:
parsed[key] = json.loads(raw)
except Exception:
parsed[key] = raw
total += 1
if parsed:
tree[type_name] = parsed
if total == 0:
raise RuntimeError(
"Template workspace is empty — has TemplateSeed run yet?"
)
ctx.logger.debug(
f"Loaded {total} template entries across {len(tree)} types"
)
return tree
async def _write_all(self, ctx, tree):
values = []
for type_name, entries in tree.items():
for key, value in entries.items():
values.append((type_name, key, json.dumps(value)))
if values:
await ctx.config.put_many(self.workspace, values)
ctx.logger.info(
f"Workspace {self.workspace!r} populated with "
f"{len(values)} entries"
)
async def _upsert_missing(self, ctx, tree):
written = 0
for type_name, entries in tree.items():
existing = set(
await ctx.config.keys(self.workspace, type_name)
)
values = []
for key, value in entries.items():
if key not in existing:
values.append(
(type_name, key, json.dumps(value))
)
if values:
await ctx.config.put_many(self.workspace, values)
written += len(values)
ctx.logger.info(
f"Workspace {self.workspace!r} upsert-missing: "
f"{written} new entries"
)

View file

@ -24,6 +24,21 @@ logger = logging.getLogger(__name__)
default_ident = "config-svc"
def is_reserved_workspace(workspace):
"""Reserved workspaces are storage-only.
Any workspace id beginning with ``_`` is reserved for internal use
(e.g. ``__template__`` holding factory-default seed config).
Reads and writes work normally so bootstrap and provisioning code
can use the standard config API, but **change notifications for
reserved workspaces are suppressed**. Services subscribed to the
config push therefore never see reserved-workspace events and
cannot accidentally act on template content as if it were live
state.
"""
return workspace.startswith("_")
default_config_request_queue = config_request_queue
default_config_response_queue = config_response_queue
default_config_push_queue = config_push_queue
@ -130,6 +145,21 @@ class Processor(AsyncProcessor):
async def push(self, changes=None):
# Suppress notifications from reserved workspaces (ids starting
# with "_", e.g. "__template__"). Stored config is preserved;
# only the broadcast is filtered. Keeps services oblivious to
# template / bootstrap state.
if changes:
filtered = {}
for type_name, workspaces in changes.items():
visible = [
w for w in workspaces
if not is_reserved_workspace(w)
]
if visible:
filtered[type_name] = visible
changes = filtered
version = await self.config.get_version()
resp = ConfigPush(

View file

@ -5,7 +5,7 @@ Input is text, output is embeddings vector.
"""
from ... base import EmbeddingsService
from ollama import Client
from ollama import AsyncClient
import os
import logging
@ -30,24 +30,24 @@ class Processor(EmbeddingsService):
}
)
self.client = Client(host=ollama)
self.client = AsyncClient(host=ollama)
self.default_model = model
self._checked_models = set()
def _ensure_model(self, model_name):
async def _ensure_model(self, model_name):
"""Check if model exists locally, pull it if not."""
if model_name in self._checked_models:
return
try:
self.client.show(model_name)
await self.client.show(model_name)
self._checked_models.add(model_name)
except Exception as e:
status_code = getattr(e, 'status_code', None)
if status_code == 404 or "not found" in str(e).lower():
logger.info(f"Ollama model '{model_name}' not found locally. Pulling, this may take a while...")
try:
self.client.pull(model_name)
await self.client.pull(model_name)
self._checked_models.add(model_name)
logger.info(f"Successfully pulled Ollama model '{model_name}'.")
except Exception as pull_e:
@ -63,10 +63,10 @@ class Processor(EmbeddingsService):
use_model = model or self.default_model
# Ensure the model exists/is pulled
self._ensure_model(use_model)
await self._ensure_model(use_model)
# Ollama handles batch input efficiently
embeds = self.client.embed(
embeds = await self.client.embed(
model = use_model,
input = texts
)

View file

@ -1,22 +1,371 @@
"""
IAM-backed authentication and authorisation for the API gateway.
class Authenticator:
The gateway delegates both authentication ("who is this caller?")
and authorisation ("may they do this?") to the IAM regime via the
contract specified in docs/tech-specs/iam-contract.md. No regime-
specific policy (roles, scopes, claims) lives in the gateway.
def __init__(self, token=None, allow_all=False):
- Authentication: API keys are resolved by IAM; JWTs are validated
locally against the cached signing public key.
- Authorisation: every per-request decision is asked of IAM via
``authorise(identity, capability, resource, parameters)``, with
results cached for the TTL the regime returns.
"""
if not allow_all and token is None:
raise RuntimeError("Need a token")
import asyncio
import base64
import hashlib
import json
import logging
import time
import uuid
from dataclasses import dataclass, field
if not allow_all and token == "":
raise RuntimeError("Need a token")
from aiohttp import web
self.token = token
self.allow_all = allow_all
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519
def permitted(self, token, roles):
from ..base.iam_client import IamClient
from ..base.metrics import ProducerMetrics, SubscriberMetrics
from ..schema import (
IamRequest, IamResponse,
iam_request_queue, iam_response_queue,
)
if self.allow_all: return True
logger = logging.getLogger("auth")
if self.token != token: return False
API_KEY_CACHE_TTL = 60 # seconds
return True
# Upper bound on cache TTL the gateway honours for an authorisation
# decision, regardless of what the regime suggested. Caps the
# revocation latency window.
AUTHZ_CACHE_TTL_MAX = 60 # seconds
@dataclass
class Identity:
"""The gateway-side surface of an authenticated caller.
Per the IAM contract this is a small fixed shape; regime-internal
state (roles, claims, group memberships) is reachable only via
the regime's ``authorise`` operation. The gateway itself never
reads policy from this object.
"""
# Opaque handle, quoted back when calling ``authorise``. For
# the OSS regime this is the user record's id; the gateway
# treats it as a string with no semantic content.
handle: str
# The workspace this credential authenticates to. Used by the
# gateway as the default-fill-in for operations that omit a
# workspace. Never used as policy input.
workspace: str
# Stable identifier for audit logs. In OSS this is the same
# value as ``handle``; not assumed equal in the contract.
principal_id: str
# How the credential was presented. Non-policy; useful for
# logs / metrics only.
source: str # "api-key" | "jwt"
def _auth_failure():
return web.HTTPUnauthorized(
text='{"error":"auth failure"}',
content_type="application/json",
)
def _access_denied():
return web.HTTPForbidden(
text='{"error":"access denied"}',
content_type="application/json",
)
def _b64url_decode(s):
pad = "=" * (-len(s) % 4)
return base64.urlsafe_b64decode(s + pad)
def _verify_jwt_eddsa(token, public_pem):
"""Verify an Ed25519 JWT and return its claims. Raises on any
validation failure. Refuses non-EdDSA algorithms."""
parts = token.split(".")
if len(parts) != 3:
raise ValueError("malformed JWT")
h_b64, p_b64, s_b64 = parts
signing_input = f"{h_b64}.{p_b64}".encode("ascii")
header = json.loads(_b64url_decode(h_b64))
if header.get("alg") != "EdDSA":
raise ValueError(f"unsupported alg: {header.get('alg')!r}")
key = serialization.load_pem_public_key(public_pem.encode("ascii"))
if not isinstance(key, ed25519.Ed25519PublicKey):
raise ValueError("public key is not Ed25519")
signature = _b64url_decode(s_b64)
key.verify(signature, signing_input) # raises InvalidSignature
claims = json.loads(_b64url_decode(p_b64))
exp = claims.get("exp")
if exp is None or exp < time.time():
raise ValueError("expired")
return claims
class IamAuth:
"""Resolves bearer credentials via the IAM service.
Used by every gateway endpoint that needs authentication. Fetches
the IAM signing public key at startup (cached in memory). API
keys are resolved via the IAM service with a local hashidentity
cache (short TTL so revoked keys stop working within the TTL
window without any push mechanism)."""
def __init__(self, backend, id="api-gateway"):
self.backend = backend
self.id = id
# Populated at start() via IAM.
self._signing_public_pem = None
# API-key cache: plaintext_sha256_hex -> (Identity, expires_ts)
self._key_cache = {}
self._key_cache_lock = asyncio.Lock()
# Authorisation decision cache: hash(handle, capability,
# resource, parameters) -> (allow_bool, expires_ts). Holds
# both allows and denies — denies cached briefly to avoid
# hammering iam-svc with repeated rejected attempts.
self._authz_cache: dict[str, tuple[bool, float]] = {}
self._authz_cache_lock = asyncio.Lock()
# ------------------------------------------------------------------
# Short-lived client helper. Mirrors the pattern used by the
# bootstrap framework and AsyncProcessor: a fresh uuid suffix per
# invocation so Pulsar exclusive subscriptions don't collide with
# ghosts from prior calls.
# ------------------------------------------------------------------
def _make_client(self):
rr_id = str(uuid.uuid4())
return IamClient(
backend=self.backend,
subscription=f"{self.id}--iam--{rr_id}",
consumer_name=self.id,
request_topic=iam_request_queue,
request_schema=IamRequest,
request_metrics=ProducerMetrics(
processor=self.id, flow=None, name="iam-request",
),
response_topic=iam_response_queue,
response_schema=IamResponse,
response_metrics=SubscriberMetrics(
processor=self.id, flow=None, name="iam-response",
),
)
async def _with_client(self, op):
"""Open a short-lived IamClient, run ``op(client)``, close."""
client = self._make_client()
await client.start()
try:
return await op(client)
finally:
try:
await client.stop()
except Exception:
pass
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
async def start(self, max_retries=30, retry_delay=2.0):
"""Fetch the signing public key from IAM. Retries on
failure the gateway may be starting before IAM is ready."""
async def _fetch(client):
return await client.get_signing_key_public()
for attempt in range(max_retries):
try:
pem = await self._with_client(_fetch)
if pem:
self._signing_public_pem = pem
logger.info(
"IamAuth: fetched IAM signing public key "
f"({len(pem)} bytes)"
)
return
except Exception as e:
logger.info(
f"IamAuth: waiting for IAM signing key "
f"({type(e).__name__}: {e}); "
f"retry {attempt + 1}/{max_retries}"
)
await asyncio.sleep(retry_delay)
# Don't prevent startup forever. A later authenticate() call
# will try again via the JWT path.
logger.warning(
"IamAuth: could not fetch IAM signing key at startup; "
"JWT validation will fail until it's available"
)
# ------------------------------------------------------------------
# Authentication
# ------------------------------------------------------------------
async def authenticate(self, request):
"""Extract and validate the Bearer credential from an HTTP
request. Returns an ``Identity``. Raises HTTPUnauthorized
(401 / "auth failure") on any failure mode the caller
cannot distinguish missing / malformed / invalid / expired /
revoked credentials."""
header = request.headers.get("Authorization", "")
if not header.startswith("Bearer "):
raise _auth_failure()
token = header[len("Bearer "):].strip()
if not token:
raise _auth_failure()
# API keys always start with "tg_". JWTs have two dots and
# no "tg_" prefix. Discriminate cheaply.
if token.startswith("tg_"):
return await self._resolve_api_key(token)
if token.count(".") == 2:
return self._verify_jwt(token)
raise _auth_failure()
def _verify_jwt(self, token):
if not self._signing_public_pem:
raise _auth_failure()
try:
claims = _verify_jwt_eddsa(token, self._signing_public_pem)
except Exception as e:
logger.debug(f"JWT validation failed: {type(e).__name__}: {e}")
raise _auth_failure()
sub = claims.get("sub", "")
ws = claims.get("workspace", "")
if not sub or not ws:
raise _auth_failure()
# JWT carries no policy state under the IAM contract;
# any roles / claims field is ignored here.
return Identity(
handle=sub, workspace=ws, principal_id=sub, source="jwt",
)
async def _resolve_api_key(self, plaintext):
h = hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
cached = self._key_cache.get(h)
now = time.time()
if cached and cached[1] > now:
return cached[0]
async with self._key_cache_lock:
cached = self._key_cache.get(h)
if cached and cached[1] > now:
return cached[0]
try:
async def _call(client):
return await client.resolve_api_key(plaintext)
# ``roles`` is returned by the OSS regime as a hint
# but is not consulted by the gateway; all policy
# decisions go through ``authorise``.
user_id, workspace, _roles = await self._with_client(_call)
except Exception as e:
logger.debug(
f"API key resolution failed: "
f"{type(e).__name__}: {e}"
)
raise _auth_failure()
if not user_id or not workspace:
raise _auth_failure()
identity = Identity(
handle=user_id, workspace=workspace,
principal_id=user_id, source="api-key",
)
self._key_cache[h] = (identity, now + API_KEY_CACHE_TTL)
return identity
# ------------------------------------------------------------------
# Authorisation
# ------------------------------------------------------------------
@staticmethod
def _authz_cache_key(handle, capability, resource, parameters):
payload = json.dumps(
{
"h": handle,
"c": capability,
"r": resource or {},
"p": parameters or {},
},
sort_keys=True,
separators=(",", ":"),
)
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
async def authorise(self, identity, capability, resource, parameters):
"""Ask the IAM regime whether ``identity`` may perform
``capability`` on ``resource`` given ``parameters``.
Caches the decision for the regime's suggested TTL, clamped
above by ``AUTHZ_CACHE_TTL_MAX``. Both allow and deny
decisions are cached (denies briefly, to avoid hammering
iam-svc with repeated rejected attempts).
Raises ``HTTPForbidden`` (403 / "access denied") on a deny
decision. Raises ``HTTPUnauthorized`` (401 / "auth failure")
if the IAM service errors out failing closed."""
key = self._authz_cache_key(
identity.handle, capability, resource, parameters,
)
now = time.time()
cached = self._authz_cache.get(key)
if cached and cached[1] > now:
allow, _ = cached
if not allow:
raise _access_denied()
return
async with self._authz_cache_lock:
cached = self._authz_cache.get(key)
if cached and cached[1] > now:
allow, _ = cached
if not allow:
raise _access_denied()
return
try:
async def _call(client):
return await client.authorise(
identity.handle, capability,
resource or {}, parameters or {},
)
allow, ttl = await self._with_client(_call)
except Exception as e:
logger.warning(
f"authorise failed: {type(e).__name__}: {e}; "
f"failing closed for "
f"{identity.principal_id!r} cap={capability!r}"
)
raise _auth_failure()
ttl = max(0, min(int(ttl or 0), AUTHZ_CACHE_TTL_MAX))
self._authz_cache[key] = (bool(allow), now + ttl)
if not allow:
raise _access_denied()
return

View file

@ -0,0 +1,100 @@
"""
Gateway-side authorisation entry points.
Under the IAM contract (see docs/tech-specs/iam-contract.md) the
gateway holds *no* policy state. Roles, capability sets, and
workspace-scope rules all live in the IAM regime (iam-svc for OSS).
This module is the thin surface the gateway uses to ask the regime
for a decision:
- ``PUBLIC`` / ``AUTHENTICATED`` sentinels for endpoints that don't
go through capability-based authorisation.
- :func:`enforce` authenticate-only, then ask the regime.
- :func:`enforce_workspace` default-fill the workspace from the
caller's bound workspace and ask the regime, with the workspace
treated as the resource address.
The capability strings themselves are an open vocabulary see
docs/tech-specs/capabilities.md. The gateway does not validate them
beyond passing them through; an unknown capability simply produces a
deny verdict from the regime.
"""
from aiohttp import web
PUBLIC = "__public__"
AUTHENTICATED = "__authenticated__"
def access_denied():
return web.HTTPForbidden(
text='{"error":"access denied"}',
content_type="application/json",
)
def auth_failure():
return web.HTTPUnauthorized(
text='{"error":"auth failure"}',
content_type="application/json",
)
async def enforce(request, auth, capability):
"""Authenticate the caller and (for non-sentinel capabilities)
ask the IAM regime whether they may invoke ``capability``.
The resource is system-level (``{}``) and parameters are empty
use :func:`enforce_workspace` for workspace-scoped endpoints, or
drive authorisation through the operation registry for richer
cases.
- ``PUBLIC``: returns ``None`` no authentication.
- ``AUTHENTICATED``: returns the ``Identity`` no authorisation.
- capability string: returns the ``Identity`` if the regime
allows; raises ``HTTPForbidden`` otherwise.
"""
if capability == PUBLIC:
return None
identity = await auth.authenticate(request)
if capability == AUTHENTICATED:
return identity
await auth.authorise(identity, capability, {}, {})
return identity
async def enforce_workspace(data, identity, auth, capability=None):
"""Default-fill the workspace on a request body and (optionally)
authorise the caller for ``capability`` against that workspace.
- Target workspace = ``data["workspace"]`` if supplied, else the
caller's bound workspace.
- On success, ``data["workspace"]`` is overwritten with the
resolved value so downstream code sees a single canonical
address.
- When ``capability`` is given, the regime is asked whether the
caller may invoke ``capability`` on ``{workspace: target}``.
Raises ``HTTPForbidden`` on a deny.
For ``capability=None`` no authorisation call is made the
caller has presumably already authorised via :func:`enforce`
(handy for endpoints that authorise once then resolve workspace
on the body before forwarding).
"""
if not isinstance(data, dict):
return data
requested = data.get("workspace", "")
target = requested or identity.workspace
data["workspace"] = target
if capability is not None:
await auth.authorise(
identity, capability, {"workspace": target}, {},
)
return data

View file

@ -0,0 +1,40 @@
from ... schema import IamRequest, IamResponse
from ... schema import iam_request_queue, iam_response_queue
from ... messaging import TranslatorRegistry
from . requestor import ServiceRequestor
class IamRequestor(ServiceRequestor):
def __init__(self, backend, consumer, subscriber, timeout=120,
request_queue=None, response_queue=None):
if request_queue is None:
request_queue = iam_request_queue
if response_queue is None:
response_queue = iam_response_queue
super().__init__(
backend=backend,
consumer_name=consumer,
subscription=subscriber,
request_queue=request_queue,
response_queue=response_queue,
request_schema=IamRequest,
response_schema=IamResponse,
timeout=timeout,
)
self.request_translator = (
TranslatorRegistry.get_request_translator("iam")
)
self.response_translator = (
TranslatorRegistry.get_response_translator("iam")
)
def to_request(self, body):
return self.request_translator.decode(body)
def from_response(self, message):
return self.response_translator.encode_with_completion(message)

View file

@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
from . config import ConfigRequestor
from . flow import FlowRequestor
from . iam import IamRequestor
from . librarian import LibrarianRequestor
from . knowledge import KnowledgeRequestor
from . collection_management import CollectionManagementRequestor
@ -72,6 +73,7 @@ request_response_dispatchers = {
global_dispatchers = {
"config": ConfigRequestor,
"flow": FlowRequestor,
"iam": IamRequestor,
"librarian": LibrarianRequestor,
"knowledge": KnowledgeRequestor,
"collection-management": CollectionManagementRequestor,
@ -105,13 +107,31 @@ class DispatcherWrapper:
class DispatcherManager:
def __init__(self, backend, config_receiver, prefix="api-gateway",
queue_overrides=None):
def __init__(self, backend, config_receiver, auth,
prefix="api-gateway", queue_overrides=None):
"""
``auth`` is required. It flows into the Mux for first-frame
WebSocket authentication and into downstream dispatcher
construction. There is no permissive default constructing
a DispatcherManager without an authenticator would be a
silent downgrade to no-auth on the socket path.
"""
if auth is None:
raise ValueError(
"DispatcherManager requires an 'auth' argument — there "
"is no no-auth mode"
)
self.backend = backend
self.config_receiver = config_receiver
self.config_receiver.add_handler(self)
self.prefix = prefix
# Gateway IamAuth — used by the socket Mux for first-frame
# auth and by any dispatcher that needs to resolve caller
# identity out-of-band.
self.auth = auth
# Store queue overrides for global services
# Format: {"config": {"request": "...", "response": "..."}, ...}
self.queue_overrides = queue_overrides or {}
@ -163,6 +183,15 @@ class DispatcherManager:
def dispatch_global_service(self):
return DispatcherWrapper(self.process_global_service)
def dispatch_auth_iam(self):
"""Pre-configured IAM dispatcher for the gateway's auth
endpoints (login, bootstrap, change-password). Pins the
kind to ``iam`` so these handlers don't have to supply URL
params the global dispatcher would expect."""
async def _process(data, responder):
return await self.invoke_global_service(data, responder, "iam")
return DispatcherWrapper(_process)
def dispatch_core_export(self):
return DispatcherWrapper(self.process_core_export)
@ -314,7 +343,10 @@ class DispatcherManager:
async def process_socket(self, ws, running, params):
dispatcher = Mux(self, ws, running)
# The mux self-authenticates via the first-frame protocol;
# pass the gateway's IamAuth so it can validate tokens
# without reaching back into the endpoint layer.
dispatcher = Mux(self, ws, running, auth=self.auth)
return dispatcher

View file

@ -16,11 +16,28 @@ MAX_QUEUE_SIZE = 10
class Mux:
def __init__(self, dispatcher_manager, ws, running):
def __init__(self, dispatcher_manager, ws, running, auth):
"""
``auth`` is required the Mux implements the first-frame
auth protocol described in ``iam.md`` and will refuse any
non-auth frame until an ``auth-ok`` has been issued. There
is no no-auth mode.
"""
if auth is None:
raise ValueError(
"Mux requires an 'auth' argument — there is no "
"no-auth mode"
)
self.dispatcher_manager = dispatcher_manager
self.ws = ws
self.running = running
self.auth = auth
# Authenticated identity, populated by the first-frame auth
# protocol. ``None`` means the socket is not yet
# authenticated; any non-auth frame is refused.
self.identity = None
self.q = asyncio.Queue(maxsize=MAX_QUEUE_SIZE)
@ -31,6 +48,41 @@ class Mux:
if self.ws:
await self.ws.close()
async def _handle_auth_frame(self, data):
"""Process a ``{"type": "auth", "token": "..."}`` frame.
On success, updates ``self.identity`` and returns an
``auth-ok`` response frame. On failure, returns the masked
auth-failure frame. Never raises auth failures keep the
socket open so the client can retry without reconnecting
(important for browsers, which treat a handshake-time 401
as terminal)."""
token = data.get("token", "")
if not token:
await self.ws.send_json({
"type": "auth-failed",
"error": "auth failure",
})
return
class _Shim:
def __init__(self, tok):
self.headers = {"Authorization": f"Bearer {tok}"}
try:
identity = await self.auth.authenticate(_Shim(token))
except Exception:
await self.ws.send_json({
"type": "auth-failed",
"error": "auth failure",
})
return
self.identity = identity
await self.ws.send_json({
"type": "auth-ok",
"workspace": identity.workspace,
})
async def receive(self, msg):
request_id = None
@ -38,6 +90,16 @@ class Mux:
try:
data = msg.json()
# In-band auth protocol: the client sends
# ``{"type": "auth", "token": "..."}`` as its first frame
# (and any time it wants to re-auth: JWT refresh, token
# rotation, etc). Auth is always required on a Mux —
# there is no no-auth mode.
if isinstance(data, dict) and data.get("type") == "auth":
await self._handle_auth_frame(data)
return
request_id = data.get("id")
if "request" not in data:
@ -46,9 +108,125 @@ class Mux:
if "id" not in data:
raise RuntimeError("Bad message")
# Reject all non-auth frames until an ``auth-ok`` has
# been issued.
if self.identity is None:
await self.ws.send_json({
"id": request_id,
"error": {
"message": "auth failure",
"type": "auth-required",
},
"complete": True,
})
return
# Per-service capability gating. Resolved through the
# operation registry so the WS path matches what HTTP
# callers see — same authority, same caps.
#
# Lookup mirrors the HTTP routing decision in
# ``request_task``: presence of ``flow`` on the envelope
# means a flow-level data-plane service (graph-rag,
# agent, …); absence means a workspace-level service
# (config, flow management, librarian, …) whose specific
# operation is in the inner request body. ``iam`` is
# treated as workspace-level too — its operations are
# registered with bare names, no kind prefix.
from ..registry import lookup as _registry_lookup
from ..capabilities import enforce_workspace
from aiohttp import web as _web
service = data.get("service", "")
inner = data.get("request") or {}
inner_op = inner.get("operation", "") if isinstance(inner, dict) else ""
if data.get("flow"):
op = _registry_lookup(f"flow-service:{service}")
elif service == "iam":
op = _registry_lookup(inner_op) if inner_op else None
else:
op = _registry_lookup(f"{service}:{inner_op}") if inner_op else None
if op is None:
await self.ws.send_json({
"id": request_id,
"error": {
"message": "unknown service",
"type": "unknown-service",
},
"complete": True,
})
return
# Resolve workspace first (default-fill from the caller's
# bound workspace), then ask the regime to authorise the
# service-level capability against the matched
# operation's resource shape.
try:
await enforce_workspace(data, self.identity, self.auth)
if isinstance(inner, dict):
await enforce_workspace(inner, self.identity, self.auth)
if data.get("flow"):
resource = {
"workspace": data.get("workspace", ""),
"flow": data.get("flow", ""),
}
parameters = {}
else:
# Build a minimal RequestContext so the matched
# operation's own extractors decide resource and
# parameters — same path the HTTP endpoints take.
from ..registry import RequestContext
ctx = RequestContext(
body=inner if isinstance(inner, dict) else {},
match_info={},
identity=self.identity,
)
resource = op.extract_resource(ctx)
parameters = op.extract_parameters(ctx)
await self.auth.authorise(
self.identity, op.capability, resource, parameters,
)
except _web.HTTPForbidden:
await self.ws.send_json({
"id": request_id,
"error": {
"message": "access denied",
"type": "access-denied",
},
"complete": True,
})
return
except _web.HTTPUnauthorized:
await self.ws.send_json({
"id": request_id,
"error": {
"message": "auth failure",
"type": "auth-required",
},
"complete": True,
})
return
workspace = data["workspace"]
# Plumb authenticated caller's handle as ``actor`` so
# iam-svc handlers (whoami, future actor-scoped checks)
# know who is calling. Overwrite any caller-supplied
# value so it can't be spoofed over the WS.
if (
service == "iam"
and isinstance(data.get("request"), dict)
and self.identity is not None
):
data["request"]["actor"] = self.identity.handle
await self.q.put((
data["id"],
data.get("workspace", "default"),
workspace,
data.get("flow"),
data["service"],
data["request"]

View file

@ -0,0 +1,131 @@
"""
Gateway auth endpoints.
Three dedicated paths:
POST /api/v1/auth/login unauthenticated; username/password JWT
POST /api/v1/auth/bootstrap unauthenticated; IAM bootstrap op
POST /api/v1/auth/change-password authenticated; any role
These are the only IAM-surface operations that can be reached from
outside. Everything else routes through ``/api/v1/iam`` gated by
``users:admin``.
"""
import logging
from aiohttp import web
from .. capabilities import enforce, PUBLIC, AUTHENTICATED
logger = logging.getLogger("auth-endpoints")
logger.setLevel(logging.INFO)
class AuthEndpoints:
"""Groups the three auth-surface handlers. Each forwards to the
IAM service via the existing ``IamRequestor`` dispatcher."""
def __init__(self, iam_dispatcher, auth):
self.iam = iam_dispatcher
self.auth = auth
async def start(self):
pass
def add_routes(self, app):
app.add_routes([
web.post("/api/v1/auth/login", self.login),
web.post("/api/v1/auth/bootstrap", self.bootstrap),
web.post(
"/api/v1/auth/bootstrap-status",
self.bootstrap_status,
),
web.post(
"/api/v1/auth/change-password",
self.change_password,
),
])
async def _forward(self, body):
async def responder(x, fin):
pass
return await self.iam.process(body, responder)
async def login(self, request):
"""Public. Accepts {username, password, workspace?}. Returns
{jwt, jwt_expires} on success; IAM's masked auth failure on
anything else."""
await enforce(request, self.auth, PUBLIC)
try:
body = await request.json()
except Exception:
return web.json_response(
{"error": "invalid json"}, status=400,
)
req = {
"operation": "login",
"username": body.get("username", ""),
"password": body.get("password", ""),
"workspace": body.get("workspace", ""),
}
resp = await self._forward(req)
if "error" in resp:
return web.json_response(
{"error": "auth failure"}, status=401,
)
return web.json_response(resp)
async def bootstrap(self, request):
"""Public. Valid only when IAM is running in bootstrap mode
with empty tables. In every other case the IAM service
returns a masked auth-failure."""
await enforce(request, self.auth, PUBLIC)
resp = await self._forward({"operation": "bootstrap"})
if "error" in resp:
return web.json_response(
{"error": "auth failure"}, status=401,
)
return web.json_response(resp)
async def bootstrap_status(self, request):
"""Public, side-effect-free. Returns ``{"bootstrap_available":
bool}`` so a UI can decide whether to render first-run setup
without invoking the consuming ``bootstrap`` op."""
await enforce(request, self.auth, PUBLIC)
resp = await self._forward({"operation": "bootstrap-status"})
if "error" in resp:
return web.json_response(
{"error": "auth failure"}, status=401,
)
return web.json_response(resp)
async def change_password(self, request):
"""Authenticated (any role). Accepts {current_password,
new_password}; user_id is taken from the authenticated
identity the caller cannot change someone else's password
this way (reset-password is the admin path)."""
identity = await enforce(request, self.auth, AUTHENTICATED)
try:
body = await request.json()
except Exception:
return web.json_response(
{"error": "invalid json"}, status=400,
)
req = {
"operation": "change-password",
"user_id": identity.handle,
"password": body.get("current_password", ""),
"new_password": body.get("new_password", ""),
}
resp = await self._forward(req)
if "error" in resp:
err_type = resp.get("error", {}).get("type", "")
if err_type == "auth-failed":
return web.json_response(
{"error": "auth failure"}, status=401,
)
return web.json_response(
{"error": resp.get("error", {}).get("message", "error")},
status=400,
)
return web.json_response(resp)

View file

@ -1,28 +1,27 @@
import asyncio
from aiohttp import web
import uuid
import logging
from aiohttp import web
from .. capabilities import enforce, enforce_workspace
logger = logging.getLogger("endpoint")
logger.setLevel(logging.INFO)
class ConstantEndpoint:
def __init__(self, endpoint_path, auth, dispatcher):
def __init__(self, endpoint_path, auth, dispatcher, capability):
self.path = endpoint_path
self.auth = auth
self.operation = "service"
self.capability = capability
self.dispatcher = dispatcher
async def start(self):
pass
def add_routes(self, app):
app.add_routes([
web.post(self.path, self.handle),
])
@ -31,22 +30,14 @@ class ConstantEndpoint:
logger.debug(f"Processing request: {request.path}")
try:
ht = request.headers["Authorization"]
tokens = ht.split(" ", 2)
if tokens[0] != "Bearer":
return web.HTTPUnauthorized()
token = tokens[1]
except:
token = ""
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
identity = await enforce(request, self.auth, self.capability)
try:
data = await request.json()
if identity is not None:
await enforce_workspace(data, identity, self.auth)
async def responder(x, fin):
pass
@ -54,10 +45,8 @@ class ConstantEndpoint:
return web.json_response(resp)
except web.HTTPException:
raise
except Exception as e:
logging.error(f"Exception: {e}")
return web.json_response(
{ "error": str(e) }
)
logger.error(f"Exception: {e}", exc_info=True)
return web.json_response({"error": str(e)})

View file

@ -4,16 +4,18 @@ from aiohttp import web
from trustgraph.i18n import get_language_pack
from .. capabilities import enforce
logger = logging.getLogger("endpoint")
logger.setLevel(logging.INFO)
class I18nPackEndpoint:
def __init__(self, endpoint_path: str, auth):
def __init__(self, endpoint_path: str, auth, capability):
self.path = endpoint_path
self.auth = auth
self.operation = "service"
self.capability = capability
async def start(self):
pass
@ -26,26 +28,13 @@ class I18nPackEndpoint:
async def handle(self, request):
logger.debug(f"Processing i18n pack request: {request.path}")
token = ""
try:
ht = request.headers["Authorization"]
tokens = ht.split(" ", 2)
if tokens[0] != "Bearer":
return web.HTTPUnauthorized()
token = tokens[1]
except Exception:
token = ""
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
await enforce(request, self.auth, self.capability)
lang = request.match_info.get("lang") or "en"
# This is a path traversal defense, and is a critical sec defense.
# Do not remove!
# Path-traversal defense — critical, do not remove.
if "/" in lang or ".." in lang:
return web.HTTPBadRequest(reason="Invalid language code")
pack = get_language_pack(lang)
return web.json_response(pack)

View file

@ -0,0 +1,114 @@
"""
Registry-driven /api/v1/iam endpoint.
The gateway no longer gates IAM management with a single coarse
``users:admin`` capability. Instead, each operation declares its
own capability + resource shape in the registry (``registry.py``);
this endpoint reads the body's ``operation`` field, looks up the
declaration, and asks the IAM regime to authorise the call.
Operations not in the registry produce a 400 ``unknown operation``.
This is the gateway's primary mechanism for fail-closed gating of
the IAM surface the registry is the source of truth.
"""
import logging
from aiohttp import web
from .. capabilities import (
PUBLIC, AUTHENTICATED, auth_failure,
)
from .. registry import lookup, RequestContext
logger = logging.getLogger("iam-endpoint")
logger.setLevel(logging.INFO)
class IamEndpoint:
"""POST /api/v1/iam — generic forwarder gated by the operation
registry. The IAM dispatcher (``iam_dispatcher``) forwards the
body verbatim to iam-svc once authorisation succeeds."""
def __init__(self, endpoint_path, auth, dispatcher):
self.path = endpoint_path
self.auth = auth
self.dispatcher = dispatcher
async def start(self):
pass
def add_routes(self, app):
app.add_routes([web.post(self.path, self.handle)])
async def handle(self, request):
try:
body = await request.json()
except Exception:
return web.json_response(
{"error": "invalid json"}, status=400,
)
if not isinstance(body, dict):
return web.json_response(
{"error": "body must be an object"}, status=400,
)
op_name = body.get("operation", "")
op = lookup(op_name)
if op is None:
return web.json_response(
{"error": "unknown operation"}, status=400,
)
# Authentication: required for everything except PUBLIC.
identity = None
if op.capability != PUBLIC:
try:
identity = await self.auth.authenticate(request)
except web.HTTPException:
raise
# Authorisation: capability sentinels short-circuit the
# regime call; capability strings go through authorise().
if op.capability not in (PUBLIC, AUTHENTICATED):
ctx = RequestContext(
body=body,
match_info=dict(request.match_info),
identity=identity,
)
try:
resource = op.extract_resource(ctx)
parameters = op.extract_parameters(ctx)
except Exception as e:
logger.warning(
f"extractor failed for {op_name!r}: "
f"{type(e).__name__}: {e}"
)
return web.json_response(
{"error": "bad request"}, status=400,
)
await self.auth.authorise(
identity, op.capability, resource, parameters,
)
# Plumb the authenticated caller's handle through as ``actor``
# so iam-svc handlers (e.g. whoami, future actor-scoped
# checks) know who is making the request. The gateway is
# the only authority for this — body-supplied ``actor``
# values are overwritten so callers can't impersonate.
if identity is not None:
body["actor"] = identity.handle
async def responder(x, fin):
pass
try:
resp = await self.dispatcher.process(body, responder)
except web.HTTPException:
raise
except Exception as e:
logger.error(f"Exception: {e}", exc_info=True)
return web.json_response({"error": str(e)})
return web.json_response(resp)

View file

@ -8,72 +8,269 @@ from . variable_endpoint import VariableEndpoint
from . socket import SocketEndpoint
from . metrics import MetricsEndpoint
from . i18n import I18nPackEndpoint
from . auth_endpoints import AuthEndpoints
from . iam_endpoint import IamEndpoint
from . registry_endpoint import RegistryRoutedVariableEndpoint
from .. capabilities import PUBLIC, AUTHENTICATED, auth_failure
from .. registry import lookup as _registry_lookup, RequestContext
from .. dispatch.manager import DispatcherManager
# /api/v1/{kind} (config / flow / librarian / knowledge /
# collection-management), /api/v1/iam, and /api/v1/flow/{flow}/...
# routes are all gated per-operation by the registry, not by a
# per-kind capability map. Login / bootstrap / change-password are
# served by AuthEndpoints with their own PUBLIC / AUTHENTICATED
# sentinels.
import logging as _mgr_logging
_mgr_logger = _mgr_logging.getLogger("endpoint")
class _RoutedVariableEndpoint:
"""HTTP endpoint that gates per request via the operation
registry. The URL's ``kind`` parameter combined with a fixed
``registry_prefix`` yields the registry key e.g. prefix
``flow-service`` and kind ``agent`` looks up
``flow-service:agent``.
Used for ``/api/v1/flow/{flow}/service/{kind}`` (per-flow
data-plane services). ``/api/v1/{kind}`` (workspace-level
global services) goes through ``RegistryRoutedVariableEndpoint``
which discriminates on body operation as well as URL kind."""
def __init__(self, endpoint_path, auth, dispatcher, registry_prefix):
self.path = endpoint_path
self.auth = auth
self.dispatcher = dispatcher
self._registry_prefix = registry_prefix
async def start(self):
pass
def add_routes(self, app):
app.add_routes([web.post(self.path, self.handle)])
async def handle(self, request):
kind = request.match_info.get("kind", "")
op = _registry_lookup(f"{self._registry_prefix}:{kind}")
if op is None:
return web.json_response(
{"error": "unknown kind"}, status=404,
)
identity = await self.auth.authenticate(request)
try:
data = await request.json()
ctx = RequestContext(
body=data if isinstance(data, dict) else {},
match_info=dict(request.match_info),
identity=identity,
)
resource = op.extract_resource(ctx)
parameters = op.extract_parameters(ctx)
await self.auth.authorise(
identity, op.capability, resource, parameters,
)
async def responder(x, fin):
pass
resp = await self.dispatcher.process(
data, responder, request.match_info,
)
return web.json_response(resp)
except web.HTTPException:
raise
except Exception as e:
_mgr_logger.error(f"Exception: {e}", exc_info=True)
return web.json_response({"error": str(e)})
class _RoutedSocketEndpoint:
"""WebSocket endpoint gated per request via the operation
registry. Like ``_RoutedVariableEndpoint`` but for the
streaming flow import / export socket paths."""
def __init__(self, endpoint_path, auth, dispatcher, registry_prefix):
self.path = endpoint_path
self.auth = auth
self.dispatcher = dispatcher
self._registry_prefix = registry_prefix
async def start(self):
pass
def add_routes(self, app):
app.add_routes([web.get(self.path, self.handle)])
async def handle(self, request):
kind = request.match_info.get("kind", "")
op = _registry_lookup(f"{self._registry_prefix}:{kind}")
if op is None:
return web.json_response(
{"error": "unknown kind"}, status=404,
)
token = request.query.get("token", "")
if not token:
return auth_failure()
from . socket import _QueryTokenRequest
try:
identity = await self.auth.authenticate(
_QueryTokenRequest(token)
)
except web.HTTPException as e:
return e
ctx = RequestContext(
body={},
match_info=dict(request.match_info),
identity=identity,
)
try:
resource = op.extract_resource(ctx)
parameters = op.extract_parameters(ctx)
await self.auth.authorise(
identity, op.capability, resource, parameters,
)
except web.HTTPException as e:
return e
# Delegate the websocket handling to a standalone SocketEndpoint
# with the resolved capability, bypassing the per-request mutation
# concern by instantiating fresh state.
ws_ep = SocketEndpoint(
endpoint_path=self.path,
auth=self.auth,
dispatcher=self.dispatcher,
capability=op.capability,
)
return await ws_ep.handle(request)
class EndpointManager:
def __init__(
self, dispatcher_manager, auth, prometheus_url, timeout=600
self, dispatcher_manager, auth, prometheus_url, timeout=600,
):
self.dispatcher_manager = dispatcher_manager
self.timeout = timeout
self.services = {
}
self.endpoints = [
# Auth surface — public / authenticated-any. Must come
# before the generic /api/v1/{kind} routes to win the
# match for /api/v1/auth/* paths. aiohttp routes in
# registration order, so we prepend here.
AuthEndpoints(
iam_dispatcher=dispatcher_manager.dispatch_auth_iam(),
auth=auth,
),
# /api/v1/iam — registry-driven IAM management. Per
# operation gating happens inside IamEndpoint via the
# operation registry; the dispatcher forwards verbatim
# to iam-svc once authorisation has succeeded. Listed
# before the generic /api/v1/{kind} route so it wins
# the match for "iam".
IamEndpoint(
endpoint_path="/api/v1/iam",
auth=auth,
dispatcher=dispatcher_manager.dispatch_auth_iam(),
),
I18nPackEndpoint(
endpoint_path = "/api/v1/i18n/packs/{lang}",
auth = auth,
endpoint_path="/api/v1/i18n/packs/{lang}",
auth=auth,
capability=PUBLIC,
),
MetricsEndpoint(
endpoint_path = "/api/metrics",
prometheus_url = prometheus_url,
auth = auth,
endpoint_path="/api/metrics",
prometheus_url=prometheus_url,
auth=auth,
capability="metrics:read",
),
VariableEndpoint(
endpoint_path = "/api/v1/{kind}", auth = auth,
dispatcher = dispatcher_manager.dispatch_global_service(),
# Global services: registry-driven per-operation gating.
# Each kind+op combination has a registry entry that
# declares its capability and resource shape. Listed
# after the IAM and auth-surface routes; aiohttp's
# path matcher prefers the more-specific path so this
# variable route doesn't shadow them.
RegistryRoutedVariableEndpoint(
endpoint_path="/api/v1/{kind}",
auth=auth,
dispatcher=dispatcher_manager.dispatch_global_service(),
),
# /api/v1/socket: WebSocket handshake accepts
# unconditionally; the Mux dispatcher runs the
# first-frame auth protocol. Handshake-time 401s break
# browser reconnection, so authentication is always
# in-band for this endpoint.
SocketEndpoint(
endpoint_path = "/api/v1/socket",
auth = auth,
dispatcher = dispatcher_manager.dispatch_socket()
endpoint_path="/api/v1/socket",
auth=auth,
dispatcher=dispatcher_manager.dispatch_socket(),
capability=AUTHENTICATED, # informational only; bypassed
in_band_auth=True,
),
VariableEndpoint(
endpoint_path = "/api/v1/flow/{flow}/service/{kind}",
auth = auth,
dispatcher = dispatcher_manager.dispatch_flow_service(),
# Per-flow request/response services — gated per
# ``flow-service:<kind>`` registry entry.
_RoutedVariableEndpoint(
endpoint_path="/api/v1/flow/{flow}/service/{kind}",
auth=auth,
dispatcher=dispatcher_manager.dispatch_flow_service(),
registry_prefix="flow-service",
),
SocketEndpoint(
endpoint_path = "/api/v1/flow/{flow}/import/{kind}",
auth = auth,
dispatcher = dispatcher_manager.dispatch_flow_import()
# Per-flow streaming import/export — gated per
# ``flow-import:<kind>`` / ``flow-export:<kind>`` registry
# entry.
_RoutedSocketEndpoint(
endpoint_path="/api/v1/flow/{flow}/import/{kind}",
auth=auth,
dispatcher=dispatcher_manager.dispatch_flow_import(),
registry_prefix="flow-import",
),
SocketEndpoint(
endpoint_path = "/api/v1/flow/{flow}/export/{kind}",
auth = auth,
dispatcher = dispatcher_manager.dispatch_flow_export()
_RoutedSocketEndpoint(
endpoint_path="/api/v1/flow/{flow}/export/{kind}",
auth=auth,
dispatcher=dispatcher_manager.dispatch_flow_export(),
registry_prefix="flow-export",
),
StreamEndpoint(
endpoint_path="/api/v1/import-core",
auth=auth,
method="POST",
dispatcher=dispatcher_manager.dispatch_core_import(),
# Cross-subject import — require the admin bundle via a
# single representative capability.
capability="users:admin",
),
StreamEndpoint(
endpoint_path = "/api/v1/import-core",
auth = auth,
method = "POST",
dispatcher = dispatcher_manager.dispatch_core_import(),
endpoint_path="/api/v1/export-core",
auth=auth,
method="GET",
dispatcher=dispatcher_manager.dispatch_core_export(),
capability="users:admin",
),
StreamEndpoint(
endpoint_path = "/api/v1/export-core",
auth = auth,
method = "GET",
dispatcher = dispatcher_manager.dispatch_core_export(),
),
StreamEndpoint(
endpoint_path = "/api/v1/document-stream",
auth = auth,
method = "GET",
dispatcher = dispatcher_manager.dispatch_document_stream(),
endpoint_path="/api/v1/document-stream",
auth=auth,
method="GET",
dispatcher=dispatcher_manager.dispatch_document_stream(),
capability="documents:read",
),
]
@ -84,4 +281,3 @@ class EndpointManager:
async def start(self):
for ep in self.endpoints:
await ep.start()

View file

@ -10,17 +10,19 @@ import asyncio
import uuid
import logging
from .. capabilities import enforce
logger = logging.getLogger("endpoint")
logger.setLevel(logging.INFO)
class MetricsEndpoint:
def __init__(self, prometheus_url, endpoint_path, auth):
def __init__(self, prometheus_url, endpoint_path, auth, capability):
self.prometheus_url = prometheus_url
self.path = endpoint_path
self.auth = auth
self.operation = "service"
self.capability = capability
async def start(self):
pass
@ -35,38 +37,39 @@ class MetricsEndpoint:
logger.debug(f"Processing metrics request: {request.path}")
try:
ht = request.headers["Authorization"]
tokens = ht.split(" ", 2)
if tokens[0] != "Bearer":
return web.HTTPUnauthorized()
token = tokens[1]
except:
token = ""
await enforce(request, self.auth, self.capability)
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
path = request.match_info["path"]
url = (
self.prometheus_url + "/api/v1/" + path + "?" +
request.query_string
)
try:
path = request.match_info["path"]
async with aiohttp.ClientSession() as session:
url = (
self.prometheus_url + "/api/v1/" + path + "?" +
request.query_string
)
async with session.get(url) as resp:
return web.Response(
status=resp.status,
text=await resp.text()
)
except aiohttp.ClientConnectionError as e:
# Upstream unreachable (connect refused, DNS failure,
# server disconnect). Distinguish from our own errors so
# callers know where the fault is.
logger.error(f"Metrics upstream {url} unreachable: {e}")
return web.Response(
status=502,
text=f"Bad Gateway: metrics upstream unreachable: {e}",
)
except Exception as e:
logging.error(f"Exception: {e}")
raise web.HTTPInternalServerError()
logger.error(f"Metrics proxy exception: {e}", exc_info=True)
return web.Response(
status=500,
text=f"Internal Server Error: {e}",
)

View file

@ -0,0 +1,123 @@
"""
Registry-driven dispatch for ``/api/v1/{kind}`` global services.
The body's ``operation`` field plus the URL's ``{kind}`` together
form the canonical operation name (``<kind>:<operation>``) that the
gateway looks up in ``registry.py``. The matched operation
declares its capability and resource shape; this endpoint asks the
IAM regime to authorise the call before forwarding the body
verbatim to the backend dispatcher.
The dispatcher is the same ``dispatch_global_service()`` factory the
old coarse path used; only the gating layer has changed.
Operations not present in the registry are rejected with 400
``unknown operation`` fail closed.
"""
import logging
from aiohttp import web
from .. capabilities import (
PUBLIC, AUTHENTICATED, auth_failure,
)
from .. registry import lookup, RequestContext
logger = logging.getLogger("registry-endpoint")
logger.setLevel(logging.INFO)
class RegistryRoutedVariableEndpoint:
"""POST /api/v1/{kind} — kind comes from the URL, operation comes
from the body, both are joined as the registry key."""
def __init__(self, endpoint_path, auth, dispatcher):
self.path = endpoint_path
self.auth = auth
self.dispatcher = dispatcher
async def start(self):
pass
def add_routes(self, app):
app.add_routes([web.post(self.path, self.handle)])
async def handle(self, request):
kind = request.match_info.get("kind", "")
if not kind:
return web.json_response(
{"error": "missing kind"}, status=404,
)
try:
body = await request.json()
except Exception:
return web.json_response(
{"error": "invalid json"}, status=400,
)
if not isinstance(body, dict):
return web.json_response(
{"error": "body must be an object"}, status=400,
)
op_name = body.get("operation", "")
if not op_name:
return web.json_response(
{"error": "missing operation"}, status=400,
)
registry_key = f"{kind}:{op_name}"
op = lookup(registry_key)
if op is None:
return web.json_response(
{"error": "unknown operation"}, status=400,
)
identity = None
if op.capability != PUBLIC:
identity = await self.auth.authenticate(request)
if op.capability not in (PUBLIC, AUTHENTICATED):
ctx = RequestContext(
body=body,
match_info=dict(request.match_info),
identity=identity,
)
try:
resource = op.extract_resource(ctx)
parameters = op.extract_parameters(ctx)
except Exception as e:
logger.warning(
f"extractor failed for {registry_key!r}: "
f"{type(e).__name__}: {e}"
)
return web.json_response(
{"error": "bad request"}, status=400,
)
await self.auth.authorise(
identity, op.capability, resource, parameters,
)
# Default-fill workspace into the body so downstream
# dispatchers see the canonical resolved value. The
# extractor has already pulled the workspace out;
# mirror it back to the body for the verbatim forward.
if "workspace" in resource:
body["workspace"] = resource["workspace"]
async def responder(x, fin):
pass
try:
resp = await self.dispatcher.process(
body, responder, request.match_info,
)
except web.HTTPException:
raise
except Exception as e:
logger.error(f"Exception: {e}", exc_info=True)
return web.json_response({"error": str(e)})
return web.json_response(resp)

View file

@ -4,6 +4,9 @@ from aiohttp import web, WSMsgType
import logging
from .. running import Running
from .. capabilities import (
PUBLIC, AUTHENTICATED, auth_failure,
)
logger = logging.getLogger("socket")
logger.setLevel(logging.INFO)
@ -11,12 +14,25 @@ logger.setLevel(logging.INFO)
class SocketEndpoint:
def __init__(
self, endpoint_path, auth, dispatcher,
self, endpoint_path, auth, dispatcher, capability,
in_band_auth=False,
):
"""
``in_band_auth=True`` skips the handshake-time auth check.
The WebSocket handshake always succeeds; the dispatcher is
expected to gate itself via the first-frame auth protocol
(see ``Mux``).
This avoids the browser problem where a 401 on the handshake
is treated as permanent and prevents reconnection, and lets
long-lived sockets refresh their credential mid-session by
sending a new auth frame.
"""
self.path = endpoint_path
self.auth = auth
self.operation = "socket"
self.capability = capability
self.in_band_auth = in_band_auth
self.dispatcher = dispatcher
@ -61,15 +77,33 @@ class SocketEndpoint:
raise
async def handle(self, request):
"""Enhanced handler with better cleanup"""
try:
token = request.query['token']
except:
token = ""
"""Enhanced handler with better cleanup.
Auth: WebSocket clients pass the bearer token on the
``?token=...`` query string; we wrap it into a synthetic
Authorization header before delegating to the standard auth
path so the IAM-backed flow (JWT / API key) applies uniformly.
The first-frame auth protocol described in the IAM spec is
a future upgrade."""
if not self.in_band_auth and self.capability != PUBLIC:
token = request.query.get("token", "")
if not token:
return auth_failure()
try:
identity = await self.auth.authenticate(
_QueryTokenRequest(token)
)
except web.HTTPException as e:
return e
if self.capability != AUTHENTICATED:
try:
await self.auth.authorise(
identity, self.capability, {}, {},
)
except web.HTTPException as e:
return e
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
# 50MB max message size
ws = web.WebSocketResponse(max_msg_size=52428800)
@ -150,3 +184,11 @@ class SocketEndpoint:
web.get(self.path, self.handle),
])
class _QueryTokenRequest:
"""Minimal shim that exposes headers["Authorization"] to
IamAuth.authenticate(), derived from a query-string token."""
def __init__(self, token):
self.headers = {"Authorization": f"Bearer {token}"}

View file

@ -1,82 +1,64 @@
import asyncio
from aiohttp import web
import logging
from aiohttp import web
from .. capabilities import enforce
logger = logging.getLogger("endpoint")
logger.setLevel(logging.INFO)
class StreamEndpoint:
def __init__(self, endpoint_path, auth, dispatcher, method="POST"):
def __init__(
self, endpoint_path, auth, dispatcher, capability, method="POST",
):
self.path = endpoint_path
self.auth = auth
self.operation = "service"
self.capability = capability
self.method = method
self.dispatcher = dispatcher
async def start(self):
pass
def add_routes(self, app):
if self.method == "POST":
app.add_routes([
web.post(self.path, self.handle),
])
app.add_routes([web.post(self.path, self.handle)])
elif self.method == "GET":
app.add_routes([
web.get(self.path, self.handle),
])
app.add_routes([web.get(self.path, self.handle)])
else:
raise RuntimeError("Bad method" + self.method)
raise RuntimeError("Bad method " + self.method)
async def handle(self, request):
logger.debug(f"Processing request: {request.path}")
try:
ht = request.headers["Authorization"]
tokens = ht.split(" ", 2)
if tokens[0] != "Bearer":
return web.HTTPUnauthorized()
token = tokens[1]
except:
token = ""
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
await enforce(request, self.auth, self.capability)
try:
data = request.content
async def error(err):
return web.HTTPInternalServerError(text = err)
return web.HTTPInternalServerError(text=err)
async def ok(
status=200, reason="OK", type="application/octet-stream"
status=200, reason="OK",
type="application/octet-stream",
):
response = web.StreamResponse(
status = status, reason = reason,
headers = {"Content-Type": type}
status=status, reason=reason,
headers={"Content-Type": type},
)
await response.prepare(request)
return response
resp = await self.dispatcher.process(
data, error, ok, request
)
resp = await self.dispatcher.process(data, error, ok, request)
return resp
except web.HTTPException:
raise
except Exception as e:
logging.error(f"Exception: {e}")
return web.json_response(
{ "error": str(e) }
)
logger.error(f"Exception: {e}", exc_info=True)
return web.json_response({"error": str(e)})

View file

@ -1,27 +1,27 @@
import asyncio
from aiohttp import web
import logging
from aiohttp import web
from .. capabilities import enforce, enforce_workspace
logger = logging.getLogger("endpoint")
logger.setLevel(logging.INFO)
class VariableEndpoint:
def __init__(self, endpoint_path, auth, dispatcher):
def __init__(self, endpoint_path, auth, dispatcher, capability):
self.path = endpoint_path
self.auth = auth
self.operation = "service"
self.capability = capability
self.dispatcher = dispatcher
async def start(self):
pass
def add_routes(self, app):
app.add_routes([
web.post(self.path, self.handle),
])
@ -30,35 +30,25 @@ class VariableEndpoint:
logger.debug(f"Processing request: {request.path}")
try:
ht = request.headers["Authorization"]
tokens = ht.split(" ", 2)
if tokens[0] != "Bearer":
return web.HTTPUnauthorized()
token = tokens[1]
except:
token = ""
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
identity = await enforce(request, self.auth, self.capability)
try:
data = await request.json()
if identity is not None:
await enforce_workspace(data, identity, self.auth)
async def responder(x, fin):
pass
resp = await self.dispatcher.process(
data, responder, request.match_info
data, responder, request.match_info,
)
return web.json_response(resp)
except web.HTTPException:
raise
except Exception as e:
logging.error(f"Exception: {e}")
return web.json_response(
{ "error": str(e) }
)
logger.error(f"Exception: {e}", exc_info=True)
return web.json_response({"error": str(e)})

View file

@ -0,0 +1,533 @@
"""
Gateway operation registry.
Single declarative table mapping each operation the gateway
recognises to:
- The capability the IAM regime is asked to authorise against.
- The resource level (system / workspace / flow) determines the
shape of the resource identifier handed to ``authorise``.
- Extractors that build the resource and parameters from the
request context.
This is a gateway-internal concept. It is not part of the IAM
contract the contract specifies what arguments ``authorise``
receives; the registry is how the gateway populates them.
See docs/tech-specs/iam-contract.md for the contract and
docs/tech-specs/iam.md for the request anatomy.
"""
from dataclasses import dataclass, field
from typing import Any, Callable
# Sentinels for operations that don't go through capability-based
# authorisation. Mirror the values used in capabilities.py so the
# gateway endpoint layer can recognise them uniformly.
PUBLIC = "__public__"
AUTHENTICATED = "__authenticated__"
class ResourceLevel:
"""Where the operation's resource lives.
``SYSTEM`` operation acts on a deployment-level resource
(the user registry, the workspace registry,
the signing key). resource = {}. Workspace,
if relevant, is a parameter, not an address.
``WORKSPACE`` operation acts on something within a workspace
(config, library, knowledge, collections, flow
lifecycle). resource = {workspace}.
``FLOW`` operation acts on something within a flow
within a workspace (graph, agent, llm, etc.).
resource = {workspace, flow}.
"""
SYSTEM = "system"
WORKSPACE = "workspace"
FLOW = "flow"
@dataclass
class RequestContext:
"""The bundle of inputs the registry's extractors operate on.
Assembled by the gateway from the incoming request after
authentication."""
# Parsed JSON body (HTTP) or inner request payload (WebSocket).
body: dict = field(default_factory=dict)
# URL path components (HTTP) or WebSocket envelope routing
# fields (id, service, workspace, flow).
match_info: dict = field(default_factory=dict)
# Authenticated identity for default-fill-in. Always present
# by the time extractors run, except for PUBLIC operations
# where it is None.
identity: Any = None
@dataclass
class Operation:
"""Declared operation the gateway can dispatch + authorise."""
# Canonical operation name (used for registry lookup, audit,
# debug logs). Mirrors the operation strings in the IAM
# service and other backends where applicable.
name: str
# Capability required to invoke this operation. Either a
# string from the capability vocabulary in capabilities.md, or
# the PUBLIC / AUTHENTICATED sentinel for operations that
# don't go through capability-based authorisation.
capability: str
# Where the operation's resource lives. Determines the
# shape of the resource argument passed to authorise.
resource_level: str
# Build the resource identifier from the request context.
# Returns a dict with the appropriate components for the
# resource level: {} for SYSTEM, {workspace} for WORKSPACE,
# {workspace, flow} for FLOW. Default-fill-in of workspace
# from identity.workspace happens here when applicable.
extract_resource: Callable[[RequestContext], dict]
# Build the parameters dict — decision-relevant fields the
# operation supplied that are not part of the resource
# address. E.g. workspace association on a system-level
# user-registry operation.
extract_parameters: Callable[[RequestContext], dict]
# ---------------------------------------------------------------------------
# Registry storage.
# ---------------------------------------------------------------------------
_REGISTRY: dict[str, Operation] = {}
def register(op: Operation) -> None:
if op.name in _REGISTRY:
raise RuntimeError(
f"operation {op.name!r} already registered"
)
_REGISTRY[op.name] = op
def lookup(name: str) -> Operation | None:
return _REGISTRY.get(name)
def all_operations() -> list[Operation]:
return list(_REGISTRY.values())
# ---------------------------------------------------------------------------
# Common extractor helpers.
# ---------------------------------------------------------------------------
def _empty_resource(_ctx: RequestContext) -> dict:
"""System-level resource: empty dict."""
return {}
def _workspace_from_body(ctx: RequestContext) -> dict:
"""Workspace-level resource sourced from the request body's
workspace field, defaulting to the caller's bound workspace."""
ws = (ctx.body.get("workspace") if isinstance(ctx.body, dict) else "")
if not ws and ctx.identity is not None:
ws = ctx.identity.workspace
return {"workspace": ws}
def _flow_from_match_info(ctx: RequestContext) -> dict:
"""Flow-level resource sourced from URL path components or WS
envelope fields. Both ``workspace`` and ``flow`` are required;
no default-fill-in (the address is the operation's identity)."""
return {
"workspace": ctx.match_info.get("workspace", ""),
"flow": ctx.match_info.get("flow", ""),
}
def _no_parameters(_ctx: RequestContext) -> dict:
return {}
def _body_as_parameters(ctx: RequestContext) -> dict:
"""All body fields are parameters — used when the operation's
body is small and uniformly decision-relevant (e.g. user-
registry ops where the body's user.workspace is what the
regime checks against the admin's scope)."""
return dict(ctx.body) if isinstance(ctx.body, dict) else {}
def _workspace_param_only(ctx: RequestContext) -> dict:
"""Parameters dict carrying only the workspace association.
Used by system-level operations (e.g. user-registry ops) where
the workspace isn't part of the resource address but is the
field the regime uses to scope the admin's authority.
Pulls the workspace from the inner ``user`` / ``workspace_record``
body field if present (create-user, create-workspace), then from
the top-level body, then from the caller's bound workspace."""
body = ctx.body if isinstance(ctx.body, dict) else {}
inner_user = body.get("user") if isinstance(body.get("user"), dict) else {}
inner_ws = (
body.get("workspace_record")
if isinstance(body.get("workspace_record"), dict) else {}
)
ws = (
inner_user.get("workspace")
or inner_ws.get("id")
or body.get("workspace")
)
if not ws and ctx.identity is not None:
ws = ctx.identity.workspace
return {"workspace": ws or ""}
# ---------------------------------------------------------------------------
# Operation registrations.
#
# The gateway looks operations up by their canonical name (the same
# string the request body / WS envelope carries in its ``operation``
# field where applicable). Auth-surface operations (login, bootstrap,
# change-password) are not listed here — they have their own routes
# in auth_endpoints.py and use PUBLIC / AUTHENTICATED sentinels
# directly. Pure gateway↔IAM internal operations (resolve-api-key,
# authorise, authorise-many, get-signing-key-public) are likewise
# excluded; they are never invoked over the public API.
# ---------------------------------------------------------------------------
# IAM management operations. All routed through /api/v1/iam, body
# carries ``operation`` plus operation-specific fields.
# User registry: SYSTEM-level resource (users are global, identified
# by handle). The admin's authority is scoped per workspace via the
# parameters {workspace} field — that's what the regime checks
# against the admin's role workspace_scope.
register(Operation(
name="create-user",
capability="users:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="list-users",
capability="users:read",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="get-user",
capability="users:read",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="update-user",
capability="users:write",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="disable-user",
capability="users:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="enable-user",
capability="users:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="delete-user",
capability="users:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="reset-password",
capability="users:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
# API keys: SYSTEM-level resource — like users, a key record exists
# in the deployment-wide keys registry. The workspace the key
# authenticates to is a property of the record, not a containment;
# it appears as a parameter so the regime can scope the admin's
# authority to issue / list / revoke against it.
register(Operation(
name="create-api-key",
capability="keys:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="list-api-keys",
capability="keys:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
register(Operation(
name="revoke-api-key",
capability="keys:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_workspace_param_only,
))
# Workspace registry: SYSTEM-level resource (workspaces are the
# top-level addressable unit). No parameters — the workspace being
# acted on is identified by the body, not used as a scope cue.
register(Operation(
name="create-workspace",
capability="workspaces:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="list-workspaces",
capability="workspaces:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="get-workspace",
capability="workspaces:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="update-workspace",
capability="workspaces:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="disable-workspace",
capability="workspaces:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
# Signing key: SYSTEM-level operational op.
register(Operation(
name="rotate-signing-key",
capability="iam:admin",
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
# ---------------------------------------------------------------------------
# Auth-surface entries.
#
# Listed here so the registry is the one place the gateway looks for
# operation→capability mappings — including the sentinels for paths
# that don't go through capability-based authorisation. The actual
# routing is in auth_endpoints.py; these entries let the registry-
# driven dispatcher recognise the operation if it sees it on a
# generic path.
# ---------------------------------------------------------------------------
register(Operation(
name="login",
capability=PUBLIC,
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="bootstrap",
capability=PUBLIC,
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="bootstrap-status",
capability=PUBLIC,
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="change-password",
capability=AUTHENTICATED,
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
register(Operation(
name="whoami",
capability=AUTHENTICATED,
resource_level=ResourceLevel.SYSTEM,
extract_resource=_empty_resource,
extract_parameters=_no_parameters,
))
# ---------------------------------------------------------------------------
# Generic kind/operation entries.
#
# Names are ``<kind>:<operation>`` so the registry key is unique
# across dispatchers. All entries below are workspace-level
# resources (workspace defaulted from the caller's bound workspace
# if absent). Read/write distinction maps to the existing
# ``<subject>:read`` / ``<subject>:write`` capability vocabulary
# defined in capabilities.md.
# ---------------------------------------------------------------------------
def _register_kind_op(kind: str, op: str, capability: str) -> None:
"""Helper: register a workspace-level kind:op with the standard
extractors (workspace from body, no extra parameters)."""
register(Operation(
name=f"{kind}:{op}",
capability=capability,
resource_level=ResourceLevel.WORKSPACE,
extract_resource=_workspace_from_body,
extract_parameters=_no_parameters,
))
# config: KV-style workspace config service.
for _op in ("get", "list", "getvalues", "getvalues-all-ws", "config"):
_register_kind_op("config", _op, "config:read")
for _op in ("put", "delete"):
_register_kind_op("config", _op, "config:write")
# flow: flow-blueprint and flow-lifecycle service.
for _op in ("list-blueprints", "get-blueprint", "list-flows", "get-flow"):
_register_kind_op("flow", _op, "flows:read")
for _op in ("put-blueprint", "delete-blueprint", "start-flow", "stop-flow"):
_register_kind_op("flow", _op, "flows:write")
# librarian: document storage and processing service.
for _op in (
"get-document-metadata", "get-document-content",
"stream-document", "list-documents", "list-processing",
"get-upload-status", "list-uploads",
):
_register_kind_op("librarian", _op, "documents:read")
for _op in (
"add-document", "remove-document", "update-document",
"add-processing", "remove-processing",
"begin-upload", "upload-chunk", "complete-upload", "abort-upload",
):
_register_kind_op("librarian", _op, "documents:write")
# knowledge: knowledge-graph core service.
for _op in ("get-kg-core", "list-kg-cores"):
_register_kind_op("knowledge", _op, "knowledge:read")
for _op in ("put-kg-core", "delete-kg-core",
"load-kg-core", "unload-kg-core"):
_register_kind_op("knowledge", _op, "knowledge:write")
# collection-management: workspace collection lifecycle.
_register_kind_op("collection-management", "list-collections", "collections:read")
for _op in ("update-collection", "delete-collection"):
_register_kind_op("collection-management", _op, "collections:write")
# ---------------------------------------------------------------------------
# Per-flow data-plane services.
#
# /api/v1/flow/{flow}/service/{kind} and the streaming
# /api/v1/flow/{flow}/{import,export}/{kind} paths. No body-level
# ``operation`` discriminator — the URL kind is the operation
# identity. Resource is FLOW level (workspace + flow).
#
# Names: ``flow-service:<kind>``, ``flow-import:<kind>``,
# ``flow-export:<kind>``.
# ---------------------------------------------------------------------------
def _register_flow_kind(prefix: str, kind: str, capability: str) -> None:
register(Operation(
name=f"{prefix}:{kind}",
capability=capability,
resource_level=ResourceLevel.FLOW,
extract_resource=_flow_from_match_info,
extract_parameters=_no_parameters,
))
# Request/response services on /api/v1/flow/{flow}/service/{kind}.
_FLOW_SERVICES = {
"agent": "agent",
"text-completion": "llm",
"prompt": "llm",
"mcp-tool": "mcp",
"graph-rag": "graph:read",
"document-rag": "documents:read",
"embeddings": "embeddings",
"graph-embeddings": "graph:read",
"document-embeddings": "documents:read",
"triples": "graph:read",
"rows": "rows:read",
"nlp-query": "rows:read",
"structured-query": "rows:read",
"structured-diag": "rows:read",
"row-embeddings": "rows:read",
"sparql": "graph:read",
}
for _kind, _cap in _FLOW_SERVICES.items():
_register_flow_kind("flow-service", _kind, _cap)
# Streaming import socket endpoints.
_FLOW_IMPORTS = {
"triples": "graph:write",
"graph-embeddings": "graph:write",
"document-embeddings": "documents:write",
"entity-contexts": "documents:write",
"rows": "rows:write",
}
for _kind, _cap in _FLOW_IMPORTS.items():
_register_flow_kind("flow-import", _kind, _cap)
# Streaming export socket endpoints.
_FLOW_EXPORTS = {
"triples": "graph:read",
"graph-embeddings": "graph:read",
"document-embeddings": "documents:read",
"entity-contexts": "documents:read",
}
for _kind, _cap in _FLOW_EXPORTS.items():
_register_flow_kind("flow-export", _kind, _cap)

View file

@ -12,7 +12,7 @@ import os
from trustgraph.base.logging import setup_logging, add_logging_args
from trustgraph.base.pubsub import get_pubsub, add_pubsub_args
from . auth import Authenticator
from . auth import IamAuth
from . config.receiver import ConfigReceiver
from . dispatch.manager import DispatcherManager
@ -35,7 +35,6 @@ default_prometheus_url = os.getenv("PROMETHEUS_URL", "http://prometheus:9090")
default_pulsar_api_key = os.getenv("PULSAR_API_KEY", None)
default_timeout = 600
default_port = 8088
default_api_token = os.getenv("GATEWAY_SECRET", "")
class Api:
@ -60,13 +59,14 @@ class Api:
if not self.prometheus_url.endswith("/"):
self.prometheus_url += "/"
api_token = config.get("api_token", default_api_token)
# Token not set, or token equal empty string means no auth
if api_token:
self.auth = Authenticator(token=api_token)
else:
self.auth = Authenticator(allow_all=True)
# IAM-backed authentication. The legacy GATEWAY_SECRET
# shared-token path has been removed — there is no
# "open for everyone" fallback. The gateway cannot
# authenticate any request until IAM is reachable.
self.auth = IamAuth(
backend=self.pubsub_backend,
id=config.get("id", "api-gateway"),
)
self.config_receiver = ConfigReceiver(self.pubsub_backend)
@ -118,6 +118,7 @@ class Api:
config_receiver = self.config_receiver,
prefix = "gateway",
queue_overrides = queue_overrides,
auth = self.auth,
)
self.endpoint_manager = EndpointManager(
@ -132,12 +133,18 @@ class Api:
]
async def app_factory(self):
self.app = web.Application(
middlewares=[],
client_max_size=256 * 1024 * 1024
)
# Fetch IAM signing public key before accepting traffic.
# Blocks for a bounded retry window; the gateway starts even
# if IAM is still unreachable (JWT validation will 401 until
# the key is available).
await self.auth.start()
await self.config_receiver.start()
for ep in self.endpoints:
@ -189,12 +196,6 @@ def run():
help=f'API request timeout in seconds (default: {default_timeout})',
)
parser.add_argument(
'--api-token',
default=default_api_token,
help=f'Secret API token (default: no auth)',
)
add_logging_args(parser)
parser.add_argument(

View file

@ -0,0 +1 @@
from . service import *

View file

@ -0,0 +1,4 @@
from . service import run
run()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,233 @@
"""
IAM service processor. Terminates the IAM request queue and forwards
each request to the IamService business logic, then returns the
response on the IAM response queue.
Shape mirrors trustgraph.config.service.
"""
import logging
import os
from trustgraph.schema import Error
from trustgraph.schema import IamRequest, IamResponse
from trustgraph.schema import iam_request_queue, iam_response_queue
from trustgraph.base import AsyncProcessor, Consumer, Producer
from trustgraph.base import ConsumerMetrics, ProducerMetrics
from trustgraph.base.cassandra_config import (
add_cassandra_args, resolve_cassandra_config,
)
from . iam import IamService
logger = logging.getLogger(__name__)
default_ident = "iam-svc"
default_iam_request_queue = iam_request_queue
default_iam_response_queue = iam_response_queue
# Environment variables consulted as a fallback when the
# corresponding params field is not set in the processor-group YAML
# or via CLI. Intended for K8s Secret / env-var injection so the
# bootstrap token never has to live in the YAML (and thus in git).
ENV_BOOTSTRAP_MODE = "IAM_BOOTSTRAP_MODE"
ENV_BOOTSTRAP_TOKEN = "IAM_BOOTSTRAP_TOKEN"
class Processor(AsyncProcessor):
def __init__(self, **params):
iam_req_q = params.get(
"iam_request_queue", default_iam_request_queue,
)
iam_resp_q = params.get(
"iam_response_queue", default_iam_response_queue,
)
# Resolve bootstrap mode + token. Precedence: explicit
# params (CLI / processor-group YAML) → environment variable
# → unset (fail-closed). The env-var path is the K8s-native
# injection point: an `IAM_BOOTSTRAP_TOKEN` from a Secret
# never has to land in the YAML, and therefore never enters
# git history.
bootstrap_mode = (
params.get("bootstrap_mode")
or os.environ.get(ENV_BOOTSTRAP_MODE)
)
bootstrap_token = (
params.get("bootstrap_token")
or os.environ.get(ENV_BOOTSTRAP_TOKEN)
)
if bootstrap_mode not in ("token", "bootstrap"):
raise RuntimeError(
"iam-svc: bootstrap-mode is required. Set to 'token' "
"(with bootstrap-token) for production, or 'bootstrap' "
"to enable the explicit bootstrap operation over the "
"pub/sub bus (dev / quick-start only, not safe under "
"public exposure). Configurable via processor-group "
f"params or the {ENV_BOOTSTRAP_MODE} environment "
"variable. Refusing to start."
)
if bootstrap_mode == "token" and not bootstrap_token:
raise RuntimeError(
"iam-svc: bootstrap-mode=token requires bootstrap-token "
f"(or the {ENV_BOOTSTRAP_TOKEN} environment "
"variable). Refusing to start."
)
if bootstrap_mode == "bootstrap" and bootstrap_token:
raise RuntimeError(
"iam-svc: bootstrap-token is not accepted when "
"bootstrap-mode=bootstrap. Ambiguous intent. "
"Refusing to start."
)
self.bootstrap_mode = bootstrap_mode
self.bootstrap_token = bootstrap_token
cassandra_host = params.get("cassandra_host")
cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password")
hosts, username, password, keyspace = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password,
default_keyspace="iam",
)
self.cassandra_host = hosts
self.cassandra_username = username
self.cassandra_password = password
super().__init__(
**params | {
"iam_request_schema": IamRequest.__name__,
"iam_response_schema": IamResponse.__name__,
"cassandra_host": self.cassandra_host,
"cassandra_username": self.cassandra_username,
"cassandra_password": self.cassandra_password,
}
)
iam_request_metrics = ConsumerMetrics(
processor=self.id, flow=None, name="iam-request",
)
iam_response_metrics = ProducerMetrics(
processor=self.id, flow=None, name="iam-response",
)
self.iam_request_topic = iam_req_q
self.iam_request_consumer = Consumer(
taskgroup=self.taskgroup,
backend=self.pubsub,
flow=None,
topic=iam_req_q,
subscriber=self.id,
schema=IamRequest,
handler=self.on_iam_request,
metrics=iam_request_metrics,
)
self.iam_response_producer = Producer(
backend=self.pubsub,
topic=iam_resp_q,
schema=IamResponse,
metrics=iam_response_metrics,
)
self.iam = IamService(
host=self.cassandra_host,
username=self.cassandra_username,
password=self.cassandra_password,
keyspace=keyspace,
bootstrap_mode=self.bootstrap_mode,
bootstrap_token=self.bootstrap_token,
)
logger.info(
f"IAM service initialised (bootstrap-mode={self.bootstrap_mode})"
)
async def start(self):
await self.pubsub.ensure_topic(self.iam_request_topic)
# Token-mode auto-bootstrap runs before we accept requests so
# the first inbound call always sees a populated table.
await self.iam.auto_bootstrap_if_token_mode()
await self.iam_request_consumer.start()
async def on_iam_request(self, msg, consumer, flow):
id = None
try:
v = msg.value()
id = msg.properties()["id"]
logger.debug(
f"Handling IAM request {id} op={v.operation!r}"
)
resp = await self.iam.handle(v)
await self.iam_response_producer.send(
resp, properties={"id": id},
)
except Exception as e:
logger.error(
f"IAM request failed: {type(e).__name__}: {e}",
exc_info=True,
)
resp = IamResponse(
error=Error(type="internal-error", message=str(e)),
)
if id is not None:
await self.iam_response_producer.send(
resp, properties={"id": id},
)
@staticmethod
def add_args(parser):
AsyncProcessor.add_args(parser)
parser.add_argument(
"--iam-request-queue",
default=default_iam_request_queue,
help=f"IAM request queue (default: {default_iam_request_queue})",
)
parser.add_argument(
"--iam-response-queue",
default=default_iam_response_queue,
help=f"IAM response queue (default: {default_iam_response_queue})",
)
parser.add_argument(
"--bootstrap-mode",
default=None,
choices=["token", "bootstrap"],
help=(
"IAM bootstrap mode (required). "
"'token' = operator supplies the initial admin API "
"key via --bootstrap-token; auto-seeds on first start, "
"bootstrap operation refused. "
"'bootstrap' = bootstrap operation is live over the "
"bus until tables are populated; a token is generated "
"and returned by tg-bootstrap-iam. Unsafe to run "
"'bootstrap' mode with public exposure."
),
)
parser.add_argument(
"--bootstrap-token",
default=None,
help=(
"Initial admin API key plaintext, required when "
"--bootstrap-mode=token. Treat as a one-time "
"credential: the operator should rotate to a new key "
"and revoke this one after first use."
),
)
add_cassandra_args(parser)
def run():
Processor.launch(default_ident, __doc__)

View file

@ -4,7 +4,7 @@ Simple LLM service, performs text prompt completion using an Ollama service.
Input is prompt, output is response.
"""
from ollama import Client
from ollama import AsyncClient
import os
import logging
@ -38,23 +38,23 @@ class Processor(LlmService):
self.default_model = model
self.temperature = temperature
self.llm = Client(host=ollama)
self.llm = AsyncClient(host=ollama)
self._checked_models = set()
def _ensure_model(self, model_name):
async def _ensure_model(self, model_name):
"""Check if model exists locally, pull it if not."""
if model_name in self._checked_models:
return
try:
self.llm.show(model_name)
await self.llm.show(model_name)
self._checked_models.add(model_name)
except Exception as e:
status_code = getattr(e, 'status_code', None)
if status_code == 404 or "not found" in str(e).lower():
logger.info(f"Ollama model '{model_name}' not found locally. Pulling, this may take a while...")
try:
self.llm.pull(model_name)
await self.llm.pull(model_name)
self._checked_models.add(model_name)
logger.info(f"Successfully pulled Ollama model '{model_name}'.")
except Exception as pull_e:
@ -66,9 +66,9 @@ class Processor(LlmService):
# Use provided model or fall back to default
model_name = model or self.default_model
# Ensure the model exists/is pulled
self._ensure_model(model_name)
await self._ensure_model(model_name)
# Use provided temperature or fall back to default
effective_temperature = temperature if temperature is not None else self.temperature
@ -79,7 +79,7 @@ class Processor(LlmService):
try:
response = self.llm.generate(model_name, prompt, options={'temperature': effective_temperature})
response = await self.llm.generate(model_name, prompt, options={'temperature': effective_temperature})
response_text = response['response']
logger.debug("Sending response...")
@ -113,7 +113,7 @@ class Processor(LlmService):
model_name = model or self.default_model
# Ensure the model exists/is pulled
self._ensure_model(model_name)
await self._ensure_model(model_name)
effective_temperature = temperature if temperature is not None else self.temperature
@ -123,7 +123,7 @@ class Processor(LlmService):
prompt = system + "\n\n" + prompt
try:
stream = self.llm.generate(
stream = await self.llm.generate(
model_name,
prompt,
options={'temperature': effective_temperature},
@ -133,7 +133,7 @@ class Processor(LlmService):
total_input_tokens = 0
total_output_tokens = 0
for chunk in stream:
async for chunk in stream:
if 'response' in chunk and chunk['response']:
yield LlmChunk(
text=chunk['response'],

View file

@ -0,0 +1,436 @@
"""
IAM Cassandra table store.
Tables:
- iam_workspaces (id primary key)
- iam_users (id primary key) + iam_users_by_username lookup table
(workspace, username) -> id
- iam_api_keys (key_hash primary key) with secondary index on user_id
- iam_signing_keys (kid primary key) RSA keypairs for JWT signing
See docs/tech-specs/iam-protocol.md for the wire-level context.
"""
import logging
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from ssl import SSLContext, PROTOCOL_TLSv1_2
from . cassandra_async import async_execute
logger = logging.getLogger(__name__)
class IamTableStore:
def __init__(
self,
cassandra_host, cassandra_username, cassandra_password,
keyspace,
):
self.keyspace = keyspace
logger.info("IAM: connecting to Cassandra...")
if isinstance(cassandra_host, str):
cassandra_host = [h.strip() for h in cassandra_host.split(",")]
if cassandra_username and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
auth_provider = PlainTextAuthProvider(
username=cassandra_username, password=cassandra_password,
)
self.cluster = Cluster(
cassandra_host,
auth_provider=auth_provider,
ssl_context=ssl_context,
)
else:
self.cluster = Cluster(cassandra_host)
self.cassandra = self.cluster.connect()
logger.info("IAM: connected.")
self._ensure_schema()
self._prepare_statements()
def _ensure_schema(self):
# FIXME: Replication factor should be configurable.
self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
}};
""")
self.cassandra.set_keyspace(self.keyspace)
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS iam_workspaces (
id text PRIMARY KEY,
name text,
enabled boolean,
created timestamp
);
""")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS iam_users (
id text PRIMARY KEY,
workspace text,
username text,
name text,
email text,
password_hash text,
roles set<text>,
enabled boolean,
must_change_password boolean,
created timestamp
);
""")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS iam_users_by_username (
workspace text,
username text,
user_id text,
PRIMARY KEY ((workspace), username)
);
""")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS iam_api_keys (
key_hash text PRIMARY KEY,
id text,
user_id text,
name text,
prefix text,
expires timestamp,
created timestamp,
last_used timestamp
);
""")
self.cassandra.execute("""
CREATE INDEX IF NOT EXISTS iam_api_keys_user_id_idx
ON iam_api_keys (user_id);
""")
self.cassandra.execute("""
CREATE INDEX IF NOT EXISTS iam_api_keys_id_idx
ON iam_api_keys (id);
""")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS iam_signing_keys (
kid text PRIMARY KEY,
private_pem text,
public_pem text,
created timestamp,
retired timestamp
);
""")
logger.info("IAM: Cassandra schema OK.")
def _prepare_statements(self):
c = self.cassandra
self.put_workspace_stmt = c.prepare("""
INSERT INTO iam_workspaces (id, name, enabled, created)
VALUES (?, ?, ?, ?)
""")
self.get_workspace_stmt = c.prepare("""
SELECT id, name, enabled, created FROM iam_workspaces
WHERE id = ?
""")
self.list_workspaces_stmt = c.prepare("""
SELECT id, name, enabled, created FROM iam_workspaces
""")
self.put_user_stmt = c.prepare("""
INSERT INTO iam_users (
id, workspace, username, name, email, password_hash,
roles, enabled, must_change_password, created
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""")
self.get_user_stmt = c.prepare("""
SELECT id, workspace, username, name, email, password_hash,
roles, enabled, must_change_password, created
FROM iam_users WHERE id = ?
""")
self.list_users_by_workspace_stmt = c.prepare("""
SELECT id, workspace, username, name, email, password_hash,
roles, enabled, must_change_password, created
FROM iam_users WHERE workspace = ? ALLOW FILTERING
""")
self.list_users_stmt = c.prepare("""
SELECT id, workspace, username, name, email, password_hash,
roles, enabled, must_change_password, created
FROM iam_users
""")
self.put_username_lookup_stmt = c.prepare("""
INSERT INTO iam_users_by_username (workspace, username, user_id)
VALUES (?, ?, ?)
""")
self.get_user_id_by_username_stmt = c.prepare("""
SELECT user_id FROM iam_users_by_username
WHERE workspace = ? AND username = ?
""")
self.delete_username_lookup_stmt = c.prepare("""
DELETE FROM iam_users_by_username
WHERE workspace = ? AND username = ?
""")
self.delete_user_stmt = c.prepare("""
DELETE FROM iam_users WHERE id = ?
""")
self.put_api_key_stmt = c.prepare("""
INSERT INTO iam_api_keys (
key_hash, id, user_id, name, prefix, expires,
created, last_used
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""")
self.get_api_key_by_hash_stmt = c.prepare("""
SELECT key_hash, id, user_id, name, prefix, expires,
created, last_used
FROM iam_api_keys WHERE key_hash = ?
""")
self.get_api_key_by_id_stmt = c.prepare("""
SELECT key_hash, id, user_id, name, prefix, expires,
created, last_used
FROM iam_api_keys WHERE id = ?
""")
self.list_api_keys_by_user_stmt = c.prepare("""
SELECT key_hash, id, user_id, name, prefix, expires,
created, last_used
FROM iam_api_keys WHERE user_id = ?
""")
self.delete_api_key_stmt = c.prepare("""
DELETE FROM iam_api_keys WHERE key_hash = ?
""")
self.put_signing_key_stmt = c.prepare("""
INSERT INTO iam_signing_keys (
kid, private_pem, public_pem, created, retired
)
VALUES (?, ?, ?, ?, ?)
""")
self.list_signing_keys_stmt = c.prepare("""
SELECT kid, private_pem, public_pem, created, retired
FROM iam_signing_keys
""")
self.retire_signing_key_stmt = c.prepare("""
UPDATE iam_signing_keys SET retired = ? WHERE kid = ?
""")
self.update_user_profile_stmt = c.prepare("""
UPDATE iam_users
SET name = ?, email = ?, roles = ?, enabled = ?,
must_change_password = ?
WHERE id = ?
""")
self.update_user_password_stmt = c.prepare("""
UPDATE iam_users
SET password_hash = ?, must_change_password = ?
WHERE id = ?
""")
self.update_user_enabled_stmt = c.prepare("""
UPDATE iam_users SET enabled = ? WHERE id = ?
""")
self.update_workspace_stmt = c.prepare("""
UPDATE iam_workspaces SET name = ?, enabled = ?
WHERE id = ?
""")
# ------------------------------------------------------------------
# Workspaces
# ------------------------------------------------------------------
async def put_workspace(self, id, name, enabled, created):
await async_execute(
self.cassandra, self.put_workspace_stmt,
(id, name, enabled, created),
)
async def get_workspace(self, id):
rows = await async_execute(
self.cassandra, self.get_workspace_stmt, (id,),
)
return rows[0] if rows else None
async def list_workspaces(self):
return await async_execute(
self.cassandra, self.list_workspaces_stmt,
)
# ------------------------------------------------------------------
# Users
# ------------------------------------------------------------------
async def put_user(
self, id, workspace, username, name, email, password_hash,
roles, enabled, must_change_password, created,
):
await async_execute(
self.cassandra, self.put_user_stmt,
(
id, workspace, username, name, email, password_hash,
set(roles) if roles else set(),
enabled, must_change_password, created,
),
)
await async_execute(
self.cassandra, self.put_username_lookup_stmt,
(workspace, username, id),
)
async def get_user(self, id):
rows = await async_execute(
self.cassandra, self.get_user_stmt, (id,),
)
return rows[0] if rows else None
async def get_user_id_by_username(self, workspace, username):
rows = await async_execute(
self.cassandra, self.get_user_id_by_username_stmt,
(workspace, username),
)
return rows[0][0] if rows else None
async def list_users_by_workspace(self, workspace):
return await async_execute(
self.cassandra, self.list_users_by_workspace_stmt, (workspace,),
)
async def list_users(self):
"""List every user across the deployment. Used by the
system-level list-users handler when no workspace filter is
supplied; the gateway has already authorised the call against
the caller's authority."""
return await async_execute(
self.cassandra, self.list_users_stmt, (),
)
async def delete_user(self, id):
await async_execute(
self.cassandra, self.delete_user_stmt, (id,),
)
async def delete_username_lookup(self, workspace, username):
await async_execute(
self.cassandra, self.delete_username_lookup_stmt,
(workspace, username),
)
# ------------------------------------------------------------------
# API keys
# ------------------------------------------------------------------
async def put_api_key(
self, key_hash, id, user_id, name, prefix, expires,
created, last_used,
):
await async_execute(
self.cassandra, self.put_api_key_stmt,
(key_hash, id, user_id, name, prefix, expires,
created, last_used),
)
async def get_api_key_by_hash(self, key_hash):
rows = await async_execute(
self.cassandra, self.get_api_key_by_hash_stmt, (key_hash,),
)
return rows[0] if rows else None
async def get_api_key_by_id(self, id):
rows = await async_execute(
self.cassandra, self.get_api_key_by_id_stmt, (id,),
)
return rows[0] if rows else None
async def list_api_keys_by_user(self, user_id):
return await async_execute(
self.cassandra, self.list_api_keys_by_user_stmt, (user_id,),
)
async def delete_api_key(self, key_hash):
await async_execute(
self.cassandra, self.delete_api_key_stmt, (key_hash,),
)
# ------------------------------------------------------------------
# Signing keys
# ------------------------------------------------------------------
async def put_signing_key(self, kid, private_pem, public_pem,
created, retired):
await async_execute(
self.cassandra, self.put_signing_key_stmt,
(kid, private_pem, public_pem, created, retired),
)
async def list_signing_keys(self):
return await async_execute(
self.cassandra, self.list_signing_keys_stmt,
)
async def retire_signing_key(self, kid, retired):
await async_execute(
self.cassandra, self.retire_signing_key_stmt,
(retired, kid),
)
# ------------------------------------------------------------------
# User partial updates
# ------------------------------------------------------------------
async def update_user_profile(
self, id, name, email, roles, enabled, must_change_password,
):
await async_execute(
self.cassandra, self.update_user_profile_stmt,
(
name, email,
set(roles) if roles else set(),
enabled, must_change_password, id,
),
)
async def update_user_password(
self, id, password_hash, must_change_password,
):
await async_execute(
self.cassandra, self.update_user_password_stmt,
(password_hash, must_change_password, id),
)
async def update_user_enabled(self, id, enabled):
await async_execute(
self.cassandra, self.update_user_enabled_stmt,
(enabled, id),
)
# ------------------------------------------------------------------
# Workspace updates
# ------------------------------------------------------------------
async def update_workspace(self, id, name, enabled):
await async_execute(
self.cassandra, self.update_workspace_stmt,
(name, enabled, id),
)
# ------------------------------------------------------------------
# Bootstrap helpers
# ------------------------------------------------------------------
async def any_workspace_exists(self):
rows = await self.list_workspaces()
return bool(rows)