Per-workspace queue routing for workspace-scoped services (#862)

Workspace identity is now determined by queue infrastructure instead of
message body fields, closing a privilege-escalation vector where a caller
could spoof workspace in the request payload.

- Add WorkspaceProcessor base class: discovers workspaces from config at
  startup, creates per-workspace consumers (queue:workspace), and manages
  consumer lifecycle on workspace create/delete events
- Roll out to librarian, flow-svc, knowledge cores, and config-svc
- Config service gets a dual-queue regime: a system queue for
  cross-workspace ops (getvalues-all-ws, bootstrapper writes to
  __workspaces__) and per-workspace queues for tenant-scoped ops, with
  workspace discovery from its own Cassandra store
- Remove workspace field from request schemas (FlowRequest,
  LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and
  from DocumentMetadata / ProcessingMetadata — table stores now accept
  workspace as an explicit parameter
- Strip workspace encode/decode from all message translators and gateway
  serializers
- Gateway enforces workspace existence: reject requests targeting
  non-existent workspaces instead of routing to queues with no consumer
- Config service provisions new workspaces from __template__ on creation
- Add workspace lifecycle hooks to AsyncProcessor so any processor can
  react to workspace create/delete without subclassing WorkspaceProcessor
This commit is contained in:
cybermaggedon 2026-05-04 10:30:03 +01:00 committed by GitHub
parent 9be257ceee
commit 9f2bfbce0c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 1565 additions and 677 deletions

View file

@ -22,15 +22,15 @@ def dump_status(metrics_url, api_url, flow_id, token=None,
print()
print(f"Flow {flow_id}")
show_processors(metrics_url, flow_id)
show_processors(metrics_url, flow_id, token=token)
print()
print(f"Blueprint {blueprint_name}")
show_processors(metrics_url, blueprint_name)
show_processors(metrics_url, blueprint_name, token=token)
print()
def show_processors(metrics_url, flow_label):
def show_processors(metrics_url, flow_label, token=None):
url = f"{metrics_url}/query"
@ -40,7 +40,11 @@ def show_processors(metrics_url, flow_label):
"query": "consumer_state{" + expr + "}"
}
resp = requests.get(url, params=params)
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
resp = requests.get(url, params=params, headers=headers)
obj = resp.json()

View file

@ -2,16 +2,22 @@
Dump out TrustGraph processor states.
"""
import os
import requests
import argparse
default_metrics_url = "http://localhost:8088/api/metrics"
DEFAULT_TOKEN = os.getenv("TRUSTGRAPH_TOKEN", None)
def dump_status(url):
def dump_status(metrics_url, token=None):
url = f"{url}/query?query=processor_info"
url = f"{metrics_url}/query?query=processor_info"
resp = requests.get(url)
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
resp = requests.get(url, headers=headers)
obj = resp.json()
@ -39,11 +45,17 @@ def main():
help=f'Metrics URL (default: {default_metrics_url})',
)
parser.add_argument(
'-t', '--token',
default=DEFAULT_TOKEN,
help=f'Bearer token for authentication (default: TRUSTGRAPH_TOKEN env var)',
)
args = parser.parse_args()
try:
dump_status(args.metrics_url)
dump_status(args.metrics_url, args.token)
except Exception as e:

View file

@ -3,12 +3,14 @@ Dump out a stream of token rates, input, output and total. This is averaged
across the time since tg-show-token-rate is started.
"""
import os
import requests
import argparse
import json
import time
default_metrics_url = "http://localhost:8088/api/metrics"
DEFAULT_TOKEN = os.getenv("TRUSTGRAPH_TOKEN", None)
class Collate:
@ -36,16 +38,20 @@ class Collate:
return delta/time, self.total/self.time
def dump_status(metrics_url, number_samples, period):
def dump_status(metrics_url, number_samples, period, token=None):
input_url = f"{metrics_url}/query?query=input_tokens_total"
output_url = f"{metrics_url}/query?query=output_tokens_total"
resp = requests.get(input_url)
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
resp = requests.get(input_url, headers=headers)
obj = resp.json()
input = Collate(obj)
resp = requests.get(output_url)
resp = requests.get(output_url, headers=headers)
obj = resp.json()
output = Collate(obj)
@ -56,20 +62,20 @@ def dump_status(metrics_url, number_samples, period):
time.sleep(period)
resp = requests.get(input_url)
resp = requests.get(input_url, headers=headers)
obj = resp.json()
inr, inl = input.record(obj, period)
resp = requests.get(output_url)
resp = requests.get(output_url, headers=headers)
obj = resp.json()
outr, outl = output.record(obj, period)
print(f"{inl:10.1f} {outl:10.1f} {inl+outl:10.1f}")
def main():
parser = argparse.ArgumentParser(
prog='tg-show-processor-state',
prog='tg-show-token-rate',
description=__doc__,
)
@ -93,6 +99,12 @@ def main():
help=f'Metrics period (default: 100)',
)
parser.add_argument(
'-t', '--token',
default=DEFAULT_TOKEN,
help=f'Bearer token for authentication (default: TRUSTGRAPH_TOKEN env var)',
)
args = parser.parse_args()
try: