mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-18 03:45:12 +02:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
This commit is contained in:
parent
9332089b3d
commit
d35473f7f7
377 changed files with 6868 additions and 5785 deletions
|
|
@ -8,6 +8,7 @@ class Metadata:
|
|||
# Root document identifier (set by librarian, preserved through pipeline)
|
||||
root: str = ""
|
||||
|
||||
# Collection management
|
||||
user: str = ""
|
||||
# Collection the message belongs to. Workspace is NOT carried on the
|
||||
# message — consumers derive it from flow.workspace (the flow the
|
||||
# message arrived on), which is the trusted isolation boundary.
|
||||
collection: str = ""
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from .embeddings import GraphEmbeddings
|
|||
# <- (error)
|
||||
|
||||
# list-kg-cores
|
||||
# -> (user)
|
||||
# -> (workspace)
|
||||
# <- ()
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -27,8 +27,8 @@ class KnowledgeRequest:
|
|||
# load-kg-core, unload-kg-core
|
||||
operation: str = ""
|
||||
|
||||
# list-kg-cores, delete-kg-core, put-kg-core
|
||||
user: str = ""
|
||||
# Workspace the cores belong to. Partition / isolation boundary.
|
||||
workspace: str = ""
|
||||
|
||||
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
|
||||
# load-kg-core, unload-kg-core
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ class AgentStep:
|
|||
action: str = ""
|
||||
arguments: dict[str, str] = field(default_factory=dict)
|
||||
observation: str = ""
|
||||
user: str = "" # User context for the step
|
||||
step_type: str = "" # "react", "plan", "execute", "decompose", "synthesise"
|
||||
plan: list[PlanStep] = field(default_factory=list) # Plan steps (for plan-then-execute)
|
||||
subagent_results: dict[str, str] = field(default_factory=dict) # Subagent results keyed by goal
|
||||
|
|
@ -33,7 +32,6 @@ class AgentRequest:
|
|||
state: str = ""
|
||||
group: list[str] | None = None
|
||||
history: list[AgentStep] = field(default_factory=list)
|
||||
user: str = "" # User context for multi-tenancy
|
||||
collection: str = "default" # Collection for provenance traces
|
||||
streaming: bool = False # Enable streaming response delivery (default false)
|
||||
session_id: str = "" # For provenance tracking across iterations
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from ..core.topic import queue
|
|||
@dataclass
|
||||
class CollectionMetadata:
|
||||
"""Collection metadata record"""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
|
|
@ -23,11 +22,17 @@ class CollectionMetadata:
|
|||
|
||||
@dataclass
|
||||
class CollectionManagementRequest:
|
||||
"""Request for collection management operations"""
|
||||
"""Request for collection management operations.
|
||||
|
||||
Collection-management is a global (non-flow-scoped) service, so the
|
||||
workspace has to travel on the wire — it's the isolation boundary
|
||||
for which workspace's collections the request operates on.
|
||||
"""
|
||||
operation: str = "" # e.g., "delete-collection"
|
||||
|
||||
# For 'list-collections'
|
||||
user: str = ""
|
||||
# Workspace the collection belongs to.
|
||||
workspace: str = ""
|
||||
|
||||
collection: str = ""
|
||||
timestamp: str = "" # ISO timestamp
|
||||
name: str = ""
|
||||
|
|
|
|||
|
|
@ -7,12 +7,19 @@ from ..core.primitives import Error
|
|||
############################################################################
|
||||
|
||||
# Config service:
|
||||
# get(keys) -> (version, values)
|
||||
# list(type) -> (version, values)
|
||||
# getvalues(type) -> (version, values)
|
||||
# put(values) -> ()
|
||||
# delete(keys) -> ()
|
||||
# config() -> (version, config)
|
||||
# get(workspace, keys) -> (version, values)
|
||||
# list(workspace, type) -> (version, directory)
|
||||
# getvalues(workspace, type) -> (version, values)
|
||||
# getvalues-all-ws(type) -> (version, values with workspace field)
|
||||
# put(workspace, values) -> ()
|
||||
# delete(workspace, keys) -> ()
|
||||
# config(workspace) -> (version, config)
|
||||
#
|
||||
# Most operations are scoped to a workspace. The workspace field on the
|
||||
# request identifies which workspace's config to read or modify.
|
||||
# getvalues-all-ws returns values across all workspaces for a single
|
||||
# type — used by shared processors to load type-scoped config at startup.
|
||||
|
||||
@dataclass
|
||||
class ConfigKey:
|
||||
type: str = ""
|
||||
|
|
@ -23,16 +30,24 @@ class ConfigValue:
|
|||
type: str = ""
|
||||
key: str = ""
|
||||
value: str = ""
|
||||
# Populated by getvalues-all-ws responses so callers can identify
|
||||
# which workspace each value belongs to. Empty otherwise.
|
||||
workspace: str = ""
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
@dataclass
|
||||
class ConfigRequest:
|
||||
operation: str = "" # get, list, getvalues, delete, put, config
|
||||
# Operations: get, list, getvalues, getvalues-all-ws, delete, put,
|
||||
# config
|
||||
operation: str = ""
|
||||
|
||||
# Workspace scope — required on all operations except
|
||||
# getvalues-all-ws which spans all workspaces.
|
||||
workspace: str = ""
|
||||
|
||||
# get, delete
|
||||
keys: list[ConfigKey] = field(default_factory=list)
|
||||
|
||||
# list, getvalues
|
||||
# list, getvalues, getvalues-all-ws
|
||||
type: str = ""
|
||||
|
||||
# put
|
||||
|
|
@ -58,7 +73,12 @@ class ConfigResponse:
|
|||
@dataclass
|
||||
class ConfigPush:
|
||||
version: int = 0
|
||||
types: list[str] = field(default_factory=list)
|
||||
|
||||
# Dict of config type -> list of affected workspaces.
|
||||
# Handlers look up their registered type and get the list of
|
||||
# workspaces that need refreshing.
|
||||
# e.g. {"prompt": ["workspace-a", "workspace-b"], "schema": ["workspace-a"]}
|
||||
changes: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
config_request_queue = queue('config', cls='request')
|
||||
config_response_queue = queue('config', cls='response')
|
||||
|
|
|
|||
|
|
@ -17,12 +17,14 @@ from ..core.primitives import Error
|
|||
# start_flow(flowid, blueprintname) -> ()
|
||||
# stop_flow(flowid) -> ()
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
@dataclass
|
||||
class FlowRequest:
|
||||
operation: str = "" # list-blueprints, get-blueprint, put-blueprint, delete-blueprint
|
||||
# list-flows, get-flow, start-flow, stop-flow
|
||||
|
||||
# Workspace scope — all operations act within this workspace
|
||||
workspace: str = ""
|
||||
|
||||
# get_blueprint, put_blueprint, delete_blueprint, start_flow
|
||||
blueprint_name: str = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -43,12 +43,12 @@ from ..core.metadata import Metadata
|
|||
# <- (error)
|
||||
|
||||
# list-documents
|
||||
# -> (user, collection?)
|
||||
# -> (workspace, collection?)
|
||||
# <- (document_metadata[])
|
||||
# <- (error)
|
||||
|
||||
# list-processing
|
||||
# -> (user, collection?)
|
||||
# -> (workspace, collection?)
|
||||
# <- (processing_metadata[])
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ from ..core.metadata import Metadata
|
|||
# <- (error)
|
||||
|
||||
# list-uploads
|
||||
# -> (user)
|
||||
# -> (workspace)
|
||||
# <- (uploads[])
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ class DocumentMetadata:
|
|||
title: str = ""
|
||||
comments: str = ""
|
||||
metadata: list[Triple] = field(default_factory=list)
|
||||
user: str = ""
|
||||
workspace: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
# Child document support
|
||||
parent_id: str = "" # Empty for top-level docs, set for children
|
||||
|
|
@ -107,7 +107,7 @@ class ProcessingMetadata:
|
|||
document_id: str = ""
|
||||
time: int = 0
|
||||
flow: str = ""
|
||||
user: str = ""
|
||||
workspace: str = ""
|
||||
collection: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
|
@ -162,8 +162,8 @@ class LibrarianRequest:
|
|||
# add-document, upload-chunk
|
||||
content: bytes = b""
|
||||
|
||||
# list-documents, list-processing, list-uploads
|
||||
user: str = ""
|
||||
# Workspace scopes every library operation.
|
||||
workspace: str = ""
|
||||
|
||||
# list-documents?, list-processing?
|
||||
collection: str = ""
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from ..core.topic import queue
|
|||
class GraphEmbeddingsRequest:
|
||||
vector: list[float] = field(default_factory=list)
|
||||
limit: int = 0
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
@dataclass
|
||||
|
|
@ -31,7 +30,6 @@ class GraphEmbeddingsResponse:
|
|||
|
||||
@dataclass
|
||||
class TriplesQueryRequest:
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
s: Term | None = None
|
||||
p: Term | None = None
|
||||
|
|
@ -55,7 +53,6 @@ class TriplesQueryResponse:
|
|||
class DocumentEmbeddingsRequest:
|
||||
vector: list[float] = field(default_factory=list)
|
||||
limit: int = 0
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
@dataclass
|
||||
|
|
@ -89,7 +86,6 @@ class RowEmbeddingsRequest:
|
|||
"""Request for row embeddings semantic search"""
|
||||
vector: list[float] = field(default_factory=list) # Query vector
|
||||
limit: int = 10 # Max results to return
|
||||
user: str = "" # User/keyspace
|
||||
collection: str = "" # Collection name
|
||||
schema_name: str = "" # Schema name to search within
|
||||
index_name: str | None = None # Optional: filter to specific index
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from ..core.primitives import Error, Term, Triple
|
|||
@dataclass
|
||||
class GraphRagQuery:
|
||||
query: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
entity_limit: int = 0
|
||||
triple_limit: int = 0
|
||||
|
|
@ -40,7 +39,6 @@ class GraphRagResponse:
|
|||
@dataclass
|
||||
class DocumentRagQuery:
|
||||
query: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
doc_limit: int = 0
|
||||
streaming: bool = False
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ class GraphQLError:
|
|||
|
||||
@dataclass
|
||||
class RowsQueryRequest:
|
||||
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection: str = "" # Data collection identifier (required for partition key)
|
||||
query: str = "" # GraphQL query string
|
||||
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ class SparqlBinding:
|
|||
|
||||
@dataclass
|
||||
class SparqlQueryRequest:
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
query: str = "" # SPARQL query string
|
||||
limit: int = 10000 # Safety limit on results
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from ..core.primitives import Error
|
|||
@dataclass
|
||||
class StructuredQueryRequest:
|
||||
question: str = ""
|
||||
user: str = "" # Cassandra keyspace identifier
|
||||
collection: str = "" # Data collection identifier
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@ from ..core.primitives import Error
|
|||
@dataclass
|
||||
class ToolServiceRequest:
|
||||
"""Request to a dynamically configured tool service."""
|
||||
# User context for multi-tenancy
|
||||
user: str = ""
|
||||
# Config values (collection, etc.) as JSON
|
||||
config: str = ""
|
||||
# Arguments from LLM as JSON
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue