mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-06 22:02:37 +02:00
Per-workspace queue routing for workspace-scoped services (#862)
Workspace identity is now determined by queue infrastructure instead of message body fields, closing a privilege-escalation vector where a caller could spoof workspace in the request payload. - Add WorkspaceProcessor base class: discovers workspaces from config at startup, creates per-workspace consumers (queue:workspace), and manages consumer lifecycle on workspace create/delete events - Roll out to librarian, flow-svc, knowledge cores, and config-svc - Config service gets a dual-queue regime: a system queue for cross-workspace ops (getvalues-all-ws, bootstrapper writes to __workspaces__) and per-workspace queues for tenant-scoped ops, with workspace discovery from its own Cassandra store - Remove workspace field from request schemas (FlowRequest, LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and from DocumentMetadata / ProcessingMetadata — table stores now accept workspace as an explicit parameter - Strip workspace encode/decode from all message translators and gateway serializers - Gateway enforces workspace existence: reject requests targeting non-existent workspaces instead of routing to queues with no consumer - Config service provisions new workspaces from __template__ on creation - Add workspace lifecycle hooks to AsyncProcessor so any processor can react to workspace create/delete without subclassing WorkspaceProcessor
This commit is contained in:
parent
9be257ceee
commit
9f2bfbce0c
53 changed files with 1565 additions and 677 deletions
|
|
@ -217,7 +217,6 @@ class Library:
|
|||
"title": title,
|
||||
"comments": comments,
|
||||
"metadata": triples,
|
||||
"workspace": self.api.workspace,
|
||||
"tags": tags
|
||||
},
|
||||
"content": base64.b64encode(document).decode("utf-8"),
|
||||
|
|
@ -249,7 +248,6 @@ class Library:
|
|||
"kind": kind,
|
||||
"title": title,
|
||||
"comments": comments,
|
||||
"workspace": self.api.workspace,
|
||||
"tags": tags,
|
||||
},
|
||||
"total-size": total_size,
|
||||
|
|
@ -377,7 +375,6 @@ class Library:
|
|||
)
|
||||
for w in v["metadata"]
|
||||
],
|
||||
workspace = v.get("workspace", ""),
|
||||
tags = v["tags"],
|
||||
parent_id = v.get("parent-id", ""),
|
||||
document_type = v.get("document-type", "source"),
|
||||
|
|
@ -436,7 +433,6 @@ class Library:
|
|||
)
|
||||
for w in doc["metadata"]
|
||||
],
|
||||
workspace = doc.get("workspace", ""),
|
||||
tags = doc["tags"],
|
||||
parent_id = doc.get("parent-id", ""),
|
||||
document_type = doc.get("document-type", "source"),
|
||||
|
|
@ -485,7 +481,6 @@ class Library:
|
|||
"operation": "update-document",
|
||||
"workspace": self.api.workspace,
|
||||
"document-metadata": {
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": id,
|
||||
"time": metadata.time,
|
||||
"title": metadata.title,
|
||||
|
|
@ -599,7 +594,6 @@ class Library:
|
|||
"document-id": document_id,
|
||||
"time": int(time.time()),
|
||||
"flow": flow,
|
||||
"workspace": self.api.workspace,
|
||||
"collection": collection,
|
||||
"tags": tags,
|
||||
}
|
||||
|
|
@ -681,7 +675,6 @@ class Library:
|
|||
document_id = v["document-id"],
|
||||
time = datetime.datetime.fromtimestamp(v["time"]),
|
||||
flow = v["flow"],
|
||||
workspace = v.get("workspace", ""),
|
||||
collection = v["collection"],
|
||||
tags = v["tags"],
|
||||
)
|
||||
|
|
@ -945,7 +938,6 @@ class Library:
|
|||
"title": title,
|
||||
"comments": comments,
|
||||
"metadata": triples,
|
||||
"workspace": self.api.workspace,
|
||||
"tags": tags,
|
||||
"parent-id": parent_id,
|
||||
"document-type": "extracted",
|
||||
|
|
|
|||
|
|
@ -65,7 +65,6 @@ class DocumentMetadata:
|
|||
title: Document title
|
||||
comments: Additional comments or description
|
||||
metadata: List of RDF triples providing structured metadata
|
||||
workspace: Workspace the document belongs to
|
||||
tags: List of tags for categorization
|
||||
parent_id: Parent document ID for child documents (empty for top-level docs)
|
||||
document_type: "source" for uploaded documents, "extracted" for derived content
|
||||
|
|
@ -76,7 +75,6 @@ class DocumentMetadata:
|
|||
title : str
|
||||
comments : str
|
||||
metadata : List[Triple]
|
||||
workspace : str
|
||||
tags : List[str]
|
||||
parent_id : str = ""
|
||||
document_type : str = "source"
|
||||
|
|
@ -91,7 +89,6 @@ class ProcessingMetadata:
|
|||
document_id: ID of the document being processed
|
||||
time: Processing start timestamp
|
||||
flow: Flow instance handling the processing
|
||||
workspace: Workspace the processing job belongs to
|
||||
collection: Target collection for processed data
|
||||
tags: List of tags for categorization
|
||||
"""
|
||||
|
|
@ -99,7 +96,6 @@ class ProcessingMetadata:
|
|||
document_id : str
|
||||
time : datetime.datetime
|
||||
flow : str
|
||||
workspace : str
|
||||
collection : str
|
||||
tags : List[str]
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from . publisher import Publisher
|
|||
from . subscriber import Subscriber
|
||||
from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics, SubscriberMetrics
|
||||
from . logging import add_logging_args, setup_logging
|
||||
from . workspace_processor import WorkspaceProcessor
|
||||
from . flow_processor import FlowProcessor
|
||||
from . consumer_spec import ConsumerSpec
|
||||
from . parameter_spec import ParameterSpec
|
||||
|
|
|
|||
|
|
@ -71,6 +71,11 @@ class AsyncProcessor:
|
|||
# { "handler": async_fn, "types": set_or_none }
|
||||
self.config_handlers = []
|
||||
|
||||
# Workspace lifecycle handlers, called when workspaces are
|
||||
# created or deleted. Each entry is an async callable:
|
||||
# async def handler(workspace_changes: WorkspaceChanges)
|
||||
self.workspace_handlers = []
|
||||
|
||||
# Track the current config version for dedup
|
||||
self.config_version = 0
|
||||
|
||||
|
|
@ -251,6 +256,10 @@ class AsyncProcessor:
|
|||
"types": set(types) if types else None,
|
||||
})
|
||||
|
||||
# Register a handler for workspace lifecycle events
|
||||
def register_workspace_handler(self, handler: Callable[..., Any]) -> None:
|
||||
self.workspace_handlers.append(handler)
|
||||
|
||||
# Called when a config notify message arrives
|
||||
async def on_config_notify(self, message, consumer, flow):
|
||||
|
||||
|
|
@ -266,6 +275,16 @@ class AsyncProcessor:
|
|||
)
|
||||
return
|
||||
|
||||
# Dispatch workspace lifecycle events before config handlers
|
||||
if v.workspace_changes and self.workspace_handlers:
|
||||
for handler in self.workspace_handlers:
|
||||
try:
|
||||
await handler(v.workspace_changes)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Workspace handler failed: {e}", exc_info=True
|
||||
)
|
||||
|
||||
notify_types = set(changes.keys())
|
||||
|
||||
# Filter out handlers that don't care about any of the changed
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from .. schema import Error
|
|||
from .. schema import config_request_queue, config_response_queue
|
||||
from .. schema import config_push_queue
|
||||
from .. log_level import LogLevel
|
||||
from . async_processor import AsyncProcessor
|
||||
from . workspace_processor import WorkspaceProcessor
|
||||
from . flow import Flow
|
||||
|
||||
# Module logger
|
||||
|
|
@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Parent class for configurable processors, configured with flows by
|
||||
# the config service
|
||||
class FlowProcessor(AsyncProcessor):
|
||||
class FlowProcessor(WorkspaceProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
@staticmethod
|
||||
def add_args(parser: ArgumentParser) -> None:
|
||||
|
||||
AsyncProcessor.add_args(parser)
|
||||
WorkspaceProcessor.add_args(parser)
|
||||
|
||||
# parser.add_argument(
|
||||
# '--rate-limit-retry',
|
||||
|
|
|
|||
|
|
@ -202,7 +202,6 @@ class LibrarianClient:
|
|||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
workspace=workspace,
|
||||
kind=kind,
|
||||
title=title or doc_id,
|
||||
parent_id=parent_id,
|
||||
|
|
@ -227,7 +226,6 @@ class LibrarianClient:
|
|||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
workspace=workspace,
|
||||
kind=kind,
|
||||
title=title or doc_id,
|
||||
document_type=document_type,
|
||||
|
|
|
|||
66
trustgraph-base/trustgraph/base/workspace_processor.py
Normal file
66
trustgraph-base/trustgraph/base/workspace_processor.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import logging
|
||||
|
||||
from . async_processor import AsyncProcessor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
WORKSPACES_NAMESPACE = "__workspaces__"
|
||||
WORKSPACE_TYPE = "workspace"
|
||||
|
||||
|
||||
class WorkspaceProcessor(AsyncProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
super(WorkspaceProcessor, self).__init__(**params)
|
||||
|
||||
self.active_workspaces = set()
|
||||
|
||||
self.register_workspace_handler(self._handle_workspace_changes)
|
||||
|
||||
async def _discover_workspaces(self):
|
||||
client = self._create_config_client()
|
||||
try:
|
||||
await client.start()
|
||||
type_data, version = await self._fetch_type_all_workspaces(
|
||||
client, WORKSPACE_TYPE,
|
||||
)
|
||||
for ws in type_data:
|
||||
if ws == WORKSPACES_NAMESPACE:
|
||||
for workspace_id in type_data[ws]:
|
||||
if workspace_id not in self.active_workspaces:
|
||||
self.active_workspaces.add(workspace_id)
|
||||
await self.on_workspace_created(workspace_id)
|
||||
finally:
|
||||
await client.stop()
|
||||
|
||||
async def _handle_workspace_changes(self, workspace_changes):
|
||||
for workspace_id in workspace_changes.created:
|
||||
if workspace_id not in self.active_workspaces:
|
||||
self.active_workspaces.add(workspace_id)
|
||||
logger.info(f"Workspace created: {workspace_id}")
|
||||
await self.on_workspace_created(workspace_id)
|
||||
|
||||
for workspace_id in workspace_changes.deleted:
|
||||
if workspace_id in self.active_workspaces:
|
||||
logger.info(f"Workspace deleted: {workspace_id}")
|
||||
await self.on_workspace_deleted(workspace_id)
|
||||
self.active_workspaces.discard(workspace_id)
|
||||
|
||||
async def on_workspace_created(self, workspace):
|
||||
pass
|
||||
|
||||
async def on_workspace_deleted(self, workspace):
|
||||
pass
|
||||
|
||||
async def start(self):
|
||||
await super(WorkspaceProcessor, self).start()
|
||||
await self._discover_workspaces()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser: ArgumentParser) -> None:
|
||||
AsyncProcessor.add_args(parser)
|
||||
|
|
@ -9,7 +9,6 @@ class CollectionManagementRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> CollectionManagementRequest:
|
||||
return CollectionManagementRequest(
|
||||
operation=data.get("operation"),
|
||||
workspace=data.get("workspace", ""),
|
||||
collection=data.get("collection"),
|
||||
timestamp=data.get("timestamp"),
|
||||
name=data.get("name"),
|
||||
|
|
@ -24,8 +23,6 @@ class CollectionManagementRequestTranslator(MessageTranslator):
|
|||
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.collection is not None:
|
||||
result["collection"] = obj.collection
|
||||
if obj.timestamp is not None:
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ class FlowRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> FlowRequest:
|
||||
return FlowRequest(
|
||||
operation=data.get("operation"),
|
||||
workspace=data.get("workspace", ""),
|
||||
blueprint_name=data.get("blueprint-name"),
|
||||
blueprint_definition=data.get("blueprint-definition"),
|
||||
description=data.get("description"),
|
||||
|
|
@ -22,8 +21,6 @@ class FlowRequestTranslator(MessageTranslator):
|
|||
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
if obj.workspace is not None:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.blueprint_name is not None:
|
||||
result["blueprint-name"] = obj.blueprint_name
|
||||
if obj.blueprint_definition is not None:
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
|
||||
return KnowledgeRequest(
|
||||
operation=data.get("operation"),
|
||||
workspace=data.get("workspace", ""),
|
||||
id=data.get("id"),
|
||||
flow=data.get("flow"),
|
||||
collection=data.get("collection"),
|
||||
|
|
@ -58,8 +57,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
|
||||
if obj.operation:
|
||||
result["operation"] = obj.operation
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.id:
|
||||
result["id"] = obj.id
|
||||
if obj.flow:
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ class LibraryRequestTranslator(MessageTranslator):
|
|||
document_metadata=doc_metadata,
|
||||
processing_metadata=proc_metadata,
|
||||
content=content,
|
||||
workspace=data.get("workspace", ""),
|
||||
collection=data.get("collection", ""),
|
||||
criteria=criteria,
|
||||
# Chunked upload fields
|
||||
|
|
@ -76,8 +75,6 @@ class LibraryRequestTranslator(MessageTranslator):
|
|||
result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
|
||||
if obj.content:
|
||||
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.collection:
|
||||
result["collection"] = obj.collection
|
||||
if obj.criteria is not None:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ class DocumentMetadataTranslator(Translator):
|
|||
title=data.get("title"),
|
||||
comments=data.get("comments"),
|
||||
metadata=self.subgraph_translator.decode(metadata) if metadata is not None else [],
|
||||
workspace=data.get("workspace"),
|
||||
tags=data.get("tags"),
|
||||
parent_id=data.get("parent-id", ""),
|
||||
document_type=data.get("document-type", "source"),
|
||||
|
|
@ -40,8 +39,6 @@ class DocumentMetadataTranslator(Translator):
|
|||
result["comments"] = obj.comments
|
||||
if obj.metadata is not None:
|
||||
result["metadata"] = self.subgraph_translator.encode(obj.metadata)
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.tags is not None:
|
||||
result["tags"] = obj.tags
|
||||
if obj.parent_id:
|
||||
|
|
@ -61,7 +58,6 @@ class ProcessingMetadataTranslator(Translator):
|
|||
document_id=data.get("document-id"),
|
||||
time=data.get("time"),
|
||||
flow=data.get("flow"),
|
||||
workspace=data.get("workspace"),
|
||||
collection=data.get("collection"),
|
||||
tags=data.get("tags")
|
||||
)
|
||||
|
|
@ -77,8 +73,6 @@ class ProcessingMetadataTranslator(Translator):
|
|||
result["time"] = obj.time
|
||||
if obj.flow:
|
||||
result["flow"] = obj.flow
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.collection:
|
||||
result["collection"] = obj.collection
|
||||
if obj.tags is not None:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,5 @@ class Metadata:
|
|||
# Root document identifier (set by librarian, preserved through pipeline)
|
||||
root: str = ""
|
||||
|
||||
# Collection the message belongs to. Workspace is NOT carried on the
|
||||
# message — consumers derive it from flow.workspace (the flow the
|
||||
# message arrived on), which is the trusted isolation boundary.
|
||||
# Collection the message belongs to.
|
||||
collection: str = ""
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from .embeddings import GraphEmbeddings
|
|||
# <- (error)
|
||||
|
||||
# list-kg-cores
|
||||
# -> (workspace)
|
||||
# -> ()
|
||||
# <- ()
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -27,9 +27,6 @@ class KnowledgeRequest:
|
|||
# load-kg-core, unload-kg-core
|
||||
operation: str = ""
|
||||
|
||||
# Workspace the cores belong to. Partition / isolation boundary.
|
||||
workspace: str = ""
|
||||
|
||||
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
|
||||
# load-kg-core, unload-kg-core
|
||||
id: str = ""
|
||||
|
|
|
|||
|
|
@ -22,17 +22,9 @@ class CollectionMetadata:
|
|||
|
||||
@dataclass
|
||||
class CollectionManagementRequest:
|
||||
"""Request for collection management operations.
|
||||
|
||||
Collection-management is a global (non-flow-scoped) service, so the
|
||||
workspace has to travel on the wire — it's the isolation boundary
|
||||
for which workspace's collections the request operates on.
|
||||
"""
|
||||
"""Request for collection management operations."""
|
||||
operation: str = "" # e.g., "delete-collection"
|
||||
|
||||
# Workspace the collection belongs to.
|
||||
workspace: str = ""
|
||||
|
||||
collection: str = ""
|
||||
timestamp: str = "" # ISO timestamp
|
||||
name: str = ""
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ class ConfigResponse:
|
|||
# Everything
|
||||
error: Error | None = None
|
||||
|
||||
@dataclass
|
||||
class WorkspaceChanges:
|
||||
created: list[str] = field(default_factory=list)
|
||||
deleted: list[str] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class ConfigPush:
|
||||
version: int = 0
|
||||
|
|
@ -80,6 +85,10 @@ class ConfigPush:
|
|||
# e.g. {"prompt": ["workspace-a", "workspace-b"], "schema": ["workspace-a"]}
|
||||
changes: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
# Workspace lifecycle events. Populated when a workspace entry
|
||||
# is created or deleted in the __workspaces__ config namespace.
|
||||
workspace_changes: WorkspaceChanges | None = None
|
||||
|
||||
config_request_queue = queue('config', cls='request')
|
||||
config_response_queue = queue('config', cls='response')
|
||||
config_push_queue = queue('config', cls='notify')
|
||||
|
|
|
|||
|
|
@ -22,9 +22,6 @@ class FlowRequest:
|
|||
operation: str = "" # list-blueprints, get-blueprint, put-blueprint, delete-blueprint
|
||||
# list-flows, get-flow, start-flow, stop-flow
|
||||
|
||||
# Workspace scope — all operations act within this workspace
|
||||
workspace: str = ""
|
||||
|
||||
# get_blueprint, put_blueprint, delete_blueprint, start_flow
|
||||
blueprint_name: str = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -43,12 +43,12 @@ from ..core.metadata import Metadata
|
|||
# <- (error)
|
||||
|
||||
# list-documents
|
||||
# -> (workspace, collection?)
|
||||
# -> (collection?)
|
||||
# <- (document_metadata[])
|
||||
# <- (error)
|
||||
|
||||
# list-processing
|
||||
# -> (workspace, collection?)
|
||||
# -> (collection?)
|
||||
# <- (processing_metadata[])
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ from ..core.metadata import Metadata
|
|||
# <- (error)
|
||||
|
||||
# list-uploads
|
||||
# -> (workspace)
|
||||
# -> ()
|
||||
# <- (uploads[])
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -90,7 +90,6 @@ class DocumentMetadata:
|
|||
title: str = ""
|
||||
comments: str = ""
|
||||
metadata: list[Triple] = field(default_factory=list)
|
||||
workspace: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
# Child document support
|
||||
parent_id: str = "" # Empty for top-level docs, set for children
|
||||
|
|
@ -107,7 +106,6 @@ class ProcessingMetadata:
|
|||
document_id: str = ""
|
||||
time: int = 0
|
||||
flow: str = ""
|
||||
workspace: str = ""
|
||||
collection: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
|
@ -162,9 +160,6 @@ class LibrarianRequest:
|
|||
# add-document, upload-chunk
|
||||
content: bytes = b""
|
||||
|
||||
# Workspace scopes every library operation.
|
||||
workspace: str = ""
|
||||
|
||||
# list-documents?, list-processing?
|
||||
collection: str = ""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue