IAM tech spec: Auth and access management current state and proposed

changes.

Support for separate workspaces

Addition of workspace CLI support for test purposes
This commit is contained in:
Cyber MacGeddon 2026-04-18 23:07:26 +01:00
parent 48da6c5f8b
commit db05427d0e
219 changed files with 4875 additions and 2616 deletions

View file

@ -50,7 +50,7 @@ class Api:
token: Optional bearer token for authentication
"""
def __init__(self, url="http://localhost:8088/", timeout=60, token: Optional[str] = None):
def __init__(self, url="http://localhost:8088/", timeout=60, token: Optional[str] = None, workspace: str = "default"):
"""
Initialize the TrustGraph API client.
@ -82,6 +82,7 @@ class Api:
self.timeout = timeout
self.token = token
self.workspace = workspace
# Lazy initialization for new clients
self._socket_client = None
@ -137,7 +138,7 @@ class Api:
config.put([ConfigValue(type="llm", key="model", value="gpt-4")])
```
"""
return Config(api=self)
return Config(api=self, workspace=self.workspace)
def knowledge(self):
"""
@ -191,6 +192,12 @@ class Api:
if self.token:
headers["Authorization"] = f"Bearer {self.token}"
# Ensure every REST request carries the workspace so services can
# scope their behaviour. Callers that already set workspace in the
# payload (e.g. Library client) take precedence.
if isinstance(request, dict) and "workspace" not in request:
request = {**request, "workspace": self.workspace}
# Invoke the API, input is passed as JSON
resp = requests.post(url, json=request, timeout=self.timeout, headers=headers)
@ -297,7 +304,10 @@ class Api:
from . socket_client import SocketClient
# Extract base URL (remove api/v1/ suffix)
base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
self._socket_client = SocketClient(base_url, self.timeout, self.token)
self._socket_client = SocketClient(
base_url, self.timeout, self.token,
workspace=self.workspace,
)
return self._socket_client
def bulk(self):
@ -417,7 +427,10 @@ class Api:
from . async_socket_client import AsyncSocketClient
# Extract base URL (remove api/v1/ suffix)
base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
self._async_socket_client = AsyncSocketClient(base_url, self.timeout, self.token)
self._async_socket_client = AsyncSocketClient(
base_url, self.timeout, self.token,
workspace=self.workspace,
)
return self._async_socket_client
def async_bulk(self):

View file

@ -22,10 +22,14 @@ class AsyncSocketClient:
Or call connect()/aclose() manually.
"""
def __init__(self, url: str, timeout: int, token: Optional[str]):
def __init__(
self, url: str, timeout: int, token: Optional[str],
workspace: str = "default",
):
self.url = self._convert_to_ws_url(url)
self.timeout = timeout
self.token = token
self.workspace = workspace
self._request_counter = 0
self._socket = None
self._connect_cm = None
@ -117,6 +121,7 @@ class AsyncSocketClient:
try:
message = {
"id": request_id,
"workspace": self.workspace,
"service": service,
"request": request
}
@ -149,6 +154,7 @@ class AsyncSocketClient:
try:
message = {
"id": request_id,
"workspace": self.workspace,
"service": service,
"request": request
}

View file

@ -2,11 +2,9 @@
TrustGraph Collection Management
This module provides interfaces for managing data collections in TrustGraph.
Collections provide logical grouping and isolation for documents and knowledge
graph data.
Collections provide logical grouping within a workspace.
"""
import datetime
import logging
from . types import CollectionMetadata
@ -18,10 +16,9 @@ class Collection:
"""
Collection management client.
Provides methods for managing data collections, including listing,
updating metadata, and deleting collections. Collections organize
documents and knowledge graph data into logical groupings for
isolation and access control.
Provides methods for managing data collections within the configured
workspace, including listing, updating metadata, and deleting
collections.
"""
def __init__(self, api):
@ -45,45 +42,20 @@ class Collection:
"""
return self.api.request(f"collection-management", request)
def list_collections(self, user, tag_filter=None):
def list_collections(self, tag_filter=None):
"""
List all collections for a user.
Retrieves metadata for all collections owned by the specified user,
with optional filtering by tags.
List all collections in this workspace.
Args:
user: User identifier
tag_filter: Optional list of tags to filter collections (default: None)
tag_filter: Optional list of tags to filter collections
Returns:
list[CollectionMetadata]: List of collection metadata objects
Raises:
ProtocolException: If response format is invalid
Example:
```python
collection = api.collection()
# List all collections
all_colls = collection.list_collections(user="trustgraph")
for coll in all_colls:
print(f"{coll.collection}: {coll.name}")
print(f" Description: {coll.description}")
print(f" Tags: {', '.join(coll.tags)}")
# List collections with specific tags
research_colls = collection.list_collections(
user="trustgraph",
tag_filter=["research", "published"]
)
```
"""
input = {
"operation": "list-collections",
"user": user,
"workspace": self.api.workspace,
}
if tag_filter:
@ -92,7 +64,6 @@ class Collection:
object = self.request(input)
try:
# Handle case where collections might be None or missing
if object is None or "collections" not in object:
return []
@ -102,7 +73,6 @@ class Collection:
return [
CollectionMetadata(
user = v["user"],
collection = v["collection"],
name = v["name"],
description = v["description"],
@ -114,15 +84,11 @@ class Collection:
logger.error("Failed to parse collection list response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def update_collection(self, user, collection, name=None, description=None, tags=None):
def update_collection(self, collection, name=None, description=None, tags=None):
"""
Update collection metadata.
Updates the name, description, and/or tags for an existing collection.
Only provided fields are updated; others remain unchanged.
Args:
user: User identifier
collection: Collection identifier
name: New collection name (optional)
description: New collection description (optional)
@ -130,35 +96,11 @@ class Collection:
Returns:
CollectionMetadata: Updated collection metadata, or None if not found
Raises:
ProtocolException: If response format is invalid
Example:
```python
collection_api = api.collection()
# Update collection metadata
updated = collection_api.update_collection(
user="trustgraph",
collection="default",
name="Default Collection",
description="Main data collection for general use",
tags=["default", "production"]
)
# Update only specific fields
updated = collection_api.update_collection(
user="trustgraph",
collection="research",
description="Updated description"
)
```
"""
input = {
"operation": "update-collection",
"user": user,
"workspace": self.api.workspace,
"collection": collection,
}
@ -175,7 +117,6 @@ class Collection:
if "collections" in object and object["collections"]:
v = object["collections"][0]
return CollectionMetadata(
user = v["user"],
collection = v["collection"],
name = v["name"],
description = v["description"],
@ -186,37 +127,23 @@ class Collection:
logger.error("Failed to parse collection update response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def delete_collection(self, user, collection):
def delete_collection(self, collection):
"""
Delete a collection.
Removes a collection and all its associated data from the system.
Args:
user: User identifier
collection: Collection identifier to delete
Returns:
dict: Empty response object
Example:
```python
collection_api = api.collection()
# Delete a collection
collection_api.delete_collection(
user="trustgraph",
collection="old-collection"
)
```
"""
input = {
"operation": "delete-collection",
"user": user,
"workspace": self.api.workspace,
"collection": collection,
}
object = self.request(input)
self.request(input)
return {}
return {}

View file

@ -21,14 +21,16 @@ class Config:
and list operations.
"""
def __init__(self, api):
def __init__(self, api, workspace="default"):
"""
Initialize Config client.
Args:
api: Parent Api instance for making requests
workspace: Workspace to scope all config operations to
"""
self.api = api
self.workspace = workspace
def request(self, request):
"""
@ -75,9 +77,9 @@ class Config:
```
"""
# The input consists of system and prompt strings
input = {
"operation": "get",
"workspace": self.workspace,
"keys": [
{ "type": k.type, "key": k.key }
for k in keys
@ -123,9 +125,9 @@ class Config:
```
"""
# The input consists of system and prompt strings
input = {
"operation": "put",
"workspace": self.workspace,
"values": [
{ "type": v.type, "key": v.key, "value": v.value }
for v in values
@ -157,9 +159,9 @@ class Config:
```
"""
# The input consists of system and prompt strings
input = {
"operation": "delete",
"workspace": self.workspace,
"keys": [
{ "type": v.type, "key": v.key }
for v in keys
@ -195,9 +197,9 @@ class Config:
```
"""
# The input consists of system and prompt strings
input = {
"operation": "list",
"workspace": self.workspace,
"type": type,
}
@ -235,9 +237,9 @@ class Config:
```
"""
# The input consists of system and prompt strings
input = {
"operation": "getvalues",
"workspace": self.workspace,
"type": type,
}
@ -255,6 +257,46 @@ class Config:
except:
raise ProtocolException(f"Response not formatted correctly")
def get_values_all_workspaces(self, type):
"""
Get all configuration values of a given type across all workspaces.
Unlike get_values(), this is not scoped to a single workspace
it returns every entry of the given type in the system. Each
returned ConfigValue includes its workspace field. Used by
shared processors to load type-scoped config at startup.
Args:
type: Configuration type (e.g. "prompt", "schema")
Returns:
list[ConfigValue]: Values across all workspaces; each has
its workspace field populated.
Raises:
ProtocolException: If response format is invalid
"""
input = {
"operation": "getvalues-all-ws",
"type": type,
}
object = self.request(input)
try:
return [
ConfigValue(
type = v["type"],
key = v["key"],
value = v["value"],
workspace = v.get("workspace", ""),
)
for v in object["values"]
]
except Exception:
raise ProtocolException("Response not formatted correctly")
def all(self):
"""
Get complete configuration and version.
@ -279,9 +321,9 @@ class Config:
```
"""
# The input consists of system and prompt strings
input = {
"operation": "config"
"operation": "config",
"workspace": self.workspace,
}
object = self.request(input)

View file

@ -115,72 +115,32 @@ class Flow:
return FlowInstance(api=self, id=id)
def list_blueprints(self):
"""
List all available flow blueprints.
"""List blueprints in the current workspace."""
Returns:
list[str]: List of blueprint names
Example:
```python
blueprints = api.flow().list_blueprints()
print(blueprints) # ['default', 'custom-flow', ...]
```
"""
# The input consists of system and prompt strings
input = {
"operation": "list-blueprints",
"workspace": self.api.workspace,
}
return self.request(request = input)["blueprint-names"]
def get_blueprint(self, blueprint_name):
"""
Get a flow blueprint definition by name.
"""Get a flow blueprint definition by name."""
Args:
blueprint_name: Name of the blueprint to retrieve
Returns:
dict: Blueprint definition as a dictionary
Example:
```python
blueprint = api.flow().get_blueprint("default")
print(blueprint) # Blueprint configuration
```
"""
# The input consists of system and prompt strings
input = {
"operation": "get-blueprint",
"workspace": self.api.workspace,
"blueprint-name": blueprint_name,
}
return json.loads(self.request(request = input)["blueprint-definition"])
def put_blueprint(self, blueprint_name, definition):
"""
Create or update a flow blueprint.
"""Create or update a flow blueprint."""
Args:
blueprint_name: Name for the blueprint
definition: Blueprint definition dictionary
Example:
```python
definition = {
"services": ["text-completion", "graph-rag"],
"parameters": {"model": "gpt-4"}
}
api.flow().put_blueprint("my-blueprint", definition)
```
"""
# The input consists of system and prompt strings
input = {
"operation": "put-blueprint",
"workspace": self.api.workspace,
"blueprint-name": blueprint_name,
"blueprint-definition": json.dumps(definition),
}
@ -188,96 +148,43 @@ class Flow:
self.request(request = input)
def delete_blueprint(self, blueprint_name):
"""
Delete a flow blueprint.
"""Delete a flow blueprint."""
Args:
blueprint_name: Name of the blueprint to delete
Example:
```python
api.flow().delete_blueprint("old-blueprint")
```
"""
# The input consists of system and prompt strings
input = {
"operation": "delete-blueprint",
"workspace": self.api.workspace,
"blueprint-name": blueprint_name,
}
self.request(request = input)
def list(self):
"""
List all active flow instances.
"""List flow instances in the current workspace."""
Returns:
list[str]: List of flow instance IDs
Example:
```python
flows = api.flow().list()
print(flows) # ['default', 'flow-1', 'flow-2', ...]
```
"""
# The input consists of system and prompt strings
input = {
"operation": "list-flows",
"workspace": self.api.workspace,
}
return self.request(request = input)["flow-ids"]
def get(self, id):
"""
Get the definition of a running flow instance.
"""Get the definition of a flow instance."""
Args:
id: Flow instance ID
Returns:
dict: Flow instance definition
Example:
```python
flow_def = api.flow().get("default")
print(flow_def)
```
"""
# The input consists of system and prompt strings
input = {
"operation": "get-flow",
"workspace": self.api.workspace,
"flow-id": id,
}
return json.loads(self.request(request = input)["flow"])
def start(self, blueprint_name, id, description, parameters=None):
"""
Start a new flow instance from a blueprint.
"""Start a new flow instance from a blueprint."""
Args:
blueprint_name: Name of the blueprint to instantiate
id: Unique identifier for the flow instance
description: Human-readable description
parameters: Optional parameters dictionary
Example:
```python
api.flow().start(
blueprint_name="default",
id="my-flow",
description="My custom flow",
parameters={"model": "gpt-4"}
)
```
"""
# The input consists of system and prompt strings
input = {
"operation": "start-flow",
"workspace": self.api.workspace,
"flow-id": id,
"blueprint-name": blueprint_name,
"description": description,
@ -289,21 +196,11 @@ class Flow:
self.request(request = input)
def stop(self, id):
"""
Stop a running flow instance.
"""Stop a running flow instance."""
Args:
id: Flow instance ID to stop
Example:
```python
api.flow().stop("my-flow")
```
"""
# The input consists of system and prompt strings
input = {
"operation": "stop-flow",
"workspace": self.api.workspace,
"flow-id": id,
}
@ -349,6 +246,13 @@ class FlowInstance:
Returns:
dict: Service response
"""
# Inject workspace so the gateway can route to the right
# workspace's flow. If already present, keep the caller's value.
if isinstance(request, dict) and "workspace" not in request:
request = {
"workspace": self.api.api.workspace,
**request,
}
return self.api.request(path = f"{self.id}/{path}", request = request)
def text_completion(self, system, prompt):

View file

@ -63,105 +63,50 @@ class Knowledge:
"""
return self.api.request(f"knowledge", request)
def list_kg_cores(self, user="trustgraph"):
def list_kg_cores(self):
"""
List all available knowledge graph cores.
Retrieves the IDs of all KG cores available for the specified user.
Args:
user: User identifier (default: "trustgraph")
List all available knowledge graph cores in this workspace.
Returns:
list[str]: List of KG core identifiers
Example:
```python
knowledge = api.knowledge()
# List available KG cores
cores = knowledge.list_kg_cores(user="trustgraph")
print(f"Available KG cores: {cores}")
```
"""
# The input consists of system and prompt strings
input = {
"operation": "list-kg-cores",
"user": user,
"workspace": self.api.workspace,
}
return self.request(request = input)["ids"]
def delete_kg_core(self, id, user="trustgraph"):
def delete_kg_core(self, id):
"""
Delete a knowledge graph core.
Removes a KG core from storage. This does not affect currently loaded
cores in flows.
Delete a knowledge graph core in this workspace.
Args:
id: KG core identifier to delete
user: User identifier (default: "trustgraph")
Example:
```python
knowledge = api.knowledge()
# Delete a KG core
knowledge.delete_kg_core(id="medical-kb-v1", user="trustgraph")
```
"""
# The input consists of system and prompt strings
input = {
"operation": "delete-kg-core",
"user": user,
"workspace": self.api.workspace,
"id": id,
}
self.request(request = input)
def load_kg_core(self, id, user="trustgraph", flow="default",
collection="default"):
def load_kg_core(self, id, flow="default", collection="default"):
"""
Load a knowledge graph core into a flow.
Makes a KG core available for use in queries and RAG operations within
the specified flow and collection.
Args:
id: KG core identifier to load
user: User identifier (default: "trustgraph")
flow: Flow instance to load into (default: "default")
collection: Collection to associate with (default: "default")
Example:
```python
knowledge = api.knowledge()
# Load a medical knowledge base into the default flow
knowledge.load_kg_core(
id="medical-kb-v1",
user="trustgraph",
flow="default",
collection="medical"
)
# Now the flow can use this KG core for RAG queries
flow = api.flow().id("default")
response = flow.graph_rag(
query="What are the symptoms of diabetes?",
user="trustgraph",
collection="medical"
)
```
"""
# The input consists of system and prompt strings
input = {
"operation": "load-kg-core",
"user": user,
"workspace": self.api.workspace,
"id": id,
"flow": flow,
"collection": collection,
@ -169,35 +114,18 @@ class Knowledge:
self.request(request = input)
def unload_kg_core(self, id, user="trustgraph", flow="default"):
def unload_kg_core(self, id, flow="default"):
"""
Unload a knowledge graph core from a flow.
Removes a KG core from active use in the specified flow, freeing
resources while keeping the core available in storage.
Args:
id: KG core identifier to unload
user: User identifier (default: "trustgraph")
flow: Flow instance to unload from (default: "default")
Example:
```python
knowledge = api.knowledge()
# Unload a KG core when no longer needed
knowledge.unload_kg_core(
id="medical-kb-v1",
user="trustgraph",
flow="default"
)
```
"""
# The input consists of system and prompt strings
input = {
"operation": "unload-kg-core",
"user": user,
"workspace": self.api.workspace,
"id": id,
"flow": flow,
}

View file

@ -94,7 +94,7 @@ class Library:
return self.api.request(f"librarian", request)
def add_document(
self, document, id, metadata, user, title, comments,
self, document, id, metadata, title, comments,
kind="text/plain", tags=[], on_progress=None,
):
"""
@ -176,7 +176,6 @@ class Library:
document=document,
id=id,
metadata=metadata,
user=user,
title=title,
comments=comments,
kind=kind,
@ -213,6 +212,7 @@ class Library:
input = {
"operation": "add-document",
"workspace": self.api.workspace,
"document-metadata": {
"id": id,
"time": int(time.time()),
@ -220,7 +220,7 @@ class Library:
"title": title,
"comments": comments,
"metadata": triples,
"user": user,
"workspace": self.api.workspace,
"tags": tags
},
"content": base64.b64encode(document).decode("utf-8"),
@ -229,7 +229,7 @@ class Library:
return self.request(input)
def _add_document_chunked(
self, document, id, metadata, user, title, comments,
self, document, id, metadata, title, comments,
kind, tags, on_progress=None,
):
"""
@ -245,13 +245,14 @@ class Library:
# Begin upload session
begin_request = {
"operation": "begin-upload",
"workspace": self.api.workspace,
"document-metadata": {
"id": id,
"time": int(time.time()),
"kind": kind,
"title": title,
"comments": comments,
"user": user,
"workspace": self.api.workspace,
"tags": tags,
},
"total-size": total_size,
@ -279,10 +280,10 @@ class Library:
chunk_request = {
"operation": "upload-chunk",
"workspace": self.api.workspace,
"upload-id": upload_id,
"chunk-index": chunk_index,
"content": base64.b64encode(chunk_data).decode("utf-8"),
"user": user,
}
chunk_response = self.request(chunk_request)
@ -298,8 +299,8 @@ class Library:
# Complete upload
complete_request = {
"operation": "complete-upload",
"workspace": self.api.workspace,
"upload-id": upload_id,
"user": user,
}
complete_response = self.request(complete_request)
@ -314,8 +315,8 @@ class Library:
try:
abort_request = {
"operation": "abort-upload",
"workspace": self.api.workspace,
"upload-id": upload_id,
"user": user,
}
self.request(abort_request)
logger.info(f"Aborted failed upload {upload_id}")
@ -323,7 +324,7 @@ class Library:
logger.warning(f"Failed to abort upload: {abort_error}")
raise
def get_documents(self, user, include_children=False):
def get_documents(self, include_children=False):
"""
List all documents for a user.
@ -359,7 +360,7 @@ class Library:
input = {
"operation": "list-documents",
"user": user,
"workspace": self.api.workspace,
"include-children": include_children,
}
@ -381,7 +382,7 @@ class Library:
)
for w in v["metadata"]
],
user = v["user"],
workspace = v.get("workspace", ""),
tags = v["tags"],
parent_id = v.get("parent-id", ""),
document_type = v.get("document-type", "source"),
@ -392,7 +393,7 @@ class Library:
logger.error("Failed to parse document list response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def get_document(self, user, id):
def get_document(self, id):
"""
Get metadata for a specific document.
@ -419,7 +420,7 @@ class Library:
input = {
"operation": "get-document",
"user": user,
"workspace": self.api.workspace,
"document-id": id,
}
@ -441,7 +442,7 @@ class Library:
)
for w in doc["metadata"]
],
user = doc["user"],
workspace = doc.get("workspace", ""),
tags = doc["tags"],
parent_id = doc.get("parent-id", ""),
document_type = doc.get("document-type", "source"),
@ -450,7 +451,7 @@ class Library:
logger.error("Failed to parse document response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def update_document(self, user, id, metadata):
def update_document(self, id, metadata):
"""
Update document metadata.
@ -490,8 +491,9 @@ class Library:
input = {
"operation": "update-document",
"workspace": self.api.workspace,
"document-metadata": {
"user": user,
"workspace": self.api.workspace,
"document-id": id,
"time": metadata.time,
"title": metadata.title,
@ -526,14 +528,14 @@ class Library:
)
for w in doc["metadata"]
],
user = doc["user"],
workspace = doc.get("workspace", ""),
tags = doc["tags"]
)
except Exception as e:
logger.error("Failed to parse document update response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def remove_document(self, user, id):
def remove_document(self, id):
"""
Remove a document from the library.
@ -555,7 +557,7 @@ class Library:
input = {
"operation": "remove-document",
"user": user,
"workspace": self.api.workspace,
"document-id": id,
}
@ -565,7 +567,7 @@ class Library:
def start_processing(
self, id, document_id, flow="default",
user="trustgraph", collection="default", tags=[],
collection="default", tags=[],
):
"""
Start a document processing workflow.
@ -602,12 +604,13 @@ class Library:
input = {
"operation": "add-processing",
"workspace": self.api.workspace,
"processing-metadata": {
"id": id,
"document-id": document_id,
"time": int(time.time()),
"flow": flow,
"user": user,
"workspace": self.api.workspace,
"collection": collection,
"tags": tags,
}
@ -618,7 +621,7 @@ class Library:
return {}
def stop_processing(
self, id, user="trustgraph",
self, id,
):
"""
Stop a running document processing job.
@ -641,15 +644,15 @@ class Library:
input = {
"operation": "remove-processing",
"workspace": self.api.workspace,
"processing-id": id,
"user": user,
}
object = self.request(input)
return {}
def get_processings(self, user="trustgraph"):
def get_processings(self):
"""
List all active document processing jobs.
@ -681,7 +684,7 @@ class Library:
input = {
"operation": "list-processing",
"user": user,
"workspace": self.api.workspace,
}
object = self.request(input)
@ -693,7 +696,7 @@ class Library:
document_id = v["document-id"],
time = datetime.datetime.fromtimestamp(v["time"]),
flow = v["flow"],
user = v["user"],
workspace = v.get("workspace", ""),
collection = v["collection"],
tags = v["tags"],
)
@ -705,7 +708,7 @@ class Library:
# Chunked upload management methods
def get_pending_uploads(self, user):
def get_pending_uploads(self):
"""
List all pending (in-progress) uploads for a user.
@ -731,14 +734,14 @@ class Library:
"""
input = {
"operation": "list-uploads",
"user": user,
"workspace": self.api.workspace,
}
response = self.request(input)
return response.get("upload-sessions", [])
def get_upload_status(self, upload_id, user):
def get_upload_status(self, upload_id):
"""
Get the status of a specific upload.
@ -774,13 +777,13 @@ class Library:
"""
input = {
"operation": "get-upload-status",
"workspace": self.api.workspace,
"upload-id": upload_id,
"user": user,
}
return self.request(input)
def abort_upload(self, upload_id, user):
def abort_upload(self, upload_id):
"""
Abort an in-progress upload.
@ -801,13 +804,13 @@ class Library:
"""
input = {
"operation": "abort-upload",
"workspace": self.api.workspace,
"upload-id": upload_id,
"user": user,
}
return self.request(input)
def resume_upload(self, upload_id, document, user, on_progress=None):
def resume_upload(self, upload_id, document, on_progress=None):
"""
Resume an interrupted upload.
@ -844,7 +847,7 @@ class Library:
```
"""
# Get current status
status = self.get_upload_status(upload_id, user)
status = self.get_upload_status(upload_id)
if status.get("upload-state") == "expired":
raise RuntimeError("Upload session has expired, please start a new upload")
@ -867,10 +870,10 @@ class Library:
chunk_request = {
"operation": "upload-chunk",
"workspace": self.api.workspace,
"upload-id": upload_id,
"chunk-index": chunk_index,
"content": base64.b64encode(chunk_data).decode("utf-8"),
"user": user,
}
self.request(chunk_request)
@ -886,8 +889,8 @@ class Library:
# Complete upload
complete_request = {
"operation": "complete-upload",
"workspace": self.api.workspace,
"upload-id": upload_id,
"user": user,
}
return self.request(complete_request)
@ -895,7 +898,7 @@ class Library:
# Child document methods
def add_child_document(
self, document, id, parent_id, user, title, comments,
self, document, id, parent_id, title, comments,
kind="text/plain", tags=[], metadata=None,
):
"""
@ -964,6 +967,7 @@ class Library:
input = {
"operation": "add-child-document",
"workspace": self.api.workspace,
"document-metadata": {
"id": id,
"time": int(time.time()),
@ -971,7 +975,7 @@ class Library:
"title": title,
"comments": comments,
"metadata": triples,
"user": user,
"workspace": self.api.workspace,
"tags": tags,
"parent-id": parent_id,
"document-type": "extracted",
@ -981,7 +985,7 @@ class Library:
return self.request(input)
def list_children(self, document_id, user):
def list_children(self, document_id):
"""
List all child documents for a given parent document.
@ -1006,8 +1010,8 @@ class Library:
"""
input = {
"operation": "list-children",
"workspace": self.api.workspace,
"document-id": document_id,
"user": user,
}
response = self.request(input)
@ -1028,7 +1032,7 @@ class Library:
)
for w in v.get("metadata", [])
],
user=v["user"],
workspace=v.get("workspace", ""),
tags=v.get("tags", []),
parent_id=v.get("parent-id", ""),
document_type=v.get("document-type", "source"),
@ -1039,7 +1043,7 @@ class Library:
logger.error("Failed to parse children response", exc_info=True)
raise ProtocolException("Response not formatted correctly")
def get_document_content(self, user, id):
def get_document_content(self, id):
"""
Get the content of a document.
@ -1067,7 +1071,7 @@ class Library:
"""
input = {
"operation": "get-document-content",
"user": user,
"workspace": self.api.workspace,
"document-id": id,
}
@ -1076,7 +1080,7 @@ class Library:
return base64.b64decode(content_b64)
def stream_document_to_file(self, user, id, file_path, chunk_size=1024*1024, on_progress=None):
def stream_document_to_file(self, id, file_path, chunk_size=1024*1024, on_progress=None):
"""
Stream document content to a file.
@ -1116,7 +1120,7 @@ class Library:
while True:
input = {
"operation": "stream-document",
"user": user,
"workspace": self.api.workspace,
"document-id": id,
"chunk-index": chunk_index,
"chunk-size": chunk_size,

View file

@ -84,10 +84,14 @@ class SocketClient:
for streaming responses.
"""
def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
def __init__(
self, url: str, timeout: int, token: Optional[str],
workspace: str = "default",
) -> None:
self.url: str = self._convert_to_ws_url(url)
self.timeout: int = timeout
self.token: Optional[str] = token
self.workspace: str = workspace
self._request_counter: int = 0
self._lock: Lock = Lock()
self._loop: Optional[asyncio.AbstractEventLoop] = None
@ -251,6 +255,7 @@ class SocketClient:
try:
message = {
"id": request_id,
"workspace": self.workspace,
"service": service,
"request": request
}
@ -290,6 +295,7 @@ class SocketClient:
try:
message = {
"id": request_id,
"workspace": self.workspace,
"service": service,
"request": request
}
@ -328,6 +334,7 @@ class SocketClient:
try:
message = {
"id": request_id,
"workspace": self.workspace,
"service": service,
"request": request
}

View file

@ -45,10 +45,13 @@ class ConfigValue:
type: Configuration type/category
key: Specific configuration key
value: Configuration value as string
workspace: Workspace the value belongs to. Only populated for
responses to getvalues-all-ws; empty otherwise.
"""
type : str
key : str
value : str
workspace : str = ""
@dataclasses.dataclass
class DocumentMetadata:
@ -62,7 +65,7 @@ class DocumentMetadata:
title: Document title
comments: Additional comments or description
metadata: List of RDF triples providing structured metadata
user: User/owner identifier
workspace: Workspace the document belongs to
tags: List of tags for categorization
parent_id: Parent document ID for child documents (empty for top-level docs)
document_type: "source" for uploaded documents, "extracted" for derived content
@ -73,7 +76,7 @@ class DocumentMetadata:
title : str
comments : str
metadata : List[Triple]
user : str
workspace : str
tags : List[str]
parent_id : str = ""
document_type : str = "source"
@ -88,7 +91,7 @@ class ProcessingMetadata:
document_id: ID of the document being processed
time: Processing start timestamp
flow: Flow instance handling the processing
user: User identifier
workspace: Workspace the processing job belongs to
collection: Target collection for processed data
tags: List of tags for categorization
"""
@ -96,7 +99,7 @@ class ProcessingMetadata:
document_id : str
time : datetime.datetime
flow : str
user : str
workspace : str
collection : str
tags : List[str]
@ -105,17 +108,15 @@ class CollectionMetadata:
"""
Metadata for a data collection.
Collections provide logical grouping and isolation for documents and
knowledge graph data.
Collections provide logical grouping within a workspace for documents
and knowledge graph data.
Attributes:
user: User/owner identifier
collection: Collection identifier
name: Human-readable collection name
description: Collection description
tags: List of tags for categorization
"""
user : str
collection : str
name : str
description : str

View file

@ -125,21 +125,39 @@ class AsyncProcessor:
response_metrics = config_resp_metrics,
)
async def fetch_config(self):
"""Fetch full config from config service using a short-lived
request/response client. Returns (config, version) or raises."""
client = self._create_config_client()
try:
await client.start()
resp = await client.request(
ConfigRequest(operation="config"),
timeout=10,
)
if resp.error:
raise RuntimeError(f"Config error: {resp.error.message}")
return resp.config, resp.version
finally:
await client.stop()
async def _fetch_type_workspace(self, client, workspace, config_type):
"""Fetch config values of a single type within one workspace.
Returns dict of {key: value}."""
resp = await client.request(
ConfigRequest(
operation="getvalues",
workspace=workspace,
type=config_type,
),
timeout=10,
)
if resp.error:
raise RuntimeError(f"Config error: {resp.error.message}")
return {v.key: v.value for v in resp.values}
async def _fetch_type_all_workspaces(self, client, config_type):
"""Fetch config values of a single type across all workspaces.
Returns dict of {workspace: {key: value}}."""
resp = await client.request(
ConfigRequest(
operation="getvalues-all-ws",
type=config_type,
),
timeout=10,
)
if resp.error:
raise RuntimeError(f"Config error: {resp.error.message}")
grouped = {}
for v in resp.values:
ws = grouped.setdefault(v.workspace, {})
ws[v.key] = v.value
return grouped, resp.version
# This is called to start dynamic behaviour.
# Implements the subscribe-then-fetch pattern to avoid race conditions.
@ -155,21 +173,51 @@ class AsyncProcessor:
# processed by on_config_notify, which does the version check
async def fetch_and_apply_config(self):
"""Fetch full config from config service and apply to all handlers.
Retries until successful config service may not be ready yet."""
"""Startup: for each registered handler, fetch config for all its
types across all workspaces and invoke the handler once per
workspace. Retries until successful config service may not be
ready yet."""
while self.running:
try:
config, version = await self.fetch_config()
client = self._create_config_client()
try:
await client.start()
logger.info(f"Fetched config version {version}")
version = 0
self.config_version = version
for entry in self.config_handlers:
handler_types = entry["types"]
# Apply to all handlers (startup = invoke all)
for entry in self.config_handlers:
await entry["handler"](config, version)
# Handlers registered without types get nothing
# at startup (there is no "all types" fetch).
if not handler_types:
continue
# Group all registered types by workspace:
# {workspace: {type: {key: value}}}
per_ws = {}
for t in handler_types:
type_data, v = \
await self._fetch_type_all_workspaces(
client, t,
)
version = max(version, v)
for ws, kv in type_data.items():
per_ws.setdefault(ws, {})[t] = kv
# Call the handler once per workspace
for ws, config in per_ws.items():
await entry["handler"](ws, config, version)
logger.info(
f"Applied startup config version {version}"
)
self.config_version = version
finally:
await client.stop()
return
@ -204,8 +252,9 @@ class AsyncProcessor:
# Called when a config notify message arrives
async def on_config_notify(self, message, consumer, flow):
notify_version = message.value().version
notify_types = set(message.value().types)
v = message.value()
notify_version = v.version
changes = v.changes # dict of type -> [workspaces]
# Skip if we already have this version or newer
if notify_version <= self.config_version:
@ -215,41 +264,60 @@ class AsyncProcessor:
)
return
# Check if any handler cares about the affected types
if notify_types:
any_interested = False
for entry in self.config_handlers:
handler_types = entry["types"]
if handler_types is None or notify_types & handler_types:
any_interested = True
break
notify_types = set(changes.keys())
if not any_interested:
logger.debug(
f"Ignoring config notify v{notify_version}, "
f"no handlers for types {notify_types}"
)
self.config_version = notify_version
return
# Filter out handlers that don't care about any of the changed
# types. A handler registered without types never fires on
# notifications (nothing to scope to).
interested = []
for entry in self.config_handlers:
handler_types = entry["types"]
if handler_types and notify_types & handler_types:
interested.append(entry)
if not interested:
logger.debug(
f"Ignoring config notify v{notify_version}, "
f"no handlers for types {notify_types}"
)
self.config_version = notify_version
return
logger.info(
f"Config notify v{notify_version} types={list(notify_types)}, "
f"fetching config..."
f"Config notify v{notify_version} "
f"types={list(notify_types)}, fetching config..."
)
# Fetch full config using short-lived client
try:
config, version = await self.fetch_config()
client = self._create_config_client()
try:
await client.start()
self.config_version = version
for entry in interested:
handler_types = entry["types"]
# Invoke handlers that care about the affected types
for entry in self.config_handlers:
handler_types = entry["types"]
if handler_types is None:
await entry["handler"](config, version)
elif not notify_types or notify_types & handler_types:
await entry["handler"](config, version)
# Build {workspace: {type: {key: value}}} for types
# this handler cares about, where the workspace was
# affected for that type.
per_ws = {}
for t in handler_types:
if t not in changes:
continue
for ws in changes[t]:
kv = await self._fetch_type_workspace(
client, ws, t,
)
per_ws.setdefault(ws, {})[t] = kv
for ws, config in per_ws.items():
await entry["handler"](
ws, config, notify_version,
)
finally:
await client.stop()
self.config_version = notify_version
except Exception as e:
logger.error(

View file

@ -48,12 +48,13 @@ class ChunkingService(FlowProcessor):
await super(ChunkingService, self).start()
await self.librarian.start()
async def get_document_text(self, doc):
async def get_document_text(self, doc, workspace):
"""
Get text content from a TextDocument, fetching from librarian if needed.
Args:
doc: TextDocument with either inline text or document_id
workspace: Workspace for librarian lookup (from flow.workspace)
Returns:
str: The document text content
@ -62,7 +63,7 @@ class ChunkingService(FlowProcessor):
logger.info(f"Fetching document {doc.document_id} from librarian...")
text = await self.librarian.fetch_document_text(
document_id=doc.document_id,
user=doc.metadata.user,
workspace=workspace,
)
logger.info(f"Fetched {len(text)} characters from librarian")
return text

View file

@ -15,114 +15,139 @@ class CollectionConfigHandler:
Storage services should:
1. Inherit from this class along with their service base class
2. Call register_config_handler(self.on_collection_config) in __init__
3. Implement create_collection(user, collection, metadata) method
4. Implement delete_collection(user, collection) method
3. Implement create_collection(workspace, collection, metadata) method
4. Implement delete_collection(workspace, collection) method
"""
def __init__(self, **kwargs):
# Track known collections: {(user, collection): metadata_dict}
# Track known collections: {(workspace, collection): metadata_dict}
self.known_collections: Dict[tuple, dict] = {}
# Pass remaining kwargs up the inheritance chain
super().__init__(**kwargs)
async def on_collection_config(self, config: dict, version: int):
async def on_collection_config(
self, workspace: str, config: dict, version: int
):
"""
Handle config push messages and extract collection information
for a single workspace.
Args:
workspace: Workspace the config applies to
config: Configuration dictionary from ConfigPush message
version: Configuration version number
"""
logger.info(f"Processing collection configuration (version {version})")
logger.info(
f"Processing collection configuration "
f"(version {version}, workspace {workspace})"
)
# Extract collections from config (treat missing key as empty)
# Extract collections from config (treat missing key as empty).
# Each config key IS the collection name — config is already
# partitioned by workspace, so no workspace prefix is needed
# on the key.
collection_config = config.get("collection", {})
# Track which collections we've seen in this config
current_collections: Set[tuple] = set()
# Process each collection in the config
for key, value_json in collection_config.items():
for collection, value_json in collection_config.items():
try:
# Parse user:collection key
if ":" not in key:
logger.warning(f"Invalid collection key format (expected user:collection): {key}")
continue
current_collections.add((workspace, collection))
user, collection = key.split(":", 1)
current_collections.add((user, collection))
# Parse metadata
metadata = json.loads(value_json)
# Check if this is a new collection or updated
collection_key = (user, collection)
if collection_key not in self.known_collections:
logger.info(f"New collection detected: {user}/{collection}")
await self.create_collection(user, collection, metadata)
self.known_collections[collection_key] = metadata
key = (workspace, collection)
if key not in self.known_collections:
logger.info(
f"New collection detected: {workspace}/{collection}"
)
await self.create_collection(
workspace, collection, metadata
)
self.known_collections[key] = metadata
else:
# Collection already exists, update metadata if changed
if self.known_collections[collection_key] != metadata:
logger.info(f"Collection metadata updated: {user}/{collection}")
# Most storage services don't need to do anything for metadata updates
# They just need to know the collection exists
self.known_collections[collection_key] = metadata
if self.known_collections[key] != metadata:
logger.info(
f"Collection metadata updated: "
f"{workspace}/{collection}"
)
self.known_collections[key] = metadata
except Exception as e:
logger.error(f"Error processing collection config for key {key}: {e}", exc_info=True)
logger.error(
f"Error processing collection config for "
f"{workspace}/{collection}: {e}",
exc_info=True,
)
# Find collections that were deleted (in known but not in current)
deleted_collections = set(self.known_collections.keys()) - current_collections
for user, collection in deleted_collections:
logger.info(f"Collection deleted: {user}/{collection}")
# Find collections for THIS workspace that were deleted (in
# known but not in current). Only compare collections owned by
# this workspace — other workspaces' collections are not
# affected by this config update.
known_for_ws = {
(w, c) for (w, c) in self.known_collections.keys()
if w == workspace
}
deleted_collections = known_for_ws - current_collections
for ws, collection in deleted_collections:
logger.info(f"Collection deleted: {ws}/{collection}")
try:
# Remove from known_collections FIRST to immediately reject new writes
# This eliminates race condition with worker threads
del self.known_collections[(user, collection)]
# Physical deletion happens after - worker threads already rejecting writes
await self.delete_collection(user, collection)
# Remove from known_collections FIRST to immediately
# reject new writes
del self.known_collections[(ws, collection)]
await self.delete_collection(ws, collection)
except Exception as e:
logger.error(f"Error deleting collection {user}/{collection}: {e}", exc_info=True)
# If physical deletion failed, should we re-add to known_collections?
# For now, keep it removed - collection is logically deleted per config
logger.error(
f"Error deleting collection {ws}/{collection}: {e}",
exc_info=True,
)
logger.debug(f"Collection config processing complete. Known collections: {len(self.known_collections)}")
logger.debug(
f"Collection config processing complete. "
f"Known collections: {len(self.known_collections)}"
)
async def create_collection(self, user: str, collection: str, metadata: dict):
async def create_collection(
self, workspace: str, collection: str, metadata: dict,
):
"""
Create a collection in the storage backend.
Subclasses must implement this method.
Args:
user: User ID
workspace: Workspace ID
collection: Collection ID
metadata: Collection metadata dictionary
"""
raise NotImplementedError("Storage service must implement create_collection method")
raise NotImplementedError(
"Storage service must implement create_collection method"
)
async def delete_collection(self, user: str, collection: str):
async def delete_collection(self, workspace: str, collection: str):
"""
Delete a collection from the storage backend.
Subclasses must implement this method.
Args:
user: User ID
workspace: Workspace ID
collection: Collection ID
"""
raise NotImplementedError("Storage service must implement delete_collection method")
raise NotImplementedError(
"Storage service must implement delete_collection method"
)
def collection_exists(self, user: str, collection: str) -> bool:
def collection_exists(self, workspace: str, collection: str) -> bool:
"""
Check if a collection is known to exist
Check if a collection is known to exist.
Args:
user: User ID
workspace: Workspace ID
collection: Collection ID
Returns:
True if collection exists, False otherwise
"""
return (user, collection) in self.known_collections
return (workspace, collection) in self.known_collections

View file

@ -18,10 +18,11 @@ class ConfigClient(RequestResponse):
)
return resp
async def get(self, type, key, timeout=CONFIG_TIMEOUT):
async def get(self, workspace, type, key, timeout=CONFIG_TIMEOUT):
"""Get a single config value. Returns the value string or None."""
resp = await self._request(
operation="get",
workspace=workspace,
keys=[ConfigKey(type=type, key=key)],
timeout=timeout,
)
@ -29,19 +30,21 @@ class ConfigClient(RequestResponse):
return resp.values[0].value
return None
async def put(self, type, key, value, timeout=CONFIG_TIMEOUT):
async def put(self, workspace, type, key, value, timeout=CONFIG_TIMEOUT):
"""Put a single config value."""
await self._request(
operation="put",
workspace=workspace,
values=[ConfigValue(type=type, key=key, value=value)],
timeout=timeout,
)
async def put_many(self, values, timeout=CONFIG_TIMEOUT):
"""Put multiple config values in a single request.
values is a list of (type, key, value) tuples."""
async def put_many(self, workspace, values, timeout=CONFIG_TIMEOUT):
"""Put multiple config values in a single request within a
single workspace. values is a list of (type, key, value) tuples."""
await self._request(
operation="put",
workspace=workspace,
values=[
ConfigValue(type=t, key=k, value=v)
for t, k, v in values
@ -49,19 +52,21 @@ class ConfigClient(RequestResponse):
timeout=timeout,
)
async def delete(self, type, key, timeout=CONFIG_TIMEOUT):
async def delete(self, workspace, type, key, timeout=CONFIG_TIMEOUT):
"""Delete a single config key."""
await self._request(
operation="delete",
workspace=workspace,
keys=[ConfigKey(type=type, key=key)],
timeout=timeout,
)
async def delete_many(self, keys, timeout=CONFIG_TIMEOUT):
"""Delete multiple config keys in a single request.
keys is a list of (type, key) tuples."""
async def delete_many(self, workspace, keys, timeout=CONFIG_TIMEOUT):
"""Delete multiple config keys in a single request within a
single workspace. keys is a list of (type, key) tuples."""
await self._request(
operation="delete",
workspace=workspace,
keys=[
ConfigKey(type=t, key=k)
for t, k in keys
@ -69,15 +74,26 @@ class ConfigClient(RequestResponse):
timeout=timeout,
)
async def keys(self, type, timeout=CONFIG_TIMEOUT):
"""List all keys for a config type."""
async def keys(self, workspace, type, timeout=CONFIG_TIMEOUT):
"""List all keys for a config type within a workspace."""
resp = await self._request(
operation="list",
workspace=workspace,
type=type,
timeout=timeout,
)
return resp.directory
async def workspaces_for_type(self, type, timeout=CONFIG_TIMEOUT):
"""Return the set of distinct workspaces with any config of
the given type."""
resp = await self._request(
operation="getvalues-all-ws",
type=type,
timeout=timeout,
)
return {v.workspace for v in resp.values if v.workspace}
class ConfigClientSpec(RequestResponseSpec):
def __init__(

View file

@ -24,7 +24,10 @@ class ConsumerSpec(Spec):
flow = flow,
backend = processor.pubsub,
topic = definition["topics"][self.name],
subscriber = processor.id + "--" + flow.name + "--" + self.name,
subscriber = (
processor.id + "--" + flow.workspace + "--" +
flow.name + "--" + self.name
),
schema = self.schema,
handler = self.handler,
metrics = consumer_metrics,

View file

@ -60,7 +60,9 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
logger.debug(f"Handling document embeddings query request {id}...")
docs = await self.query_document_embeddings(request)
docs = await self.query_document_embeddings(
flow.workspace, request,
)
logger.debug("Sending document embeddings query response...")
r = DocumentEmbeddingsResponse(chunks=docs, error=None)

View file

@ -41,7 +41,8 @@ class DocumentEmbeddingsStoreService(FlowProcessor):
request = msg.value()
await self.store_document_embeddings(request)
# Workspace comes from the flow the message arrived on.
await self.store_document_embeddings(flow.workspace, request)
except TooManyRequests as e:
raise e

View file

@ -4,15 +4,16 @@ import asyncio
class Flow:
"""
Runtime representation of a deployed flow process.
This class maintains internal processor states and orchestrates
lifecycles (start, stop) for inputs (consumers) and parameters
lifecycles (start, stop) for inputs (consumers) and parameters
that drive data flowing across linked nodes.
"""
def __init__(self, id, flow, processor, defn):
def __init__(self, id, flow, workspace, processor, defn):
self.id = id
self.name = flow
self.workspace = workspace
self.producer = {}

View file

@ -35,6 +35,8 @@ class FlowProcessor(AsyncProcessor):
)
# Initialise flow information state
# Keyed by (workspace, flow) tuples; each workspace has its own
# set of flow variants for this processor.
self.flows = {}
# These can be overriden by a derived class:
@ -48,23 +50,28 @@ class FlowProcessor(AsyncProcessor):
def register_specification(self, spec: Any) -> None:
self.specifications.append(spec)
# Start processing for a new flow
async def start_flow(self, flow, defn):
self.flows[flow] = Flow(self.id, flow, self, defn)
await self.flows[flow].start()
logger.info(f"Started flow: {flow}")
# Stop processing for a new flow
async def stop_flow(self, flow):
if flow in self.flows:
await self.flows[flow].stop()
del self.flows[flow]
logger.info(f"Stopped flow: {flow}")
# Start processing for a new flow within a workspace
async def start_flow(self, workspace, flow, defn):
key = (workspace, flow)
self.flows[key] = Flow(self.id, flow, workspace, self, defn)
await self.flows[key].start()
logger.info(f"Started flow: {workspace}/{flow}")
# Event handler - called for a configuration change
async def on_configure_flows(self, config, version):
# Stop processing for a flow within a workspace
async def stop_flow(self, workspace, flow):
key = (workspace, flow)
if key in self.flows:
await self.flows[key].stop()
del self.flows[key]
logger.info(f"Stopped flow: {workspace}/{flow}")
logger.info(f"Got config version {version}")
# Event handler - called for a configuration change for a single
# workspace
async def on_configure_flows(self, workspace, config, version):
logger.info(
f"Got config version {version} for workspace {workspace}"
)
config_type = f"processor:{self.id}"
@ -76,26 +83,28 @@ class FlowProcessor(AsyncProcessor):
for k, v in config[config_type].items()
}
else:
logger.debug("No configuration settings for me.")
logger.debug(
f"No configuration settings for me in {workspace}."
)
flow_config = {}
# Get list of flows which should be running and are currently
# running
wanted_flows = flow_config.keys()
# This takes a copy, needed because dict gets modified by stop_flow
current_flows = list(self.flows.keys())
# Get list of flows which should be running in this workspace,
# and the list currently running in this workspace
wanted_flows = set(flow_config.keys())
current_flows = {
f for (ws, f) in self.flows.keys() if ws == workspace
}
# Start all the flows which arent currently running
for flow in wanted_flows:
if flow not in current_flows:
await self.start_flow(flow, flow_config[flow])
# Start all the flows which aren't currently running in this
# workspace
for flow in wanted_flows - current_flows:
await self.start_flow(workspace, flow, flow_config[flow])
# Stop all the unwanted flows which are due to be stopped
for flow in current_flows:
if flow not in wanted_flows:
await self.stop_flow(flow)
# Stop all the unwanted flows in this workspace
for flow in current_flows - wanted_flows:
await self.stop_flow(workspace, flow)
logger.info("Handled config update")
logger.info(f"Handled config update for workspace {workspace}")
# Start threads, just call parent
async def start(self):

View file

@ -60,7 +60,9 @@ class GraphEmbeddingsQueryService(FlowProcessor):
logger.debug(f"Handling graph embeddings query request {id}...")
entities = await self.query_graph_embeddings(request)
entities = await self.query_graph_embeddings(
flow.workspace, request,
)
logger.debug("Sending graph embeddings query response...")
r = GraphEmbeddingsResponse(entities=entities, error=None)

View file

@ -41,7 +41,8 @@ class GraphEmbeddingsStoreService(FlowProcessor):
request = msg.value()
await self.store_graph_embeddings(request)
# Workspace comes from the flow the message arrived on.
await self.store_graph_embeddings(flow.workspace, request)
except TooManyRequests as e:
raise e

View file

@ -150,7 +150,7 @@ class LibrarianClient:
finally:
self._streams.pop(request_id, None)
async def fetch_document_content(self, document_id, user, timeout=120):
async def fetch_document_content(self, document_id, workspace, timeout=120):
"""Fetch document content using streaming.
Returns base64-encoded content. Caller is responsible for decoding.
@ -158,7 +158,7 @@ class LibrarianClient:
req = LibrarianRequest(
operation="stream-document",
document_id=document_id,
user=user,
workspace=workspace,
)
chunks = await self.stream(req, timeout=timeout)
@ -176,24 +176,24 @@ class LibrarianClient:
return base64.b64encode(raw)
async def fetch_document_text(self, document_id, user, timeout=120):
async def fetch_document_text(self, document_id, workspace, timeout=120):
"""Fetch document content and decode as UTF-8 text."""
content = await self.fetch_document_content(
document_id, user, timeout=timeout,
document_id, workspace, timeout=timeout,
)
return base64.b64decode(content).decode("utf-8")
async def fetch_document_metadata(self, document_id, user, timeout=120):
async def fetch_document_metadata(self, document_id, workspace, timeout=120):
"""Fetch document metadata from the librarian."""
req = LibrarianRequest(
operation="get-document-metadata",
document_id=document_id,
user=user,
workspace=workspace,
)
response = await self.request(req, timeout=timeout)
return response.document_metadata
async def save_child_document(self, doc_id, parent_id, user, content,
async def save_child_document(self, doc_id, parent_id, workspace, content,
document_type="chunk", title=None,
kind="text/plain", timeout=120):
"""Save a child document to the librarian."""
@ -202,7 +202,7 @@ class LibrarianClient:
doc_metadata = DocumentMetadata(
id=doc_id,
user=user,
workspace=workspace,
kind=kind,
title=title or doc_id,
parent_id=parent_id,
@ -218,7 +218,7 @@ class LibrarianClient:
await self.request(req, timeout=timeout)
return doc_id
async def save_document(self, doc_id, user, content, title=None,
async def save_document(self, doc_id, workspace, content, title=None,
document_type="answer", kind="text/plain",
timeout=120):
"""Save a document to the librarian."""
@ -227,7 +227,7 @@ class LibrarianClient:
doc_metadata = DocumentMetadata(
id=doc_id,
user=user,
workspace=workspace,
kind=kind,
title=title or doc_id,
document_type=document_type,
@ -238,7 +238,7 @@ class LibrarianClient:
document_id=doc_id,
document_metadata=doc_metadata,
content=base64.b64encode(content).decode("utf-8"),
user=user,
workspace=workspace,
)
await self.request(req, timeout=timeout)

View file

@ -133,8 +133,9 @@ class RequestResponseSpec(Spec):
# Make subscription names unique, so that all subscribers get
# to see all response messages
subscription = (
processor.id + "--" + flow.name + "--" + self.request_name +
"--" + str(uuid.uuid4())
processor.id + "--" + flow.workspace + "--" +
flow.name + "--" + self.request_name + "--" +
str(uuid.uuid4())
),
consumer_name = flow.id,
request_topic = definition["topics"][self.request_name],

View file

@ -21,7 +21,7 @@ class SubscriberSpec(Spec):
subscriber = Subscriber(
backend = processor.pubsub,
topic = definition["topics"][self.name],
subscription = flow.id,
subscription = flow.id + "--" + flow.workspace + "--" + flow.name,
consumer_name = flow.id,
schema = self.schema,
metrics = subscriber_metrics,

View file

@ -64,6 +64,7 @@ class ToolService(FlowProcessor):
id = msg.properties()["id"]
response = await self.invoke_tool(
flow.workspace,
request.name,
json.loads(request.parameters) if request.parameters else {},
)

View file

@ -58,9 +58,13 @@ class TriplesQueryService(FlowProcessor):
logger.debug(f"Handling triples query request {id}...")
workspace = flow.workspace
if request.streaming:
# Streaming mode: send batches
async for batch, is_final in self.query_triples_stream(request):
async for batch, is_final in self.query_triples_stream(
workspace, request,
):
r = TriplesQueryResponse(
triples=batch,
error=None,
@ -70,7 +74,7 @@ class TriplesQueryService(FlowProcessor):
logger.debug("Triples query streaming completed")
else:
# Non-streaming mode: single response
triples = await self.query_triples(request)
triples = await self.query_triples(workspace, request)
logger.debug("Sending triples query response...")
r = TriplesQueryResponse(triples=triples, error=None)
await flow("response").send(r, properties={"id": id})
@ -92,13 +96,13 @@ class TriplesQueryService(FlowProcessor):
await flow("response").send(r, properties={"id": id})
async def query_triples_stream(self, request):
async def query_triples_stream(self, workspace, request):
"""
Streaming query - yields (batch, is_final) tuples.
Default implementation batches results from query_triples.
Override for true streaming from backend.
"""
triples = await self.query_triples(request)
triples = await self.query_triples(workspace, request)
batch_size = request.batch_size if request.batch_size > 0 else 20
for i in range(0, len(triples), batch_size):

View file

@ -45,7 +45,10 @@ class TriplesStoreService(FlowProcessor):
request = msg.value()
await self.store_triples(request)
# Workspace is derived from the flow the message arrived on,
# not from fields in the message payload. Topic routing is
# the isolation boundary.
await self.store_triples(flow.workspace, request)
except TooManyRequests as e:
raise e

View file

@ -33,6 +33,7 @@ class ConfigClient(BaseClient):
subscriber=None,
input_queue=None,
output_queue=None,
workspace="default",
**pubsub_config,
):
@ -51,10 +52,13 @@ class ConfigClient(BaseClient):
**pubsub_config,
)
self.workspace = workspace
def get(self, keys, timeout=300):
resp = self.call(
operation="get",
workspace=self.workspace,
keys=[
ConfigKey(
type = k["type"],
@ -78,6 +82,7 @@ class ConfigClient(BaseClient):
resp = self.call(
operation="list",
workspace=self.workspace,
type=type,
timeout=timeout
)
@ -88,6 +93,7 @@ class ConfigClient(BaseClient):
resp = self.call(
operation="getvalues",
workspace=self.workspace,
type=type,
timeout=timeout
)
@ -101,10 +107,31 @@ class ConfigClient(BaseClient):
for v in resp.values
]
def getvalues_all_ws(self, type, timeout=300):
"""Fetch all values of a given type across all workspaces.
Returns a list of dicts including a 'workspace' field."""
resp = self.call(
operation="getvalues-all-ws",
type=type,
timeout=timeout
)
return [
{
"workspace": v.workspace,
"type": v.type,
"key": v.key,
"value": v.value,
}
for v in resp.values
]
def delete(self, keys, timeout=300):
resp = self.call(
operation="delete",
workspace=self.workspace,
keys=[
ConfigKey(
type = k["type"],
@ -121,6 +148,7 @@ class ConfigClient(BaseClient):
resp = self.call(
operation="put",
workspace=self.workspace,
values=[
ConfigValue(
type = v["type"],
@ -138,6 +166,7 @@ class ConfigClient(BaseClient):
resp = self.call(
operation="config",
workspace=self.workspace,
timeout=timeout
)

View file

@ -9,7 +9,7 @@ class CollectionManagementRequestTranslator(MessageTranslator):
def decode(self, data: Dict[str, Any]) -> CollectionManagementRequest:
return CollectionManagementRequest(
operation=data.get("operation"),
user=data.get("user"),
workspace=data.get("workspace", ""),
collection=data.get("collection"),
timestamp=data.get("timestamp"),
name=data.get("name"),
@ -24,8 +24,8 @@ class CollectionManagementRequestTranslator(MessageTranslator):
if obj.operation is not None:
result["operation"] = obj.operation
if obj.user is not None:
result["user"] = obj.user
if obj.workspace:
result["workspace"] = obj.workspace
if obj.collection is not None:
result["collection"] = obj.collection
if obj.timestamp is not None:
@ -63,7 +63,6 @@ class CollectionManagementResponseTranslator(MessageTranslator):
if "collections" in data:
for coll_data in data["collections"]:
collections.append(CollectionMetadata(
user=coll_data.get("user"),
collection=coll_data.get("collection"),
name=coll_data.get("name"),
description=coll_data.get("description"),
@ -91,7 +90,6 @@ class CollectionManagementResponseTranslator(MessageTranslator):
result["collections"] = []
for coll in obj.collections:
result["collections"].append({
"user": coll.user,
"collection": coll.collection,
"name": coll.name,
"description": coll.description,

View file

@ -23,13 +23,15 @@ class ConfigRequestTranslator(MessageTranslator):
ConfigValue(
type=v["type"],
key=v["key"],
value=v["value"]
value=v["value"],
workspace=v.get("workspace", ""),
)
for v in data["values"]
]
return ConfigRequest(
operation=data.get("operation"),
workspace=data.get("workspace", ""),
keys=keys,
type=data.get("type"),
values=values
@ -37,10 +39,13 @@ class ConfigRequestTranslator(MessageTranslator):
def encode(self, obj: ConfigRequest) -> Dict[str, Any]:
result = {}
if obj.operation is not None:
result["operation"] = obj.operation
if obj.workspace is not None:
result["workspace"] = obj.workspace
if obj.type is not None:
result["type"] = obj.type
@ -56,13 +61,14 @@ class ConfigRequestTranslator(MessageTranslator):
if obj.values is not None:
result["values"] = [
{
**({"workspace": v.workspace} if v.workspace else {}),
"type": v.type,
"key": v.key,
"value": v.value
"value": v.value,
}
for v in obj.values
]
return result
@ -81,13 +87,14 @@ class ConfigResponseTranslator(MessageTranslator):
if obj.values is not None:
result["values"] = [
{
**({"workspace": v.workspace} if v.workspace else {}),
"type": v.type,
"key": v.key,
"value": v.value
"value": v.value,
}
for v in obj.values
]
if obj.directory is not None:
result["directory"] = obj.directory

View file

@ -39,7 +39,6 @@ class DocumentTranslator(SendTranslator):
metadata=Metadata(
id=data.get("id"),
root=data.get("root", ""),
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default"),
),
data=base64.b64encode(doc).decode("utf-8")
@ -56,8 +55,6 @@ class DocumentTranslator(SendTranslator):
metadata_dict["id"] = obj.metadata.id
if obj.metadata.root:
metadata_dict["root"] = obj.metadata.root
if obj.metadata.user:
metadata_dict["user"] = obj.metadata.user
if obj.metadata.collection:
metadata_dict["collection"] = obj.metadata.collection
@ -79,7 +76,6 @@ class TextDocumentTranslator(SendTranslator):
metadata=Metadata(
id=data.get("id"),
root=data.get("root", ""),
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default"),
),
text=text.encode("utf-8")
@ -96,8 +92,6 @@ class TextDocumentTranslator(SendTranslator):
metadata_dict["id"] = obj.metadata.id
if obj.metadata.root:
metadata_dict["root"] = obj.metadata.root
if obj.metadata.user:
metadata_dict["user"] = obj.metadata.user
if obj.metadata.collection:
metadata_dict["collection"] = obj.metadata.collection
@ -115,7 +109,6 @@ class ChunkTranslator(SendTranslator):
metadata=Metadata(
id=data.get("id"),
root=data.get("root", ""),
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default"),
),
chunk=data["chunk"].encode("utf-8") if isinstance(data["chunk"], str) else data["chunk"]
@ -132,8 +125,6 @@ class ChunkTranslator(SendTranslator):
metadata_dict["id"] = obj.metadata.id
if obj.metadata.root:
metadata_dict["root"] = obj.metadata.root
if obj.metadata.user:
metadata_dict["user"] = obj.metadata.user
if obj.metadata.collection:
metadata_dict["collection"] = obj.metadata.collection
@ -161,7 +152,6 @@ class DocumentEmbeddingsTranslator(SendTranslator):
metadata=Metadata(
id=metadata.get("id"),
root=metadata.get("root", ""),
user=metadata.get("user", "trustgraph"),
collection=metadata.get("collection", "default"),
),
chunks=chunks
@ -184,8 +174,6 @@ class DocumentEmbeddingsTranslator(SendTranslator):
metadata_dict["id"] = obj.metadata.id
if obj.metadata.root:
metadata_dict["root"] = obj.metadata.root
if obj.metadata.user:
metadata_dict["user"] = obj.metadata.user
if obj.metadata.collection:
metadata_dict["collection"] = obj.metadata.collection

View file

@ -9,18 +9,21 @@ class FlowRequestTranslator(MessageTranslator):
def decode(self, data: Dict[str, Any]) -> FlowRequest:
return FlowRequest(
operation=data.get("operation"),
workspace=data.get("workspace", ""),
blueprint_name=data.get("blueprint-name"),
blueprint_definition=data.get("blueprint-definition"),
description=data.get("description"),
flow_id=data.get("flow-id"),
parameters=data.get("parameters")
)
def encode(self, obj: FlowRequest) -> Dict[str, Any]:
result = {}
if obj.operation is not None:
result["operation"] = obj.operation
if obj.workspace is not None:
result["workspace"] = obj.workspace
if obj.blueprint_name is not None:
result["blueprint-name"] = obj.blueprint_name
if obj.blueprint_definition is not None:

View file

@ -21,7 +21,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
metadata=Metadata(
id=data["triples"]["metadata"]["id"],
root=data["triples"]["metadata"].get("root", ""),
user=data["triples"]["metadata"]["user"],
collection=data["triples"]["metadata"]["collection"]
),
triples=self.subgraph_translator.decode(data["triples"]["triples"]),
@ -33,7 +32,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
metadata=Metadata(
id=data["graph-embeddings"]["metadata"]["id"],
root=data["graph-embeddings"]["metadata"].get("root", ""),
user=data["graph-embeddings"]["metadata"]["user"],
collection=data["graph-embeddings"]["metadata"]["collection"]
),
entities=[
@ -47,7 +45,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
return KnowledgeRequest(
operation=data.get("operation"),
user=data.get("user"),
workspace=data.get("workspace", ""),
id=data.get("id"),
flow=data.get("flow"),
collection=data.get("collection"),
@ -60,8 +58,8 @@ class KnowledgeRequestTranslator(MessageTranslator):
if obj.operation:
result["operation"] = obj.operation
if obj.user:
result["user"] = obj.user
if obj.workspace:
result["workspace"] = obj.workspace
if obj.id:
result["id"] = obj.id
if obj.flow:
@ -74,7 +72,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
"metadata": {
"id": obj.triples.metadata.id,
"root": obj.triples.metadata.root,
"user": obj.triples.metadata.user,
"collection": obj.triples.metadata.collection,
},
"triples": self.subgraph_translator.encode(obj.triples.triples),
@ -85,7 +82,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
"metadata": {
"id": obj.graph_embeddings.metadata.id,
"root": obj.graph_embeddings.metadata.root,
"user": obj.graph_embeddings.metadata.user,
"collection": obj.graph_embeddings.metadata.collection,
},
"entities": [
@ -122,7 +118,6 @@ class KnowledgeResponseTranslator(MessageTranslator):
"metadata": {
"id": obj.triples.metadata.id,
"root": obj.triples.metadata.root,
"user": obj.triples.metadata.user,
"collection": obj.triples.metadata.collection,
},
"triples": self.subgraph_translator.encode(obj.triples.triples),
@ -136,7 +131,6 @@ class KnowledgeResponseTranslator(MessageTranslator):
"metadata": {
"id": obj.graph_embeddings.metadata.id,
"root": obj.graph_embeddings.metadata.root,
"user": obj.graph_embeddings.metadata.user,
"collection": obj.graph_embeddings.metadata.collection,
},
"entities": [

View file

@ -49,7 +49,7 @@ class LibraryRequestTranslator(MessageTranslator):
document_metadata=doc_metadata,
processing_metadata=proc_metadata,
content=content,
user=data.get("user", ""),
workspace=data.get("workspace", ""),
collection=data.get("collection", ""),
criteria=criteria,
# Chunked upload fields
@ -76,8 +76,8 @@ class LibraryRequestTranslator(MessageTranslator):
result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
if obj.content:
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
if obj.user:
result["user"] = obj.user
if obj.workspace:
result["workspace"] = obj.workspace
if obj.collection:
result["collection"] = obj.collection
if obj.criteria is not None:

View file

@ -19,7 +19,7 @@ class DocumentMetadataTranslator(Translator):
title=data.get("title"),
comments=data.get("comments"),
metadata=self.subgraph_translator.decode(metadata) if metadata is not None else [],
user=data.get("user"),
workspace=data.get("workspace"),
tags=data.get("tags"),
parent_id=data.get("parent-id", ""),
document_type=data.get("document-type", "source"),
@ -40,8 +40,8 @@ class DocumentMetadataTranslator(Translator):
result["comments"] = obj.comments
if obj.metadata is not None:
result["metadata"] = self.subgraph_translator.encode(obj.metadata)
if obj.user:
result["user"] = obj.user
if obj.workspace:
result["workspace"] = obj.workspace
if obj.tags is not None:
result["tags"] = obj.tags
if obj.parent_id:
@ -61,7 +61,7 @@ class ProcessingMetadataTranslator(Translator):
document_id=data.get("document-id"),
time=data.get("time"),
flow=data.get("flow"),
user=data.get("user"),
workspace=data.get("workspace"),
collection=data.get("collection"),
tags=data.get("tags")
)
@ -77,8 +77,8 @@ class ProcessingMetadataTranslator(Translator):
result["time"] = obj.time
if obj.flow:
result["flow"] = obj.flow
if obj.user:
result["user"] = obj.user
if obj.workspace:
result["workspace"] = obj.workspace
if obj.collection:
result["collection"] = obj.collection
if obj.tags is not None:

View file

@ -8,6 +8,7 @@ class Metadata:
# Root document identifier (set by librarian, preserved through pipeline)
root: str = ""
# Collection management
user: str = ""
# Collection the message belongs to. Workspace is NOT carried on the
# message — consumers derive it from flow.workspace (the flow the
# message arrived on), which is the trusted isolation boundary.
collection: str = ""

View file

@ -17,7 +17,7 @@ from .embeddings import GraphEmbeddings
# <- (error)
# list-kg-cores
# -> (user)
# -> (workspace)
# <- ()
# <- (error)
@ -27,8 +27,8 @@ class KnowledgeRequest:
# load-kg-core, unload-kg-core
operation: str = ""
# list-kg-cores, delete-kg-core, put-kg-core
user: str = ""
# Workspace the cores belong to. Partition / isolation boundary.
workspace: str = ""
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
# load-kg-core, unload-kg-core

View file

@ -13,7 +13,6 @@ from ..core.topic import queue
@dataclass
class CollectionMetadata:
"""Collection metadata record"""
user: str = ""
collection: str = ""
name: str = ""
description: str = ""
@ -23,11 +22,17 @@ class CollectionMetadata:
@dataclass
class CollectionManagementRequest:
"""Request for collection management operations"""
"""Request for collection management operations.
Collection-management is a global (non-flow-scoped) service, so the
workspace has to travel on the wire it's the isolation boundary
for which workspace's collections the request operates on.
"""
operation: str = "" # e.g., "delete-collection"
# For 'list-collections'
user: str = ""
# Workspace the collection belongs to.
workspace: str = ""
collection: str = ""
timestamp: str = "" # ISO timestamp
name: str = ""

View file

@ -7,12 +7,19 @@ from ..core.primitives import Error
############################################################################
# Config service:
# get(keys) -> (version, values)
# list(type) -> (version, values)
# getvalues(type) -> (version, values)
# put(values) -> ()
# delete(keys) -> ()
# config() -> (version, config)
# get(workspace, keys) -> (version, values)
# list(workspace, type) -> (version, directory)
# getvalues(workspace, type) -> (version, values)
# getvalues-all-ws(type) -> (version, values with workspace field)
# put(workspace, values) -> ()
# delete(workspace, keys) -> ()
# config(workspace) -> (version, config)
#
# Most operations are scoped to a workspace. The workspace field on the
# request identifies which workspace's config to read or modify.
# getvalues-all-ws returns values across all workspaces for a single
# type — used by shared processors to load type-scoped config at startup.
@dataclass
class ConfigKey:
type: str = ""
@ -23,16 +30,24 @@ class ConfigValue:
type: str = ""
key: str = ""
value: str = ""
# Populated by getvalues-all-ws responses so callers can identify
# which workspace each value belongs to. Empty otherwise.
workspace: str = ""
# Prompt services, abstract the prompt generation
@dataclass
class ConfigRequest:
operation: str = "" # get, list, getvalues, delete, put, config
# Operations: get, list, getvalues, getvalues-all-ws, delete, put,
# config
operation: str = ""
# Workspace scope — required on all operations except
# getvalues-all-ws which spans all workspaces.
workspace: str = ""
# get, delete
keys: list[ConfigKey] = field(default_factory=list)
# list, getvalues
# list, getvalues, getvalues-all-ws
type: str = ""
# put
@ -58,7 +73,12 @@ class ConfigResponse:
@dataclass
class ConfigPush:
version: int = 0
types: list[str] = field(default_factory=list)
# Dict of config type -> list of affected workspaces.
# Handlers look up their registered type and get the list of
# workspaces that need refreshing.
# e.g. {"prompt": ["workspace-a", "workspace-b"], "schema": ["workspace-a"]}
changes: dict[str, list[str]] = field(default_factory=dict)
config_request_queue = queue('config', cls='request')
config_response_queue = queue('config', cls='response')

View file

@ -17,12 +17,14 @@ from ..core.primitives import Error
# start_flow(flowid, blueprintname) -> ()
# stop_flow(flowid) -> ()
# Prompt services, abstract the prompt generation
@dataclass
class FlowRequest:
operation: str = "" # list-blueprints, get-blueprint, put-blueprint, delete-blueprint
# list-flows, get-flow, start-flow, stop-flow
# Workspace scope — all operations act within this workspace
workspace: str = ""
# get_blueprint, put_blueprint, delete_blueprint, start_flow
blueprint_name: str = ""

View file

@ -43,12 +43,12 @@ from ..core.metadata import Metadata
# <- (error)
# list-documents
# -> (user, collection?)
# -> (workspace, collection?)
# <- (document_metadata[])
# <- (error)
# list-processing
# -> (user, collection?)
# -> (workspace, collection?)
# <- (processing_metadata[])
# <- (error)
@ -78,7 +78,7 @@ from ..core.metadata import Metadata
# <- (error)
# list-uploads
# -> (user)
# -> (workspace)
# <- (uploads[])
# <- (error)
@ -90,7 +90,7 @@ class DocumentMetadata:
title: str = ""
comments: str = ""
metadata: list[Triple] = field(default_factory=list)
user: str = ""
workspace: str = ""
tags: list[str] = field(default_factory=list)
# Child document support
parent_id: str = "" # Empty for top-level docs, set for children
@ -107,7 +107,7 @@ class ProcessingMetadata:
document_id: str = ""
time: int = 0
flow: str = ""
user: str = ""
workspace: str = ""
collection: str = ""
tags: list[str] = field(default_factory=list)
@ -162,8 +162,8 @@ class LibrarianRequest:
# add-document, upload-chunk
content: bytes = b""
# list-documents, list-processing, list-uploads
user: str = ""
# Workspace scopes every library operation.
workspace: str = ""
# list-documents?, list-processing?
collection: str = ""