trustgraph/trustgraph-base/trustgraph/messaging/translators/library.py
cybermaggedon d35473f7f7
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.

Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
  proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
  captures the workspace/collection/flow hierarchy.

Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
  DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
  Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
  service layer.
- Translators updated to not serialise/deserialise user.

API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.

Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
  scoped by workspace. Config client API takes workspace as first
  positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
  no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.

CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
  library) drop user kwargs from every method signature.

MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
  keyed per user.

Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
  whose blueprint template was parameterised AND no remaining
  live flow (across all workspaces) still resolves to that topic.
  Three scopes fall out naturally from template analysis:
    * {id} -> per-flow, deleted on stop
    * {blueprint} -> per-blueprint, kept while any flow of the
      same blueprint exists
    * {workspace} -> per-workspace, kept while any flow in the
      workspace exists
    * literal -> global, never deleted (e.g. tg.request.librarian)
  Fixes a bug where stopping a flow silently destroyed the global
  librarian exchange, wedging all library operations until manual
  restart.

RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
  dead connections (broker restart, orphaned channels, network
  partitions) within ~2 heartbeat windows, so the consumer
  reconnects and re-binds its queue rather than sitting forever
  on a zombie connection.

Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
  ~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00

177 lines
6.7 KiB
Python

from typing import Dict, Any, Tuple, Optional
from ...schema import LibrarianRequest, LibrarianResponse, DocumentMetadata, ProcessingMetadata, Criteria
from .base import MessageTranslator
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
class LibraryRequestTranslator(MessageTranslator):
"""Translator for LibrarianRequest schema objects"""
def __init__(self):
self.doc_metadata_translator = DocumentMetadataTranslator()
self.proc_metadata_translator = ProcessingMetadataTranslator()
def decode(self, data: Dict[str, Any]) -> LibrarianRequest:
# Document metadata
doc_metadata = None
if "document-metadata" in data:
doc_metadata = self.doc_metadata_translator.decode(data["document-metadata"])
# Processing metadata
proc_metadata = None
if "processing-metadata" in data:
proc_metadata = self.proc_metadata_translator.decode(data["processing-metadata"])
# Criteria
criteria = []
if "criteria" in data:
criteria = [
Criteria(
key=c["key"],
value=c["value"],
operator=c["operator"]
)
for c in data["criteria"]
]
# Content as bytes
content = None
if "content" in data:
if isinstance(data["content"], str):
content = data["content"].encode("utf-8")
else:
content = data["content"]
return LibrarianRequest(
operation=data.get("operation"),
document_id=data.get("document-id", ""),
processing_id=data.get("processing-id", ""),
document_metadata=doc_metadata,
processing_metadata=proc_metadata,
content=content,
workspace=data.get("workspace", ""),
collection=data.get("collection", ""),
criteria=criteria,
# Chunked upload fields
total_size=data.get("total-size", 0),
chunk_size=data.get("chunk-size", 0),
upload_id=data.get("upload-id", ""),
chunk_index=data.get("chunk-index", 0),
# List documents filtering
include_children=data.get("include-children", False),
)
def encode(self, obj: LibrarianRequest) -> Dict[str, Any]:
result = {}
if obj.operation:
result["operation"] = obj.operation
if obj.document_id:
result["document-id"] = obj.document_id
if obj.processing_id:
result["processing-id"] = obj.processing_id
if obj.document_metadata:
result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata)
if obj.processing_metadata:
result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
if obj.content:
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
if obj.workspace:
result["workspace"] = obj.workspace
if obj.collection:
result["collection"] = obj.collection
if obj.criteria is not None:
result["criteria"] = [
{
"key": c.key,
"value": c.value,
"operator": c.operator
}
for c in obj.criteria
]
return result
class LibraryResponseTranslator(MessageTranslator):
"""Translator for LibrarianResponse schema objects"""
def __init__(self):
self.doc_metadata_translator = DocumentMetadataTranslator()
self.proc_metadata_translator = ProcessingMetadataTranslator()
def decode(self, data: Dict[str, Any]) -> LibrarianResponse:
raise NotImplementedError("Response translation to Pulsar not typically needed")
def encode(self, obj: LibrarianResponse) -> Dict[str, Any]:
result = {}
if obj.error:
result["error"] = {
"type": obj.error.type,
"message": obj.error.message,
}
if obj.document_metadata:
result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata)
if obj.content:
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
if obj.document_metadatas is not None:
result["document-metadatas"] = [
self.doc_metadata_translator.encode(dm)
for dm in obj.document_metadatas
]
if obj.processing_metadatas is not None:
result["processing-metadatas"] = [
self.proc_metadata_translator.encode(pm)
for pm in obj.processing_metadatas
]
# Chunked upload response fields
if obj.upload_id:
result["upload-id"] = obj.upload_id
if obj.chunk_size:
result["chunk-size"] = obj.chunk_size
if obj.total_chunks:
result["total-chunks"] = obj.total_chunks
if obj.chunk_index:
result["chunk-index"] = obj.chunk_index
if obj.chunks_received:
result["chunks-received"] = obj.chunks_received
if obj.bytes_received:
result["bytes-received"] = obj.bytes_received
if obj.total_bytes:
result["total-bytes"] = obj.total_bytes
if obj.document_id:
result["document-id"] = obj.document_id
if obj.object_id:
result["object-id"] = obj.object_id
if obj.upload_state:
result["upload-state"] = obj.upload_state
if obj.received_chunks:
result["received-chunks"] = obj.received_chunks
if obj.missing_chunks:
result["missing-chunks"] = obj.missing_chunks
if obj.upload_sessions:
result["upload-sessions"] = [
{
"upload-id": s.upload_id,
"document-id": s.document_id,
"document-metadata-json": s.document_metadata_json,
"total-size": s.total_size,
"chunk-size": s.chunk_size,
"total-chunks": s.total_chunks,
"chunks-received": s.chunks_received,
"created-at": s.created_at,
}
for s in obj.upload_sessions
]
return result
def encode_with_completion(self, obj: LibrarianResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
return self.encode(obj), obj.is_final