mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-29 10:26:21 +02:00
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
177 lines
6.7 KiB
Python
177 lines
6.7 KiB
Python
from typing import Dict, Any, Tuple, Optional
|
|
from ...schema import LibrarianRequest, LibrarianResponse, DocumentMetadata, ProcessingMetadata, Criteria
|
|
from .base import MessageTranslator
|
|
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
|
|
|
|
|
|
class LibraryRequestTranslator(MessageTranslator):
|
|
"""Translator for LibrarianRequest schema objects"""
|
|
|
|
def __init__(self):
|
|
self.doc_metadata_translator = DocumentMetadataTranslator()
|
|
self.proc_metadata_translator = ProcessingMetadataTranslator()
|
|
|
|
def decode(self, data: Dict[str, Any]) -> LibrarianRequest:
|
|
# Document metadata
|
|
doc_metadata = None
|
|
if "document-metadata" in data:
|
|
doc_metadata = self.doc_metadata_translator.decode(data["document-metadata"])
|
|
|
|
# Processing metadata
|
|
proc_metadata = None
|
|
if "processing-metadata" in data:
|
|
proc_metadata = self.proc_metadata_translator.decode(data["processing-metadata"])
|
|
|
|
# Criteria
|
|
criteria = []
|
|
if "criteria" in data:
|
|
criteria = [
|
|
Criteria(
|
|
key=c["key"],
|
|
value=c["value"],
|
|
operator=c["operator"]
|
|
)
|
|
for c in data["criteria"]
|
|
]
|
|
|
|
# Content as bytes
|
|
content = None
|
|
if "content" in data:
|
|
if isinstance(data["content"], str):
|
|
content = data["content"].encode("utf-8")
|
|
else:
|
|
content = data["content"]
|
|
|
|
return LibrarianRequest(
|
|
operation=data.get("operation"),
|
|
document_id=data.get("document-id", ""),
|
|
processing_id=data.get("processing-id", ""),
|
|
document_metadata=doc_metadata,
|
|
processing_metadata=proc_metadata,
|
|
content=content,
|
|
workspace=data.get("workspace", ""),
|
|
collection=data.get("collection", ""),
|
|
criteria=criteria,
|
|
# Chunked upload fields
|
|
total_size=data.get("total-size", 0),
|
|
chunk_size=data.get("chunk-size", 0),
|
|
upload_id=data.get("upload-id", ""),
|
|
chunk_index=data.get("chunk-index", 0),
|
|
# List documents filtering
|
|
include_children=data.get("include-children", False),
|
|
)
|
|
|
|
def encode(self, obj: LibrarianRequest) -> Dict[str, Any]:
|
|
result = {}
|
|
|
|
if obj.operation:
|
|
result["operation"] = obj.operation
|
|
if obj.document_id:
|
|
result["document-id"] = obj.document_id
|
|
if obj.processing_id:
|
|
result["processing-id"] = obj.processing_id
|
|
if obj.document_metadata:
|
|
result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata)
|
|
if obj.processing_metadata:
|
|
result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
|
|
if obj.content:
|
|
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
|
|
if obj.workspace:
|
|
result["workspace"] = obj.workspace
|
|
if obj.collection:
|
|
result["collection"] = obj.collection
|
|
if obj.criteria is not None:
|
|
result["criteria"] = [
|
|
{
|
|
"key": c.key,
|
|
"value": c.value,
|
|
"operator": c.operator
|
|
}
|
|
for c in obj.criteria
|
|
]
|
|
|
|
return result
|
|
|
|
|
|
class LibraryResponseTranslator(MessageTranslator):
|
|
"""Translator for LibrarianResponse schema objects"""
|
|
|
|
def __init__(self):
|
|
self.doc_metadata_translator = DocumentMetadataTranslator()
|
|
self.proc_metadata_translator = ProcessingMetadataTranslator()
|
|
|
|
def decode(self, data: Dict[str, Any]) -> LibrarianResponse:
|
|
raise NotImplementedError("Response translation to Pulsar not typically needed")
|
|
|
|
def encode(self, obj: LibrarianResponse) -> Dict[str, Any]:
|
|
result = {}
|
|
|
|
if obj.error:
|
|
result["error"] = {
|
|
"type": obj.error.type,
|
|
"message": obj.error.message,
|
|
}
|
|
|
|
if obj.document_metadata:
|
|
result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata)
|
|
|
|
if obj.content:
|
|
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
|
|
|
|
if obj.document_metadatas is not None:
|
|
result["document-metadatas"] = [
|
|
self.doc_metadata_translator.encode(dm)
|
|
for dm in obj.document_metadatas
|
|
]
|
|
|
|
if obj.processing_metadatas is not None:
|
|
result["processing-metadatas"] = [
|
|
self.proc_metadata_translator.encode(pm)
|
|
for pm in obj.processing_metadatas
|
|
]
|
|
|
|
# Chunked upload response fields
|
|
if obj.upload_id:
|
|
result["upload-id"] = obj.upload_id
|
|
if obj.chunk_size:
|
|
result["chunk-size"] = obj.chunk_size
|
|
if obj.total_chunks:
|
|
result["total-chunks"] = obj.total_chunks
|
|
if obj.chunk_index:
|
|
result["chunk-index"] = obj.chunk_index
|
|
if obj.chunks_received:
|
|
result["chunks-received"] = obj.chunks_received
|
|
if obj.bytes_received:
|
|
result["bytes-received"] = obj.bytes_received
|
|
if obj.total_bytes:
|
|
result["total-bytes"] = obj.total_bytes
|
|
if obj.document_id:
|
|
result["document-id"] = obj.document_id
|
|
if obj.object_id:
|
|
result["object-id"] = obj.object_id
|
|
if obj.upload_state:
|
|
result["upload-state"] = obj.upload_state
|
|
if obj.received_chunks:
|
|
result["received-chunks"] = obj.received_chunks
|
|
if obj.missing_chunks:
|
|
result["missing-chunks"] = obj.missing_chunks
|
|
if obj.upload_sessions:
|
|
result["upload-sessions"] = [
|
|
{
|
|
"upload-id": s.upload_id,
|
|
"document-id": s.document_id,
|
|
"document-metadata-json": s.document_metadata_json,
|
|
"total-size": s.total_size,
|
|
"chunk-size": s.chunk_size,
|
|
"total-chunks": s.total_chunks,
|
|
"chunks-received": s.chunks_received,
|
|
"created-at": s.created_at,
|
|
}
|
|
for s in obj.upload_sessions
|
|
]
|
|
|
|
return result
|
|
|
|
def encode_with_completion(self, obj: LibrarianResponse) -> Tuple[Dict[str, Any], bool]:
|
|
"""Returns (response_dict, is_final)"""
|
|
return self.encode(obj), obj.is_final
|