mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
217 lines
No EOL
6.5 KiB
Python
217 lines
No EOL
6.5 KiB
Python
"""
|
|
Contract test fixtures and configuration
|
|
|
|
This file provides common fixtures for contract testing, focusing on
|
|
message schema validation, API interface contracts, and service compatibility.
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from typing import Dict, Any, Type
|
|
from pulsar.schema import Record
|
|
from unittest.mock import MagicMock
|
|
|
|
from trustgraph.schema import (
|
|
TextCompletionRequest, TextCompletionResponse,
|
|
DocumentRagQuery, DocumentRagResponse,
|
|
AgentRequest, AgentResponse, AgentStep,
|
|
Chunk, Triple, Triples, Term, Error,
|
|
EntityContext, EntityContexts,
|
|
GraphEmbeddings, EntityEmbeddings,
|
|
Metadata, IRI, LITERAL
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def schema_registry():
|
|
"""Registry of all Pulsar schemas used in TrustGraph"""
|
|
return {
|
|
# Text Completion
|
|
"TextCompletionRequest": TextCompletionRequest,
|
|
"TextCompletionResponse": TextCompletionResponse,
|
|
|
|
# Document RAG
|
|
"DocumentRagQuery": DocumentRagQuery,
|
|
"DocumentRagResponse": DocumentRagResponse,
|
|
|
|
# Agent
|
|
"AgentRequest": AgentRequest,
|
|
"AgentResponse": AgentResponse,
|
|
"AgentStep": AgentStep,
|
|
|
|
# Graph
|
|
"Chunk": Chunk,
|
|
"Triple": Triple,
|
|
"Triples": Triples,
|
|
"Term": Term,
|
|
"Error": Error,
|
|
"EntityContext": EntityContext,
|
|
"EntityContexts": EntityContexts,
|
|
"GraphEmbeddings": GraphEmbeddings,
|
|
"EntityEmbeddings": EntityEmbeddings,
|
|
|
|
# Common
|
|
"Metadata": Metadata,
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_message_data():
|
|
"""Sample message data for contract testing"""
|
|
return {
|
|
"TextCompletionRequest": {
|
|
"system": "You are a helpful assistant.",
|
|
"prompt": "What is machine learning?"
|
|
},
|
|
"TextCompletionResponse": {
|
|
"error": None,
|
|
"response": "Machine learning is a subset of artificial intelligence.",
|
|
"in_token": 50,
|
|
"out_token": 100,
|
|
"model": "gpt-3.5-turbo"
|
|
},
|
|
"DocumentRagQuery": {
|
|
"query": "What is artificial intelligence?",
|
|
"collection": "test_collection",
|
|
"doc_limit": 10
|
|
},
|
|
"DocumentRagResponse": {
|
|
"error": None,
|
|
"response": "Artificial intelligence is the simulation of human intelligence in machines."
|
|
},
|
|
"AgentRequest": {
|
|
"question": "What is machine learning?",
|
|
"state": "",
|
|
"group": [],
|
|
"history": []
|
|
},
|
|
"AgentResponse": {
|
|
"message_type": "answer",
|
|
"content": "Machine learning is a subset of AI.",
|
|
"end_of_message": True,
|
|
"end_of_dialog": True,
|
|
"error": None,
|
|
},
|
|
"Metadata": {
|
|
"id": "test-doc-123",
|
|
"collection": "test_collection"
|
|
},
|
|
"Term": {
|
|
"type": IRI,
|
|
"iri": "http://example.com/entity"
|
|
},
|
|
"Triple": {
|
|
"s": Term(
|
|
type=IRI,
|
|
iri="http://example.com/subject"
|
|
),
|
|
"p": Term(
|
|
type=IRI,
|
|
iri="http://example.com/predicate"
|
|
),
|
|
"o": Term(
|
|
type=LITERAL,
|
|
value="Object value"
|
|
)
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def invalid_message_data():
|
|
"""Invalid message data for contract validation testing"""
|
|
return {
|
|
"TextCompletionRequest": [
|
|
{"system": None, "prompt": "test"}, # Invalid system (None)
|
|
{"system": "test", "prompt": None}, # Invalid prompt (None)
|
|
{"system": 123, "prompt": "test"}, # Invalid system (not string)
|
|
{}, # Missing required fields
|
|
],
|
|
"DocumentRagQuery": [
|
|
{"query": None, "collection": "test", "doc_limit": 10}, # Invalid query
|
|
{"query": "test", "collection": "test", "doc_limit": -1}, # Invalid doc_limit
|
|
{"query": "test"}, # Missing required fields
|
|
],
|
|
"Term": [
|
|
{"type": IRI, "iri": None}, # Invalid iri (None)
|
|
{"type": "invalid_type", "value": "test"}, # Invalid type
|
|
{"type": LITERAL, "value": 123}, # Invalid value (not string)
|
|
]
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def message_properties():
|
|
"""Standard message properties for contract testing"""
|
|
return {
|
|
"id": "test-message-123",
|
|
"routing_key": "test.routing.key",
|
|
"timestamp": "2024-01-01T00:00:00Z",
|
|
"source_service": "test-service",
|
|
"correlation_id": "correlation-123"
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def schema_evolution_data():
|
|
"""Data for testing schema evolution and backward compatibility"""
|
|
return {
|
|
"TextCompletionRequest_v1": {
|
|
"system": "You are helpful.",
|
|
"prompt": "Test prompt"
|
|
},
|
|
"TextCompletionRequest_v2": {
|
|
"system": "You are helpful.",
|
|
"prompt": "Test prompt",
|
|
"temperature": 0.7, # New field
|
|
"max_tokens": 100 # New field
|
|
},
|
|
"TextCompletionResponse_v1": {
|
|
"error": None,
|
|
"response": "Test response",
|
|
"model": "gpt-3.5-turbo"
|
|
},
|
|
"TextCompletionResponse_v2": {
|
|
"error": None,
|
|
"response": "Test response",
|
|
"in_token": 50, # New field
|
|
"out_token": 100, # New field
|
|
"model": "gpt-3.5-turbo"
|
|
}
|
|
}
|
|
|
|
|
|
def validate_schema_contract(schema_class: Type[Record], data: Dict[str, Any]) -> bool:
|
|
"""Helper function to validate schema contracts"""
|
|
try:
|
|
# Create instance from data
|
|
instance = schema_class(**data)
|
|
|
|
# Verify all fields are accessible
|
|
for field_name in data.keys():
|
|
assert hasattr(instance, field_name)
|
|
assert getattr(instance, field_name) == data[field_name]
|
|
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def serialize_deserialize_test(schema_class: Type[Record], data: Dict[str, Any]) -> bool:
|
|
"""Helper function to test serialization/deserialization"""
|
|
try:
|
|
# Create instance
|
|
instance = schema_class(**data)
|
|
|
|
# This would test actual Pulsar serialization if we had the client
|
|
# For now, we test the schema construction and field access
|
|
for field_name, field_value in data.items():
|
|
assert getattr(instance, field_name) == field_value
|
|
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
# Test markers for contract tests
|
|
pytestmark = pytest.mark.contract |