trustgraph/ai-context/trustgraph-templates/tests/validators/trustgraph.py

236 lines
7.4 KiB
Python
Raw Normal View History

"""
TrustGraph configuration semantic validation.
"""
import json
from typing import Dict, Any, List, Tuple, Set
def validate_service_references(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate that configured services reference valid modules.
Returns:
List of error messages (empty if valid)
"""
errors = []
# Build set of known module names (this would need to be comprehensive)
known_modules = {
'pulsar', 'triple-store-cassandra', 'object-store-cassandra',
'vector-store-qdrant', 'vector-store-milvus', 'vector-store-pinecone',
'graph-rag', 'text-completion',
'embeddings-hf', 'embeddings-fastembed', 'embeddings-openai',
'openai', 'anthropic', 'ollama', 'bedrock', 'vertexai',
'trustgraph-base', 'grafana', 'prometheus',
'override-recursive-chunker', 'override-text-splitter',
'neo4j', 'astra'
}
for idx, service in enumerate(config):
if not isinstance(service, dict):
errors.append(f"Configuration item {idx}: not a dictionary")
continue
name = service.get('name')
if not name:
errors.append(f"Configuration item {idx}: missing 'name' field")
elif name not in known_modules:
# This might be intentional for new modules, so just warn
pass
return errors
def validate_parameter_types(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate that module parameters are reasonable.
Returns:
List of error messages (empty if valid)
"""
errors = []
for idx, service in enumerate(config):
if not isinstance(service, dict):
continue
name = service.get('name', f'item-{idx}')
parameters = service.get('parameters', {})
if not isinstance(parameters, dict):
errors.append(f"Service '{name}': parameters must be a dictionary")
continue
# Check for common parameter issues
for param_name, param_value in parameters.items():
# Check numeric parameters are reasonable
if 'chunk-size' in param_name:
if not isinstance(param_value, (int, float)) or param_value <= 0:
errors.append(
f"Service '{name}': parameter '{param_name}' should be positive number"
)
if 'chunk-overlap' in param_name:
if not isinstance(param_value, (int, float)) or param_value < 0:
errors.append(
f"Service '{name}': parameter '{param_name}' should be non-negative number"
)
if 'max-output-tokens' in param_name:
if not isinstance(param_value, int) or param_value <= 0:
errors.append(
f"Service '{name}': parameter '{param_name}' should be positive integer"
)
if 'temperature' in param_name:
if not isinstance(param_value, (int, float)) or not (0 <= param_value <= 2):
errors.append(
f"Service '{name}': parameter '{param_name}' should be between 0 and 2"
)
return errors
def validate_storage_consistency(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate that graph/object/vector stores are configured consistently.
Returns:
List of error messages (empty if valid)
"""
errors = []
service_names = [s.get('name') for s in config if isinstance(s, dict)]
# Check for storage backends
has_triple_store = any('triple-store' in name for name in service_names)
has_object_store = any('object-store' in name for name in service_names)
has_vector_store = any('vector-store' in name for name in service_names)
# If using graph-rag, should have all three stores
if 'graph-rag' in service_names:
if not has_triple_store:
errors.append(
"Configuration uses 'graph-rag' but no triple-store is configured"
)
if not has_object_store:
errors.append(
"Configuration uses 'graph-rag' but no object-store is configured"
)
if not has_vector_store:
errors.append(
"Configuration uses 'graph-rag' but no vector-store is configured"
)
return errors
def validate_llm_configuration(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate LLM configuration is present and reasonable.
Returns:
List of error messages (empty if valid)
"""
errors = []
service_names = [s.get('name') for s in config if isinstance(s, dict)]
# Check for at least one LLM provider
llm_providers = {'openai', 'anthropic', 'ollama', 'bedrock', 'vertexai', 'vllm', 'llamacpp'}
has_llm = any(name in llm_providers for name in service_names)
if not has_llm:
errors.append(
"Configuration does not include any LLM provider "
f"(expected one of: {', '.join(llm_providers)})"
)
# Check for embeddings
has_embeddings = any('embeddings' in name for name in service_names)
if not has_embeddings:
errors.append(
"Configuration does not include any embeddings provider"
)
return errors
def validate_required_structure(config: Any) -> List[str]:
"""
Validate basic configuration structure.
Handles both input format (list of services) and output format (dict).
Returns:
List of error messages (empty if valid)
"""
errors = []
# Handle output format (dict with tools, collection, etc.)
if isinstance(config, dict):
# Just check it's not empty
if not config:
errors.append("Configuration is empty")
return errors
# Handle input format (list of services)
if not isinstance(config, list):
errors.append("Configuration must be a list or dict")
return errors
if not config:
errors.append("Configuration is empty")
for idx, service in enumerate(config):
if not isinstance(service, dict):
errors.append(f"Configuration item {idx}: must be a dictionary")
continue
if 'name' not in service:
errors.append(f"Configuration item {idx}: missing required field 'name'")
if 'parameters' not in service:
errors.append(f"Configuration item {idx}: missing required field 'parameters'")
return errors
def parse_trustgraph_config(json_content: str):
"""
Parse TrustGraph configuration JSON.
Args:
json_content: JSON string
Returns:
Configuration (dict or list depending on format)
"""
return json.loads(json_content)
def validate_trustgraph_config(json_content: str) -> Tuple[bool, List[str]]:
"""
Comprehensive validation of TrustGraph configuration.
Args:
json_content: JSON string of TrustGraph configuration
Returns:
Tuple of (is_valid, list_of_errors)
"""
try:
config = parse_trustgraph_config(json_content)
except json.JSONDecodeError as e:
return False, [f"JSON parsing error: {e}"]
errors = []
errors.extend(validate_required_structure(config))
errors.extend(validate_service_references(config))
errors.extend(validate_parameter_types(config))
errors.extend(validate_storage_consistency(config))
errors.extend(validate_llm_configuration(config))
return len(errors) == 0, errors