Squashed 'ai-context/trustgraph-templates/' content from commit 42a5fd1b

git-subtree-dir: ai-context/trustgraph-templates
git-subtree-split: 42a5fd1b678f32be378062e30451e2052ccb95dd
This commit is contained in:
elpresidank 2026-04-05 21:09:49 -05:00
commit 74cc8a4685
1216 changed files with 116347 additions and 0 deletions

View file

View file

@ -0,0 +1,242 @@
"""
Docker Compose manifest semantic validation.
"""
import yaml
from typing import Dict, Any, List, Set, Tuple
def validate_service_dependencies(compose_data: Dict[str, Any]) -> List[str]:
"""
Validate that depends_on references valid services.
Returns:
List of error messages (empty if valid)
"""
errors = []
services = compose_data.get('services', {})
service_names = set(services.keys())
for service_name, service_spec in services.items():
depends_on = service_spec.get('depends_on', [])
# depends_on can be a list or dict
if isinstance(depends_on, list):
deps = depends_on
elif isinstance(depends_on, dict):
deps = list(depends_on.keys())
else:
continue
for dep in deps:
if dep not in service_names:
errors.append(
f"Service '{service_name}': depends_on references "
f"undefined service '{dep}'"
)
return errors
def validate_volume_references(compose_data: Dict[str, Any]) -> List[str]:
"""
Validate that volume names in binds are defined.
Returns:
List of error messages (empty if valid)
"""
errors = []
services = compose_data.get('services', {})
defined_volumes = set(compose_data.get('volumes', {}).keys())
for service_name, service_spec in services.items():
volumes = service_spec.get('volumes', [])
for volume in volumes:
# Parse volume string (can be "volume_name:/path" or "/host/path:/container/path")
if isinstance(volume, str):
parts = volume.split(':')
if len(parts) >= 2:
volume_name = parts[0]
# If it's not an absolute path, it's a named volume
if not volume_name.startswith('/') and not volume_name.startswith('.'):
if volume_name not in defined_volumes:
errors.append(
f"Service '{service_name}': volume '{volume_name}' "
f"is not defined in top-level volumes section"
)
return errors
def validate_network_references(compose_data: Dict[str, Any]) -> List[str]:
"""
Validate that network names used by services are defined.
Returns:
List of error messages (empty if valid)
"""
errors = []
services = compose_data.get('services', {})
defined_networks = set(compose_data.get('networks', {}).keys())
# Add default network
defined_networks.add('default')
for service_name, service_spec in services.items():
networks = service_spec.get('networks', [])
# networks can be a list or dict
if isinstance(networks, list):
network_names = networks
elif isinstance(networks, dict):
network_names = list(networks.keys())
else:
continue
for network_name in network_names:
if network_name not in defined_networks:
errors.append(
f"Service '{service_name}': network '{network_name}' "
f"is not defined in top-level networks section"
)
return errors
def validate_port_conflicts(compose_data: Dict[str, Any]) -> List[str]:
"""
Validate that no duplicate host port bindings exist.
Returns:
List of error messages (empty if valid)
"""
errors = []
services = compose_data.get('services', {})
used_ports: Dict[int, str] = {}
for service_name, service_spec in services.items():
ports = service_spec.get('ports', [])
for port in ports:
# Parse port string (can be "8080:80" or "8080")
if isinstance(port, str):
parts = port.split(':')
host_port = int(parts[0]) if parts[0].isdigit() else None
elif isinstance(port, int):
host_port = port
else:
continue
if host_port:
if host_port in used_ports:
errors.append(
f"Port conflict: host port {host_port} is bound by both "
f"'{used_ports[host_port]}' and '{service_name}'"
)
else:
used_ports[host_port] = service_name
return errors
def validate_required_fields(compose_data: Dict[str, Any]) -> List[str]:
"""
Validate that required Docker Compose fields are present.
Returns:
List of error messages (empty if valid)
"""
errors = []
if 'services' not in compose_data:
errors.append("Missing required 'services' field")
return errors
services = compose_data.get('services', {})
if not services:
errors.append("'services' section is empty")
for service_name, service_spec in services.items():
if not isinstance(service_spec, dict):
errors.append(f"Service '{service_name}': invalid service specification")
continue
# Service must have either 'image' or 'build'
if 'image' not in service_spec and 'build' not in service_spec:
errors.append(
f"Service '{service_name}': must have either 'image' or 'build' field"
)
return errors
def validate_environment_variables(compose_data: Dict[str, Any]) -> List[str]:
"""
Validate environment variable references.
Returns:
List of error messages (empty if valid)
"""
errors = []
services = compose_data.get('services', {})
for service_name, service_spec in services.items():
environment = service_spec.get('environment', {})
if isinstance(environment, dict):
for key, value in environment.items():
# Check for unresolved ${VAR} references (basic check)
if isinstance(value, str) and '${' in value and '}' in value:
# This is just a warning - might be intentional
pass
elif isinstance(environment, list):
for env_var in environment:
if isinstance(env_var, str) and '=' in env_var:
key, value = env_var.split('=', 1)
if '${' in value and '}' in value:
pass
return errors
def parse_docker_compose_yaml(yaml_content: str) -> Dict[str, Any]:
"""
Parse Docker Compose YAML into dictionary.
Args:
yaml_content: YAML string
Returns:
Dictionary of Docker Compose configuration
"""
return yaml.safe_load(yaml_content)
def validate_docker_compose_manifest(yaml_content: str) -> Tuple[bool, List[str]]:
"""
Comprehensive validation of Docker Compose manifest.
Args:
yaml_content: YAML string of Docker Compose configuration
Returns:
Tuple of (is_valid, list_of_errors)
"""
try:
compose_data = parse_docker_compose_yaml(yaml_content)
except yaml.YAMLError as e:
return False, [f"YAML parsing error: {e}"]
if not compose_data:
return False, ["Empty Docker Compose file"]
errors = []
errors.extend(validate_required_fields(compose_data))
errors.extend(validate_service_dependencies(compose_data))
errors.extend(validate_volume_references(compose_data))
errors.extend(validate_network_references(compose_data))
errors.extend(validate_port_conflicts(compose_data))
errors.extend(validate_environment_variables(compose_data))
return len(errors) == 0, errors

View file

@ -0,0 +1,269 @@
"""
Kubernetes manifest semantic validation.
"""
import yaml
from typing import List, Dict, Any, Tuple
def validate_selector_labels_match(resources: List[Dict[str, Any]]) -> List[str]:
"""
Validate that Deployment selectors match pod template labels.
Returns:
List of error messages (empty if valid)
"""
errors = []
for resource in resources:
if resource.get('kind') == 'Deployment':
name = resource.get('metadata', {}).get('name', 'unknown')
selector = resource.get('spec', {}).get('selector', {}).get('matchLabels', {})
pod_labels = resource.get('spec', {}).get('template', {}).get('metadata', {}).get('labels', {})
for key, value in selector.items():
if pod_labels.get(key) != value:
errors.append(
f"Deployment '{name}': selector '{key}={value}' "
f"does not match pod label '{key}={pod_labels.get(key)}'"
)
return errors
def validate_service_selectors(resources: List[Dict[str, Any]]) -> List[str]:
"""
Validate that Service selectors match Deployment labels.
Returns:
List of error messages (empty if valid)
"""
errors = []
# Build map of deployment labels
deployment_labels = {}
for resource in resources:
if resource.get('kind') == 'Deployment':
name = resource.get('metadata', {}).get('name')
labels = resource.get('spec', {}).get('template', {}).get('metadata', {}).get('labels', {})
if name:
deployment_labels[name] = labels
# Check services
for resource in resources:
if resource.get('kind') == 'Service':
service_name = resource.get('metadata', {}).get('name', 'unknown')
selector = resource.get('spec', {}).get('selector', {})
# Find matching deployment (assume service name matches deployment name)
matching_deployment = deployment_labels.get(service_name)
if matching_deployment:
for key, value in selector.items():
if matching_deployment.get(key) != value:
errors.append(
f"Service '{service_name}': selector '{key}={value}' "
f"does not match deployment label '{key}={matching_deployment.get(key)}'"
)
return errors
def validate_volume_references(resources: List[Dict[str, Any]]) -> List[str]:
"""
Validate that volumeMounts reference defined volumes.
Returns:
List of error messages (empty if valid)
"""
errors = []
for resource in resources:
if resource.get('kind') == 'Deployment':
name = resource.get('metadata', {}).get('name', 'unknown')
containers = resource.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])
volumes = resource.get('spec', {}).get('template', {}).get('spec', {}).get('volumes', [])
# Build set of volume names
volume_names = {v.get('name') for v in volumes if v.get('name')}
# Check volume mounts
for container in containers:
container_name = container.get('name', 'unknown')
volume_mounts = container.get('volumeMounts', [])
for mount in volume_mounts:
mount_name = mount.get('name')
if mount_name and mount_name not in volume_names:
errors.append(
f"Deployment '{name}', container '{container_name}': "
f"volumeMount '{mount_name}' references undefined volume"
)
return errors
def validate_configmap_references(resources: List[Dict[str, Any]]) -> List[str]:
"""
Validate that ConfigMap/Secret references exist in manifest.
Returns:
List of error messages (empty if valid)
"""
errors = []
# Build sets of configmaps and secrets
configmaps = set()
secrets = set()
for resource in resources:
kind = resource.get('kind')
name = resource.get('metadata', {}).get('name')
if kind == 'ConfigMap' and name:
configmaps.add(name)
elif kind == 'Secret' and name:
secrets.add(name)
# Check references in deployments
for resource in resources:
if resource.get('kind') == 'Deployment':
deployment_name = resource.get('metadata', {}).get('name', 'unknown')
volumes = resource.get('spec', {}).get('template', {}).get('spec', {}).get('volumes', [])
for volume in volumes:
# Check configMap references
configmap_ref = volume.get('configMap', {}).get('name')
if configmap_ref and configmap_ref not in configmaps:
errors.append(
f"Deployment '{deployment_name}': "
f"references undefined ConfigMap '{configmap_ref}'"
)
# Check secret references
secret_ref = volume.get('secret', {}).get('secretName')
if secret_ref and secret_ref not in secrets:
errors.append(
f"Deployment '{deployment_name}': "
f"references undefined Secret '{secret_ref}'"
)
return errors
def validate_port_consistency(resources: List[Dict[str, Any]]) -> List[str]:
"""
Validate that Service targetPorts match container ports.
Returns:
List of error messages (empty if valid)
"""
errors = []
# Build map of deployment container ports
deployment_ports = {}
for resource in resources:
if resource.get('kind') == 'Deployment':
name = resource.get('metadata', {}).get('name')
containers = resource.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])
ports = []
for container in containers:
for port in container.get('ports', []):
if port.get('containerPort'):
ports.append(port['containerPort'])
if name:
deployment_ports[name] = ports
# Check services
for resource in resources:
if resource.get('kind') == 'Service':
service_name = resource.get('metadata', {}).get('name', 'unknown')
service_ports = resource.get('spec', {}).get('ports', [])
# Assume service name matches deployment name
deployment_port_list = deployment_ports.get(service_name, [])
# Only validate port consistency if deployment explicitly lists ports
if deployment_port_list:
for port_spec in service_ports:
target_port = port_spec.get('targetPort')
if isinstance(target_port, int) and target_port not in deployment_port_list:
errors.append(
f"Service '{service_name}': "
f"targetPort {target_port} not found in deployment container ports"
)
return errors
def validate_required_fields(resources: List[Dict[str, Any]]) -> List[str]:
"""
Validate that required Kubernetes fields are present.
Returns:
List of error messages (empty if valid)
"""
errors = []
for idx, resource in enumerate(resources):
if not resource.get('apiVersion'):
errors.append(f"Resource {idx}: missing apiVersion")
if not resource.get('kind'):
errors.append(f"Resource {idx}: missing kind")
if not resource.get('metadata'):
errors.append(f"Resource {idx}: missing metadata")
elif not resource['metadata'].get('name'):
errors.append(f"Resource {idx} ({resource.get('kind', 'unknown')}): missing metadata.name")
return errors
def parse_kubernetes_yaml(yaml_content: str) -> List[Dict[str, Any]]:
"""
Parse Kubernetes YAML into list of resources.
Args:
yaml_content: YAML string (may contain multiple documents)
Returns:
List of resource dictionaries
"""
resources = []
for doc in yaml.safe_load_all(yaml_content):
if doc: # Skip empty documents
# If it's a Kubernetes List, unwrap it
if doc.get('kind') == 'List' and 'items' in doc:
resources.extend(doc['items'])
else:
resources.append(doc)
return resources
def validate_kubernetes_manifest(yaml_content: str) -> Tuple[bool, List[str]]:
"""
Comprehensive validation of Kubernetes manifest.
Args:
yaml_content: YAML string of Kubernetes resources
Returns:
Tuple of (is_valid, list_of_errors)
"""
try:
resources = parse_kubernetes_yaml(yaml_content)
except yaml.YAMLError as e:
return False, [f"YAML parsing error: {e}"]
if not resources:
return False, ["No resources found in manifest"]
errors = []
errors.extend(validate_required_fields(resources))
errors.extend(validate_selector_labels_match(resources))
errors.extend(validate_service_selectors(resources))
errors.extend(validate_volume_references(resources))
errors.extend(validate_configmap_references(resources))
# Port consistency validation is too strict for generated configs
# errors.extend(validate_port_consistency(resources))
return len(errors) == 0, errors

View file

@ -0,0 +1,235 @@
"""
TrustGraph configuration semantic validation.
"""
import json
from typing import Dict, Any, List, Tuple, Set
def validate_service_references(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate that configured services reference valid modules.
Returns:
List of error messages (empty if valid)
"""
errors = []
# Build set of known module names (this would need to be comprehensive)
known_modules = {
'pulsar', 'triple-store-cassandra', 'object-store-cassandra',
'vector-store-qdrant', 'vector-store-milvus', 'vector-store-pinecone',
'graph-rag', 'text-completion',
'embeddings-hf', 'embeddings-fastembed', 'embeddings-openai',
'openai', 'anthropic', 'ollama', 'bedrock', 'vertexai',
'trustgraph-base', 'grafana', 'prometheus',
'override-recursive-chunker', 'override-text-splitter',
'neo4j', 'astra'
}
for idx, service in enumerate(config):
if not isinstance(service, dict):
errors.append(f"Configuration item {idx}: not a dictionary")
continue
name = service.get('name')
if not name:
errors.append(f"Configuration item {idx}: missing 'name' field")
elif name not in known_modules:
# This might be intentional for new modules, so just warn
pass
return errors
def validate_parameter_types(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate that module parameters are reasonable.
Returns:
List of error messages (empty if valid)
"""
errors = []
for idx, service in enumerate(config):
if not isinstance(service, dict):
continue
name = service.get('name', f'item-{idx}')
parameters = service.get('parameters', {})
if not isinstance(parameters, dict):
errors.append(f"Service '{name}': parameters must be a dictionary")
continue
# Check for common parameter issues
for param_name, param_value in parameters.items():
# Check numeric parameters are reasonable
if 'chunk-size' in param_name:
if not isinstance(param_value, (int, float)) or param_value <= 0:
errors.append(
f"Service '{name}': parameter '{param_name}' should be positive number"
)
if 'chunk-overlap' in param_name:
if not isinstance(param_value, (int, float)) or param_value < 0:
errors.append(
f"Service '{name}': parameter '{param_name}' should be non-negative number"
)
if 'max-output-tokens' in param_name:
if not isinstance(param_value, int) or param_value <= 0:
errors.append(
f"Service '{name}': parameter '{param_name}' should be positive integer"
)
if 'temperature' in param_name:
if not isinstance(param_value, (int, float)) or not (0 <= param_value <= 2):
errors.append(
f"Service '{name}': parameter '{param_name}' should be between 0 and 2"
)
return errors
def validate_storage_consistency(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate that graph/object/vector stores are configured consistently.
Returns:
List of error messages (empty if valid)
"""
errors = []
service_names = [s.get('name') for s in config if isinstance(s, dict)]
# Check for storage backends
has_triple_store = any('triple-store' in name for name in service_names)
has_object_store = any('object-store' in name for name in service_names)
has_vector_store = any('vector-store' in name for name in service_names)
# If using graph-rag, should have all three stores
if 'graph-rag' in service_names:
if not has_triple_store:
errors.append(
"Configuration uses 'graph-rag' but no triple-store is configured"
)
if not has_object_store:
errors.append(
"Configuration uses 'graph-rag' but no object-store is configured"
)
if not has_vector_store:
errors.append(
"Configuration uses 'graph-rag' but no vector-store is configured"
)
return errors
def validate_llm_configuration(config: List[Dict[str, Any]]) -> List[str]:
"""
Validate LLM configuration is present and reasonable.
Returns:
List of error messages (empty if valid)
"""
errors = []
service_names = [s.get('name') for s in config if isinstance(s, dict)]
# Check for at least one LLM provider
llm_providers = {'openai', 'anthropic', 'ollama', 'bedrock', 'vertexai', 'vllm', 'llamacpp'}
has_llm = any(name in llm_providers for name in service_names)
if not has_llm:
errors.append(
"Configuration does not include any LLM provider "
f"(expected one of: {', '.join(llm_providers)})"
)
# Check for embeddings
has_embeddings = any('embeddings' in name for name in service_names)
if not has_embeddings:
errors.append(
"Configuration does not include any embeddings provider"
)
return errors
def validate_required_structure(config: Any) -> List[str]:
"""
Validate basic configuration structure.
Handles both input format (list of services) and output format (dict).
Returns:
List of error messages (empty if valid)
"""
errors = []
# Handle output format (dict with tools, collection, etc.)
if isinstance(config, dict):
# Just check it's not empty
if not config:
errors.append("Configuration is empty")
return errors
# Handle input format (list of services)
if not isinstance(config, list):
errors.append("Configuration must be a list or dict")
return errors
if not config:
errors.append("Configuration is empty")
for idx, service in enumerate(config):
if not isinstance(service, dict):
errors.append(f"Configuration item {idx}: must be a dictionary")
continue
if 'name' not in service:
errors.append(f"Configuration item {idx}: missing required field 'name'")
if 'parameters' not in service:
errors.append(f"Configuration item {idx}: missing required field 'parameters'")
return errors
def parse_trustgraph_config(json_content: str):
"""
Parse TrustGraph configuration JSON.
Args:
json_content: JSON string
Returns:
Configuration (dict or list depending on format)
"""
return json.loads(json_content)
def validate_trustgraph_config(json_content: str) -> Tuple[bool, List[str]]:
"""
Comprehensive validation of TrustGraph configuration.
Args:
json_content: JSON string of TrustGraph configuration
Returns:
Tuple of (is_valid, list_of_errors)
"""
try:
config = parse_trustgraph_config(json_content)
except json.JSONDecodeError as e:
return False, [f"JSON parsing error: {e}"]
errors = []
errors.extend(validate_required_structure(config))
errors.extend(validate_service_references(config))
errors.extend(validate_parameter_types(config))
errors.extend(validate_storage_consistency(config))
errors.extend(validate_llm_configuration(config))
return len(errors) == 0, errors