Squashed 'ai-context/trustgraph-templates/' content from commit 42a5fd1b

git-subtree-dir: ai-context/trustgraph-templates git-subtree-split: 42a5fd1b678f32be378062e30451e2052ccb95dd
2026-07-03 06:51:00 +02:00 · 2026-04-05 21:09:49 -05:00 · 2026-04-05 21:09:49 -05:00 · 74cc8a4685
commit 74cc8a4685
1216 changed files with 116347 additions and 0 deletions
--- a/tests/validators/init.py
+++ b/tests/validators/init.py
--- a/tests/validators/docker_compose.py
+++ b/tests/validators/docker_compose.py
@ -0,0 +1,242 @@
+"""
+Docker Compose manifest semantic validation.
+"""
+
+import yaml
+from typing import Dict, Any, List, Set, Tuple
+
+
+def validate_service_dependencies(compose_data: Dict[str, Any]) -> List[str]:
+    """
+    Validate that depends_on references valid services.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+    services = compose_data.get('services', {})
+    service_names = set(services.keys())
+
+    for service_name, service_spec in services.items():
+        depends_on = service_spec.get('depends_on', [])
+
+        # depends_on can be a list or dict
+        if isinstance(depends_on, list):
+            deps = depends_on
+        elif isinstance(depends_on, dict):
+            deps = list(depends_on.keys())
+        else:
+            continue
+
+        for dep in deps:
+            if dep not in service_names:
+                errors.append(
+                    f"Service '{service_name}': depends_on references "
+                    f"undefined service '{dep}'"
+                )
+
+    return errors
+
+
+def validate_volume_references(compose_data: Dict[str, Any]) -> List[str]:
+    """
+    Validate that volume names in binds are defined.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+    services = compose_data.get('services', {})
+    defined_volumes = set(compose_data.get('volumes', {}).keys())
+
+    for service_name, service_spec in services.items():
+        volumes = service_spec.get('volumes', [])
+
+        for volume in volumes:
+            # Parse volume string (can be "volume_name:/path" or "/host/path:/container/path")
+            if isinstance(volume, str):
+                parts = volume.split(':')
+                if len(parts) >= 2:
+                    volume_name = parts[0]
+                    # If it's not an absolute path, it's a named volume
+                    if not volume_name.startswith('/') and not volume_name.startswith('.'):
+                        if volume_name not in defined_volumes:
+                            errors.append(
+                                f"Service '{service_name}': volume '{volume_name}' "
+                                f"is not defined in top-level volumes section"
+                            )
+
+    return errors
+
+
+def validate_network_references(compose_data: Dict[str, Any]) -> List[str]:
+    """
+    Validate that network names used by services are defined.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+    services = compose_data.get('services', {})
+    defined_networks = set(compose_data.get('networks', {}).keys())
+
+    # Add default network
+    defined_networks.add('default')
+
+    for service_name, service_spec in services.items():
+        networks = service_spec.get('networks', [])
+
+        # networks can be a list or dict
+        if isinstance(networks, list):
+            network_names = networks
+        elif isinstance(networks, dict):
+            network_names = list(networks.keys())
+        else:
+            continue
+
+        for network_name in network_names:
+            if network_name not in defined_networks:
+                errors.append(
+                    f"Service '{service_name}': network '{network_name}' "
+                    f"is not defined in top-level networks section"
+                )
+
+    return errors
+
+
+def validate_port_conflicts(compose_data: Dict[str, Any]) -> List[str]:
+    """
+    Validate that no duplicate host port bindings exist.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+    services = compose_data.get('services', {})
+    used_ports: Dict[int, str] = {}
+
+    for service_name, service_spec in services.items():
+        ports = service_spec.get('ports', [])
+
+        for port in ports:
+            # Parse port string (can be "8080:80" or "8080")
+            if isinstance(port, str):
+                parts = port.split(':')
+                host_port = int(parts[0]) if parts[0].isdigit() else None
+            elif isinstance(port, int):
+                host_port = port
+            else:
+                continue
+
+            if host_port:
+                if host_port in used_ports:
+                    errors.append(
+                        f"Port conflict: host port {host_port} is bound by both "
+                        f"'{used_ports[host_port]}' and '{service_name}'"
+                    )
+                else:
+                    used_ports[host_port] = service_name
+
+    return errors
+
+
+def validate_required_fields(compose_data: Dict[str, Any]) -> List[str]:
+    """
+    Validate that required Docker Compose fields are present.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    if 'services' not in compose_data:
+        errors.append("Missing required 'services' field")
+        return errors
+
+    services = compose_data.get('services', {})
+    if not services:
+        errors.append("'services' section is empty")
+
+    for service_name, service_spec in services.items():
+        if not isinstance(service_spec, dict):
+            errors.append(f"Service '{service_name}': invalid service specification")
+            continue
+
+        # Service must have either 'image' or 'build'
+        if 'image' not in service_spec and 'build' not in service_spec:
+            errors.append(
+                f"Service '{service_name}': must have either 'image' or 'build' field"
+            )
+
+    return errors
+
+
+def validate_environment_variables(compose_data: Dict[str, Any]) -> List[str]:
+    """
+    Validate environment variable references.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+    services = compose_data.get('services', {})
+
+    for service_name, service_spec in services.items():
+        environment = service_spec.get('environment', {})
+
+        if isinstance(environment, dict):
+            for key, value in environment.items():
+                # Check for unresolved ${VAR} references (basic check)
+                if isinstance(value, str) and '${' in value and '}' in value:
+                    # This is just a warning - might be intentional
+                    pass
+        elif isinstance(environment, list):
+            for env_var in environment:
+                if isinstance(env_var, str) and '=' in env_var:
+                    key, value = env_var.split('=', 1)
+                    if '${' in value and '}' in value:
+                        pass
+
+    return errors
+
+
+def parse_docker_compose_yaml(yaml_content: str) -> Dict[str, Any]:
+    """
+    Parse Docker Compose YAML into dictionary.
+
+    Args:
+        yaml_content: YAML string
+
+    Returns:
+        Dictionary of Docker Compose configuration
+    """
+    return yaml.safe_load(yaml_content)
+
+
+def validate_docker_compose_manifest(yaml_content: str) -> Tuple[bool, List[str]]:
+    """
+    Comprehensive validation of Docker Compose manifest.
+
+    Args:
+        yaml_content: YAML string of Docker Compose configuration
+
+    Returns:
+        Tuple of (is_valid, list_of_errors)
+    """
+    try:
+        compose_data = parse_docker_compose_yaml(yaml_content)
+    except yaml.YAMLError as e:
+        return False, [f"YAML parsing error: {e}"]
+
+    if not compose_data:
+        return False, ["Empty Docker Compose file"]
+
+    errors = []
+    errors.extend(validate_required_fields(compose_data))
+    errors.extend(validate_service_dependencies(compose_data))
+    errors.extend(validate_volume_references(compose_data))
+    errors.extend(validate_network_references(compose_data))
+    errors.extend(validate_port_conflicts(compose_data))
+    errors.extend(validate_environment_variables(compose_data))
+
+    return len(errors) == 0, errors
--- a/tests/validators/kubernetes.py
+++ b/tests/validators/kubernetes.py
@ -0,0 +1,269 @@
+"""
+Kubernetes manifest semantic validation.
+"""
+
+import yaml
+from typing import List, Dict, Any, Tuple
+
+
+def validate_selector_labels_match(resources: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that Deployment selectors match pod template labels.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    for resource in resources:
+        if resource.get('kind') == 'Deployment':
+            name = resource.get('metadata', {}).get('name', 'unknown')
+            selector = resource.get('spec', {}).get('selector', {}).get('matchLabels', {})
+            pod_labels = resource.get('spec', {}).get('template', {}).get('metadata', {}).get('labels', {})
+
+            for key, value in selector.items():
+                if pod_labels.get(key) != value:
+                    errors.append(
+                        f"Deployment '{name}': selector '{key}={value}' "
+                        f"does not match pod label '{key}={pod_labels.get(key)}'"
+                    )
+
+    return errors
+
+
+def validate_service_selectors(resources: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that Service selectors match Deployment labels.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    # Build map of deployment labels
+    deployment_labels = {}
+    for resource in resources:
+        if resource.get('kind') == 'Deployment':
+            name = resource.get('metadata', {}).get('name')
+            labels = resource.get('spec', {}).get('template', {}).get('metadata', {}).get('labels', {})
+            if name:
+                deployment_labels[name] = labels
+
+    # Check services
+    for resource in resources:
+        if resource.get('kind') == 'Service':
+            service_name = resource.get('metadata', {}).get('name', 'unknown')
+            selector = resource.get('spec', {}).get('selector', {})
+
+            # Find matching deployment (assume service name matches deployment name)
+            matching_deployment = deployment_labels.get(service_name)
+            if matching_deployment:
+                for key, value in selector.items():
+                    if matching_deployment.get(key) != value:
+                        errors.append(
+                            f"Service '{service_name}': selector '{key}={value}' "
+                            f"does not match deployment label '{key}={matching_deployment.get(key)}'"
+                        )
+
+    return errors
+
+
+def validate_volume_references(resources: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that volumeMounts reference defined volumes.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    for resource in resources:
+        if resource.get('kind') == 'Deployment':
+            name = resource.get('metadata', {}).get('name', 'unknown')
+            containers = resource.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])
+            volumes = resource.get('spec', {}).get('template', {}).get('spec', {}).get('volumes', [])
+
+            # Build set of volume names
+            volume_names = {v.get('name') for v in volumes if v.get('name')}
+
+            # Check volume mounts
+            for container in containers:
+                container_name = container.get('name', 'unknown')
+                volume_mounts = container.get('volumeMounts', [])
+
+                for mount in volume_mounts:
+                    mount_name = mount.get('name')
+                    if mount_name and mount_name not in volume_names:
+                        errors.append(
+                            f"Deployment '{name}', container '{container_name}': "
+                            f"volumeMount '{mount_name}' references undefined volume"
+                        )
+
+    return errors
+
+
+def validate_configmap_references(resources: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that ConfigMap/Secret references exist in manifest.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    # Build sets of configmaps and secrets
+    configmaps = set()
+    secrets = set()
+
+    for resource in resources:
+        kind = resource.get('kind')
+        name = resource.get('metadata', {}).get('name')
+        if kind == 'ConfigMap' and name:
+            configmaps.add(name)
+        elif kind == 'Secret' and name:
+            secrets.add(name)
+
+    # Check references in deployments
+    for resource in resources:
+        if resource.get('kind') == 'Deployment':
+            deployment_name = resource.get('metadata', {}).get('name', 'unknown')
+            volumes = resource.get('spec', {}).get('template', {}).get('spec', {}).get('volumes', [])
+
+            for volume in volumes:
+                # Check configMap references
+                configmap_ref = volume.get('configMap', {}).get('name')
+                if configmap_ref and configmap_ref not in configmaps:
+                    errors.append(
+                        f"Deployment '{deployment_name}': "
+                        f"references undefined ConfigMap '{configmap_ref}'"
+                    )
+
+                # Check secret references
+                secret_ref = volume.get('secret', {}).get('secretName')
+                if secret_ref and secret_ref not in secrets:
+                    errors.append(
+                        f"Deployment '{deployment_name}': "
+                        f"references undefined Secret '{secret_ref}'"
+                    )
+
+    return errors
+
+
+def validate_port_consistency(resources: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that Service targetPorts match container ports.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    # Build map of deployment container ports
+    deployment_ports = {}
+    for resource in resources:
+        if resource.get('kind') == 'Deployment':
+            name = resource.get('metadata', {}).get('name')
+            containers = resource.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])
+
+            ports = []
+            for container in containers:
+                for port in container.get('ports', []):
+                    if port.get('containerPort'):
+                        ports.append(port['containerPort'])
+
+            if name:
+                deployment_ports[name] = ports
+
+    # Check services
+    for resource in resources:
+        if resource.get('kind') == 'Service':
+            service_name = resource.get('metadata', {}).get('name', 'unknown')
+            service_ports = resource.get('spec', {}).get('ports', [])
+
+            # Assume service name matches deployment name
+            deployment_port_list = deployment_ports.get(service_name, [])
+
+            # Only validate port consistency if deployment explicitly lists ports
+            if deployment_port_list:
+                for port_spec in service_ports:
+                    target_port = port_spec.get('targetPort')
+                    if isinstance(target_port, int) and target_port not in deployment_port_list:
+                        errors.append(
+                            f"Service '{service_name}': "
+                            f"targetPort {target_port} not found in deployment container ports"
+                        )
+
+    return errors
+
+
+def validate_required_fields(resources: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that required Kubernetes fields are present.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    for idx, resource in enumerate(resources):
+        if not resource.get('apiVersion'):
+            errors.append(f"Resource {idx}: missing apiVersion")
+        if not resource.get('kind'):
+            errors.append(f"Resource {idx}: missing kind")
+        if not resource.get('metadata'):
+            errors.append(f"Resource {idx}: missing metadata")
+        elif not resource['metadata'].get('name'):
+            errors.append(f"Resource {idx} ({resource.get('kind', 'unknown')}): missing metadata.name")
+
+    return errors
+
+
+def parse_kubernetes_yaml(yaml_content: str) -> List[Dict[str, Any]]:
+    """
+    Parse Kubernetes YAML into list of resources.
+
+    Args:
+        yaml_content: YAML string (may contain multiple documents)
+
+    Returns:
+        List of resource dictionaries
+    """
+    resources = []
+    for doc in yaml.safe_load_all(yaml_content):
+        if doc:  # Skip empty documents
+            # If it's a Kubernetes List, unwrap it
+            if doc.get('kind') == 'List' and 'items' in doc:
+                resources.extend(doc['items'])
+            else:
+                resources.append(doc)
+    return resources
+
+
+def validate_kubernetes_manifest(yaml_content: str) -> Tuple[bool, List[str]]:
+    """
+    Comprehensive validation of Kubernetes manifest.
+
+    Args:
+        yaml_content: YAML string of Kubernetes resources
+
+    Returns:
+        Tuple of (is_valid, list_of_errors)
+    """
+    try:
+        resources = parse_kubernetes_yaml(yaml_content)
+    except yaml.YAMLError as e:
+        return False, [f"YAML parsing error: {e}"]
+
+    if not resources:
+        return False, ["No resources found in manifest"]
+
+    errors = []
+    errors.extend(validate_required_fields(resources))
+    errors.extend(validate_selector_labels_match(resources))
+    errors.extend(validate_service_selectors(resources))
+    errors.extend(validate_volume_references(resources))
+    errors.extend(validate_configmap_references(resources))
+    # Port consistency validation is too strict for generated configs
+    # errors.extend(validate_port_consistency(resources))
+
+    return len(errors) == 0, errors
--- a/tests/validators/trustgraph.py
+++ b/tests/validators/trustgraph.py
@ -0,0 +1,235 @@
+"""
+TrustGraph configuration semantic validation.
+"""
+
+import json
+from typing import Dict, Any, List, Tuple, Set
+
+
+def validate_service_references(config: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that configured services reference valid modules.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    # Build set of known module names (this would need to be comprehensive)
+    known_modules = {
+        'pulsar', 'triple-store-cassandra', 'object-store-cassandra',
+        'vector-store-qdrant', 'vector-store-milvus', 'vector-store-pinecone',
+        'graph-rag', 'text-completion',
+        'embeddings-hf', 'embeddings-fastembed', 'embeddings-openai',
+        'openai', 'anthropic', 'ollama', 'bedrock', 'vertexai',
+        'trustgraph-base', 'grafana', 'prometheus',
+        'override-recursive-chunker', 'override-text-splitter',
+        'neo4j', 'astra'
+    }
+
+    for idx, service in enumerate(config):
+        if not isinstance(service, dict):
+            errors.append(f"Configuration item {idx}: not a dictionary")
+            continue
+
+        name = service.get('name')
+        if not name:
+            errors.append(f"Configuration item {idx}: missing 'name' field")
+        elif name not in known_modules:
+            # This might be intentional for new modules, so just warn
+            pass
+
+    return errors
+
+
+def validate_parameter_types(config: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that module parameters are reasonable.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    for idx, service in enumerate(config):
+        if not isinstance(service, dict):
+            continue
+
+        name = service.get('name', f'item-{idx}')
+        parameters = service.get('parameters', {})
+
+        if not isinstance(parameters, dict):
+            errors.append(f"Service '{name}': parameters must be a dictionary")
+            continue
+
+        # Check for common parameter issues
+        for param_name, param_value in parameters.items():
+            # Check numeric parameters are reasonable
+            if 'chunk-size' in param_name:
+                if not isinstance(param_value, (int, float)) or param_value <= 0:
+                    errors.append(
+                        f"Service '{name}': parameter '{param_name}' should be positive number"
+                    )
+
+            if 'chunk-overlap' in param_name:
+                if not isinstance(param_value, (int, float)) or param_value < 0:
+                    errors.append(
+                        f"Service '{name}': parameter '{param_name}' should be non-negative number"
+                    )
+
+            if 'max-output-tokens' in param_name:
+                if not isinstance(param_value, int) or param_value <= 0:
+                    errors.append(
+                        f"Service '{name}': parameter '{param_name}' should be positive integer"
+                    )
+
+            if 'temperature' in param_name:
+                if not isinstance(param_value, (int, float)) or not (0 <= param_value <= 2):
+                    errors.append(
+                        f"Service '{name}': parameter '{param_name}' should be between 0 and 2"
+                    )
+
+    return errors
+
+
+def validate_storage_consistency(config: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate that graph/object/vector stores are configured consistently.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    service_names = [s.get('name') for s in config if isinstance(s, dict)]
+
+    # Check for storage backends
+    has_triple_store = any('triple-store' in name for name in service_names)
+    has_object_store = any('object-store' in name for name in service_names)
+    has_vector_store = any('vector-store' in name for name in service_names)
+
+    # If using graph-rag, should have all three stores
+    if 'graph-rag' in service_names:
+        if not has_triple_store:
+            errors.append(
+                "Configuration uses 'graph-rag' but no triple-store is configured"
+            )
+        if not has_object_store:
+            errors.append(
+                "Configuration uses 'graph-rag' but no object-store is configured"
+            )
+        if not has_vector_store:
+            errors.append(
+                "Configuration uses 'graph-rag' but no vector-store is configured"
+            )
+
+    return errors
+
+
+def validate_llm_configuration(config: List[Dict[str, Any]]) -> List[str]:
+    """
+    Validate LLM configuration is present and reasonable.
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    service_names = [s.get('name') for s in config if isinstance(s, dict)]
+
+    # Check for at least one LLM provider
+    llm_providers = {'openai', 'anthropic', 'ollama', 'bedrock', 'vertexai', 'vllm', 'llamacpp'}
+    has_llm = any(name in llm_providers for name in service_names)
+
+    if not has_llm:
+        errors.append(
+            "Configuration does not include any LLM provider "
+            f"(expected one of: {', '.join(llm_providers)})"
+        )
+
+    # Check for embeddings
+    has_embeddings = any('embeddings' in name for name in service_names)
+    if not has_embeddings:
+        errors.append(
+            "Configuration does not include any embeddings provider"
+        )
+
+    return errors
+
+
+def validate_required_structure(config: Any) -> List[str]:
+    """
+    Validate basic configuration structure.
+
+    Handles both input format (list of services) and output format (dict).
+
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors = []
+
+    # Handle output format (dict with tools, collection, etc.)
+    if isinstance(config, dict):
+        # Just check it's not empty
+        if not config:
+            errors.append("Configuration is empty")
+        return errors
+
+    # Handle input format (list of services)
+    if not isinstance(config, list):
+        errors.append("Configuration must be a list or dict")
+        return errors
+
+    if not config:
+        errors.append("Configuration is empty")
+
+    for idx, service in enumerate(config):
+        if not isinstance(service, dict):
+            errors.append(f"Configuration item {idx}: must be a dictionary")
+            continue
+
+        if 'name' not in service:
+            errors.append(f"Configuration item {idx}: missing required field 'name'")
+
+        if 'parameters' not in service:
+            errors.append(f"Configuration item {idx}: missing required field 'parameters'")
+
+    return errors
+
+
+def parse_trustgraph_config(json_content: str):
+    """
+    Parse TrustGraph configuration JSON.
+
+    Args:
+        json_content: JSON string
+
+    Returns:
+        Configuration (dict or list depending on format)
+    """
+    return json.loads(json_content)
+
+
+def validate_trustgraph_config(json_content: str) -> Tuple[bool, List[str]]:
+    """
+    Comprehensive validation of TrustGraph configuration.
+
+    Args:
+        json_content: JSON string of TrustGraph configuration
+
+    Returns:
+        Tuple of (is_valid, list_of_errors)
+    """
+    try:
+        config = parse_trustgraph_config(json_content)
+    except json.JSONDecodeError as e:
+        return False, [f"JSON parsing error: {e}"]
+
+    errors = []
+    errors.extend(validate_required_structure(config))
+    errors.extend(validate_service_references(config))
+    errors.extend(validate_parameter_types(config))
+    errors.extend(validate_storage_consistency(config))
+    errors.extend(validate_llm_configuration(config))
+
+    return len(errors) == 0, errors