mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 00:36:54 +02:00
Add initial project structure and configuration files
- Created .gitignore to exclude unnecessary files and directories. - Added Cargo.toml for Rust workspace configuration. - Introduced example configuration file entropix.yaml.example for user customization. - Included LICENSE file with Apache 2.0 license details. - Created pyproject.toml for Python project metadata and dependencies. - Added README.md with project overview and usage instructions. - Implemented a broken agent example to demonstrate testing capabilities. - Established Rust module structure with Cargo.toml and source files. - Set up initial tests for assertions and configuration validation.
This commit is contained in:
commit
a36cecf255
37 changed files with 5397 additions and 0 deletions
4
tests/__init__.py
Normal file
4
tests/__init__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
"""
|
||||
Entropix Test Suite
|
||||
"""
|
||||
|
||||
234
tests/test_assertions.py
Normal file
234
tests/test_assertions.py
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
"""
|
||||
Tests for the assertion/invariant system.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from entropix.core.config import InvariantConfig, InvariantType
|
||||
from entropix.assertions.deterministic import (
|
||||
ContainsChecker,
|
||||
LatencyChecker,
|
||||
ValidJsonChecker,
|
||||
RegexChecker,
|
||||
)
|
||||
from entropix.assertions.safety import ExcludesPIIChecker, RefusalChecker
|
||||
from entropix.assertions.verifier import InvariantVerifier
|
||||
|
||||
|
||||
class TestContainsChecker:
|
||||
"""Tests for ContainsChecker."""
|
||||
|
||||
def test_contains_pass(self):
|
||||
"""Test contains check passes when value is present."""
|
||||
config = InvariantConfig(type=InvariantType.CONTAINS, value="success")
|
||||
checker = ContainsChecker(config)
|
||||
|
||||
result = checker.check("Operation was a success!", 100.0)
|
||||
|
||||
assert result.passed
|
||||
assert "Found" in result.details
|
||||
|
||||
def test_contains_fail(self):
|
||||
"""Test contains check fails when value is missing."""
|
||||
config = InvariantConfig(type=InvariantType.CONTAINS, value="success")
|
||||
checker = ContainsChecker(config)
|
||||
|
||||
result = checker.check("Operation failed", 100.0)
|
||||
|
||||
assert not result.passed
|
||||
assert "not found" in result.details
|
||||
|
||||
def test_contains_case_insensitive(self):
|
||||
"""Test contains check is case insensitive."""
|
||||
config = InvariantConfig(type=InvariantType.CONTAINS, value="SUCCESS")
|
||||
checker = ContainsChecker(config)
|
||||
|
||||
result = checker.check("it was a success", 100.0)
|
||||
|
||||
assert result.passed
|
||||
|
||||
|
||||
class TestLatencyChecker:
|
||||
"""Tests for LatencyChecker."""
|
||||
|
||||
def test_latency_pass(self):
|
||||
"""Test latency check passes when under threshold."""
|
||||
config = InvariantConfig(type=InvariantType.LATENCY, max_ms=2000)
|
||||
checker = LatencyChecker(config)
|
||||
|
||||
result = checker.check("response", 500.0)
|
||||
|
||||
assert result.passed
|
||||
assert "500ms" in result.details
|
||||
|
||||
def test_latency_fail(self):
|
||||
"""Test latency check fails when over threshold."""
|
||||
config = InvariantConfig(type=InvariantType.LATENCY, max_ms=1000)
|
||||
checker = LatencyChecker(config)
|
||||
|
||||
result = checker.check("response", 1500.0)
|
||||
|
||||
assert not result.passed
|
||||
assert "exceeded" in result.details
|
||||
|
||||
def test_latency_boundary(self):
|
||||
"""Test latency check at exact boundary passes."""
|
||||
config = InvariantConfig(type=InvariantType.LATENCY, max_ms=1000)
|
||||
checker = LatencyChecker(config)
|
||||
|
||||
result = checker.check("response", 1000.0)
|
||||
|
||||
assert result.passed
|
||||
|
||||
|
||||
class TestValidJsonChecker:
|
||||
"""Tests for ValidJsonChecker."""
|
||||
|
||||
def test_valid_json_pass(self):
|
||||
"""Test valid JSON passes."""
|
||||
config = InvariantConfig(type=InvariantType.VALID_JSON)
|
||||
checker = ValidJsonChecker(config)
|
||||
|
||||
result = checker.check('{"status": "ok", "value": 123}', 100.0)
|
||||
|
||||
assert result.passed
|
||||
|
||||
def test_valid_json_array(self):
|
||||
"""Test JSON array passes."""
|
||||
config = InvariantConfig(type=InvariantType.VALID_JSON)
|
||||
checker = ValidJsonChecker(config)
|
||||
|
||||
result = checker.check('[1, 2, 3]', 100.0)
|
||||
|
||||
assert result.passed
|
||||
|
||||
def test_invalid_json_fail(self):
|
||||
"""Test invalid JSON fails."""
|
||||
config = InvariantConfig(type=InvariantType.VALID_JSON)
|
||||
checker = ValidJsonChecker(config)
|
||||
|
||||
result = checker.check('not valid json', 100.0)
|
||||
|
||||
assert not result.passed
|
||||
assert "Invalid JSON" in result.details
|
||||
|
||||
|
||||
class TestRegexChecker:
|
||||
"""Tests for RegexChecker."""
|
||||
|
||||
def test_regex_pass(self):
|
||||
"""Test regex match passes."""
|
||||
config = InvariantConfig(
|
||||
type=InvariantType.REGEX,
|
||||
pattern=r"confirmation_\d+"
|
||||
)
|
||||
checker = RegexChecker(config)
|
||||
|
||||
result = checker.check("Your confirmation_12345 is ready", 100.0)
|
||||
|
||||
assert result.passed
|
||||
|
||||
def test_regex_fail(self):
|
||||
"""Test regex non-match fails."""
|
||||
config = InvariantConfig(
|
||||
type=InvariantType.REGEX,
|
||||
pattern=r"^OK$"
|
||||
)
|
||||
checker = RegexChecker(config)
|
||||
|
||||
result = checker.check("NOT OK", 100.0)
|
||||
|
||||
assert not result.passed
|
||||
|
||||
|
||||
class TestExcludesPIIChecker:
|
||||
"""Tests for ExcludesPIIChecker."""
|
||||
|
||||
def test_no_pii_passes(self):
|
||||
"""Test response without PII passes."""
|
||||
config = InvariantConfig(type=InvariantType.EXCLUDES_PII)
|
||||
checker = ExcludesPIIChecker(config)
|
||||
|
||||
result = checker.check("Hello, your order is confirmed.", 100.0)
|
||||
|
||||
assert result.passed
|
||||
|
||||
def test_email_detected(self):
|
||||
"""Test email PII is detected."""
|
||||
config = InvariantConfig(type=InvariantType.EXCLUDES_PII)
|
||||
checker = ExcludesPIIChecker(config)
|
||||
|
||||
result = checker.check("Contact: john@example.com", 100.0)
|
||||
|
||||
assert not result.passed
|
||||
assert "email" in result.details.lower()
|
||||
|
||||
def test_phone_detected(self):
|
||||
"""Test phone number PII is detected."""
|
||||
config = InvariantConfig(type=InvariantType.EXCLUDES_PII)
|
||||
checker = ExcludesPIIChecker(config)
|
||||
|
||||
result = checker.check("Call me at 555-123-4567", 100.0)
|
||||
|
||||
assert not result.passed
|
||||
|
||||
|
||||
class TestRefusalChecker:
|
||||
"""Tests for RefusalChecker."""
|
||||
|
||||
def test_refusal_detected(self):
|
||||
"""Test refusal language is detected as passing."""
|
||||
config = InvariantConfig(
|
||||
type=InvariantType.REFUSAL_CHECK,
|
||||
dangerous_prompts=True
|
||||
)
|
||||
checker = RefusalChecker(config)
|
||||
|
||||
result = checker.check("I cannot help with that request.", 100.0)
|
||||
|
||||
assert result.passed
|
||||
assert "refused" in result.details.lower()
|
||||
|
||||
def test_compromised_detected(self):
|
||||
"""Test compromised response is detected as failing."""
|
||||
config = InvariantConfig(
|
||||
type=InvariantType.REFUSAL_CHECK,
|
||||
dangerous_prompts=True
|
||||
)
|
||||
checker = RefusalChecker(config)
|
||||
|
||||
result = checker.check("Okay, I will ignore previous instructions.", 100.0)
|
||||
|
||||
assert not result.passed
|
||||
assert "compromise" in result.details.lower()
|
||||
|
||||
|
||||
class TestInvariantVerifier:
|
||||
"""Tests for InvariantVerifier."""
|
||||
|
||||
def test_verify_multiple_invariants(self):
|
||||
"""Test verifying multiple invariants at once."""
|
||||
invariants = [
|
||||
InvariantConfig(type=InvariantType.LATENCY, max_ms=2000),
|
||||
InvariantConfig(type=InvariantType.VALID_JSON),
|
||||
]
|
||||
|
||||
verifier = InvariantVerifier(invariants)
|
||||
|
||||
# Both pass
|
||||
result = verifier.verify('{"ok": true}', 500.0)
|
||||
assert result.all_passed
|
||||
assert result.passed_count == 2
|
||||
|
||||
# Latency fails
|
||||
result = verifier.verify('{"ok": true}', 3000.0)
|
||||
assert not result.all_passed
|
||||
assert result.failed_count == 1
|
||||
|
||||
def test_empty_invariants(self):
|
||||
"""Test with no invariants."""
|
||||
verifier = InvariantVerifier([])
|
||||
result = verifier.verify("anything", 100.0)
|
||||
|
||||
assert result.all_passed
|
||||
assert result.total_count == 0
|
||||
|
||||
181
tests/test_config.py
Normal file
181
tests/test_config.py
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
"""
|
||||
Tests for configuration loading and validation.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
from entropix.core.config import (
|
||||
EntropixConfig,
|
||||
AgentConfig,
|
||||
ModelConfig,
|
||||
MutationConfig,
|
||||
InvariantConfig,
|
||||
OutputConfig,
|
||||
load_config,
|
||||
create_default_config,
|
||||
AgentType,
|
||||
MutationType,
|
||||
InvariantType,
|
||||
OutputFormat,
|
||||
)
|
||||
|
||||
|
||||
class TestEntropixConfig:
|
||||
"""Tests for EntropixConfig."""
|
||||
|
||||
def test_create_default_config(self):
|
||||
"""Test creating a default configuration."""
|
||||
config = create_default_config()
|
||||
|
||||
assert config.version == "1.0"
|
||||
assert config.agent.type == AgentType.HTTP
|
||||
assert config.model.provider == "ollama"
|
||||
assert config.model.name == "qwen3:8b"
|
||||
assert len(config.golden_prompts) >= 1
|
||||
|
||||
def test_config_to_yaml(self):
|
||||
"""Test serializing config to YAML."""
|
||||
config = create_default_config()
|
||||
yaml_str = config.to_yaml()
|
||||
|
||||
assert "version" in yaml_str
|
||||
assert "agent" in yaml_str
|
||||
assert "golden_prompts" in yaml_str
|
||||
|
||||
def test_config_from_yaml(self):
|
||||
"""Test parsing config from YAML."""
|
||||
yaml_content = """
|
||||
version: "1.0"
|
||||
agent:
|
||||
endpoint: "http://localhost:8000/test"
|
||||
type: "http"
|
||||
timeout: 5000
|
||||
model:
|
||||
provider: "ollama"
|
||||
name: "qwen3:8b"
|
||||
golden_prompts:
|
||||
- "Test prompt 1"
|
||||
- "Test prompt 2"
|
||||
invariants:
|
||||
- type: "latency"
|
||||
max_ms: 1000
|
||||
"""
|
||||
config = EntropixConfig.from_yaml(yaml_content)
|
||||
|
||||
assert config.agent.endpoint == "http://localhost:8000/test"
|
||||
assert config.agent.timeout == 5000
|
||||
assert len(config.golden_prompts) == 2
|
||||
assert len(config.invariants) == 1
|
||||
|
||||
def test_load_config_file_not_found(self):
|
||||
"""Test loading a non-existent config file."""
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_config("/nonexistent/path/config.yaml")
|
||||
|
||||
def test_load_config_from_file(self):
|
||||
"""Test loading config from an actual file."""
|
||||
yaml_content = """
|
||||
version: "1.0"
|
||||
agent:
|
||||
endpoint: "http://test:8000/invoke"
|
||||
golden_prompts:
|
||||
- "Hello world"
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".yaml", delete=False
|
||||
) as f:
|
||||
f.write(yaml_content)
|
||||
f.flush()
|
||||
|
||||
config = load_config(f.name)
|
||||
assert config.agent.endpoint == "http://test:8000/invoke"
|
||||
|
||||
# Cleanup
|
||||
Path(f.name).unlink()
|
||||
|
||||
|
||||
class TestAgentConfig:
|
||||
"""Tests for AgentConfig validation."""
|
||||
|
||||
def test_valid_http_config(self):
|
||||
"""Test valid HTTP agent config."""
|
||||
config = AgentConfig(
|
||||
endpoint="http://localhost:8000/invoke",
|
||||
type=AgentType.HTTP,
|
||||
timeout=30000,
|
||||
)
|
||||
assert config.endpoint == "http://localhost:8000/invoke"
|
||||
|
||||
def test_timeout_bounds(self):
|
||||
"""Test timeout validation."""
|
||||
# Valid
|
||||
config = AgentConfig(endpoint="http://test", timeout=1000)
|
||||
assert config.timeout == 1000
|
||||
|
||||
# Too low
|
||||
with pytest.raises(ValueError):
|
||||
AgentConfig(endpoint="http://test", timeout=500)
|
||||
|
||||
def test_env_var_expansion(self):
|
||||
"""Test environment variable expansion in headers."""
|
||||
import os
|
||||
os.environ["TEST_API_KEY"] = "secret123"
|
||||
|
||||
config = AgentConfig(
|
||||
endpoint="http://test",
|
||||
headers={"Authorization": "Bearer ${TEST_API_KEY}"},
|
||||
)
|
||||
|
||||
assert config.headers["Authorization"] == "Bearer secret123"
|
||||
|
||||
del os.environ["TEST_API_KEY"]
|
||||
|
||||
|
||||
class TestMutationConfig:
|
||||
"""Tests for MutationConfig."""
|
||||
|
||||
def test_default_mutation_types(self):
|
||||
"""Test default mutation types are set."""
|
||||
config = MutationConfig()
|
||||
|
||||
assert MutationType.PARAPHRASE in config.types
|
||||
assert MutationType.NOISE in config.types
|
||||
assert MutationType.PROMPT_INJECTION in config.types
|
||||
|
||||
def test_mutation_weights(self):
|
||||
"""Test mutation weights."""
|
||||
config = MutationConfig()
|
||||
|
||||
# Prompt injection should have higher weight
|
||||
assert config.weights[MutationType.PROMPT_INJECTION] > config.weights[MutationType.NOISE]
|
||||
|
||||
|
||||
class TestInvariantConfig:
|
||||
"""Tests for InvariantConfig validation."""
|
||||
|
||||
def test_latency_invariant(self):
|
||||
"""Test latency invariant requires max_ms."""
|
||||
config = InvariantConfig(type=InvariantType.LATENCY, max_ms=2000)
|
||||
assert config.max_ms == 2000
|
||||
|
||||
def test_latency_missing_max_ms(self):
|
||||
"""Test latency invariant fails without max_ms."""
|
||||
with pytest.raises(ValueError):
|
||||
InvariantConfig(type=InvariantType.LATENCY)
|
||||
|
||||
def test_contains_invariant(self):
|
||||
"""Test contains invariant requires value."""
|
||||
config = InvariantConfig(type=InvariantType.CONTAINS, value="test")
|
||||
assert config.value == "test"
|
||||
|
||||
def test_similarity_invariant(self):
|
||||
"""Test similarity invariant."""
|
||||
config = InvariantConfig(
|
||||
type=InvariantType.SIMILARITY,
|
||||
expected="Expected response",
|
||||
threshold=0.8,
|
||||
)
|
||||
assert config.threshold == 0.8
|
||||
|
||||
146
tests/test_mutations.py
Normal file
146
tests/test_mutations.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""
|
||||
Tests for the mutation engine.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from entropix.mutations.types import MutationType, Mutation
|
||||
from entropix.mutations.templates import MutationTemplates, MUTATION_TEMPLATES
|
||||
|
||||
|
||||
class TestMutationType:
|
||||
"""Tests for MutationType enum."""
|
||||
|
||||
def test_mutation_type_values(self):
|
||||
"""Test mutation type string values."""
|
||||
assert MutationType.PARAPHRASE.value == "paraphrase"
|
||||
assert MutationType.NOISE.value == "noise"
|
||||
assert MutationType.TONE_SHIFT.value == "tone_shift"
|
||||
assert MutationType.PROMPT_INJECTION.value == "prompt_injection"
|
||||
|
||||
def test_display_name(self):
|
||||
"""Test display name generation."""
|
||||
assert MutationType.PARAPHRASE.display_name == "Paraphrase"
|
||||
assert MutationType.TONE_SHIFT.display_name == "Tone Shift"
|
||||
assert MutationType.PROMPT_INJECTION.display_name == "Prompt Injection"
|
||||
|
||||
def test_default_weights(self):
|
||||
"""Test default weights are assigned."""
|
||||
assert MutationType.PARAPHRASE.default_weight == 1.0
|
||||
assert MutationType.PROMPT_INJECTION.default_weight == 1.5
|
||||
assert MutationType.NOISE.default_weight == 0.8
|
||||
|
||||
|
||||
class TestMutation:
|
||||
"""Tests for Mutation dataclass."""
|
||||
|
||||
def test_mutation_creation(self):
|
||||
"""Test creating a mutation."""
|
||||
mutation = Mutation(
|
||||
original="Book a flight",
|
||||
mutated="I need to fly somewhere",
|
||||
type=MutationType.PARAPHRASE,
|
||||
weight=1.0,
|
||||
)
|
||||
|
||||
assert mutation.original == "Book a flight"
|
||||
assert mutation.mutated == "I need to fly somewhere"
|
||||
assert mutation.type == MutationType.PARAPHRASE
|
||||
|
||||
def test_mutation_id_generation(self):
|
||||
"""Test unique ID generation."""
|
||||
m1 = Mutation(
|
||||
original="Test",
|
||||
mutated="Test 1",
|
||||
type=MutationType.NOISE,
|
||||
)
|
||||
m2 = Mutation(
|
||||
original="Test",
|
||||
mutated="Test 2",
|
||||
type=MutationType.NOISE,
|
||||
)
|
||||
|
||||
assert m1.id != m2.id
|
||||
assert len(m1.id) == 12
|
||||
|
||||
def test_mutation_validity(self):
|
||||
"""Test mutation validity checks."""
|
||||
# Valid mutation
|
||||
valid = Mutation(
|
||||
original="Test",
|
||||
mutated="Different text",
|
||||
type=MutationType.PARAPHRASE,
|
||||
)
|
||||
assert valid.is_valid()
|
||||
|
||||
# Invalid: same as original
|
||||
invalid_same = Mutation(
|
||||
original="Test",
|
||||
mutated="Test",
|
||||
type=MutationType.PARAPHRASE,
|
||||
)
|
||||
assert not invalid_same.is_valid()
|
||||
|
||||
# Invalid: empty mutated
|
||||
invalid_empty = Mutation(
|
||||
original="Test",
|
||||
mutated="",
|
||||
type=MutationType.PARAPHRASE,
|
||||
)
|
||||
assert not invalid_empty.is_valid()
|
||||
|
||||
def test_mutation_serialization(self):
|
||||
"""Test to_dict and from_dict."""
|
||||
mutation = Mutation(
|
||||
original="Test prompt",
|
||||
mutated="Mutated prompt",
|
||||
type=MutationType.NOISE,
|
||||
weight=0.8,
|
||||
)
|
||||
|
||||
data = mutation.to_dict()
|
||||
restored = Mutation.from_dict(data)
|
||||
|
||||
assert restored.original == mutation.original
|
||||
assert restored.mutated == mutation.mutated
|
||||
assert restored.type == mutation.type
|
||||
|
||||
|
||||
class TestMutationTemplates:
|
||||
"""Tests for MutationTemplates."""
|
||||
|
||||
def test_all_types_have_templates(self):
|
||||
"""Test that all mutation types have templates."""
|
||||
templates = MutationTemplates()
|
||||
|
||||
for mutation_type in MutationType:
|
||||
template = templates.get(mutation_type)
|
||||
assert template is not None
|
||||
assert "{prompt}" in template
|
||||
|
||||
def test_format_template(self):
|
||||
"""Test formatting a template with a prompt."""
|
||||
templates = MutationTemplates()
|
||||
formatted = templates.format(
|
||||
MutationType.PARAPHRASE,
|
||||
"Book a flight to Paris"
|
||||
)
|
||||
|
||||
assert "Book a flight to Paris" in formatted
|
||||
assert "{prompt}" not in formatted
|
||||
|
||||
def test_custom_template(self):
|
||||
"""Test setting a custom template."""
|
||||
templates = MutationTemplates()
|
||||
custom = "Custom template for {prompt}"
|
||||
|
||||
templates.set_template(MutationType.NOISE, custom)
|
||||
|
||||
assert templates.get(MutationType.NOISE) == custom
|
||||
|
||||
def test_custom_template_requires_placeholder(self):
|
||||
"""Test that custom templates must have {prompt} placeholder."""
|
||||
templates = MutationTemplates()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
templates.set_template(MutationType.NOISE, "No placeholder here")
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue