flakestorm/tests/test_reports.py

510 lines
15 KiB
Python
Raw Normal View History

"""Tests for report generation."""
import json
import tempfile
from datetime import datetime
from pathlib import Path
import pytest
from flakestorm.mutations.types import Mutation, MutationType
class TestCheckResult:
"""Tests for CheckResult data model."""
def test_check_result_creation(self):
"""CheckResult can be created."""
from flakestorm.reports.models import CheckResult
result = CheckResult(
check_type="contains",
passed=True,
details="Found expected substring",
)
assert result.check_type == "contains"
assert result.passed is True
assert result.details == "Found expected substring"
def test_check_result_to_dict(self):
"""CheckResult converts to dict."""
from flakestorm.reports.models import CheckResult
result = CheckResult(
check_type="latency",
passed=False,
details="Exceeded 5000ms",
)
d = result.to_dict()
assert d["check_type"] == "latency"
assert d["passed"] is False
assert d["details"] == "Exceeded 5000ms"
class TestMutationResult:
"""Tests for MutationResult data model."""
@pytest.fixture
def sample_mutation(self):
"""Create a sample mutation."""
return Mutation(
original="What is the weather?",
mutated="Tell me about today's weather conditions",
type=MutationType.PARAPHRASE,
)
def test_mutation_result_creation(self, sample_mutation):
"""MutationResult can be created."""
from flakestorm.reports.models import MutationResult
result = MutationResult(
original_prompt="What is the weather?",
mutation=sample_mutation,
response="It's sunny today",
latency_ms=100.0,
passed=True,
)
assert result.response == "It's sunny today"
assert result.passed is True
assert result.latency_ms == 100.0
def test_mutation_result_with_checks(self, sample_mutation):
"""MutationResult with check results."""
from flakestorm.reports.models import CheckResult, MutationResult
checks = [
CheckResult(check_type="contains", passed=True, details="Found 'weather'"),
CheckResult(check_type="latency", passed=False, details="Too slow"),
]
result = MutationResult(
original_prompt="What is the weather?",
mutation=sample_mutation,
response="Test",
latency_ms=200.0,
passed=False,
checks=checks,
)
assert len(result.checks) == 2
assert result.checks[0].passed is True
assert result.checks[1].passed is False
def test_mutation_result_failed_checks(self, sample_mutation):
"""MutationResult returns failed checks."""
from flakestorm.reports.models import CheckResult, MutationResult
checks = [
CheckResult(check_type="contains", passed=True, details="OK"),
CheckResult(check_type="latency", passed=False, details="Too slow"),
CheckResult(check_type="safety", passed=False, details="PII detected"),
]
result = MutationResult(
original_prompt="Test",
mutation=sample_mutation,
response="Test",
latency_ms=200.0,
passed=False,
checks=checks,
)
failed = result.failed_checks
assert len(failed) == 2
class TestTypeStatistics:
"""Tests for TypeStatistics data model."""
def test_type_statistics_creation(self):
"""TypeStatistics can be created."""
from flakestorm.reports.models import TypeStatistics
stats = TypeStatistics(
mutation_type="paraphrase",
total=100,
passed=85,
pass_rate=0.85,
)
assert stats.mutation_type == "paraphrase"
assert stats.total == 100
assert stats.passed == 85
assert stats.pass_rate == 0.85
def test_type_statistics_to_dict(self):
"""TypeStatistics converts to dict."""
from flakestorm.reports.models import TypeStatistics
stats = TypeStatistics(
mutation_type="noise",
total=50,
passed=40,
pass_rate=0.8,
)
d = stats.to_dict()
assert d["mutation_type"] == "noise"
assert d["failed"] == 10
class TestTestStatistics:
"""Tests for TestStatistics data model."""
def test_statistics_creation(self):
"""TestStatistics can be created."""
from flakestorm.reports.models import TestStatistics
stats = TestStatistics(
total_mutations=100,
passed_mutations=85,
failed_mutations=15,
robustness_score=0.85,
avg_latency_ms=150.0,
p50_latency_ms=120.0,
p95_latency_ms=300.0,
p99_latency_ms=450.0,
)
assert stats.total_mutations == 100
assert stats.passed_mutations == 85
assert stats.robustness_score == 0.85
def test_statistics_pass_rate(self):
"""Statistics calculates pass_rate correctly."""
from flakestorm.reports.models import TestStatistics
stats = TestStatistics(
total_mutations=100,
passed_mutations=80,
failed_mutations=20,
robustness_score=0.85,
avg_latency_ms=150.0,
p50_latency_ms=120.0,
p95_latency_ms=300.0,
p99_latency_ms=450.0,
)
assert stats.pass_rate == 0.8
def test_statistics_zero_total(self):
"""Statistics handles zero total."""
from flakestorm.reports.models import TestStatistics
stats = TestStatistics(
total_mutations=0,
passed_mutations=0,
failed_mutations=0,
robustness_score=0.0,
avg_latency_ms=0.0,
p50_latency_ms=0.0,
p95_latency_ms=0.0,
p99_latency_ms=0.0,
)
assert stats.pass_rate == 0.0
class TestTestResults:
"""Tests for TestResults data model."""
@pytest.fixture
def sample_config(self):
"""Create sample config."""
from flakestorm.core.config import (
AgentConfig,
AgentType,
FlakeStormConfig,
)
return FlakeStormConfig(
agent=AgentConfig(
endpoint="http://localhost:8000/chat",
type=AgentType.HTTP,
),
golden_prompts=["Test"],
invariants=[],
)
@pytest.fixture
def sample_statistics(self):
"""Create sample statistics."""
from flakestorm.reports.models import TestStatistics
return TestStatistics(
total_mutations=10,
passed_mutations=8,
failed_mutations=2,
robustness_score=0.8,
avg_latency_ms=150.0,
p50_latency_ms=120.0,
p95_latency_ms=300.0,
p99_latency_ms=450.0,
)
def test_results_creation(self, sample_config, sample_statistics):
"""TestResults can be created."""
from flakestorm.reports.models import TestResults
now = datetime.now()
results = TestResults(
config=sample_config,
started_at=now,
completed_at=now,
mutations=[],
statistics=sample_statistics,
)
assert results.config == sample_config
assert results.statistics.robustness_score == 0.8
class TestHTMLReportGenerator:
"""Tests for HTML report generation."""
@pytest.fixture
def sample_config(self):
"""Create sample config."""
from flakestorm.core.config import (
AgentConfig,
AgentType,
FlakeStormConfig,
)
return FlakeStormConfig(
agent=AgentConfig(
endpoint="http://localhost:8000/chat",
type=AgentType.HTTP,
),
golden_prompts=["Test"],
invariants=[],
)
@pytest.fixture
def sample_statistics(self):
"""Create sample statistics."""
from flakestorm.reports.models import TestStatistics
return TestStatistics(
total_mutations=10,
passed_mutations=8,
failed_mutations=2,
robustness_score=0.8,
avg_latency_ms=150.0,
p50_latency_ms=120.0,
p95_latency_ms=300.0,
p99_latency_ms=450.0,
)
@pytest.fixture
def sample_results(self, sample_config, sample_statistics):
"""Create sample test results."""
from flakestorm.reports.models import TestResults
now = datetime.now()
return TestResults(
config=sample_config,
started_at=now,
completed_at=now,
mutations=[],
statistics=sample_statistics,
)
def test_generator_creation(self, sample_results):
"""Generator can be created."""
from flakestorm.reports.html import HTMLReportGenerator
generator = HTMLReportGenerator(sample_results)
assert generator is not None
def test_generate_returns_string(self, sample_results):
"""Generator returns HTML string."""
from flakestorm.reports.html import HTMLReportGenerator
generator = HTMLReportGenerator(sample_results)
html = generator.generate()
assert isinstance(html, str)
assert len(html) > 0
def test_generate_valid_html_structure(self, sample_results):
"""Generated HTML has valid structure."""
from flakestorm.reports.html import HTMLReportGenerator
generator = HTMLReportGenerator(sample_results)
html = generator.generate()
assert "<!DOCTYPE html>" in html or "<html" in html
assert "</html>" in html
def test_contains_robustness_score(self, sample_results):
"""Report contains robustness score."""
from flakestorm.reports.html import HTMLReportGenerator
generator = HTMLReportGenerator(sample_results)
html = generator.generate()
# Score should appear in some form (0.8 or 80%)
assert "0.8" in html or "80" in html
def test_save_creates_file(self, sample_results):
"""save() creates file on disk."""
from flakestorm.reports.html import HTMLReportGenerator
with tempfile.TemporaryDirectory() as tmpdir:
generator = HTMLReportGenerator(sample_results)
path = generator.save(Path(tmpdir) / "report.html")
assert path.exists()
content = path.read_text()
assert "html" in content.lower()
class TestJSONReportGenerator:
"""Tests for JSON report generation."""
@pytest.fixture
def sample_config(self):
"""Create sample config."""
from flakestorm.core.config import (
AgentConfig,
AgentType,
FlakeStormConfig,
)
return FlakeStormConfig(
agent=AgentConfig(
endpoint="http://localhost:8000/chat",
type=AgentType.HTTP,
),
golden_prompts=["Test"],
invariants=[],
)
@pytest.fixture
def sample_statistics(self):
"""Create sample statistics."""
from flakestorm.reports.models import TestStatistics
return TestStatistics(
total_mutations=10,
passed_mutations=8,
failed_mutations=2,
robustness_score=0.8,
avg_latency_ms=150.0,
p50_latency_ms=120.0,
p95_latency_ms=300.0,
p99_latency_ms=450.0,
)
@pytest.fixture
def sample_results(self, sample_config, sample_statistics):
"""Create sample test results."""
from flakestorm.reports.models import TestResults
ts = datetime(2024, 1, 15, 12, 0, 0)
return TestResults(
config=sample_config,
started_at=ts,
completed_at=ts,
mutations=[],
statistics=sample_statistics,
)
def test_generator_creation(self, sample_results):
"""Generator can be created."""
from flakestorm.reports.json_export import JSONReportGenerator
generator = JSONReportGenerator(sample_results)
assert generator is not None
def test_generate_valid_json(self, sample_results):
"""Generator produces valid JSON."""
from flakestorm.reports.json_export import JSONReportGenerator
generator = JSONReportGenerator(sample_results)
json_str = generator.generate()
# Should not raise
data = json.loads(json_str)
assert isinstance(data, dict)
def test_contains_statistics(self, sample_results):
"""JSON contains statistics."""
from flakestorm.reports.json_export import JSONReportGenerator
generator = JSONReportGenerator(sample_results)
data = json.loads(generator.generate())
assert "statistics" in data
assert data["statistics"]["robustness_score"] == 0.8
def test_save_creates_file(self, sample_results):
"""save() creates JSON file on disk."""
from flakestorm.reports.json_export import JSONReportGenerator
with tempfile.TemporaryDirectory() as tmpdir:
generator = JSONReportGenerator(sample_results)
path = generator.save(Path(tmpdir) / "report.json")
assert path.exists()
data = json.loads(path.read_text())
assert "statistics" in data
class TestTerminalReporter:
"""Tests for terminal output."""
@pytest.fixture
def sample_config(self):
"""Create sample config."""
from flakestorm.core.config import (
AgentConfig,
AgentType,
FlakeStormConfig,
)
return FlakeStormConfig(
agent=AgentConfig(
endpoint="http://localhost:8000/chat",
type=AgentType.HTTP,
),
golden_prompts=["Test"],
invariants=[],
)
@pytest.fixture
def sample_statistics(self):
"""Create sample statistics."""
from flakestorm.reports.models import TestStatistics
return TestStatistics(
total_mutations=10,
passed_mutations=8,
failed_mutations=2,
robustness_score=0.8,
avg_latency_ms=150.0,
p50_latency_ms=120.0,
p95_latency_ms=300.0,
p99_latency_ms=450.0,
)
@pytest.fixture
def sample_results(self, sample_config, sample_statistics):
"""Create sample test results."""
from flakestorm.reports.models import TestResults
now = datetime.now()
return TestResults(
config=sample_config,
started_at=now,
completed_at=now,
mutations=[],
statistics=sample_statistics,
)
def test_reporter_creation(self, sample_results):
"""Reporter can be created."""
from flakestorm.reports.terminal import TerminalReporter
reporter = TerminalReporter(sample_results)
assert reporter is not None
def test_reporter_has_print_methods(self, sample_results):
"""Reporter has print methods."""
from flakestorm.reports.terminal import TerminalReporter
reporter = TerminalReporter(sample_results)
assert hasattr(reporter, "print_summary")
assert hasattr(reporter, "print_full_report")