mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 00:36:54 +02:00
Implement Open Source edition limits and feature restrictions
- Add 5 mutation types (paraphrase, noise, tone_shift, prompt_injection, custom) - Cap mutations at 50 per test run - Force sequential execution only - Disable GitHub Actions integration (Cloud feature) - Add upgrade prompts throughout CLI - Update README with feature comparison - Add limits.py module for centralized limit management - Add cloud and limits CLI commands - Update all documentation with Cloud upgrade messaging
This commit is contained in:
parent
2016be238d
commit
7b75fc9530
47 changed files with 3560 additions and 1012 deletions
509
tests/test_reports.py
Normal file
509
tests/test_reports.py
Normal file
|
|
@ -0,0 +1,509 @@
|
|||
"""Tests for report generation."""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from entropix.mutations.types import Mutation, MutationType
|
||||
|
||||
|
||||
class TestCheckResult:
|
||||
"""Tests for CheckResult data model."""
|
||||
|
||||
def test_check_result_creation(self):
|
||||
"""CheckResult can be created."""
|
||||
from entropix.reports.models import CheckResult
|
||||
|
||||
result = CheckResult(
|
||||
check_type="contains",
|
||||
passed=True,
|
||||
details="Found expected substring",
|
||||
)
|
||||
assert result.check_type == "contains"
|
||||
assert result.passed is True
|
||||
assert result.details == "Found expected substring"
|
||||
|
||||
def test_check_result_to_dict(self):
|
||||
"""CheckResult converts to dict."""
|
||||
from entropix.reports.models import CheckResult
|
||||
|
||||
result = CheckResult(
|
||||
check_type="latency",
|
||||
passed=False,
|
||||
details="Exceeded 5000ms",
|
||||
)
|
||||
d = result.to_dict()
|
||||
assert d["check_type"] == "latency"
|
||||
assert d["passed"] is False
|
||||
assert d["details"] == "Exceeded 5000ms"
|
||||
|
||||
|
||||
class TestMutationResult:
|
||||
"""Tests for MutationResult data model."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_mutation(self):
|
||||
"""Create a sample mutation."""
|
||||
return Mutation(
|
||||
original="What is the weather?",
|
||||
mutated="Tell me about today's weather conditions",
|
||||
type=MutationType.PARAPHRASE,
|
||||
)
|
||||
|
||||
def test_mutation_result_creation(self, sample_mutation):
|
||||
"""MutationResult can be created."""
|
||||
from entropix.reports.models import MutationResult
|
||||
|
||||
result = MutationResult(
|
||||
original_prompt="What is the weather?",
|
||||
mutation=sample_mutation,
|
||||
response="It's sunny today",
|
||||
latency_ms=100.0,
|
||||
passed=True,
|
||||
)
|
||||
assert result.response == "It's sunny today"
|
||||
assert result.passed is True
|
||||
assert result.latency_ms == 100.0
|
||||
|
||||
def test_mutation_result_with_checks(self, sample_mutation):
|
||||
"""MutationResult with check results."""
|
||||
from entropix.reports.models import CheckResult, MutationResult
|
||||
|
||||
checks = [
|
||||
CheckResult(check_type="contains", passed=True, details="Found 'weather'"),
|
||||
CheckResult(check_type="latency", passed=False, details="Too slow"),
|
||||
]
|
||||
result = MutationResult(
|
||||
original_prompt="What is the weather?",
|
||||
mutation=sample_mutation,
|
||||
response="Test",
|
||||
latency_ms=200.0,
|
||||
passed=False,
|
||||
checks=checks,
|
||||
)
|
||||
assert len(result.checks) == 2
|
||||
assert result.checks[0].passed is True
|
||||
assert result.checks[1].passed is False
|
||||
|
||||
def test_mutation_result_failed_checks(self, sample_mutation):
|
||||
"""MutationResult returns failed checks."""
|
||||
from entropix.reports.models import CheckResult, MutationResult
|
||||
|
||||
checks = [
|
||||
CheckResult(check_type="contains", passed=True, details="OK"),
|
||||
CheckResult(check_type="latency", passed=False, details="Too slow"),
|
||||
CheckResult(check_type="safety", passed=False, details="PII detected"),
|
||||
]
|
||||
result = MutationResult(
|
||||
original_prompt="Test",
|
||||
mutation=sample_mutation,
|
||||
response="Test",
|
||||
latency_ms=200.0,
|
||||
passed=False,
|
||||
checks=checks,
|
||||
)
|
||||
failed = result.failed_checks
|
||||
assert len(failed) == 2
|
||||
|
||||
|
||||
class TestTypeStatistics:
|
||||
"""Tests for TypeStatistics data model."""
|
||||
|
||||
def test_type_statistics_creation(self):
|
||||
"""TypeStatistics can be created."""
|
||||
from entropix.reports.models import TypeStatistics
|
||||
|
||||
stats = TypeStatistics(
|
||||
mutation_type="paraphrase",
|
||||
total=100,
|
||||
passed=85,
|
||||
pass_rate=0.85,
|
||||
)
|
||||
assert stats.mutation_type == "paraphrase"
|
||||
assert stats.total == 100
|
||||
assert stats.passed == 85
|
||||
assert stats.pass_rate == 0.85
|
||||
|
||||
def test_type_statistics_to_dict(self):
|
||||
"""TypeStatistics converts to dict."""
|
||||
from entropix.reports.models import TypeStatistics
|
||||
|
||||
stats = TypeStatistics(
|
||||
mutation_type="noise",
|
||||
total=50,
|
||||
passed=40,
|
||||
pass_rate=0.8,
|
||||
)
|
||||
d = stats.to_dict()
|
||||
assert d["mutation_type"] == "noise"
|
||||
assert d["failed"] == 10
|
||||
|
||||
|
||||
class TestTestStatistics:
|
||||
"""Tests for TestStatistics data model."""
|
||||
|
||||
def test_statistics_creation(self):
|
||||
"""TestStatistics can be created."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
stats = TestStatistics(
|
||||
total_mutations=100,
|
||||
passed_mutations=85,
|
||||
failed_mutations=15,
|
||||
robustness_score=0.85,
|
||||
avg_latency_ms=150.0,
|
||||
p50_latency_ms=120.0,
|
||||
p95_latency_ms=300.0,
|
||||
p99_latency_ms=450.0,
|
||||
)
|
||||
assert stats.total_mutations == 100
|
||||
assert stats.passed_mutations == 85
|
||||
assert stats.robustness_score == 0.85
|
||||
|
||||
def test_statistics_pass_rate(self):
|
||||
"""Statistics calculates pass_rate correctly."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
stats = TestStatistics(
|
||||
total_mutations=100,
|
||||
passed_mutations=80,
|
||||
failed_mutations=20,
|
||||
robustness_score=0.85,
|
||||
avg_latency_ms=150.0,
|
||||
p50_latency_ms=120.0,
|
||||
p95_latency_ms=300.0,
|
||||
p99_latency_ms=450.0,
|
||||
)
|
||||
assert stats.pass_rate == 0.8
|
||||
|
||||
def test_statistics_zero_total(self):
|
||||
"""Statistics handles zero total."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
stats = TestStatistics(
|
||||
total_mutations=0,
|
||||
passed_mutations=0,
|
||||
failed_mutations=0,
|
||||
robustness_score=0.0,
|
||||
avg_latency_ms=0.0,
|
||||
p50_latency_ms=0.0,
|
||||
p95_latency_ms=0.0,
|
||||
p99_latency_ms=0.0,
|
||||
)
|
||||
assert stats.pass_rate == 0.0
|
||||
|
||||
|
||||
class TestTestResults:
|
||||
"""Tests for TestResults data model."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config(self):
|
||||
"""Create sample config."""
|
||||
from entropix.core.config import (
|
||||
AgentConfig,
|
||||
AgentType,
|
||||
EntropixConfig,
|
||||
)
|
||||
|
||||
return EntropixConfig(
|
||||
agent=AgentConfig(
|
||||
endpoint="http://localhost:8000/chat",
|
||||
type=AgentType.HTTP,
|
||||
),
|
||||
golden_prompts=["Test"],
|
||||
invariants=[],
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_statistics(self):
|
||||
"""Create sample statistics."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
return TestStatistics(
|
||||
total_mutations=10,
|
||||
passed_mutations=8,
|
||||
failed_mutations=2,
|
||||
robustness_score=0.8,
|
||||
avg_latency_ms=150.0,
|
||||
p50_latency_ms=120.0,
|
||||
p95_latency_ms=300.0,
|
||||
p99_latency_ms=450.0,
|
||||
)
|
||||
|
||||
def test_results_creation(self, sample_config, sample_statistics):
|
||||
"""TestResults can be created."""
|
||||
from entropix.reports.models import TestResults
|
||||
|
||||
now = datetime.now()
|
||||
results = TestResults(
|
||||
config=sample_config,
|
||||
started_at=now,
|
||||
completed_at=now,
|
||||
mutations=[],
|
||||
statistics=sample_statistics,
|
||||
)
|
||||
assert results.config == sample_config
|
||||
assert results.statistics.robustness_score == 0.8
|
||||
|
||||
|
||||
class TestHTMLReportGenerator:
|
||||
"""Tests for HTML report generation."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config(self):
|
||||
"""Create sample config."""
|
||||
from entropix.core.config import (
|
||||
AgentConfig,
|
||||
AgentType,
|
||||
EntropixConfig,
|
||||
)
|
||||
|
||||
return EntropixConfig(
|
||||
agent=AgentConfig(
|
||||
endpoint="http://localhost:8000/chat",
|
||||
type=AgentType.HTTP,
|
||||
),
|
||||
golden_prompts=["Test"],
|
||||
invariants=[],
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_statistics(self):
|
||||
"""Create sample statistics."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
return TestStatistics(
|
||||
total_mutations=10,
|
||||
passed_mutations=8,
|
||||
failed_mutations=2,
|
||||
robustness_score=0.8,
|
||||
avg_latency_ms=150.0,
|
||||
p50_latency_ms=120.0,
|
||||
p95_latency_ms=300.0,
|
||||
p99_latency_ms=450.0,
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_results(self, sample_config, sample_statistics):
|
||||
"""Create sample test results."""
|
||||
from entropix.reports.models import TestResults
|
||||
|
||||
now = datetime.now()
|
||||
return TestResults(
|
||||
config=sample_config,
|
||||
started_at=now,
|
||||
completed_at=now,
|
||||
mutations=[],
|
||||
statistics=sample_statistics,
|
||||
)
|
||||
|
||||
def test_generator_creation(self, sample_results):
|
||||
"""Generator can be created."""
|
||||
from entropix.reports.html import HTMLReportGenerator
|
||||
|
||||
generator = HTMLReportGenerator(sample_results)
|
||||
assert generator is not None
|
||||
|
||||
def test_generate_returns_string(self, sample_results):
|
||||
"""Generator returns HTML string."""
|
||||
from entropix.reports.html import HTMLReportGenerator
|
||||
|
||||
generator = HTMLReportGenerator(sample_results)
|
||||
html = generator.generate()
|
||||
|
||||
assert isinstance(html, str)
|
||||
assert len(html) > 0
|
||||
|
||||
def test_generate_valid_html_structure(self, sample_results):
|
||||
"""Generated HTML has valid structure."""
|
||||
from entropix.reports.html import HTMLReportGenerator
|
||||
|
||||
generator = HTMLReportGenerator(sample_results)
|
||||
html = generator.generate()
|
||||
|
||||
assert "<!DOCTYPE html>" in html or "<html" in html
|
||||
assert "</html>" in html
|
||||
|
||||
def test_contains_robustness_score(self, sample_results):
|
||||
"""Report contains robustness score."""
|
||||
from entropix.reports.html import HTMLReportGenerator
|
||||
|
||||
generator = HTMLReportGenerator(sample_results)
|
||||
html = generator.generate()
|
||||
|
||||
# Score should appear in some form (0.8 or 80%)
|
||||
assert "0.8" in html or "80" in html
|
||||
|
||||
def test_save_creates_file(self, sample_results):
|
||||
"""save() creates file on disk."""
|
||||
from entropix.reports.html import HTMLReportGenerator
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
generator = HTMLReportGenerator(sample_results)
|
||||
path = generator.save(Path(tmpdir) / "report.html")
|
||||
|
||||
assert path.exists()
|
||||
content = path.read_text()
|
||||
assert "html" in content.lower()
|
||||
|
||||
|
||||
class TestJSONReportGenerator:
|
||||
"""Tests for JSON report generation."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config(self):
|
||||
"""Create sample config."""
|
||||
from entropix.core.config import (
|
||||
AgentConfig,
|
||||
AgentType,
|
||||
EntropixConfig,
|
||||
)
|
||||
|
||||
return EntropixConfig(
|
||||
agent=AgentConfig(
|
||||
endpoint="http://localhost:8000/chat",
|
||||
type=AgentType.HTTP,
|
||||
),
|
||||
golden_prompts=["Test"],
|
||||
invariants=[],
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_statistics(self):
|
||||
"""Create sample statistics."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
return TestStatistics(
|
||||
total_mutations=10,
|
||||
passed_mutations=8,
|
||||
failed_mutations=2,
|
||||
robustness_score=0.8,
|
||||
avg_latency_ms=150.0,
|
||||
p50_latency_ms=120.0,
|
||||
p95_latency_ms=300.0,
|
||||
p99_latency_ms=450.0,
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_results(self, sample_config, sample_statistics):
|
||||
"""Create sample test results."""
|
||||
from entropix.reports.models import TestResults
|
||||
|
||||
ts = datetime(2024, 1, 15, 12, 0, 0)
|
||||
return TestResults(
|
||||
config=sample_config,
|
||||
started_at=ts,
|
||||
completed_at=ts,
|
||||
mutations=[],
|
||||
statistics=sample_statistics,
|
||||
)
|
||||
|
||||
def test_generator_creation(self, sample_results):
|
||||
"""Generator can be created."""
|
||||
from entropix.reports.json_export import JSONReportGenerator
|
||||
|
||||
generator = JSONReportGenerator(sample_results)
|
||||
assert generator is not None
|
||||
|
||||
def test_generate_valid_json(self, sample_results):
|
||||
"""Generator produces valid JSON."""
|
||||
from entropix.reports.json_export import JSONReportGenerator
|
||||
|
||||
generator = JSONReportGenerator(sample_results)
|
||||
json_str = generator.generate()
|
||||
|
||||
# Should not raise
|
||||
data = json.loads(json_str)
|
||||
assert isinstance(data, dict)
|
||||
|
||||
def test_contains_statistics(self, sample_results):
|
||||
"""JSON contains statistics."""
|
||||
from entropix.reports.json_export import JSONReportGenerator
|
||||
|
||||
generator = JSONReportGenerator(sample_results)
|
||||
data = json.loads(generator.generate())
|
||||
|
||||
assert "statistics" in data
|
||||
assert data["statistics"]["robustness_score"] == 0.8
|
||||
|
||||
def test_save_creates_file(self, sample_results):
|
||||
"""save() creates JSON file on disk."""
|
||||
from entropix.reports.json_export import JSONReportGenerator
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
generator = JSONReportGenerator(sample_results)
|
||||
path = generator.save(Path(tmpdir) / "report.json")
|
||||
|
||||
assert path.exists()
|
||||
data = json.loads(path.read_text())
|
||||
assert "statistics" in data
|
||||
|
||||
|
||||
class TestTerminalReporter:
|
||||
"""Tests for terminal output."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config(self):
|
||||
"""Create sample config."""
|
||||
from entropix.core.config import (
|
||||
AgentConfig,
|
||||
AgentType,
|
||||
EntropixConfig,
|
||||
)
|
||||
|
||||
return EntropixConfig(
|
||||
agent=AgentConfig(
|
||||
endpoint="http://localhost:8000/chat",
|
||||
type=AgentType.HTTP,
|
||||
),
|
||||
golden_prompts=["Test"],
|
||||
invariants=[],
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_statistics(self):
|
||||
"""Create sample statistics."""
|
||||
from entropix.reports.models import TestStatistics
|
||||
|
||||
return TestStatistics(
|
||||
total_mutations=10,
|
||||
passed_mutations=8,
|
||||
failed_mutations=2,
|
||||
robustness_score=0.8,
|
||||
avg_latency_ms=150.0,
|
||||
p50_latency_ms=120.0,
|
||||
p95_latency_ms=300.0,
|
||||
p99_latency_ms=450.0,
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_results(self, sample_config, sample_statistics):
|
||||
"""Create sample test results."""
|
||||
from entropix.reports.models import TestResults
|
||||
|
||||
now = datetime.now()
|
||||
return TestResults(
|
||||
config=sample_config,
|
||||
started_at=now,
|
||||
completed_at=now,
|
||||
mutations=[],
|
||||
statistics=sample_statistics,
|
||||
)
|
||||
|
||||
def test_reporter_creation(self, sample_results):
|
||||
"""Reporter can be created."""
|
||||
from entropix.reports.terminal import TerminalReporter
|
||||
|
||||
reporter = TerminalReporter(sample_results)
|
||||
assert reporter is not None
|
||||
|
||||
def test_reporter_has_print_methods(self, sample_results):
|
||||
"""Reporter has print methods."""
|
||||
from entropix.reports.terminal import TerminalReporter
|
||||
|
||||
reporter = TerminalReporter(sample_results)
|
||||
assert hasattr(reporter, "print_summary")
|
||||
assert hasattr(reporter, "print_full_report")
|
||||
Loading…
Add table
Add a link
Reference in a new issue