2025-12-28 21:55:01 +08:00
|
|
|
"""
|
|
|
|
|
Tests for the mutation engine.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import pytest
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-29 11:15:18 +08:00
|
|
|
from flakestorm.mutations.templates import MutationTemplates
|
|
|
|
|
from flakestorm.mutations.types import Mutation, MutationType
|
2025-12-28 21:55:01 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestMutationType:
|
|
|
|
|
"""Tests for MutationType enum."""
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_mutation_type_values(self):
|
2026-01-15 15:04:10 +08:00
|
|
|
"""Test mutation type string values for all 24 types."""
|
|
|
|
|
# Core prompt-level attacks (8)
|
2025-12-28 21:55:01 +08:00
|
|
|
assert MutationType.PARAPHRASE.value == "paraphrase"
|
|
|
|
|
assert MutationType.NOISE.value == "noise"
|
|
|
|
|
assert MutationType.TONE_SHIFT.value == "tone_shift"
|
|
|
|
|
assert MutationType.PROMPT_INJECTION.value == "prompt_injection"
|
2026-01-01 17:28:05 +08:00
|
|
|
assert MutationType.ENCODING_ATTACKS.value == "encoding_attacks"
|
|
|
|
|
assert MutationType.CONTEXT_MANIPULATION.value == "context_manipulation"
|
|
|
|
|
assert MutationType.LENGTH_EXTREMES.value == "length_extremes"
|
|
|
|
|
assert MutationType.CUSTOM.value == "custom"
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2026-01-15 15:04:10 +08:00
|
|
|
# Advanced prompt-level attacks (7)
|
|
|
|
|
assert MutationType.MULTI_TURN_ATTACK.value == "multi_turn_attack"
|
|
|
|
|
assert MutationType.ADVANCED_JAILBREAK.value == "advanced_jailbreak"
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.SEMANTIC_SIMILARITY_ATTACK.value
|
|
|
|
|
== "semantic_similarity_attack"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.FORMAT_POISONING.value == "format_poisoning"
|
|
|
|
|
assert MutationType.LANGUAGE_MIXING.value == "language_mixing"
|
|
|
|
|
assert MutationType.TOKEN_MANIPULATION.value == "token_manipulation"
|
|
|
|
|
assert MutationType.TEMPORAL_ATTACK.value == "temporal_attack"
|
|
|
|
|
|
|
|
|
|
# System/Network-level attacks (9)
|
|
|
|
|
assert MutationType.HTTP_HEADER_INJECTION.value == "http_header_injection"
|
|
|
|
|
assert MutationType.PAYLOAD_SIZE_ATTACK.value == "payload_size_attack"
|
|
|
|
|
assert MutationType.CONTENT_TYPE_CONFUSION.value == "content_type_confusion"
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.QUERY_PARAMETER_POISONING.value == "query_parameter_poisoning"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.REQUEST_METHOD_ATTACK.value == "request_method_attack"
|
|
|
|
|
assert MutationType.PROTOCOL_LEVEL_ATTACK.value == "protocol_level_attack"
|
|
|
|
|
assert MutationType.RESOURCE_EXHAUSTION.value == "resource_exhaustion"
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.CONCURRENT_REQUEST_PATTERN.value
|
|
|
|
|
== "concurrent_request_pattern"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.TIMEOUT_MANIPULATION.value == "timeout_manipulation"
|
|
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_display_name(self):
|
2026-01-15 15:04:10 +08:00
|
|
|
"""Test display name generation for all mutation types."""
|
|
|
|
|
# Core types
|
2025-12-28 21:55:01 +08:00
|
|
|
assert MutationType.PARAPHRASE.display_name == "Paraphrase"
|
|
|
|
|
assert MutationType.TONE_SHIFT.display_name == "Tone Shift"
|
|
|
|
|
assert MutationType.PROMPT_INJECTION.display_name == "Prompt Injection"
|
2026-01-01 17:28:05 +08:00
|
|
|
assert MutationType.ENCODING_ATTACKS.display_name == "Encoding Attacks"
|
|
|
|
|
assert MutationType.CONTEXT_MANIPULATION.display_name == "Context Manipulation"
|
|
|
|
|
assert MutationType.LENGTH_EXTREMES.display_name == "Length Extremes"
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2026-01-15 15:04:10 +08:00
|
|
|
# Advanced types
|
|
|
|
|
assert MutationType.MULTI_TURN_ATTACK.display_name == "Multi Turn Attack"
|
|
|
|
|
assert MutationType.ADVANCED_JAILBREAK.display_name == "Advanced Jailbreak"
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.SEMANTIC_SIMILARITY_ATTACK.display_name
|
|
|
|
|
== "Semantic Similarity Attack"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.FORMAT_POISONING.display_name == "Format Poisoning"
|
|
|
|
|
assert MutationType.LANGUAGE_MIXING.display_name == "Language Mixing"
|
|
|
|
|
assert MutationType.TOKEN_MANIPULATION.display_name == "Token Manipulation"
|
|
|
|
|
assert MutationType.TEMPORAL_ATTACK.display_name == "Temporal Attack"
|
|
|
|
|
|
|
|
|
|
# System/Network types
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.HTTP_HEADER_INJECTION.display_name == "Http Header Injection"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.PAYLOAD_SIZE_ATTACK.display_name == "Payload Size Attack"
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.CONTENT_TYPE_CONFUSION.display_name == "Content Type Confusion"
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.QUERY_PARAMETER_POISONING.display_name
|
|
|
|
|
== "Query Parameter Poisoning"
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.REQUEST_METHOD_ATTACK.display_name == "Request Method Attack"
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.PROTOCOL_LEVEL_ATTACK.display_name == "Protocol Level Attack"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.RESOURCE_EXHAUSTION.display_name == "Resource Exhaustion"
|
|
|
|
|
assert (
|
|
|
|
|
MutationType.CONCURRENT_REQUEST_PATTERN.display_name
|
|
|
|
|
== "Concurrent Request Pattern"
|
|
|
|
|
)
|
|
|
|
|
assert MutationType.TIMEOUT_MANIPULATION.display_name == "Timeout Manipulation"
|
|
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_default_weights(self):
|
2026-01-15 15:04:10 +08:00
|
|
|
"""Test default weights are assigned for all mutation types."""
|
|
|
|
|
# Core types
|
2025-12-28 21:55:01 +08:00
|
|
|
assert MutationType.PARAPHRASE.default_weight == 1.0
|
|
|
|
|
assert MutationType.PROMPT_INJECTION.default_weight == 1.5
|
|
|
|
|
assert MutationType.NOISE.default_weight == 0.8
|
2026-01-01 17:28:05 +08:00
|
|
|
assert MutationType.ENCODING_ATTACKS.default_weight == 1.3
|
|
|
|
|
assert MutationType.CONTEXT_MANIPULATION.default_weight == 1.1
|
|
|
|
|
assert MutationType.LENGTH_EXTREMES.default_weight == 1.2
|
2026-01-15 15:04:10 +08:00
|
|
|
assert MutationType.TONE_SHIFT.default_weight == 0.9
|
|
|
|
|
assert MutationType.CUSTOM.default_weight == 1.0
|
|
|
|
|
|
|
|
|
|
# Advanced types
|
|
|
|
|
assert MutationType.MULTI_TURN_ATTACK.default_weight == 1.4
|
|
|
|
|
assert MutationType.ADVANCED_JAILBREAK.default_weight == 2.0
|
|
|
|
|
assert MutationType.SEMANTIC_SIMILARITY_ATTACK.default_weight == 1.3
|
|
|
|
|
assert MutationType.FORMAT_POISONING.default_weight == 1.6
|
|
|
|
|
assert MutationType.LANGUAGE_MIXING.default_weight == 1.2
|
|
|
|
|
assert MutationType.TOKEN_MANIPULATION.default_weight == 1.5
|
|
|
|
|
assert MutationType.TEMPORAL_ATTACK.default_weight == 1.1
|
|
|
|
|
|
|
|
|
|
# System/Network types
|
|
|
|
|
assert MutationType.HTTP_HEADER_INJECTION.default_weight == 1.7
|
|
|
|
|
assert MutationType.PAYLOAD_SIZE_ATTACK.default_weight == 1.4
|
|
|
|
|
assert MutationType.CONTENT_TYPE_CONFUSION.default_weight == 1.5
|
|
|
|
|
assert MutationType.QUERY_PARAMETER_POISONING.default_weight == 1.6
|
|
|
|
|
assert MutationType.REQUEST_METHOD_ATTACK.default_weight == 1.3
|
|
|
|
|
assert MutationType.PROTOCOL_LEVEL_ATTACK.default_weight == 1.8
|
|
|
|
|
assert MutationType.RESOURCE_EXHAUSTION.default_weight == 1.5
|
|
|
|
|
assert MutationType.CONCURRENT_REQUEST_PATTERN.default_weight == 1.4
|
|
|
|
|
assert MutationType.TIMEOUT_MANIPULATION.default_weight == 1.3
|
2025-12-28 21:55:01 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestMutation:
|
|
|
|
|
"""Tests for Mutation dataclass."""
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_mutation_creation(self):
|
|
|
|
|
"""Test creating a mutation."""
|
|
|
|
|
mutation = Mutation(
|
|
|
|
|
original="Book a flight",
|
|
|
|
|
mutated="I need to fly somewhere",
|
|
|
|
|
type=MutationType.PARAPHRASE,
|
|
|
|
|
weight=1.0,
|
|
|
|
|
)
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
assert mutation.original == "Book a flight"
|
|
|
|
|
assert mutation.mutated == "I need to fly somewhere"
|
|
|
|
|
assert mutation.type == MutationType.PARAPHRASE
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_mutation_id_generation(self):
|
|
|
|
|
"""Test unique ID generation."""
|
|
|
|
|
m1 = Mutation(
|
|
|
|
|
original="Test",
|
|
|
|
|
mutated="Test 1",
|
|
|
|
|
type=MutationType.NOISE,
|
|
|
|
|
)
|
|
|
|
|
m2 = Mutation(
|
|
|
|
|
original="Test",
|
|
|
|
|
mutated="Test 2",
|
|
|
|
|
type=MutationType.NOISE,
|
|
|
|
|
)
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
assert m1.id != m2.id
|
|
|
|
|
assert len(m1.id) == 12
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_mutation_validity(self):
|
|
|
|
|
"""Test mutation validity checks."""
|
2025-12-29 00:11:02 +08:00
|
|
|
# Valid mutation (mutated must be different and <= 3x original length)
|
2025-12-28 21:55:01 +08:00
|
|
|
valid = Mutation(
|
2025-12-29 00:11:02 +08:00
|
|
|
original="What is the weather today?",
|
|
|
|
|
mutated="Tell me about the weather",
|
2025-12-28 21:55:01 +08:00
|
|
|
type=MutationType.PARAPHRASE,
|
|
|
|
|
)
|
|
|
|
|
assert valid.is_valid()
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
# Invalid: same as original
|
|
|
|
|
invalid_same = Mutation(
|
2025-12-29 00:11:02 +08:00
|
|
|
original="Test prompt",
|
|
|
|
|
mutated="Test prompt",
|
2025-12-28 21:55:01 +08:00
|
|
|
type=MutationType.PARAPHRASE,
|
|
|
|
|
)
|
|
|
|
|
assert not invalid_same.is_valid()
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2026-01-01 17:28:05 +08:00
|
|
|
# Invalid: empty mutated (for non-LENGTH_EXTREMES types)
|
2025-12-28 21:55:01 +08:00
|
|
|
invalid_empty = Mutation(
|
2025-12-29 00:11:02 +08:00
|
|
|
original="Test prompt",
|
2025-12-28 21:55:01 +08:00
|
|
|
mutated="",
|
|
|
|
|
type=MutationType.PARAPHRASE,
|
|
|
|
|
)
|
|
|
|
|
assert not invalid_empty.is_valid()
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2026-01-01 17:28:05 +08:00
|
|
|
# Valid: empty mutated for LENGTH_EXTREMES (edge case testing)
|
|
|
|
|
valid_empty = Mutation(
|
|
|
|
|
original="Test prompt",
|
|
|
|
|
mutated="",
|
|
|
|
|
type=MutationType.LENGTH_EXTREMES,
|
|
|
|
|
)
|
|
|
|
|
assert valid_empty.is_valid()
|
|
|
|
|
|
|
|
|
|
# Valid: very long mutated for LENGTH_EXTREMES
|
|
|
|
|
very_long = "x" * (len("Test prompt") * 5)
|
|
|
|
|
valid_long = Mutation(
|
|
|
|
|
original="Test prompt",
|
|
|
|
|
mutated=very_long,
|
|
|
|
|
type=MutationType.LENGTH_EXTREMES,
|
|
|
|
|
)
|
|
|
|
|
assert valid_long.is_valid()
|
|
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_mutation_serialization(self):
|
|
|
|
|
"""Test to_dict and from_dict."""
|
|
|
|
|
mutation = Mutation(
|
|
|
|
|
original="Test prompt",
|
|
|
|
|
mutated="Mutated prompt",
|
|
|
|
|
type=MutationType.NOISE,
|
|
|
|
|
weight=0.8,
|
|
|
|
|
)
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
data = mutation.to_dict()
|
|
|
|
|
restored = Mutation.from_dict(data)
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
assert restored.original == mutation.original
|
|
|
|
|
assert restored.mutated == mutation.mutated
|
|
|
|
|
assert restored.type == mutation.type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestMutationTemplates:
|
|
|
|
|
"""Tests for MutationTemplates."""
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_all_types_have_templates(self):
|
2026-01-15 15:04:10 +08:00
|
|
|
"""Test that all 24 mutation types have templates."""
|
2025-12-28 21:55:01 +08:00
|
|
|
templates = MutationTemplates()
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2026-01-15 15:04:10 +08:00
|
|
|
# All 24 mutation types
|
2026-01-01 17:28:05 +08:00
|
|
|
expected_types = [
|
2026-01-15 15:04:10 +08:00
|
|
|
# Core prompt-level attacks (8)
|
2026-01-01 17:28:05 +08:00
|
|
|
MutationType.PARAPHRASE,
|
|
|
|
|
MutationType.NOISE,
|
|
|
|
|
MutationType.TONE_SHIFT,
|
|
|
|
|
MutationType.PROMPT_INJECTION,
|
|
|
|
|
MutationType.ENCODING_ATTACKS,
|
|
|
|
|
MutationType.CONTEXT_MANIPULATION,
|
|
|
|
|
MutationType.LENGTH_EXTREMES,
|
|
|
|
|
MutationType.CUSTOM,
|
2026-01-15 15:04:10 +08:00
|
|
|
# Advanced prompt-level attacks (7)
|
|
|
|
|
MutationType.MULTI_TURN_ATTACK,
|
|
|
|
|
MutationType.ADVANCED_JAILBREAK,
|
|
|
|
|
MutationType.SEMANTIC_SIMILARITY_ATTACK,
|
|
|
|
|
MutationType.FORMAT_POISONING,
|
|
|
|
|
MutationType.LANGUAGE_MIXING,
|
|
|
|
|
MutationType.TOKEN_MANIPULATION,
|
|
|
|
|
MutationType.TEMPORAL_ATTACK,
|
|
|
|
|
# System/Network-level attacks (9)
|
|
|
|
|
MutationType.HTTP_HEADER_INJECTION,
|
|
|
|
|
MutationType.PAYLOAD_SIZE_ATTACK,
|
|
|
|
|
MutationType.CONTENT_TYPE_CONFUSION,
|
|
|
|
|
MutationType.QUERY_PARAMETER_POISONING,
|
|
|
|
|
MutationType.REQUEST_METHOD_ATTACK,
|
|
|
|
|
MutationType.PROTOCOL_LEVEL_ATTACK,
|
|
|
|
|
MutationType.RESOURCE_EXHAUSTION,
|
|
|
|
|
MutationType.CONCURRENT_REQUEST_PATTERN,
|
|
|
|
|
MutationType.TIMEOUT_MANIPULATION,
|
2026-01-01 17:28:05 +08:00
|
|
|
]
|
|
|
|
|
|
2026-01-15 15:04:10 +08:00
|
|
|
assert len(expected_types) == 24, "Should have exactly 24 mutation types"
|
|
|
|
|
|
2026-01-01 17:28:05 +08:00
|
|
|
for mutation_type in expected_types:
|
2025-12-28 21:55:01 +08:00
|
|
|
template = templates.get(mutation_type)
|
2026-01-15 15:04:10 +08:00
|
|
|
assert template is not None, f"Template missing for {mutation_type.value}"
|
|
|
|
|
assert (
|
|
|
|
|
"{prompt}" in template
|
|
|
|
|
), f"Template for {mutation_type.value} missing {{prompt}} placeholder"
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_format_template(self):
|
|
|
|
|
"""Test formatting a template with a prompt."""
|
|
|
|
|
templates = MutationTemplates()
|
2025-12-29 00:11:02 +08:00
|
|
|
formatted = templates.format(MutationType.PARAPHRASE, "Book a flight to Paris")
|
|
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
assert "Book a flight to Paris" in formatted
|
|
|
|
|
assert "{prompt}" not in formatted
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_custom_template(self):
|
|
|
|
|
"""Test setting a custom template."""
|
|
|
|
|
templates = MutationTemplates()
|
|
|
|
|
custom = "Custom template for {prompt}"
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
templates.set_template(MutationType.NOISE, custom)
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
assert templates.get(MutationType.NOISE) == custom
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
def test_custom_template_requires_placeholder(self):
|
|
|
|
|
"""Test that custom templates must have {prompt} placeholder."""
|
|
|
|
|
templates = MutationTemplates()
|
2025-12-29 00:11:02 +08:00
|
|
|
|
2025-12-28 21:55:01 +08:00
|
|
|
with pytest.raises(ValueError):
|
|
|
|
|
templates.set_template(MutationType.NOISE, "No placeholder here")
|