diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md new file mode 100644 index 0000000..745102e --- /dev/null +++ b/RELEASE_NOTES.md @@ -0,0 +1,105 @@ +# Release Notes + +## Version 0.9.1 - 24 Mutation Types Update + +### 🎯 Major Update: Comprehensive Mutation Coverage + +Flakestorm now supports **24 mutation types** for comprehensive robustness testing, expanding from the original 8 core types to cover advanced prompt-level attacks and system/network-level vulnerabilities. + +### ✨ What's New + +#### Expanded Mutation Types (24 Total) + +**Core Prompt-Level Attacks (8 types):** +- Paraphrase - Semantic rewrites preserving intent +- Noise - Typos and spelling errors +- Tone Shift - Aggressive/impatient phrasing +- Prompt Injection - Basic adversarial attacks +- Encoding Attacks - Base64, Unicode, URL encoding +- Context Manipulation - Adding/removing/reordering context +- Length Extremes - Empty, minimal, or very long inputs +- Custom - User-defined mutation templates + +**Advanced Prompt-Level Attacks (7 new types):** +- Multi-Turn Attack - Fake conversation history with contradictory turns +- Advanced Jailbreak - Sophisticated injection techniques (DAN, role-playing, hypothetical scenarios) +- Semantic Similarity Attack - Adversarial examples that look similar but have different meanings +- Format Poisoning - Structured data injection (JSON, XML, markdown, YAML) +- Language Mixing - Multilingual inputs, code-switching, mixed scripts +- Token Manipulation - Tokenizer edge cases, special tokens, boundary attacks +- Temporal Attack - Impossible dates, outdated references, temporal confusion + +**System/Network-Level Attacks (9 new types):** +- HTTP Header Injection - Header manipulation and injection attacks +- Payload Size Attack - Extremely large payloads, memory exhaustion +- Content-Type Confusion - MIME type manipulation and format confusion +- Query Parameter Poisoning - Parameter pollution and query-based injection +- Request Method Attack - HTTP method confusion and manipulation +- Protocol-Level Attack - Request smuggling, chunked encoding, protocol confusion +- Resource Exhaustion - CPU/memory exhaustion, infinite loops, DoS patterns +- Concurrent Request Pattern - Race conditions, concurrent state manipulation +- Timeout Manipulation - Slow processing, timeout-inducing patterns + +### 🔧 Improvements + +- **Comprehensive Testing Coverage**: All 24 mutation types are fully implemented with templates and default weights +- **Updated Documentation**: README and Usage Guide now reflect all 24 mutation types +- **Enhanced Test Suite**: Test coverage expanded to validate all 24 mutation types +- **Production Status**: Updated development status to Production/Stable + +### 📚 Documentation Updates + +- README.md updated to reflect 24 mutation types with clear categorization +- Usage Guide includes detailed explanations of all mutation types +- Test suite (`tests/test_mutations.py`) now validates all 24 types + +### 🐛 Bug Fixes + +- Fixed mutation type count inconsistencies in documentation +- Updated test assertions to cover all mutation types + +### 📦 Technical Details + +- All 24 mutation types have: + - Complete template definitions in `src/flakestorm/mutations/templates.py` + - Default weights configured in `src/flakestorm/mutations/types.py` + - Display names and descriptions + - Full test coverage + +### 🚀 Migration Guide + +No breaking changes. Existing configurations continue to work. The default mutation types remain the original 8 core types. To use the new advanced types, add them to your `flakestorm.yaml`: + +```yaml +mutations: + types: + - paraphrase + - noise + - tone_shift + - prompt_injection + - encoding_attacks + - context_manipulation + - length_extremes + - custom + # Add new types as needed: + - multi_turn_attack + - advanced_jailbreak + - semantic_similarity_attack + # ... and more +``` + +### 📊 Impact + +This update significantly expands Flakestorm's ability to test agent robustness across: +- **Security vulnerabilities** (advanced jailbreaks, protocol attacks) +- **Input parsing edge cases** (format poisoning, token manipulation) +- **System-level attacks** (resource exhaustion, timeout manipulation) +- **Internationalization** (language mixing, character set handling) + +### 🙏 Acknowledgments + +Thank you to all contributors and users who have helped shape Flakestorm into a comprehensive chaos engineering tool for AI agents. + +--- + +**Full Changelog**: See [GitHub Releases](https://github.com/flakestorm/flakestorm/releases) for detailed commit history. diff --git a/pyproject.toml b/pyproject.toml index 20d1b3a..db018d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "flakestorm" -version = "0.9.0" +version = "0.9.1" description = "The Agent Reliability Engine - Chaos Engineering for AI Agents" readme = "README.md" license = "Apache-2.0" @@ -23,7 +23,7 @@ keywords = [ "adversarial-testing" ] classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", diff --git a/src/flakestorm/mutations/types.py b/src/flakestorm/mutations/types.py index 9384463..be45527 100644 --- a/src/flakestorm/mutations/types.py +++ b/src/flakestorm/mutations/types.py @@ -63,7 +63,7 @@ class MutationType(str, Enum): LANGUAGE_MIXING = "language_mixing" """Multilingual inputs, code-switching, and character set handling.""" - TOKEN_MANIPULATION = "token_manipulation" + TOKEN_MANIPULATION = "token_manipulation" # nosec B105 """Tokenizer edge cases, special tokens, and token boundary attacks.""" TEMPORAL_ATTACK = "temporal_attack" @@ -268,7 +268,7 @@ class Mutation: # Allow up to 10x original length for length extremes testing if len(self.mutated) > len(self.original) * 10: return True # Very long is valid for this type - + # For other types, empty strings are invalid if not self.mutated or not self.mutated.strip(): return False diff --git a/tests/test_mutations.py b/tests/test_mutations.py index b135556..acacdc0 100644 --- a/tests/test_mutations.py +++ b/tests/test_mutations.py @@ -12,7 +12,8 @@ class TestMutationType: """Tests for MutationType enum.""" def test_mutation_type_values(self): - """Test mutation type string values.""" + """Test mutation type string values for all 24 types.""" + # Core prompt-level attacks (8) assert MutationType.PARAPHRASE.value == "paraphrase" assert MutationType.NOISE.value == "noise" assert MutationType.TONE_SHIFT.value == "tone_shift" @@ -22,8 +23,37 @@ class TestMutationType: assert MutationType.LENGTH_EXTREMES.value == "length_extremes" assert MutationType.CUSTOM.value == "custom" + # Advanced prompt-level attacks (7) + assert MutationType.MULTI_TURN_ATTACK.value == "multi_turn_attack" + assert MutationType.ADVANCED_JAILBREAK.value == "advanced_jailbreak" + assert ( + MutationType.SEMANTIC_SIMILARITY_ATTACK.value + == "semantic_similarity_attack" + ) + assert MutationType.FORMAT_POISONING.value == "format_poisoning" + assert MutationType.LANGUAGE_MIXING.value == "language_mixing" + assert MutationType.TOKEN_MANIPULATION.value == "token_manipulation" + assert MutationType.TEMPORAL_ATTACK.value == "temporal_attack" + + # System/Network-level attacks (9) + assert MutationType.HTTP_HEADER_INJECTION.value == "http_header_injection" + assert MutationType.PAYLOAD_SIZE_ATTACK.value == "payload_size_attack" + assert MutationType.CONTENT_TYPE_CONFUSION.value == "content_type_confusion" + assert ( + MutationType.QUERY_PARAMETER_POISONING.value == "query_parameter_poisoning" + ) + assert MutationType.REQUEST_METHOD_ATTACK.value == "request_method_attack" + assert MutationType.PROTOCOL_LEVEL_ATTACK.value == "protocol_level_attack" + assert MutationType.RESOURCE_EXHAUSTION.value == "resource_exhaustion" + assert ( + MutationType.CONCURRENT_REQUEST_PATTERN.value + == "concurrent_request_pattern" + ) + assert MutationType.TIMEOUT_MANIPULATION.value == "timeout_manipulation" + def test_display_name(self): - """Test display name generation.""" + """Test display name generation for all mutation types.""" + # Core types assert MutationType.PARAPHRASE.display_name == "Paraphrase" assert MutationType.TONE_SHIFT.display_name == "Tone Shift" assert MutationType.PROMPT_INJECTION.display_name == "Prompt Injection" @@ -31,14 +61,74 @@ class TestMutationType: assert MutationType.CONTEXT_MANIPULATION.display_name == "Context Manipulation" assert MutationType.LENGTH_EXTREMES.display_name == "Length Extremes" + # Advanced types + assert MutationType.MULTI_TURN_ATTACK.display_name == "Multi Turn Attack" + assert MutationType.ADVANCED_JAILBREAK.display_name == "Advanced Jailbreak" + assert ( + MutationType.SEMANTIC_SIMILARITY_ATTACK.display_name + == "Semantic Similarity Attack" + ) + assert MutationType.FORMAT_POISONING.display_name == "Format Poisoning" + assert MutationType.LANGUAGE_MIXING.display_name == "Language Mixing" + assert MutationType.TOKEN_MANIPULATION.display_name == "Token Manipulation" + assert MutationType.TEMPORAL_ATTACK.display_name == "Temporal Attack" + + # System/Network types + assert ( + MutationType.HTTP_HEADER_INJECTION.display_name == "Http Header Injection" + ) + assert MutationType.PAYLOAD_SIZE_ATTACK.display_name == "Payload Size Attack" + assert ( + MutationType.CONTENT_TYPE_CONFUSION.display_name == "Content Type Confusion" + ) + assert ( + MutationType.QUERY_PARAMETER_POISONING.display_name + == "Query Parameter Poisoning" + ) + assert ( + MutationType.REQUEST_METHOD_ATTACK.display_name == "Request Method Attack" + ) + assert ( + MutationType.PROTOCOL_LEVEL_ATTACK.display_name == "Protocol Level Attack" + ) + assert MutationType.RESOURCE_EXHAUSTION.display_name == "Resource Exhaustion" + assert ( + MutationType.CONCURRENT_REQUEST_PATTERN.display_name + == "Concurrent Request Pattern" + ) + assert MutationType.TIMEOUT_MANIPULATION.display_name == "Timeout Manipulation" + def test_default_weights(self): - """Test default weights are assigned.""" + """Test default weights are assigned for all mutation types.""" + # Core types assert MutationType.PARAPHRASE.default_weight == 1.0 assert MutationType.PROMPT_INJECTION.default_weight == 1.5 assert MutationType.NOISE.default_weight == 0.8 assert MutationType.ENCODING_ATTACKS.default_weight == 1.3 assert MutationType.CONTEXT_MANIPULATION.default_weight == 1.1 assert MutationType.LENGTH_EXTREMES.default_weight == 1.2 + assert MutationType.TONE_SHIFT.default_weight == 0.9 + assert MutationType.CUSTOM.default_weight == 1.0 + + # Advanced types + assert MutationType.MULTI_TURN_ATTACK.default_weight == 1.4 + assert MutationType.ADVANCED_JAILBREAK.default_weight == 2.0 + assert MutationType.SEMANTIC_SIMILARITY_ATTACK.default_weight == 1.3 + assert MutationType.FORMAT_POISONING.default_weight == 1.6 + assert MutationType.LANGUAGE_MIXING.default_weight == 1.2 + assert MutationType.TOKEN_MANIPULATION.default_weight == 1.5 + assert MutationType.TEMPORAL_ATTACK.default_weight == 1.1 + + # System/Network types + assert MutationType.HTTP_HEADER_INJECTION.default_weight == 1.7 + assert MutationType.PAYLOAD_SIZE_ATTACK.default_weight == 1.4 + assert MutationType.CONTENT_TYPE_CONFUSION.default_weight == 1.5 + assert MutationType.QUERY_PARAMETER_POISONING.default_weight == 1.6 + assert MutationType.REQUEST_METHOD_ATTACK.default_weight == 1.3 + assert MutationType.PROTOCOL_LEVEL_ATTACK.default_weight == 1.8 + assert MutationType.RESOURCE_EXHAUSTION.default_weight == 1.5 + assert MutationType.CONCURRENT_REQUEST_PATTERN.default_weight == 1.4 + assert MutationType.TIMEOUT_MANIPULATION.default_weight == 1.3 class TestMutation: @@ -137,11 +227,12 @@ class TestMutationTemplates: """Tests for MutationTemplates.""" def test_all_types_have_templates(self): - """Test that all mutation types have templates.""" + """Test that all 24 mutation types have templates.""" templates = MutationTemplates() - # Test all 8 mutation types + # All 24 mutation types expected_types = [ + # Core prompt-level attacks (8) MutationType.PARAPHRASE, MutationType.NOISE, MutationType.TONE_SHIFT, @@ -150,12 +241,34 @@ class TestMutationTemplates: MutationType.CONTEXT_MANIPULATION, MutationType.LENGTH_EXTREMES, MutationType.CUSTOM, + # Advanced prompt-level attacks (7) + MutationType.MULTI_TURN_ATTACK, + MutationType.ADVANCED_JAILBREAK, + MutationType.SEMANTIC_SIMILARITY_ATTACK, + MutationType.FORMAT_POISONING, + MutationType.LANGUAGE_MIXING, + MutationType.TOKEN_MANIPULATION, + MutationType.TEMPORAL_ATTACK, + # System/Network-level attacks (9) + MutationType.HTTP_HEADER_INJECTION, + MutationType.PAYLOAD_SIZE_ATTACK, + MutationType.CONTENT_TYPE_CONFUSION, + MutationType.QUERY_PARAMETER_POISONING, + MutationType.REQUEST_METHOD_ATTACK, + MutationType.PROTOCOL_LEVEL_ATTACK, + MutationType.RESOURCE_EXHAUSTION, + MutationType.CONCURRENT_REQUEST_PATTERN, + MutationType.TIMEOUT_MANIPULATION, ] + assert len(expected_types) == 24, "Should have exactly 24 mutation types" + for mutation_type in expected_types: template = templates.get(mutation_type) - assert template is not None - assert "{prompt}" in template + assert template is not None, f"Template missing for {mutation_type.value}" + assert ( + "{prompt}" in template + ), f"Template for {mutation_type.value} missing {{prompt}} placeholder" def test_format_template(self): """Test formatting a template with a prompt."""