diff --git a/README.md b/README.md
index 4b962b9..e66040d 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,20 @@ Instead of running one test case, Flakestorm takes a single "Golden Prompt", gen
 - ✅ **Local-First**: Uses Ollama with Qwen 3 8B for free testing
 - ✅ **Beautiful Reports**: Interactive HTML reports with pass/fail matrices
 
+## Demo
+
+### flakestorm in Action
+
+![flakestorm Demo](flakestorm_demo.gif)
+
+*Watch flakestorm generate mutations and test your agent in real-time*
+
+### Test Report
+
+![flakestorm Test Report](flakestorm_test_reporting.gif)
+
+*Interactive HTML reports with detailed failure analysis and recommendations*
+
 ## Quick Start
 
 ### Installation Order
@@ -97,7 +111,11 @@ sudo apt install ollama
 # Windows: Starts automatically as a service
 
 # In another terminal, pull the model
-ollama pull qwen3:8b
+# Choose based on your RAM:
+# - 8GB RAM: ollama pull tinyllama:1.1b or gemma2:2b
+# - 16GB RAM: ollama pull qwen2.5:3b (recommended)
+# - 32GB+ RAM: ollama pull qwen2.5-coder:7b (best quality)
+ollama pull qwen2.5:3b
 ```
 
 **Troubleshooting:** If you get `syntax error: <!doctype html>` or `command not found` when running `ollama` commands:
@@ -194,7 +212,9 @@ agent:
 
 model:
   provider: "ollama"
-  name: "qwen3:8b"
+  # Choose model based on your RAM: 8GB (tinyllama:1.1b), 16GB (qwen2.5:3b), 32GB+ (qwen2.5-coder:7b)
+  # See docs/USAGE_GUIDE.md for full model recommendations
+  name: "qwen2.5:3b"
   base_url: "http://localhost:11434"
 
 mutations:
diff --git a/docs/USAGE_GUIDE.md b/docs/USAGE_GUIDE.md
index 876dfc0..8c7aadf 100644
--- a/docs/USAGE_GUIDE.md
+++ b/docs/USAGE_GUIDE.md
@@ -258,6 +258,57 @@ ollama pull qwen2.5-coder:7b
 ollama run qwen2.5-coder:7b "Hello, world!"
 ```
 
+### Choosing the Right Model for Your System
+
+FlakeStorm uses local LLMs to generate mutations. Choose a model that fits your system's RAM and performance requirements:
+
+| System RAM | Recommended Model | Model Size | Speed | Quality | Use Case |
+|------------|-------------------|------------|-------|---------|----------|
+| **4-8 GB** | `tinyllama:1.1b` | ~700 MB | ⚡⚡⚡ Very Fast | ⭐⭐ Basic | Quick testing, CI/CD |
+| **8-16 GB** | `gemma2:2b` | ~1.4 GB | ⚡⚡ Fast | ⭐⭐⭐ Good | Balanced performance |
+| **8-16 GB** | `phi3:mini` | ~2.3 GB | ⚡⚡ Fast | ⭐⭐⭐ Good | Microsoft's efficient model |
+| **16-32 GB** | `qwen2.5:3b` | ~2.0 GB | ⚡⚡ Fast | ⭐⭐⭐⭐ Very Good | Recommended for most users |
+| **16-32 GB** | `gemma2:9b` | ~5.4 GB | ⚡ Moderate | ⭐⭐⭐⭐ Very Good | Better quality mutations |
+| **32+ GB** | `qwen2.5-coder:7b` | ~4.4 GB | ⚡ Moderate | ⭐⭐⭐⭐⭐ Excellent | Best for code/structured prompts |
+| **32+ GB** | `qwen2.5:7b` | ~4.4 GB | ⚡ Moderate | ⭐⭐⭐⭐⭐ Excellent | Best overall quality |
+| **64+ GB** | `qwen2.5:14b` | ~8.9 GB | 🐌 Slower | ⭐⭐⭐⭐⭐ Excellent | Maximum quality (overkill for most) |
+
+**Quick Recommendations:**
+
+- **Minimum viable (8GB RAM)**: `tinyllama:1.1b` or `gemma2:2b`
+- **Recommended (16GB+ RAM)**: `qwen2.5:3b` or `gemma2:9b`
+- **Best quality (32GB+ RAM)**: `qwen2.5-coder:7b` or `qwen2.5:7b`
+
+**Pull your chosen model:**
+
+```bash
+# For 8GB RAM systems
+ollama pull tinyllama:1.1b
+# or
+ollama pull gemma2:2b
+
+# For 16GB RAM systems (recommended)
+ollama pull qwen2.5:3b
+# or
+ollama pull gemma2:9b
+
+# For 32GB+ RAM systems (best quality)
+ollama pull qwen2.5-coder:7b
+# or
+ollama pull qwen2.5:7b
+```
+
+**Update your `flakestorm.yaml` to use your chosen model:**
+
+```yaml
+model:
+  provider: "ollama"
+  name: "qwen2.5:3b"  # Change to your chosen model
+  base_url: "http://localhost:11434"
+```
+
+**Note:** Smaller models are faster but may produce less diverse mutations. Larger models produce higher quality mutations but require more RAM and are slower. For most users, `qwen2.5:3b` or `gemma2:9b` provides the best balance.
+
 ### Step 3: Create Virtual Environment and Install flakestorm
 
 **CRITICAL: Python 3.10+ Required!**
@@ -375,10 +426,22 @@ maturin build --release
 # 4. Remove any old wheels (if they exist)
 rm -f ../target/wheels/entropix_rust-*.whl  # Remove old wheels with wrong name
 
-# 5. Install the new wheel (use specific pattern to avoid old wheels)
-pip install ../target/wheels/flakestorm_rust-*.whl
+# 5. List available wheel files to get the exact filename
+# On Linux/macOS:
+ls ../target/wheels/flakestorm_rust-*.whl
+# On Windows (PowerShell):
+# Get-ChildItem ..\target\wheels\flakestorm_rust-*.whl
 
-# 6. Verify installation
+# 6. Install the wheel using the FULL filename (wildcard pattern may not work)
+# Copy the exact filename from step 5 and use it here:
+# Example for Windows:
+# pip install ../target/wheels/flakestorm_rust-0.1.0-cp311-cp311-win_amd64.whl
+# Example for Linux:
+# pip install ../target/wheels/flakestorm_rust-0.1.0-cp311-cp311-manylinux_2_34_x86_64.whl
+# Example for macOS:
+# pip install ../target/wheels/flakestorm_rust-0.1.0-cp311-cp311-macosx_10_9_x86_64.whl
+
+# 7. Verify installation
 python -c "import flakestorm_rust; print('Rust extension installed successfully!')"
 ```
 
@@ -994,17 +1057,22 @@ mutations:
     length_extremes: 1.2
 
 # =============================================================================
-# LLM CONFIGURATION (for mutation generation)
+# MODEL CONFIGURATION (for mutation generation)
 # =============================================================================
-llm:
-  # Ollama model to use
-  model: "qwen2.5-coder:7b"
+model:
+  # Model provider: "ollama" (default)
+  provider: "ollama"
+
+  # Model name (must be pulled in Ollama first)
+  # See "Choosing the Right Model for Your System" section above for recommendations
+  # based on your RAM: 8GB (tinyllama:1.1b), 16GB (qwen2.5:3b), 32GB+ (qwen2.5-coder:7b)
+  name: "qwen2.5-coder:7b"
 
   # Ollama server URL
-  host: "http://localhost:11434"
+  base_url: "http://localhost:11434"
 
-  # Generation temperature (higher = more creative mutations)
-  temperature: 0.8
+  # Optional: Generation temperature (higher = more creative mutations)
+  # temperature: 0.8
 
 # =============================================================================
 # INVARIANTS (ASSERTIONS)
diff --git a/flakestorm-generate-search-queries.yaml b/flakestorm-generate-search-queries.yaml
index 4c9b406..14c0a37 100644
--- a/flakestorm-generate-search-queries.yaml
+++ b/flakestorm-generate-search-queries.yaml
@@ -34,7 +34,7 @@ agent:
 # Recommended for 8GB RAM: qwen2.5:1.5b (fastest), tinyllama (smallest), or phi3:mini (best quality)
 model:
   provider: "ollama"
-  name: "tinyllama"  # Small, fast model optimized for 8GB RAM
+  name: "gemma3:1b"  # Small, fast model optimized for 8GB RAM
   base_url: "http://localhost:11434"
 
 # =============================================================================
@@ -42,7 +42,7 @@ model:
 # =============================================================================
 mutations:
   # Number of mutations to generate per golden prompt
-  count: 3
+  count: 20
 
   # Types of mutations to apply
   types:
diff --git a/flakestorm_demo.gif b/flakestorm_demo.gif
new file mode 100644
index 0000000..b4e61f1
Binary files /dev/null and b/flakestorm_demo.gif differ
diff --git a/flakestorm_test_reporting.gif b/flakestorm_test_reporting.gif
new file mode 100644
index 0000000..7ff86c7
Binary files /dev/null and b/flakestorm_test_reporting.gif differ
diff --git a/src/flakestorm/core/orchestrator.py b/src/flakestorm/core/orchestrator.py
index 5ac12d1..3025dc4 100644
--- a/src/flakestorm/core/orchestrator.py
+++ b/src/flakestorm/core/orchestrator.py
@@ -26,7 +26,7 @@ from rich.progress import (
 )
 
 # Configuration limits for local hardware constraints
-MAX_MUTATIONS_PER_RUN = 50
+MAX_MUTATIONS_PER_RUN = 200
 PARALLEL_EXECUTION_ENABLED = False  # Sequential execution for local hardware
 
 if TYPE_CHECKING:
diff --git a/src/flakestorm/reports/html.py b/src/flakestorm/reports/html.py
index 8abe178..1dc8be2 100644
--- a/src/flakestorm/reports/html.py
+++ b/src/flakestorm/reports/html.py
@@ -13,12 +13,12 @@ from __future__ import annotations
 import json
 from datetime import datetime
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from jinja2 import Template
 
 if TYPE_CHECKING:
-    from flakestorm.reports.models import TestResults
+    from flakestorm.reports.models import MutationResult, TestResults
 
 
 HTML_TEMPLATE = """
@@ -461,6 +461,77 @@ HTML_TEMPLATE = """
             </div>
         </div>
 
+        {% if summary.total_failures > 0 %}
+        <div class="section">
+            <h2 class="section-title">📋 Executive Summary & Action Items</h2>
+            <div class="summary-card" style="background: var(--bg-card); border-radius: 12px; padding: 1.5rem; margin-bottom: 1rem;">
+                <div style="margin-bottom: 1rem;">
+                    <h3 style="font-size: 1.125rem; margin-bottom: 0.75rem;">Overall Assessment</h3>
+                    <p style="color: var(--text-secondary); line-height: 1.6;">
+                        Your agent has a <strong>{{ score_percent }}%</strong> robustness score with 
+                        <strong>{{ failed_mutations }}</strong> failures out of <strong>{{ total_mutations }}</strong> tests.
+                        {% if score_percent < 70 %}
+                        <span style="color: var(--danger);">⚠️ This indicates significant vulnerabilities that need immediate attention.</span>
+                        {% elif score_percent < 85 %}
+                        <span style="color: var(--warning);">⚠️ Your agent needs improvement before production deployment.</span>
+                        {% else %}
+                        <span style="color: var(--success);">✓ Your agent shows good robustness, but there's room for improvement.</span>
+                        {% endif %}
+                    </p>
+                </div>
+
+                {% if summary.recommendations %}
+                <div style="margin-top: 1.5rem;">
+                    <h3 style="font-size: 1.125rem; margin-bottom: 0.75rem;">Priority Action Items</h3>
+                    <div style="display: flex; flex-direction: column; gap: 0.75rem;">
+                        {% for rec in summary.recommendations %}
+                        <div style="background: var(--bg-secondary); border-left: 4px solid 
+                            {% if rec.priority == 'critical' %}var(--danger)
+                            {% elif rec.priority == 'high' %}var(--warning)
+                            {% else %}var(--accent)
+                            {% endif %};
+                            padding: 1rem; border-radius: 8px;">
+                            <div style="display: flex; justify-content: space-between; align-items: start; margin-bottom: 0.5rem;">
+                                <div>
+                                    <strong style="text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em;
+                                        color: {% if rec.priority == 'critical' %}var(--danger)
+                                        {% elif rec.priority == 'high' %}var(--warning)
+                                        {% else %}var(--accent)
+                                        {% endif %};">
+                                        {{ rec.priority }} Priority
+                                    </strong>
+                                    <h4 style="margin: 0.25rem 0; font-size: 1rem;">{{ rec.issue }}</h4>
+                                </div>
+                                <span style="background: var(--bg-primary); padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.875rem;">
+                                    {{ rec.count }} occurrence{{ 's' if rec.count != 1 else '' }}
+                                </span>
+                            </div>
+                            <p style="margin: 0; color: var(--text-secondary); line-height: 1.5;">{{ rec.action }}</p>
+                        </div>
+                        {% endfor %}
+                    </div>
+                </div>
+                {% endif %}
+
+                {% if summary.top_issues %}
+                <div style="margin-top: 1.5rem;">
+                    <h3 style="font-size: 1.125rem; margin-bottom: 0.75rem;">Top Failure Types</h3>
+                    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 0.75rem;">
+                        {% for issue in summary.top_issues %}
+                        <div style="background: var(--bg-secondary); padding: 0.75rem; border-radius: 8px;">
+                            <div style="font-size: 0.875rem; color: var(--text-secondary); margin-bottom: 0.25rem;">
+                                {{ issue.type.replace('_', ' ').title() }}
+                            </div>
+                            <div style="font-size: 1.25rem; font-weight: 600;">{{ issue.count }}</div>
+                        </div>
+                        {% endfor %}
+                    </div>
+                </div>
+                {% endif %}
+            </div>
+        </div>
+        {% endif %}
+
         <div class="section">
             <h2 class="section-title">📊 By Mutation Type</h2>
             <div class="type-breakdown">
@@ -512,24 +583,38 @@ HTML_TEMPLATE = """
     <script>
         const mutations = {{ mutations_json|safe }};
 
+        function escapeHtml(text) {
+            const div = document.createElement('div');
+            div.textContent = text;
+            return div.innerHTML;
+        }
+
         function showDetail(index) {
             const m = mutations[index];
             const modal = document.getElementById('detail-modal');
             const body = document.getElementById('modal-body');
 
+            const hasRecommendation = m.recommendation && !m.passed;
+            
             body.innerHTML = `
                 <div class="detail-section">
                     <div class="detail-label">Original Prompt</div>
-                    <div class="detail-content">${m.original_prompt}</div>
+                    <div class="detail-content">${escapeHtml(m.original_prompt)}</div>
                 </div>
                 <div class="detail-section">
                     <div class="detail-label">Mutated (${m.mutation.type})</div>
-                    <div class="detail-content">${m.mutation.mutated}</div>
+                    <div class="detail-content">${escapeHtml(m.mutation.mutated)}</div>
                 </div>
                 <div class="detail-section">
                     <div class="detail-label">Agent Response</div>
-                    <div class="detail-content">${m.response || '(empty)'}</div>
+                    <div class="detail-content">${escapeHtml(m.response || '(empty)')}</div>
                 </div>
+                ${m.error ? `
+                <div class="detail-section">
+                    <div class="detail-label" style="color: var(--danger);">Error</div>
+                    <div class="detail-content" style="color: var(--danger);">${escapeHtml(m.error)}</div>
+                </div>
+                ` : ''}
                 <div class="detail-section">
                     <div class="detail-label">Invariant Checks</div>
                     <ul class="check-list">
@@ -539,13 +624,41 @@ HTML_TEMPLATE = """
                                     ${c.passed ? '✓' : '✗'}
                                 </div>
                                 <div class="check-details">
-                                    <div class="check-type">${c.check_type}</div>
-                                    <div class="check-message">${c.details}</div>
+                                    <div class="check-type">${escapeHtml(c.check_type)}</div>
+                                    <div class="check-message">${escapeHtml(c.details)}</div>
                                 </div>
                             </li>
                         `).join('')}
                     </ul>
                 </div>
+                ${hasRecommendation ? `
+                <div class="detail-section" style="background: var(--bg-card); border-left: 4px solid 
+                    ${m.recommendation.priority === 'critical' ? 'var(--danger)' : 
+                      m.recommendation.priority === 'high' ? 'var(--warning)' : 'var(--accent)'};
+                    padding: 1rem; border-radius: 8px; margin-top: 1rem;">
+                    <div style="display: flex; justify-content: space-between; align-items: start; margin-bottom: 0.75rem;">
+                        <div>
+                            <div style="text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em;
+                                color: ${m.recommendation.priority === 'critical' ? 'var(--danger)' : 
+                                         m.recommendation.priority === 'high' ? 'var(--warning)' : 'var(--accent)'};
+                                font-weight: 600; margin-bottom: 0.25rem;">
+                                ${m.recommendation.priority} Priority
+                            </div>
+                            <h4 style="margin: 0; font-size: 1.125rem; color: var(--text-primary);">
+                                💡 ${escapeHtml(m.recommendation.title)}
+                            </h4>
+                        </div>
+                    </div>
+                    <p style="color: var(--text-secondary); line-height: 1.6; margin-bottom: 1rem;">
+                        ${escapeHtml(m.recommendation.description)}
+                    </p>
+                    ${m.recommendation.code ? `
+                    <div style="background: var(--bg-primary); border-radius: 8px; padding: 1rem; overflow-x: auto;">
+                        <pre style="margin: 0; font-family: 'SF Mono', 'Fira Code', monospace; font-size: 0.875rem; line-height: 1.5; color: var(--text-primary);"><code>${escapeHtml(m.recommendation.code)}</code></pre>
+                    </div>
+                    ` : ''}
+                </div>
+                ` : ''}
             `;
 
             modal.classList.add('active');
@@ -586,6 +699,366 @@ class HTMLReportGenerator:
         self.results = results
         self.template = Template(HTML_TEMPLATE)
 
+    def _generate_recommendation(
+        self, mutation_result: Any
+    ) -> dict[str, str]:
+        """
+        Generate actionable recommendation for a failed mutation.
+
+        Args:
+            mutation_result: The failed mutation result
+
+        Returns:
+            Dictionary with title, description, and code example
+        """
+        failed_checks = mutation_result.failed_checks
+        mutation_type = mutation_result.mutation.type.value
+        error = mutation_result.error
+
+        # Check for agent errors (HTTP 500, connection errors, etc.)
+        if error:
+            if "JSON" in error or "json" in error.lower():
+                if "control character" in error.lower():
+                    return {
+                        "title": "Fix JSON Input Sanitization",
+                        "description": "The mutated input contains control characters (newlines, tabs) that break JSON parsing. Your agent needs to sanitize inputs before inserting them into JSON.",
+                        "priority": "high",
+                        "code": '''# Python example
+import json
+import re
+
+def sanitize_for_json(text: str) -> str:
+    """Remove control characters that break JSON."""
+    # Remove control characters (0x00-0x1F, 0x7F-0x9F)
+    return re.sub(r'[\\x00-\\x1f\\x7f-\\x9f]', '', text)
+
+# In your request handler:
+sanitized = sanitize_for_json(user_input)
+request_body = json.dumps({"productDescription": sanitized})''',
+                    }
+                else:
+                    return {
+                        "title": "Fix JSON Parsing Error",
+                        "description": f"The agent returned invalid JSON. Error: {error[:100]}",
+                        "priority": "high",
+                        "code": '''# Ensure your agent always returns valid JSON
+# Wrap responses in try/except:
+try:
+    response = json.loads(agent_output)
+except json.JSONDecodeError as e:
+    return {"error": "Invalid JSON response", "details": str(e)}''',
+                    }
+            elif "HTTP 500" in error or "500" in error:
+                return {
+                    "title": "Fix Server Error Handling",
+                    "description": "The agent's backend returned HTTP 500. This indicates a server-side error that needs investigation.",
+                    "priority": "critical",
+                    "code": '''# Add error handling in your agent:
+# 1. Check server logs for the actual error
+# 2. Add input validation before processing
+# 3. Return proper error responses instead of 500
+
+def handle_request(input_text):
+    try:
+        # Validate input
+        if not input_text or len(input_text) > MAX_LENGTH:
+            return {"error": "Invalid input"}
+        
+        # Process request
+        result = process(input_text)
+        return {"success": True, "data": result}
+    except Exception as e:
+        # Log error, return 400 instead of 500
+        logger.error(f"Error: {e}")
+        return {"error": "Processing failed", "status": 400}''',
+                }
+            else:
+                return {
+                    "title": "Fix Agent Error",
+                    "description": f"The agent failed with error: {error[:150]}",
+                    "priority": "high",
+                    "code": "# Check agent logs and add proper error handling",
+                }
+
+        # Check for specific invariant failures
+        check_types = [c.check_type for c in failed_checks]
+
+        if "latency" in check_types:
+            latency_check = next(c for c in failed_checks if c.check_type == "latency")
+            return {
+                "title": "Optimize Response Latency",
+                "description": f"Response took {mutation_result.latency_ms:.0f}ms, exceeding the threshold. This mutation type ({mutation_type}) is causing performance issues.",
+                "priority": "medium",
+                "code": f'''# Performance optimization strategies:
+# 1. Add caching for similar requests
+# 2. Optimize LLM calls (reduce max_tokens, use faster models)
+# 3. Add request timeout and circuit breaker
+# 4. Consider async processing for long operations
+
+# Example timeout:
+import asyncio
+
+async def process_with_timeout(input_text, max_ms=10000):
+    try:
+        return await asyncio.wait_for(
+            process_request(input_text),
+            timeout=max_ms / 1000
+        )
+    except asyncio.TimeoutError:
+        return {{"error": "Request timeout"}}''',
+            }
+
+        if "valid_json" in check_types:
+            return {
+                "title": "Ensure Valid JSON Response",
+                "description": "The agent's response is not valid JSON. All responses must be properly formatted JSON.",
+                "priority": "high",
+                "code": '''# Always return valid JSON:
+import json
+
+def format_response(data):
+    """Ensure response is always valid JSON."""
+    try:
+        # If data is already a dict/list, serialize it
+        if isinstance(data, (dict, list)):
+            return json.dumps(data)
+        # If it's a string, try to parse it first
+        try:
+            parsed = json.loads(data)
+            return json.dumps(parsed)
+        except:
+            # Wrap in a JSON object
+            return json.dumps({"output": data})
+    except Exception as e:
+        return json.dumps({"error": str(e)})''',
+            }
+
+        if "contains" in check_types:
+            contains_check = next(c for c in failed_checks if c.check_type == "contains")
+            return {
+                "title": "Fix Response Content Validation",
+                "description": f"Response doesn't contain expected content. {contains_check.details}",
+                "priority": "medium",
+                "code": "# Review your agent's response logic to ensure it includes required content",
+            }
+
+        if "excludes_pii" in check_types:
+            return {
+                "title": "Fix PII Leakage",
+                "description": "The response contains personally identifiable information (PII) that should not be exposed.",
+                "priority": "critical",
+                "code": '''# Add PII detection and filtering:
+import re
+
+PII_PATTERNS = [
+    r'\\b\\d{3}-\\d{2}-\\d{4}\\b',  # SSN
+    r'\\b\\d{16}\\b',  # Credit card
+    r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b',  # Email
+]
+
+def filter_pii(text):
+    """Remove PII from text."""
+    for pattern in PII_PATTERNS:
+        text = re.sub(pattern, '[REDACTED]', text)
+    return text''',
+            }
+
+        # Default recommendation based on mutation type
+        mutation_recommendations = {
+            "encoding_attacks": {
+                "title": "Handle Encoded Inputs",
+                "description": "The agent failed on encoded inputs (Base64, Unicode, URL encoding). Add input decoding and validation.",
+                "priority": "high",
+                "code": '''# Decode various encoding formats:
+import base64
+import urllib.parse
+
+def decode_input(text):
+    """Try to decode various encoding formats."""
+    # Try URL decoding
+    try:
+        decoded = urllib.parse.unquote(text)
+        if decoded != text:
+            return decoded
+    except:
+        pass
+    
+    # Try Base64
+    try:
+        decoded = base64.b64decode(text).decode('utf-8')
+        return decoded
+    except:
+        pass
+    
+    return text''',
+            },
+            "context_manipulation": {
+                "title": "Improve Context Extraction",
+                "description": "The agent failed when context was manipulated. Improve intent extraction from noisy inputs.",
+                "priority": "medium",
+                "code": "# Use semantic search or LLM to extract core intent from noisy context",
+            },
+            "prompt_injection": {
+                "title": "Strengthen Prompt Injection Defense",
+                "description": "The agent is vulnerable to prompt injection attacks. Add input validation and filtering.",
+                "priority": "critical",
+                "code": '''# Add prompt injection detection:
+INJECTION_PATTERNS = [
+    "ignore previous instructions",
+    "forget your rules",
+    "you are now",
+    "system:",
+    "assistant:",
+]
+
+def detect_injection(text):
+    """Detect potential prompt injection."""
+    text_lower = text.lower()
+    for pattern in INJECTION_PATTERNS:
+        if pattern in text_lower:
+            return True
+    return False''',
+            },
+            "length_extremes": {
+                "title": "Handle Edge Case Inputs",
+                "description": "The agent failed on extreme input lengths (empty or very long). Add input validation.",
+                "priority": "medium",
+                "code": '''# Add input length validation:
+MIN_LENGTH = 1
+MAX_LENGTH = 10000
+
+def validate_input(text):
+    """Validate input length."""
+    if len(text) < MIN_LENGTH:
+        return {"error": "Input too short"}
+    if len(text) > MAX_LENGTH:
+        return {"error": "Input too long"}
+    return None''',
+            },
+        }
+
+        if mutation_type in mutation_recommendations:
+            return mutation_recommendations[mutation_type]
+
+        # Generic recommendation
+        return {
+            "title": "Review Agent Logic",
+            "description": f"The agent failed on {mutation_type} mutation. Review the agent's handling of this input type.",
+            "priority": "medium",
+            "code": "# Review agent logs and add appropriate error handling",
+        }
+
+    def _generate_summary(self) -> dict[str, Any]:
+        """
+        Generate executive summary with actionable insights.
+
+        Returns:
+            Dictionary with summary data
+        """
+        stats = self.results.statistics
+        failed = self.results.failed_mutations
+
+        # Group failures by type
+        failures_by_type: dict[str, list] = {}
+        failures_by_check: dict[str, int] = {}
+        error_types: dict[str, int] = {}
+
+        for mutation in failed:
+            # Group by mutation type
+            mut_type = mutation.mutation.type.value
+            if mut_type not in failures_by_type:
+                failures_by_type[mut_type] = []
+            failures_by_type[mut_type].append(mutation)
+
+            # Count check failures
+            for check in mutation.failed_checks:
+                failures_by_check[check.check_type] = (
+                    failures_by_check.get(check.check_type, 0) + 1
+                )
+
+            # Count error types
+            if mutation.error:
+                if "JSON" in mutation.error or "json" in mutation.error.lower():
+                    error_types["JSON Parsing"] = error_types.get("JSON Parsing", 0) + 1
+                elif "500" in mutation.error or "HTTP 500" in mutation.error:
+                    error_types["HTTP 500"] = error_types.get("HTTP 500", 0) + 1
+                elif "timeout" in mutation.error.lower():
+                    error_types["Timeout"] = error_types.get("Timeout", 0) + 1
+                else:
+                    error_types["Other Errors"] = (
+                        error_types.get("Other Errors", 0) + 1
+                    )
+
+        # Top issues
+        top_issues = []
+        if failures_by_check:
+            sorted_checks = sorted(
+                failures_by_check.items(), key=lambda x: x[1], reverse=True
+            )
+            top_issues = [
+                {"type": check_type, "count": count}
+                for check_type, count in sorted_checks[:5]
+            ]
+
+        # Recommendations
+        recommendations = []
+        if error_types.get("JSON Parsing", 0) > 0:
+            recommendations.append(
+                {
+                    "priority": "high",
+                    "issue": "JSON Parsing Errors",
+                    "count": error_types["JSON Parsing"],
+                    "action": "Add input sanitization to remove control characters before JSON serialization",
+                }
+            )
+        if error_types.get("HTTP 500", 0) > 0:
+            recommendations.append(
+                {
+                    "priority": "critical",
+                    "issue": "Server Errors",
+                    "count": error_types["HTTP 500"],
+                    "action": "Investigate server logs and add proper error handling to return 400 instead of 500",
+                }
+            )
+        if failures_by_check.get("latency", 0) > 0:
+            recommendations.append(
+                {
+                    "priority": "medium",
+                    "issue": "Performance Issues",
+                    "count": failures_by_check["latency"],
+                    "action": "Optimize agent response time - consider caching, reducing LLM tokens, or async processing",
+                }
+            )
+        if failures_by_type.get("encoding_attacks", []):
+            recommendations.append(
+                {
+                    "priority": "high",
+                    "issue": "Encoding Attack Vulnerabilities",
+                    "count": len(failures_by_type["encoding_attacks"]),
+                    "action": "Add input decoding for Base64, Unicode, and URL-encoded inputs",
+                }
+            )
+        if failures_by_type.get("prompt_injection", []):
+            recommendations.append(
+                {
+                    "priority": "critical",
+                    "issue": "Prompt Injection Vulnerabilities",
+                    "count": len(failures_by_type["prompt_injection"]),
+                    "action": "Add prompt injection detection and filtering",
+                }
+            )
+
+        return {
+            "total_failures": len(failed),
+            "failures_by_type": {
+                k: len(v) for k, v in failures_by_type.items()
+            },
+            "failures_by_check": failures_by_check,
+            "error_types": error_types,
+            "top_issues": top_issues,
+            "recommendations": recommendations,
+        }
+
     def generate(self) -> str:
         """
         Generate the HTML report.
@@ -610,8 +1083,16 @@ class HTMLReportGenerator:
             for t in stats.by_type
         ]
 
-        # Prepare mutations data
-        mutations_data = [m.to_dict() for m in self.results.mutations]
+        # Prepare mutations data with recommendations
+        mutations_data = []
+        for m in self.results.mutations:
+            mut_dict = m.to_dict()
+            if not m.passed:
+                mut_dict["recommendation"] = self._generate_recommendation(m)
+            mutations_data.append(mut_dict)
+
+        # Generate summary
+        summary = self._generate_summary()
 
         return self.template.render(
             report_date=self.results.started_at.strftime("%Y-%m-%d %H:%M:%S"),
@@ -626,6 +1107,7 @@ class HTMLReportGenerator:
             type_stats=type_stats,
             mutations=self.results.mutations,
             mutations_json=json.dumps(mutations_data),
+            summary=summary,
         )
 
     def save(self, path: str | Path | None = None) -> Path: