Fix .gitignore to exclude root reports folder while tracking src/flakestorm/reports source code - Change reports/ to /reports/ to only ignore root reports directory - Add !src/flakestorm/reports/ to explicitly include source code module - Add reports module source files to repository

2026-04-25 00:36:54 +02:00 · 2026-01-02 18:21:08 +08:00 · 2026-01-02 18:21:08 +08:00 · 8fc291d186
commit 8fc291d186
parent 661445c7b8
6 changed files with 1180 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -80,10 +80,12 @@ Cargo.lock
 # =============================================================================
 # Project-specific
 # =============================================================================
-# Generated reports
-reports/
+# Generated reports (root only, not src/flakestorm/reports/)
+/reports/
 *.html
 !docs/*.html
+# Explicitly include source code reports module
+!src/flakestorm/reports/

 # Local configuration (may contain secrets)
 flakestorm.yaml
--- a/src/flakestorm/reports/init.py
+++ b/src/flakestorm/reports/init.py
@ -0,0 +1,30 @@
+"""
+flakestorm Reports Module
+
+Provides report generation in multiple formats:
+- Interactive HTML reports
+- JSON exports
+- Terminal output
+"""
+
+from flakestorm.reports.html import HTMLReportGenerator
+from flakestorm.reports.json_export import JSONReportGenerator
+from flakestorm.reports.models import (
+    CheckResult,
+    MutationResult,
+    TestResults,
+    TestStatistics,
+    TypeStatistics,
+)
+from flakestorm.reports.terminal import TerminalReporter
+
+__all__ = [
+    "TestResults",
+    "TestStatistics",
+    "MutationResult",
+    "CheckResult",
+    "TypeStatistics",
+    "HTMLReportGenerator",
+    "JSONReportGenerator",
+    "TerminalReporter",
+]
--- a/src/flakestorm/reports/html.py
+++ b/src/flakestorm/reports/html.py
@ -0,0 +1,655 @@
+"""
+HTML Report Generator
+
+Generates interactive HTML reports with:
+- Robustness score visualization
+- Pass/fail matrix grid
+- Drill-down into failed mutations
+- Latency charts
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from jinja2 import Template
+
+if TYPE_CHECKING:
+    from flakestorm.reports.models import TestResults
+
+
+HTML_TEMPLATE = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>flakestorm Report - {{ report_date }}</title>
+    <style>
+        :root {
+            --bg-primary: #0a0a0f;
+            --bg-secondary: #12121a;
+            --bg-card: #1a1a24;
+            --text-primary: #e8e8ed;
+            --text-secondary: #8b8b9e;
+            --accent: #6366f1;
+            --accent-light: #818cf8;
+            --success: #22c55e;
+            --danger: #ef4444;
+            --warning: #f59e0b;
+            --border: #2a2a3a;
+        }
+
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: var(--bg-primary);
+            color: var(--text-primary);
+            line-height: 1.6;
+            min-height: 100vh;
+        }
+
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 2rem;
+        }
+
+        header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 2rem;
+            padding-bottom: 1rem;
+            border-bottom: 1px solid var(--border);
+        }
+
+        .logo {
+            display: flex;
+            align-items: center;
+            gap: 0.75rem;
+        }
+
+        .logo-icon {
+            width: 40px;
+            height: 40px;
+            background: linear-gradient(135deg, var(--accent), var(--accent-light));
+            border-radius: 10px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-weight: bold;
+            font-size: 1.25rem;
+        }
+
+        .logo-text {
+            font-size: 1.5rem;
+            font-weight: 600;
+        }
+
+        .report-meta {
+            text-align: right;
+            color: var(--text-secondary);
+            font-size: 0.875rem;
+        }
+
+        .score-section {
+            display: grid;
+            grid-template-columns: 1fr 2fr;
+            gap: 2rem;
+            margin-bottom: 2rem;
+        }
+
+        .score-card {
+            background: var(--bg-card);
+            border-radius: 16px;
+            padding: 2rem;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+        }
+
+        .score-ring {
+            position: relative;
+            width: 180px;
+            height: 180px;
+        }
+
+        .score-ring svg {
+            transform: rotate(-90deg);
+        }
+
+        .score-ring circle {
+            fill: none;
+            stroke-width: 12;
+        }
+
+        .score-ring .bg {
+            stroke: var(--border);
+        }
+
+        .score-ring .progress {
+            stroke: var(--accent);
+            stroke-linecap: round;
+            transition: stroke-dashoffset 1s ease-out;
+        }
+
+        .score-value {
+            position: absolute;
+            top: 50%;
+            left: 50%;
+            transform: translate(-50%, -50%);
+            font-size: 2.5rem;
+            font-weight: 700;
+        }
+
+        .score-label {
+            margin-top: 1rem;
+            font-size: 1.125rem;
+            color: var(--text-secondary);
+        }
+
+        .stats-grid {
+            display: grid;
+            grid-template-columns: repeat(2, 1fr);
+            gap: 1rem;
+        }
+
+        .stat-card {
+            background: var(--bg-card);
+            border-radius: 12px;
+            padding: 1.25rem;
+        }
+
+        .stat-label {
+            font-size: 0.875rem;
+            color: var(--text-secondary);
+            margin-bottom: 0.5rem;
+        }
+
+        .stat-value {
+            font-size: 1.5rem;
+            font-weight: 600;
+        }
+
+        .stat-value.success { color: var(--success); }
+        .stat-value.danger { color: var(--danger); }
+
+        .section {
+            margin-bottom: 2rem;
+        }
+
+        .section-title {
+            font-size: 1.25rem;
+            font-weight: 600;
+            margin-bottom: 1rem;
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+        }
+
+        .matrix-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
+            gap: 1rem;
+        }
+
+        .matrix-cell {
+            background: var(--bg-card);
+            border-radius: 12px;
+            padding: 1rem;
+            cursor: pointer;
+            transition: transform 0.2s, box-shadow 0.2s;
+        }
+
+        .matrix-cell:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3);
+        }
+
+        .matrix-cell.passed {
+            border-left: 4px solid var(--success);
+        }
+
+        .matrix-cell.failed {
+            border-left: 4px solid var(--danger);
+        }
+
+        .mutation-type {
+            font-size: 0.75rem;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+            color: var(--text-secondary);
+            margin-bottom: 0.5rem;
+        }
+
+        .mutation-text {
+            font-size: 0.875rem;
+            line-height: 1.4;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            display: -webkit-box;
+            -webkit-line-clamp: 2;
+            -webkit-box-orient: vertical;
+        }
+
+        .mutation-meta {
+            display: flex;
+            justify-content: space-between;
+            margin-top: 0.75rem;
+            font-size: 0.75rem;
+            color: var(--text-secondary);
+        }
+
+        .type-breakdown {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 1rem;
+        }
+
+        .type-card {
+            background: var(--bg-card);
+            border-radius: 12px;
+            padding: 1.25rem;
+        }
+
+        .type-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 1rem;
+        }
+
+        .type-name {
+            font-weight: 600;
+            text-transform: capitalize;
+        }
+
+        .type-rate {
+            font-size: 1.125rem;
+            font-weight: 600;
+        }
+
+        .progress-bar {
+            height: 8px;
+            background: var(--border);
+            border-radius: 4px;
+            overflow: hidden;
+        }
+
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(90deg, var(--accent), var(--accent-light));
+            border-radius: 4px;
+            transition: width 0.5s ease-out;
+        }
+
+        .modal {
+            display: none;
+            position: fixed;
+            inset: 0;
+            background: rgba(0, 0, 0, 0.8);
+            z-index: 1000;
+            align-items: center;
+            justify-content: center;
+            padding: 2rem;
+        }
+
+        .modal.active {
+            display: flex;
+        }
+
+        .modal-content {
+            background: var(--bg-secondary);
+            border-radius: 16px;
+            max-width: 800px;
+            width: 100%;
+            max-height: 80vh;
+            overflow-y: auto;
+            padding: 2rem;
+        }
+
+        .modal-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 1.5rem;
+        }
+
+        .modal-close {
+            background: none;
+            border: none;
+            color: var(--text-secondary);
+            font-size: 1.5rem;
+            cursor: pointer;
+        }
+
+        .detail-section {
+            margin-bottom: 1.5rem;
+        }
+
+        .detail-label {
+            font-size: 0.875rem;
+            color: var(--text-secondary);
+            margin-bottom: 0.5rem;
+        }
+
+        .detail-content {
+            background: var(--bg-card);
+            border-radius: 8px;
+            padding: 1rem;
+            font-family: 'SF Mono', 'Fira Code', monospace;
+            font-size: 0.875rem;
+            white-space: pre-wrap;
+            word-break: break-word;
+        }
+
+        .check-list {
+            list-style: none;
+        }
+
+        .check-item {
+            display: flex;
+            align-items: flex-start;
+            gap: 0.75rem;
+            padding: 0.75rem;
+            background: var(--bg-card);
+            border-radius: 8px;
+            margin-bottom: 0.5rem;
+        }
+
+        .check-icon {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            flex-shrink: 0;
+            font-size: 0.75rem;
+        }
+
+        .check-icon.passed {
+            background: var(--success);
+            color: white;
+        }
+
+        .check-icon.failed {
+            background: var(--danger);
+            color: white;
+        }
+
+        .check-details {
+            flex: 1;
+        }
+
+        .check-type {
+            font-weight: 600;
+            text-transform: capitalize;
+        }
+
+        .check-message {
+            font-size: 0.875rem;
+            color: var(--text-secondary);
+        }
+
+        @media (max-width: 768px) {
+            .score-section {
+                grid-template-columns: 1fr;
+            }
+
+            .stats-grid {
+                grid-template-columns: 1fr;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <div class="logo">
+                <div class="logo-icon">E</div>
+                <span class="logo-text">flakestorm</span>
+            </div>
+            <div class="report-meta">
+                <div>{{ report_date }}</div>
+                <div>Duration: {{ duration }}s</div>
+            </div>
+        </header>
+
+        <div class="score-section">
+            <div class="score-card">
+                <div class="score-ring">
+                    <svg width="180" height="180">
+                        <circle class="bg" cx="90" cy="90" r="78"></circle>
+                        <circle class="progress" cx="90" cy="90" r="78"
+                            stroke-dasharray="{{ circumference }}"
+                            stroke-dashoffset="{{ score_offset }}">
+                        </circle>
+                    </svg>
+                    <div class="score-value">{{ score_percent }}%</div>
+                </div>
+                <div class="score-label">Robustness Score</div>
+            </div>
+
+            <div class="stats-grid">
+                <div class="stat-card">
+                    <div class="stat-label">Total Mutations</div>
+                    <div class="stat-value">{{ total_mutations }}</div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-label">Passed</div>
+                    <div class="stat-value success">{{ passed_mutations }}</div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-label">Failed</div>
+                    <div class="stat-value danger">{{ failed_mutations }}</div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-label">Avg Latency</div>
+                    <div class="stat-value">{{ avg_latency }}ms</div>
+                </div>
+            </div>
+        </div>
+
+        <div class="section">
+            <h2 class="section-title">📊 By Mutation Type</h2>
+            <div class="type-breakdown">
+                {% for type_stat in type_stats %}
+                <div class="type-card">
+                    <div class="type-header">
+                        <span class="type-name">{{ type_stat.mutation_type }}</span>
+                        <span class="type-rate">{{ type_stat.pass_rate_percent }}%</span>
+                    </div>
+                    <div class="progress-bar">
+                        <div class="progress-fill" style="width: {{ type_stat.pass_rate_percent }}%"></div>
+                    </div>
+                    <div style="margin-top: 0.5rem; font-size: 0.875rem; color: var(--text-secondary);">
+                        {{ type_stat.passed }}/{{ type_stat.total }} passed
+                    </div>
+                </div>
+                {% endfor %}
+            </div>
+        </div>
+
+        <div class="section">
+            <h2 class="section-title">🔬 Mutation Results</h2>
+            <div class="matrix-grid">
+                {% for result in mutations %}
+                <div class="matrix-cell {{ 'passed' if result.passed else 'failed' }}"
+                     onclick="showDetail({{ loop.index0 }})">
+                    <div class="mutation-type">{{ result.mutation.type }}</div>
+                    <div class="mutation-text">{{ result.mutation.mutated[:100] }}...</div>
+                    <div class="mutation-meta">
+                        <span>{{ result.latency_ms|round(0)|int }}ms</span>
+                        <span>{{ '✓' if result.passed else '✗' }}</span>
+                    </div>
+                </div>
+                {% endfor %}
+            </div>
+        </div>
+    </div>
+
+    <div class="modal" id="detail-modal">
+        <div class="modal-content">
+            <div class="modal-header">
+                <h3>Mutation Details</h3>
+                <button class="modal-close" onclick="closeModal()">×</button>
+            </div>
+            <div id="modal-body"></div>
+        </div>
+    </div>
+
+    <script>
+        const mutations = {{ mutations_json|safe }};
+
+        function showDetail(index) {
+            const m = mutations[index];
+            const modal = document.getElementById('detail-modal');
+            const body = document.getElementById('modal-body');
+
+            body.innerHTML = `
+                <div class="detail-section">
+                    <div class="detail-label">Original Prompt</div>
+                    <div class="detail-content">${m.original_prompt}</div>
+                </div>
+                <div class="detail-section">
+                    <div class="detail-label">Mutated (${m.mutation.type})</div>
+                    <div class="detail-content">${m.mutation.mutated}</div>
+                </div>
+                <div class="detail-section">
+                    <div class="detail-label">Agent Response</div>
+                    <div class="detail-content">${m.response || '(empty)'}</div>
+                </div>
+                <div class="detail-section">
+                    <div class="detail-label">Invariant Checks</div>
+                    <ul class="check-list">
+                        ${m.checks.map(c => `
+                            <li class="check-item">
+                                <div class="check-icon ${c.passed ? 'passed' : 'failed'}">
+                                    ${c.passed ? '✓' : '✗'}
+                                </div>
+                                <div class="check-details">
+                                    <div class="check-type">${c.check_type}</div>
+                                    <div class="check-message">${c.details}</div>
+                                </div>
+                            </li>
+                        `).join('')}
+                    </ul>
+                </div>
+            `;
+
+            modal.classList.add('active');
+        }
+
+        function closeModal() {
+            document.getElementById('detail-modal').classList.remove('active');
+        }
+
+        document.getElementById('detail-modal').addEventListener('click', (e) => {
+            if (e.target.id === 'detail-modal') closeModal();
+        });
+
+        document.addEventListener('keydown', (e) => {
+            if (e.key === 'Escape') closeModal();
+        });
+    </script>
+</body>
+</html>
+"""
+
+
+class HTMLReportGenerator:
+    """
+    Generates interactive HTML reports from test results.
+
+    Creates a single-file HTML report with embedded CSS and JavaScript
+    for easy sharing and viewing.
+    """
+
+    def __init__(self, results: TestResults):
+        """
+        Initialize the generator.
+
+        Args:
+            results: Test results to generate report from
+        """
+        self.results = results
+        self.template = Template(HTML_TEMPLATE)
+
+    def generate(self) -> str:
+        """
+        Generate the HTML report.
+
+        Returns:
+            Complete HTML document as a string
+        """
+        stats = self.results.statistics
+
+        # Calculate score ring values
+        circumference = 2 * 3.14159 * 78
+        score_offset = circumference * (1 - stats.robustness_score)
+
+        # Prepare type stats
+        type_stats = [
+            {
+                "mutation_type": t.mutation_type.replace("_", " "),
+                "total": t.total,
+                "passed": t.passed,
+                "pass_rate_percent": round(t.pass_rate * 100, 1),
+            }
+            for t in stats.by_type
+        ]
+
+        # Prepare mutations data
+        mutations_data = [m.to_dict() for m in self.results.mutations]
+
+        return self.template.render(
+            report_date=self.results.started_at.strftime("%Y-%m-%d %H:%M:%S"),
+            duration=round(self.results.duration, 1),
+            circumference=circumference,
+            score_offset=score_offset,
+            score_percent=round(stats.robustness_score * 100, 1),
+            total_mutations=stats.total_mutations,
+            passed_mutations=stats.passed_mutations,
+            failed_mutations=stats.failed_mutations,
+            avg_latency=round(stats.avg_latency_ms),
+            type_stats=type_stats,
+            mutations=self.results.mutations,
+            mutations_json=json.dumps(mutations_data),
+        )
+
+    def save(self, path: str | Path | None = None) -> Path:
+        """
+        Save the HTML report to a file.
+
+        Args:
+            path: Output path (default: auto-generated in reports dir)
+
+        Returns:
+            Path to the saved file
+        """
+        if path is None:
+            output_dir = Path(self.results.config.output.path)
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+            filename = f"flakestorm-{timestamp}.html"
+            path = output_dir / filename
+        else:
+            path = Path(path)
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+        html = self.generate()
+        path.write_text(html, encoding="utf-8")
+
+        return path
--- a/src/flakestorm/reports/json_export.py
+++ b/src/flakestorm/reports/json_export.py
@ -0,0 +1,115 @@
+"""
+JSON Report Generator
+
+Exports test results to JSON format for programmatic consumption
+and integration with other tools.
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from flakestorm.reports.models import TestResults
+
+
+class JSONReportGenerator:
+    """
+    Generates JSON reports from test results.
+
+    Creates structured JSON output suitable for:
+    - CI/CD pipeline consumption
+    - Data analysis tools
+    - Dashboard integrations
+    """
+
+    def __init__(self, results: TestResults):
+        """
+        Initialize the generator.
+
+        Args:
+            results: Test results to generate report from
+        """
+        self.results = results
+
+    def generate(self, pretty: bool = True) -> str:
+        """
+        Generate the JSON report.
+
+        Args:
+            pretty: Whether to format with indentation
+
+        Returns:
+            JSON string
+        """
+        data = self.results.to_dict()
+
+        if pretty:
+            return json.dumps(data, indent=2, default=str)
+        return json.dumps(data, default=str)
+
+    def generate_summary(self) -> dict[str, Any]:
+        """
+        Generate a summary-only report (no mutation details).
+
+        Useful for quick status checks in CI/CD.
+        """
+        stats = self.results.statistics
+
+        return {
+            "version": "1.0",
+            "started_at": self.results.started_at.isoformat(),
+            "completed_at": self.results.completed_at.isoformat(),
+            "duration_seconds": self.results.duration,
+            "robustness_score": stats.robustness_score,
+            "pass_rate": stats.pass_rate,
+            "total_mutations": stats.total_mutations,
+            "passed_mutations": stats.passed_mutations,
+            "failed_mutations": stats.failed_mutations,
+            "avg_latency_ms": stats.avg_latency_ms,
+            "p95_latency_ms": stats.p95_latency_ms,
+            "by_type": {
+                t.mutation_type: {
+                    "total": t.total,
+                    "passed": t.passed,
+                    "pass_rate": t.pass_rate,
+                }
+                for t in stats.by_type
+            },
+        }
+
+    def save(self, path: str | Path | None = None, summary_only: bool = False) -> Path:
+        """
+        Save the JSON report to a file.
+
+        Args:
+            path: Output path (default: auto-generated in reports dir)
+            summary_only: Only include summary, no mutation details
+
+        Returns:
+            Path to the saved file
+        """
+        if path is None:
+            output_dir = Path(self.results.config.output.path)
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+            suffix = "-summary" if summary_only else ""
+            filename = f"flakestorm-{timestamp}{suffix}.json"
+            path = output_dir / filename
+        else:
+            path = Path(path)
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+        if summary_only:
+            data = self.generate_summary()
+            content = json.dumps(data, indent=2, default=str)
+        else:
+            content = self.generate()
+
+        path.write_text(content, encoding="utf-8")
+
+        return path
--- a/src/flakestorm/reports/models.py
+++ b/src/flakestorm/reports/models.py
@ -0,0 +1,220 @@
+"""
+Report Data Models
+
+Data structures for representing test results and statistics.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from flakestorm.core.config import FlakeStormConfig
+    from flakestorm.mutations.types import Mutation
+
+
+@dataclass
+class CheckResult:
+    """Result of a single invariant check."""
+
+    check_type: str
+    """Type of the check (e.g., 'latency', 'contains')."""
+
+    passed: bool
+    """Whether the check passed."""
+
+    details: str
+    """Human-readable details about the check result."""
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "check_type": self.check_type,
+            "passed": self.passed,
+            "details": self.details,
+        }
+
+
+@dataclass
+class TypeStatistics:
+    """Statistics for a specific mutation type."""
+
+    mutation_type: str
+    """Name of the mutation type."""
+
+    total: int
+    """Total number of tests of this type."""
+
+    passed: int
+    """Number of tests that passed."""
+
+    pass_rate: float
+    """Pass rate as a decimal (0.0 to 1.0)."""
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "mutation_type": self.mutation_type,
+            "total": self.total,
+            "passed": self.passed,
+            "failed": self.total - self.passed,
+            "pass_rate": self.pass_rate,
+        }
+
+
+@dataclass
+class TestStatistics:
+    """Aggregate statistics for a test run."""
+
+    total_mutations: int
+    """Total number of mutations tested."""
+
+    passed_mutations: int
+    """Number of mutations that passed all checks."""
+
+    failed_mutations: int
+    """Number of mutations that failed one or more checks."""
+
+    robustness_score: float
+    """Weighted robustness score (0.0 to 1.0)."""
+
+    avg_latency_ms: float
+    """Average response latency in milliseconds."""
+
+    p50_latency_ms: float
+    """50th percentile (median) latency."""
+
+    p95_latency_ms: float
+    """95th percentile latency."""
+
+    p99_latency_ms: float
+    """99th percentile latency."""
+
+    by_type: list[TypeStatistics] = field(default_factory=list)
+    """Statistics broken down by mutation type."""
+
+    duration_seconds: float = 0.0
+    """Total test duration in seconds."""
+
+    @property
+    def pass_rate(self) -> float:
+        """Simple pass rate (passed / total)."""
+        if self.total_mutations == 0:
+            return 0.0
+        return self.passed_mutations / self.total_mutations
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "total_mutations": self.total_mutations,
+            "passed_mutations": self.passed_mutations,
+            "failed_mutations": self.failed_mutations,
+            "robustness_score": self.robustness_score,
+            "pass_rate": self.pass_rate,
+            "avg_latency_ms": self.avg_latency_ms,
+            "p50_latency_ms": self.p50_latency_ms,
+            "p95_latency_ms": self.p95_latency_ms,
+            "p99_latency_ms": self.p99_latency_ms,
+            "duration_seconds": self.duration_seconds,
+            "by_type": [t.to_dict() for t in self.by_type],
+        }
+
+
+@dataclass
+class MutationResult:
+    """Result of testing a single mutation."""
+
+    original_prompt: str
+    """The original golden prompt."""
+
+    mutation: Mutation
+    """The mutation that was tested."""
+
+    response: str
+    """The agent's response."""
+
+    latency_ms: float
+    """Response latency in milliseconds."""
+
+    passed: bool
+    """Whether all invariant checks passed."""
+
+    checks: list[CheckResult] = field(default_factory=list)
+    """Individual check results."""
+
+    error: str | None = None
+    """Error message if the agent call failed."""
+
+    @property
+    def failed_checks(self) -> list[CheckResult]:
+        """Get list of failed checks."""
+        return [c for c in self.checks if not c.passed]
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "original_prompt": self.original_prompt,
+            "mutation": self.mutation.to_dict(),
+            "response": self.response,
+            "latency_ms": self.latency_ms,
+            "passed": self.passed,
+            "checks": [c.to_dict() for c in self.checks],
+            "error": self.error,
+        }
+
+
+@dataclass
+class TestResults:
+    """Complete results from a test run."""
+
+    config: FlakeStormConfig
+    """Configuration used for the test."""
+
+    started_at: datetime
+    """When the test started."""
+
+    completed_at: datetime
+    """When the test completed."""
+
+    mutations: list[MutationResult]
+    """Results for each mutation."""
+
+    statistics: TestStatistics
+    """Aggregate statistics."""
+
+    @property
+    def duration(self) -> float:
+        """Test duration in seconds."""
+        return (self.completed_at - self.started_at).total_seconds()
+
+    @property
+    def passed_mutations(self) -> list[MutationResult]:
+        """Get mutations that passed."""
+        return [m for m in self.mutations if m.passed]
+
+    @property
+    def failed_mutations(self) -> list[MutationResult]:
+        """Get mutations that failed."""
+        return [m for m in self.mutations if not m.passed]
+
+    def get_by_type(self, mutation_type: str) -> list[MutationResult]:
+        """Get mutations of a specific type."""
+        return [m for m in self.mutations if m.mutation.type.value == mutation_type]
+
+    def get_by_prompt(self, prompt: str) -> list[MutationResult]:
+        """Get mutations for a specific golden prompt."""
+        return [m for m in self.mutations if m.original_prompt == prompt]
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "version": "1.0",
+            "started_at": self.started_at.isoformat(),
+            "completed_at": self.completed_at.isoformat(),
+            "duration_seconds": self.duration,
+            "statistics": self.statistics.to_dict(),
+            "mutations": [m.to_dict() for m in self.mutations],
+            "golden_prompts": self.config.golden_prompts,
+        }
--- a/src/flakestorm/reports/terminal.py
+++ b/src/flakestorm/reports/terminal.py
@ -0,0 +1,156 @@
+"""
+Terminal Report Generator
+
+Displays test results directly in the terminal using rich formatting.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+if TYPE_CHECKING:
+    from flakestorm.reports.models import TestResults
+
+
+class TerminalReporter:
+    """
+    Displays test results in the terminal using rich formatting.
+
+    Provides colorful, informative output for interactive use.
+    """
+
+    def __init__(self, results: TestResults, console: Console | None = None):
+        """
+        Initialize the reporter.
+
+        Args:
+            results: Test results to display
+            console: Rich console (default: new console)
+        """
+        self.results = results
+        self.console = console or Console()
+
+    def print_summary(self) -> None:
+        """Print a summary of the test results."""
+        stats = self.results.statistics
+
+        # Robustness score with color
+        score = stats.robustness_score
+        if score >= 0.9:
+            score_style = "bold green"
+            score_emoji = "🎉"
+        elif score >= 0.7:
+            score_style = "bold yellow"
+            score_emoji = "⚠️"
+        else:
+            score_style = "bold red"
+            score_emoji = "❌"
+
+        score_text = Text()
+        score_text.append(f"{score_emoji} Robustness Score: ", style="bold")
+        score_text.append(f"{score:.1%}", style=score_style)
+
+        # Create summary panel
+        summary_lines = [
+            score_text,
+            "",
+            f"Total Mutations: {stats.total_mutations}",
+            Text.assemble(
+                ("Passed: ", ""),
+                (str(stats.passed_mutations), "green"),
+                (" | Failed: ", ""),
+                (str(stats.failed_mutations), "red"),
+            ),
+            "",
+            f"Avg Latency: {stats.avg_latency_ms:.0f}ms",
+            f"P95 Latency: {stats.p95_latency_ms:.0f}ms",
+            f"Duration: {self.results.duration:.1f}s",
+        ]
+
+        panel_content = "\n".join(str(line) for line in summary_lines)
+
+        self.console.print(
+            Panel(
+                panel_content,
+                title="flakestorm Results",
+                border_style="blue",
+            )
+        )
+
+    def print_type_breakdown(self) -> None:
+        """Print breakdown by mutation type."""
+        stats = self.results.statistics
+
+        table = Table(title="By Mutation Type", show_header=True)
+        table.add_column("Type", style="cyan")
+        table.add_column("Passed", justify="right", style="green")
+        table.add_column("Failed", justify="right", style="red")
+        table.add_column("Pass Rate", justify="right")
+        table.add_column("Progress", width=20)
+
+        for type_stat in stats.by_type:
+            # Create a simple text-based progress bar
+            bar_width = 15
+            filled = int(type_stat.pass_rate * bar_width)
+            bar = "█" * filled + "░" * (bar_width - filled)
+
+            table.add_row(
+                type_stat.mutation_type.replace("_", " ").title(),
+                str(type_stat.passed),
+                str(type_stat.total - type_stat.passed),
+                f"{type_stat.pass_rate:.1%}",
+                bar,
+            )
+
+        self.console.print(table)
+
+    def print_failures(self, limit: int = 10) -> None:
+        """
+        Print details of failed mutations.
+
+        Args:
+            limit: Maximum number of failures to show
+        """
+        failed = self.results.failed_mutations
+
+        if not failed:
+            self.console.print("[green]✓ No failures![/green]")
+            return
+
+        self.console.print(
+            f"\n[bold red]Failed Mutations ({len(failed)} total):[/bold red]"
+        )
+
+        for i, result in enumerate(failed[:limit]):
+            self.console.print(f"\n[bold]#{i+1} - {result.mutation.type.value}[/bold]")
+            self.console.print(
+                f"  [dim]Original:[/dim] {result.original_prompt[:50]}..."
+            )
+            self.console.print(
+                f"  [dim]Mutated:[/dim] {result.mutation.mutated[:50]}..."
+            )
+
+            for check in result.failed_checks:
+                self.console.print(
+                    f"  [red]✗ {check.check_type}:[/red] {check.details}"
+                )
+
+        if len(failed) > limit:
+            self.console.print(
+                f"\n[dim]...and {len(failed) - limit} more failures. "
+                "See HTML report for details.[/dim]"
+            )
+
+    def print_full_report(self) -> None:
+        """Print the complete terminal report."""
+        self.console.print()
+        self.print_summary()
+        self.console.print()
+        self.print_type_breakdown()
+        self.print_failures()
+        self.console.print()