mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 00:36:54 +02:00
Fix .gitignore to exclude root reports folder while tracking src/flakestorm/reports source code - Change reports/ to /reports/ to only ignore root reports directory - Add !src/flakestorm/reports/ to explicitly include source code module - Add reports module source files to repository
This commit is contained in:
parent
661445c7b8
commit
8fc291d186
6 changed files with 1180 additions and 2 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -80,10 +80,12 @@ Cargo.lock
|
|||
# =============================================================================
|
||||
# Project-specific
|
||||
# =============================================================================
|
||||
# Generated reports
|
||||
reports/
|
||||
# Generated reports (root only, not src/flakestorm/reports/)
|
||||
/reports/
|
||||
*.html
|
||||
!docs/*.html
|
||||
# Explicitly include source code reports module
|
||||
!src/flakestorm/reports/
|
||||
|
||||
# Local configuration (may contain secrets)
|
||||
flakestorm.yaml
|
||||
|
|
|
|||
30
src/flakestorm/reports/__init__.py
Normal file
30
src/flakestorm/reports/__init__.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""
|
||||
flakestorm Reports Module
|
||||
|
||||
Provides report generation in multiple formats:
|
||||
- Interactive HTML reports
|
||||
- JSON exports
|
||||
- Terminal output
|
||||
"""
|
||||
|
||||
from flakestorm.reports.html import HTMLReportGenerator
|
||||
from flakestorm.reports.json_export import JSONReportGenerator
|
||||
from flakestorm.reports.models import (
|
||||
CheckResult,
|
||||
MutationResult,
|
||||
TestResults,
|
||||
TestStatistics,
|
||||
TypeStatistics,
|
||||
)
|
||||
from flakestorm.reports.terminal import TerminalReporter
|
||||
|
||||
__all__ = [
|
||||
"TestResults",
|
||||
"TestStatistics",
|
||||
"MutationResult",
|
||||
"CheckResult",
|
||||
"TypeStatistics",
|
||||
"HTMLReportGenerator",
|
||||
"JSONReportGenerator",
|
||||
"TerminalReporter",
|
||||
]
|
||||
655
src/flakestorm/reports/html.py
Normal file
655
src/flakestorm/reports/html.py
Normal file
|
|
@ -0,0 +1,655 @@
|
|||
"""
|
||||
HTML Report Generator
|
||||
|
||||
Generates interactive HTML reports with:
|
||||
- Robustness score visualization
|
||||
- Pass/fail matrix grid
|
||||
- Drill-down into failed mutations
|
||||
- Latency charts
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from flakestorm.reports.models import TestResults
|
||||
|
||||
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>flakestorm Report - {{ report_date }}</title>
|
||||
<style>
|
||||
:root {
|
||||
--bg-primary: #0a0a0f;
|
||||
--bg-secondary: #12121a;
|
||||
--bg-card: #1a1a24;
|
||||
--text-primary: #e8e8ed;
|
||||
--text-secondary: #8b8b9e;
|
||||
--accent: #6366f1;
|
||||
--accent-light: #818cf8;
|
||||
--success: #22c55e;
|
||||
--danger: #ef4444;
|
||||
--warning: #f59e0b;
|
||||
--border: #2a2a3a;
|
||||
}
|
||||
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: var(--bg-primary);
|
||||
color: var(--text-primary);
|
||||
line-height: 1.6;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 2rem;
|
||||
padding-bottom: 1rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.logo {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.logo-icon {
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
background: linear-gradient(135deg, var(--accent), var(--accent-light));
|
||||
border-radius: 10px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-weight: bold;
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
.logo-text {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.report-meta {
|
||||
text-align: right;
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.score-section {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 2fr;
|
||||
gap: 2rem;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.score-card {
|
||||
background: var(--bg-card);
|
||||
border-radius: 16px;
|
||||
padding: 2rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.score-ring {
|
||||
position: relative;
|
||||
width: 180px;
|
||||
height: 180px;
|
||||
}
|
||||
|
||||
.score-ring svg {
|
||||
transform: rotate(-90deg);
|
||||
}
|
||||
|
||||
.score-ring circle {
|
||||
fill: none;
|
||||
stroke-width: 12;
|
||||
}
|
||||
|
||||
.score-ring .bg {
|
||||
stroke: var(--border);
|
||||
}
|
||||
|
||||
.score-ring .progress {
|
||||
stroke: var(--accent);
|
||||
stroke-linecap: round;
|
||||
transition: stroke-dashoffset 1s ease-out;
|
||||
}
|
||||
|
||||
.score-value {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
font-size: 2.5rem;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.score-label {
|
||||
margin-top: 1rem;
|
||||
font-size: 1.125rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.stats-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.stat-card {
|
||||
background: var(--bg-card);
|
||||
border-radius: 12px;
|
||||
padding: 1.25rem;
|
||||
}
|
||||
|
||||
.stat-label {
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.stat-value {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.stat-value.success { color: var(--success); }
|
||||
.stat-value.danger { color: var(--danger); }
|
||||
|
||||
.section {
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.section-title {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
margin-bottom: 1rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.matrix-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.matrix-cell {
|
||||
background: var(--bg-card);
|
||||
border-radius: 12px;
|
||||
padding: 1rem;
|
||||
cursor: pointer;
|
||||
transition: transform 0.2s, box-shadow 0.2s;
|
||||
}
|
||||
|
||||
.matrix-cell:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
.matrix-cell.passed {
|
||||
border-left: 4px solid var(--success);
|
||||
}
|
||||
|
||||
.matrix-cell.failed {
|
||||
border-left: 4px solid var(--danger);
|
||||
}
|
||||
|
||||
.mutation-type {
|
||||
font-size: 0.75rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.mutation-text {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.4;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
display: -webkit-box;
|
||||
-webkit-line-clamp: 2;
|
||||
-webkit-box-orient: vertical;
|
||||
}
|
||||
|
||||
.mutation-meta {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-top: 0.75rem;
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.type-breakdown {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.type-card {
|
||||
background: var(--bg-card);
|
||||
border-radius: 12px;
|
||||
padding: 1.25rem;
|
||||
}
|
||||
|
||||
.type-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.type-name {
|
||||
font-weight: 600;
|
||||
text-transform: capitalize;
|
||||
}
|
||||
|
||||
.type-rate {
|
||||
font-size: 1.125rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
height: 8px;
|
||||
background: var(--border);
|
||||
border-radius: 4px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background: linear-gradient(90deg, var(--accent), var(--accent-light));
|
||||
border-radius: 4px;
|
||||
transition: width 0.5s ease-out;
|
||||
}
|
||||
|
||||
.modal {
|
||||
display: none;
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.8);
|
||||
z-index: 1000;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.modal.active {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
background: var(--bg-secondary);
|
||||
border-radius: 16px;
|
||||
max-width: 800px;
|
||||
width: 100%;
|
||||
max-height: 80vh;
|
||||
overflow-y: auto;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.modal-close {
|
||||
background: none;
|
||||
border: none;
|
||||
color: var(--text-secondary);
|
||||
font-size: 1.5rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.detail-section {
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.detail-label {
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.detail-content {
|
||||
background: var(--bg-card);
|
||||
border-radius: 8px;
|
||||
padding: 1rem;
|
||||
font-family: 'SF Mono', 'Fira Code', monospace;
|
||||
font-size: 0.875rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.check-list {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.check-item {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: 0.75rem;
|
||||
padding: 0.75rem;
|
||||
background: var(--bg-card);
|
||||
border-radius: 8px;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.check-icon {
|
||||
width: 20px;
|
||||
height: 20px;
|
||||
border-radius: 50%;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
flex-shrink: 0;
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
.check-icon.passed {
|
||||
background: var(--success);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.check-icon.failed {
|
||||
background: var(--danger);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.check-details {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.check-type {
|
||||
font-weight: 600;
|
||||
text-transform: capitalize;
|
||||
}
|
||||
|
||||
.check-message {
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.score-section {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.stats-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<div class="logo">
|
||||
<div class="logo-icon">E</div>
|
||||
<span class="logo-text">flakestorm</span>
|
||||
</div>
|
||||
<div class="report-meta">
|
||||
<div>{{ report_date }}</div>
|
||||
<div>Duration: {{ duration }}s</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div class="score-section">
|
||||
<div class="score-card">
|
||||
<div class="score-ring">
|
||||
<svg width="180" height="180">
|
||||
<circle class="bg" cx="90" cy="90" r="78"></circle>
|
||||
<circle class="progress" cx="90" cy="90" r="78"
|
||||
stroke-dasharray="{{ circumference }}"
|
||||
stroke-dashoffset="{{ score_offset }}">
|
||||
</circle>
|
||||
</svg>
|
||||
<div class="score-value">{{ score_percent }}%</div>
|
||||
</div>
|
||||
<div class="score-label">Robustness Score</div>
|
||||
</div>
|
||||
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">Total Mutations</div>
|
||||
<div class="stat-value">{{ total_mutations }}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">Passed</div>
|
||||
<div class="stat-value success">{{ passed_mutations }}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">Failed</div>
|
||||
<div class="stat-value danger">{{ failed_mutations }}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">Avg Latency</div>
|
||||
<div class="stat-value">{{ avg_latency }}ms</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2 class="section-title">📊 By Mutation Type</h2>
|
||||
<div class="type-breakdown">
|
||||
{% for type_stat in type_stats %}
|
||||
<div class="type-card">
|
||||
<div class="type-header">
|
||||
<span class="type-name">{{ type_stat.mutation_type }}</span>
|
||||
<span class="type-rate">{{ type_stat.pass_rate_percent }}%</span>
|
||||
</div>
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" style="width: {{ type_stat.pass_rate_percent }}%"></div>
|
||||
</div>
|
||||
<div style="margin-top: 0.5rem; font-size: 0.875rem; color: var(--text-secondary);">
|
||||
{{ type_stat.passed }}/{{ type_stat.total }} passed
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2 class="section-title">🔬 Mutation Results</h2>
|
||||
<div class="matrix-grid">
|
||||
{% for result in mutations %}
|
||||
<div class="matrix-cell {{ 'passed' if result.passed else 'failed' }}"
|
||||
onclick="showDetail({{ loop.index0 }})">
|
||||
<div class="mutation-type">{{ result.mutation.type }}</div>
|
||||
<div class="mutation-text">{{ result.mutation.mutated[:100] }}...</div>
|
||||
<div class="mutation-meta">
|
||||
<span>{{ result.latency_ms|round(0)|int }}ms</span>
|
||||
<span>{{ '✓' if result.passed else '✗' }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="modal" id="detail-modal">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h3>Mutation Details</h3>
|
||||
<button class="modal-close" onclick="closeModal()">×</button>
|
||||
</div>
|
||||
<div id="modal-body"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const mutations = {{ mutations_json|safe }};
|
||||
|
||||
function showDetail(index) {
|
||||
const m = mutations[index];
|
||||
const modal = document.getElementById('detail-modal');
|
||||
const body = document.getElementById('modal-body');
|
||||
|
||||
body.innerHTML = `
|
||||
<div class="detail-section">
|
||||
<div class="detail-label">Original Prompt</div>
|
||||
<div class="detail-content">${m.original_prompt}</div>
|
||||
</div>
|
||||
<div class="detail-section">
|
||||
<div class="detail-label">Mutated (${m.mutation.type})</div>
|
||||
<div class="detail-content">${m.mutation.mutated}</div>
|
||||
</div>
|
||||
<div class="detail-section">
|
||||
<div class="detail-label">Agent Response</div>
|
||||
<div class="detail-content">${m.response || '(empty)'}</div>
|
||||
</div>
|
||||
<div class="detail-section">
|
||||
<div class="detail-label">Invariant Checks</div>
|
||||
<ul class="check-list">
|
||||
${m.checks.map(c => `
|
||||
<li class="check-item">
|
||||
<div class="check-icon ${c.passed ? 'passed' : 'failed'}">
|
||||
${c.passed ? '✓' : '✗'}
|
||||
</div>
|
||||
<div class="check-details">
|
||||
<div class="check-type">${c.check_type}</div>
|
||||
<div class="check-message">${c.details}</div>
|
||||
</div>
|
||||
</li>
|
||||
`).join('')}
|
||||
</ul>
|
||||
</div>
|
||||
`;
|
||||
|
||||
modal.classList.add('active');
|
||||
}
|
||||
|
||||
function closeModal() {
|
||||
document.getElementById('detail-modal').classList.remove('active');
|
||||
}
|
||||
|
||||
document.getElementById('detail-modal').addEventListener('click', (e) => {
|
||||
if (e.target.id === 'detail-modal') closeModal();
|
||||
});
|
||||
|
||||
document.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Escape') closeModal();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class HTMLReportGenerator:
|
||||
"""
|
||||
Generates interactive HTML reports from test results.
|
||||
|
||||
Creates a single-file HTML report with embedded CSS and JavaScript
|
||||
for easy sharing and viewing.
|
||||
"""
|
||||
|
||||
def __init__(self, results: TestResults):
|
||||
"""
|
||||
Initialize the generator.
|
||||
|
||||
Args:
|
||||
results: Test results to generate report from
|
||||
"""
|
||||
self.results = results
|
||||
self.template = Template(HTML_TEMPLATE)
|
||||
|
||||
def generate(self) -> str:
|
||||
"""
|
||||
Generate the HTML report.
|
||||
|
||||
Returns:
|
||||
Complete HTML document as a string
|
||||
"""
|
||||
stats = self.results.statistics
|
||||
|
||||
# Calculate score ring values
|
||||
circumference = 2 * 3.14159 * 78
|
||||
score_offset = circumference * (1 - stats.robustness_score)
|
||||
|
||||
# Prepare type stats
|
||||
type_stats = [
|
||||
{
|
||||
"mutation_type": t.mutation_type.replace("_", " "),
|
||||
"total": t.total,
|
||||
"passed": t.passed,
|
||||
"pass_rate_percent": round(t.pass_rate * 100, 1),
|
||||
}
|
||||
for t in stats.by_type
|
||||
]
|
||||
|
||||
# Prepare mutations data
|
||||
mutations_data = [m.to_dict() for m in self.results.mutations]
|
||||
|
||||
return self.template.render(
|
||||
report_date=self.results.started_at.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
duration=round(self.results.duration, 1),
|
||||
circumference=circumference,
|
||||
score_offset=score_offset,
|
||||
score_percent=round(stats.robustness_score * 100, 1),
|
||||
total_mutations=stats.total_mutations,
|
||||
passed_mutations=stats.passed_mutations,
|
||||
failed_mutations=stats.failed_mutations,
|
||||
avg_latency=round(stats.avg_latency_ms),
|
||||
type_stats=type_stats,
|
||||
mutations=self.results.mutations,
|
||||
mutations_json=json.dumps(mutations_data),
|
||||
)
|
||||
|
||||
def save(self, path: str | Path | None = None) -> Path:
|
||||
"""
|
||||
Save the HTML report to a file.
|
||||
|
||||
Args:
|
||||
path: Output path (default: auto-generated in reports dir)
|
||||
|
||||
Returns:
|
||||
Path to the saved file
|
||||
"""
|
||||
if path is None:
|
||||
output_dir = Path(self.results.config.output.path)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
filename = f"flakestorm-{timestamp}.html"
|
||||
path = output_dir / filename
|
||||
else:
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
html = self.generate()
|
||||
path.write_text(html, encoding="utf-8")
|
||||
|
||||
return path
|
||||
115
src/flakestorm/reports/json_export.py
Normal file
115
src/flakestorm/reports/json_export.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"""
|
||||
JSON Report Generator
|
||||
|
||||
Exports test results to JSON format for programmatic consumption
|
||||
and integration with other tools.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from flakestorm.reports.models import TestResults
|
||||
|
||||
|
||||
class JSONReportGenerator:
|
||||
"""
|
||||
Generates JSON reports from test results.
|
||||
|
||||
Creates structured JSON output suitable for:
|
||||
- CI/CD pipeline consumption
|
||||
- Data analysis tools
|
||||
- Dashboard integrations
|
||||
"""
|
||||
|
||||
def __init__(self, results: TestResults):
|
||||
"""
|
||||
Initialize the generator.
|
||||
|
||||
Args:
|
||||
results: Test results to generate report from
|
||||
"""
|
||||
self.results = results
|
||||
|
||||
def generate(self, pretty: bool = True) -> str:
|
||||
"""
|
||||
Generate the JSON report.
|
||||
|
||||
Args:
|
||||
pretty: Whether to format with indentation
|
||||
|
||||
Returns:
|
||||
JSON string
|
||||
"""
|
||||
data = self.results.to_dict()
|
||||
|
||||
if pretty:
|
||||
return json.dumps(data, indent=2, default=str)
|
||||
return json.dumps(data, default=str)
|
||||
|
||||
def generate_summary(self) -> dict[str, Any]:
|
||||
"""
|
||||
Generate a summary-only report (no mutation details).
|
||||
|
||||
Useful for quick status checks in CI/CD.
|
||||
"""
|
||||
stats = self.results.statistics
|
||||
|
||||
return {
|
||||
"version": "1.0",
|
||||
"started_at": self.results.started_at.isoformat(),
|
||||
"completed_at": self.results.completed_at.isoformat(),
|
||||
"duration_seconds": self.results.duration,
|
||||
"robustness_score": stats.robustness_score,
|
||||
"pass_rate": stats.pass_rate,
|
||||
"total_mutations": stats.total_mutations,
|
||||
"passed_mutations": stats.passed_mutations,
|
||||
"failed_mutations": stats.failed_mutations,
|
||||
"avg_latency_ms": stats.avg_latency_ms,
|
||||
"p95_latency_ms": stats.p95_latency_ms,
|
||||
"by_type": {
|
||||
t.mutation_type: {
|
||||
"total": t.total,
|
||||
"passed": t.passed,
|
||||
"pass_rate": t.pass_rate,
|
||||
}
|
||||
for t in stats.by_type
|
||||
},
|
||||
}
|
||||
|
||||
def save(self, path: str | Path | None = None, summary_only: bool = False) -> Path:
|
||||
"""
|
||||
Save the JSON report to a file.
|
||||
|
||||
Args:
|
||||
path: Output path (default: auto-generated in reports dir)
|
||||
summary_only: Only include summary, no mutation details
|
||||
|
||||
Returns:
|
||||
Path to the saved file
|
||||
"""
|
||||
if path is None:
|
||||
output_dir = Path(self.results.config.output.path)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
suffix = "-summary" if summary_only else ""
|
||||
filename = f"flakestorm-{timestamp}{suffix}.json"
|
||||
path = output_dir / filename
|
||||
else:
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if summary_only:
|
||||
data = self.generate_summary()
|
||||
content = json.dumps(data, indent=2, default=str)
|
||||
else:
|
||||
content = self.generate()
|
||||
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
return path
|
||||
220
src/flakestorm/reports/models.py
Normal file
220
src/flakestorm/reports/models.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""
|
||||
Report Data Models
|
||||
|
||||
Data structures for representing test results and statistics.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from flakestorm.core.config import FlakeStormConfig
|
||||
from flakestorm.mutations.types import Mutation
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckResult:
|
||||
"""Result of a single invariant check."""
|
||||
|
||||
check_type: str
|
||||
"""Type of the check (e.g., 'latency', 'contains')."""
|
||||
|
||||
passed: bool
|
||||
"""Whether the check passed."""
|
||||
|
||||
details: str
|
||||
"""Human-readable details about the check result."""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"check_type": self.check_type,
|
||||
"passed": self.passed,
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TypeStatistics:
|
||||
"""Statistics for a specific mutation type."""
|
||||
|
||||
mutation_type: str
|
||||
"""Name of the mutation type."""
|
||||
|
||||
total: int
|
||||
"""Total number of tests of this type."""
|
||||
|
||||
passed: int
|
||||
"""Number of tests that passed."""
|
||||
|
||||
pass_rate: float
|
||||
"""Pass rate as a decimal (0.0 to 1.0)."""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"mutation_type": self.mutation_type,
|
||||
"total": self.total,
|
||||
"passed": self.passed,
|
||||
"failed": self.total - self.passed,
|
||||
"pass_rate": self.pass_rate,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestStatistics:
|
||||
"""Aggregate statistics for a test run."""
|
||||
|
||||
total_mutations: int
|
||||
"""Total number of mutations tested."""
|
||||
|
||||
passed_mutations: int
|
||||
"""Number of mutations that passed all checks."""
|
||||
|
||||
failed_mutations: int
|
||||
"""Number of mutations that failed one or more checks."""
|
||||
|
||||
robustness_score: float
|
||||
"""Weighted robustness score (0.0 to 1.0)."""
|
||||
|
||||
avg_latency_ms: float
|
||||
"""Average response latency in milliseconds."""
|
||||
|
||||
p50_latency_ms: float
|
||||
"""50th percentile (median) latency."""
|
||||
|
||||
p95_latency_ms: float
|
||||
"""95th percentile latency."""
|
||||
|
||||
p99_latency_ms: float
|
||||
"""99th percentile latency."""
|
||||
|
||||
by_type: list[TypeStatistics] = field(default_factory=list)
|
||||
"""Statistics broken down by mutation type."""
|
||||
|
||||
duration_seconds: float = 0.0
|
||||
"""Total test duration in seconds."""
|
||||
|
||||
@property
|
||||
def pass_rate(self) -> float:
|
||||
"""Simple pass rate (passed / total)."""
|
||||
if self.total_mutations == 0:
|
||||
return 0.0
|
||||
return self.passed_mutations / self.total_mutations
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"total_mutations": self.total_mutations,
|
||||
"passed_mutations": self.passed_mutations,
|
||||
"failed_mutations": self.failed_mutations,
|
||||
"robustness_score": self.robustness_score,
|
||||
"pass_rate": self.pass_rate,
|
||||
"avg_latency_ms": self.avg_latency_ms,
|
||||
"p50_latency_ms": self.p50_latency_ms,
|
||||
"p95_latency_ms": self.p95_latency_ms,
|
||||
"p99_latency_ms": self.p99_latency_ms,
|
||||
"duration_seconds": self.duration_seconds,
|
||||
"by_type": [t.to_dict() for t in self.by_type],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MutationResult:
|
||||
"""Result of testing a single mutation."""
|
||||
|
||||
original_prompt: str
|
||||
"""The original golden prompt."""
|
||||
|
||||
mutation: Mutation
|
||||
"""The mutation that was tested."""
|
||||
|
||||
response: str
|
||||
"""The agent's response."""
|
||||
|
||||
latency_ms: float
|
||||
"""Response latency in milliseconds."""
|
||||
|
||||
passed: bool
|
||||
"""Whether all invariant checks passed."""
|
||||
|
||||
checks: list[CheckResult] = field(default_factory=list)
|
||||
"""Individual check results."""
|
||||
|
||||
error: str | None = None
|
||||
"""Error message if the agent call failed."""
|
||||
|
||||
@property
|
||||
def failed_checks(self) -> list[CheckResult]:
|
||||
"""Get list of failed checks."""
|
||||
return [c for c in self.checks if not c.passed]
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"original_prompt": self.original_prompt,
|
||||
"mutation": self.mutation.to_dict(),
|
||||
"response": self.response,
|
||||
"latency_ms": self.latency_ms,
|
||||
"passed": self.passed,
|
||||
"checks": [c.to_dict() for c in self.checks],
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResults:
|
||||
"""Complete results from a test run."""
|
||||
|
||||
config: FlakeStormConfig
|
||||
"""Configuration used for the test."""
|
||||
|
||||
started_at: datetime
|
||||
"""When the test started."""
|
||||
|
||||
completed_at: datetime
|
||||
"""When the test completed."""
|
||||
|
||||
mutations: list[MutationResult]
|
||||
"""Results for each mutation."""
|
||||
|
||||
statistics: TestStatistics
|
||||
"""Aggregate statistics."""
|
||||
|
||||
@property
|
||||
def duration(self) -> float:
|
||||
"""Test duration in seconds."""
|
||||
return (self.completed_at - self.started_at).total_seconds()
|
||||
|
||||
@property
|
||||
def passed_mutations(self) -> list[MutationResult]:
|
||||
"""Get mutations that passed."""
|
||||
return [m for m in self.mutations if m.passed]
|
||||
|
||||
@property
|
||||
def failed_mutations(self) -> list[MutationResult]:
|
||||
"""Get mutations that failed."""
|
||||
return [m for m in self.mutations if not m.passed]
|
||||
|
||||
def get_by_type(self, mutation_type: str) -> list[MutationResult]:
|
||||
"""Get mutations of a specific type."""
|
||||
return [m for m in self.mutations if m.mutation.type.value == mutation_type]
|
||||
|
||||
def get_by_prompt(self, prompt: str) -> list[MutationResult]:
|
||||
"""Get mutations for a specific golden prompt."""
|
||||
return [m for m in self.mutations if m.original_prompt == prompt]
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"version": "1.0",
|
||||
"started_at": self.started_at.isoformat(),
|
||||
"completed_at": self.completed_at.isoformat(),
|
||||
"duration_seconds": self.duration,
|
||||
"statistics": self.statistics.to_dict(),
|
||||
"mutations": [m.to_dict() for m in self.mutations],
|
||||
"golden_prompts": self.config.golden_prompts,
|
||||
}
|
||||
156
src/flakestorm/reports/terminal.py
Normal file
156
src/flakestorm/reports/terminal.py
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
"""
|
||||
Terminal Report Generator
|
||||
|
||||
Displays test results directly in the terminal using rich formatting.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from flakestorm.reports.models import TestResults
|
||||
|
||||
|
||||
class TerminalReporter:
|
||||
"""
|
||||
Displays test results in the terminal using rich formatting.
|
||||
|
||||
Provides colorful, informative output for interactive use.
|
||||
"""
|
||||
|
||||
def __init__(self, results: TestResults, console: Console | None = None):
|
||||
"""
|
||||
Initialize the reporter.
|
||||
|
||||
Args:
|
||||
results: Test results to display
|
||||
console: Rich console (default: new console)
|
||||
"""
|
||||
self.results = results
|
||||
self.console = console or Console()
|
||||
|
||||
def print_summary(self) -> None:
|
||||
"""Print a summary of the test results."""
|
||||
stats = self.results.statistics
|
||||
|
||||
# Robustness score with color
|
||||
score = stats.robustness_score
|
||||
if score >= 0.9:
|
||||
score_style = "bold green"
|
||||
score_emoji = "🎉"
|
||||
elif score >= 0.7:
|
||||
score_style = "bold yellow"
|
||||
score_emoji = "⚠️"
|
||||
else:
|
||||
score_style = "bold red"
|
||||
score_emoji = "❌"
|
||||
|
||||
score_text = Text()
|
||||
score_text.append(f"{score_emoji} Robustness Score: ", style="bold")
|
||||
score_text.append(f"{score:.1%}", style=score_style)
|
||||
|
||||
# Create summary panel
|
||||
summary_lines = [
|
||||
score_text,
|
||||
"",
|
||||
f"Total Mutations: {stats.total_mutations}",
|
||||
Text.assemble(
|
||||
("Passed: ", ""),
|
||||
(str(stats.passed_mutations), "green"),
|
||||
(" | Failed: ", ""),
|
||||
(str(stats.failed_mutations), "red"),
|
||||
),
|
||||
"",
|
||||
f"Avg Latency: {stats.avg_latency_ms:.0f}ms",
|
||||
f"P95 Latency: {stats.p95_latency_ms:.0f}ms",
|
||||
f"Duration: {self.results.duration:.1f}s",
|
||||
]
|
||||
|
||||
panel_content = "\n".join(str(line) for line in summary_lines)
|
||||
|
||||
self.console.print(
|
||||
Panel(
|
||||
panel_content,
|
||||
title="flakestorm Results",
|
||||
border_style="blue",
|
||||
)
|
||||
)
|
||||
|
||||
def print_type_breakdown(self) -> None:
|
||||
"""Print breakdown by mutation type."""
|
||||
stats = self.results.statistics
|
||||
|
||||
table = Table(title="By Mutation Type", show_header=True)
|
||||
table.add_column("Type", style="cyan")
|
||||
table.add_column("Passed", justify="right", style="green")
|
||||
table.add_column("Failed", justify="right", style="red")
|
||||
table.add_column("Pass Rate", justify="right")
|
||||
table.add_column("Progress", width=20)
|
||||
|
||||
for type_stat in stats.by_type:
|
||||
# Create a simple text-based progress bar
|
||||
bar_width = 15
|
||||
filled = int(type_stat.pass_rate * bar_width)
|
||||
bar = "█" * filled + "░" * (bar_width - filled)
|
||||
|
||||
table.add_row(
|
||||
type_stat.mutation_type.replace("_", " ").title(),
|
||||
str(type_stat.passed),
|
||||
str(type_stat.total - type_stat.passed),
|
||||
f"{type_stat.pass_rate:.1%}",
|
||||
bar,
|
||||
)
|
||||
|
||||
self.console.print(table)
|
||||
|
||||
def print_failures(self, limit: int = 10) -> None:
|
||||
"""
|
||||
Print details of failed mutations.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of failures to show
|
||||
"""
|
||||
failed = self.results.failed_mutations
|
||||
|
||||
if not failed:
|
||||
self.console.print("[green]✓ No failures![/green]")
|
||||
return
|
||||
|
||||
self.console.print(
|
||||
f"\n[bold red]Failed Mutations ({len(failed)} total):[/bold red]"
|
||||
)
|
||||
|
||||
for i, result in enumerate(failed[:limit]):
|
||||
self.console.print(f"\n[bold]#{i+1} - {result.mutation.type.value}[/bold]")
|
||||
self.console.print(
|
||||
f" [dim]Original:[/dim] {result.original_prompt[:50]}..."
|
||||
)
|
||||
self.console.print(
|
||||
f" [dim]Mutated:[/dim] {result.mutation.mutated[:50]}..."
|
||||
)
|
||||
|
||||
for check in result.failed_checks:
|
||||
self.console.print(
|
||||
f" [red]✗ {check.check_type}:[/red] {check.details}"
|
||||
)
|
||||
|
||||
if len(failed) > limit:
|
||||
self.console.print(
|
||||
f"\n[dim]...and {len(failed) - limit} more failures. "
|
||||
"See HTML report for details.[/dim]"
|
||||
)
|
||||
|
||||
def print_full_report(self) -> None:
|
||||
"""Print the complete terminal report."""
|
||||
self.console.print()
|
||||
self.print_summary()
|
||||
self.console.print()
|
||||
self.print_type_breakdown()
|
||||
self.print_failures()
|
||||
self.console.print()
|
||||
Loading…
Add table
Add a link
Reference in a new issue