package resilience import ( "context" "fmt" "math" "testing" "time" ) // --- MetricsDB Tests --- func TestRingBuffer_AddAndAll(t *testing.T) { rb := newRingBuffer(5) now := time.Now() for i := 0; i < 3; i++ { rb.Add(DataPoint{Timestamp: now.Add(time.Duration(i) * time.Second), Value: float64(i)}) } if rb.Len() != 3 { t.Fatalf("expected 3, got %d", rb.Len()) } all := rb.All() if len(all) != 3 { t.Fatalf("expected 3 points, got %d", len(all)) } for i, dp := range all { if dp.Value != float64(i) { t.Errorf("point %d: expected %f, got %f", i, float64(i), dp.Value) } } } func TestRingBuffer_Wrap(t *testing.T) { rb := newRingBuffer(3) now := time.Now() for i := 0; i < 5; i++ { rb.Add(DataPoint{Timestamp: now.Add(time.Duration(i) * time.Second), Value: float64(i)}) } if rb.Len() != 3 { t.Fatalf("expected 3 (buffer size), got %d", rb.Len()) } all := rb.All() // Should contain values 2, 3, 4 (oldest 0, 1 overwritten). expected := []float64{2, 3, 4} for i, dp := range all { if dp.Value != expected[i] { t.Errorf("point %d: expected %f, got %f", i, expected[i], dp.Value) } } } func TestMetricsDB_AddAndBaseline(t *testing.T) { db := NewMetricsDB(time.Hour, 100) for i := 0; i < 20; i++ { db.AddDataPoint("soc-ingest", "cpu", 30.0+float64(i%5)) } baseline := db.GetBaseline("soc-ingest", "cpu", time.Hour) if baseline.Count != 20 { t.Fatalf("expected 20 points, got %d", baseline.Count) } if baseline.Mean < 30 || baseline.Mean > 35 { t.Errorf("mean out of expected range: %f", baseline.Mean) } if baseline.StdDev == 0 { t.Error("expected non-zero stddev") } } func TestMetricsDB_EmptyBaseline(t *testing.T) { db := NewMetricsDB(time.Hour, 100) baseline := db.GetBaseline("nonexistent", "cpu", time.Hour) if baseline.Count != 0 { t.Errorf("expected 0 count for nonexistent, got %d", baseline.Count) } } func TestCalculateZScore(t *testing.T) { baseline := Baseline{Mean: 30.0, StdDev: 5.0, Count: 100} // Normal value (Z = 1.0). z := CalculateZScore(35.0, baseline) if math.Abs(z-1.0) > 0.01 { t.Errorf("expected Z≈1.0, got %f", z) } // Anomalous value (Z = 4.0). z = CalculateZScore(50.0, baseline) if math.Abs(z-4.0) > 0.01 { t.Errorf("expected Z≈4.0, got %f", z) } // Insufficient data → 0. z = CalculateZScore(50.0, Baseline{Mean: 30, StdDev: 5, Count: 5}) if z != 0 { t.Errorf("expected 0 for insufficient data, got %f", z) } } func TestIsAnomaly(t *testing.T) { baseline := Baseline{Mean: 30.0, StdDev: 5.0, Count: 100} if IsAnomaly(35.0, baseline, 3.0) { t.Error("35 should not be anomaly (Z=1.0)") } if !IsAnomaly(50.0, baseline, 3.0) { t.Error("50 should be anomaly (Z=4.0)") } if !IsAnomaly(10.0, baseline, 3.0) { t.Error("10 should be anomaly (Z=-4.0)") } } func TestMetricsDB_Purge(t *testing.T) { db := NewMetricsDB(100*time.Millisecond, 100) db.AddDataPoint("comp", "cpu", 50) time.Sleep(150 * time.Millisecond) db.AddDataPoint("comp", "cpu", 60) removed := db.Purge() if removed != 1 { t.Errorf("expected 1 purged, got %d", removed) } } func TestMetricsDB_GetRecent(t *testing.T) { db := NewMetricsDB(time.Hour, 100) for i := 0; i < 10; i++ { db.AddDataPoint("comp", "mem", float64(i*10)) } recent := db.GetRecent("comp", "mem", 3) if len(recent) != 3 { t.Fatalf("expected 3 recent, got %d", len(recent)) } // Should be last 3: 70, 80, 90. if recent[0].Value != 70 || recent[2].Value != 90 { t.Errorf("unexpected recent values: %v", recent) } } // --- MockCollector for HealthMonitor tests --- type mockCollector struct { results map[string]map[string]float64 errors map[string]error } func (m *mockCollector) Collect(_ context.Context, component string) (map[string]float64, error) { if err, ok := m.errors[component]; ok && err != nil { return nil, err } if metrics, ok := m.results[component]; ok { return metrics, nil } return map[string]float64{}, nil } // --- HealthMonitor Tests --- // HM-01: Normal health check — all HEALTHY. func TestHealthMonitor_HM01_AllHealthy(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 10) registerTestComponents(hm, 6) health := hm.GetHealth() if health.OverallStatus != OverallHealthy { t.Errorf("expected HEALTHY, got %s", health.OverallStatus) } if !health.QuorumValid { t.Error("expected quorum valid") } if len(health.Components) != 6 { t.Errorf("expected 6 components, got %d", len(health.Components)) } } // HM-02: Single component DEGRADED. func TestHealthMonitor_HM02_SingleDegraded(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 10) registerTestComponents(hm, 6) hm.SetComponentStatus("comp-0", StatusDegraded) health := hm.GetHealth() if health.OverallStatus != OverallDegraded { t.Errorf("expected DEGRADED, got %s", health.OverallStatus) } if !health.QuorumValid { t.Error("expected quorum still valid with 5/6 healthy") } } // HM-03: Multiple components CRITICAL → quorum lost. func TestHealthMonitor_HM03_MultipleCritical(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 10) registerTestComponents(hm, 6) hm.SetComponentStatus("comp-0", StatusCritical) hm.SetComponentStatus("comp-1", StatusCritical) hm.SetComponentStatus("comp-2", StatusCritical) health := hm.GetHealth() if health.OverallStatus != OverallCritical { t.Errorf("expected CRITICAL, got %s", health.OverallStatus) } if health.QuorumValid { t.Error("expected quorum INVALID with 3/6 critical") } } // HM-04: Anomaly detection (CPU spike). func TestHealthMonitor_HM04_CPUAnomaly(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 100) hm.RegisterComponent(ComponentConfig{ Name: "soc-ingest", Type: "go_binary", Thresholds: map[string]float64{"cpu": 80}, ThresholdIsMax: map[string]bool{"cpu": true}, }) // Build baseline of normal CPU (30%). for i := 0; i < 50; i++ { hm.metricsDB.AddDataPoint("soc-ingest", "cpu", 30.0) } // Spike to 95%. hm.UpdateMetrics("soc-ingest", map[string]float64{"cpu": 95.0}) hm.checkHealth() // Should have alert(s). select { case alert := <-hm.alertBus: if alert.Component != "soc-ingest" { t.Errorf("expected soc-ingest, got %s", alert.Component) } if alert.Metric != "cpu" { t.Errorf("expected cpu metric, got %s", alert.Metric) } default: t.Error("expected alert for CPU spike") } } // HM-05: Memory leak detection. func TestHealthMonitor_HM05_MemoryLeak(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 100) hm.RegisterComponent(ComponentConfig{ Name: "soc-correlate", Type: "go_binary", Thresholds: map[string]float64{"memory": 90}, ThresholdIsMax: map[string]bool{"memory": true}, }) // Build baseline of normal memory (40%). for i := 0; i < 50; i++ { hm.metricsDB.AddDataPoint("soc-correlate", "memory", 40.0) } // Memory spike to 95%. hm.UpdateMetrics("soc-correlate", map[string]float64{"memory": 95.0}) hm.checkHealth() select { case alert := <-hm.alertBus: if alert.Metric != "memory" { t.Errorf("expected memory metric, got %s", alert.Metric) } default: t.Error("expected alert for memory spike") } } // HM-06: Quorum validation failure. func TestHealthMonitor_HM06_QuorumFailure(t *testing.T) { statuses := map[string]ComponentStatus{ "a": StatusOffline, "b": StatusOffline, "c": StatusOffline, "d": StatusOffline, "e": StatusHealthy, "f": StatusHealthy, } if ValidateQuorum(statuses) { t.Error("expected quorum invalid with 4/6 offline") } } // HM-06b: Quorum validation success (edge case: exactly 2/3). func TestHealthMonitor_HM06b_QuorumEdge(t *testing.T) { statuses := map[string]ComponentStatus{ "a": StatusHealthy, "b": StatusHealthy, "c": StatusCritical, } if !ValidateQuorum(statuses) { t.Error("expected quorum valid with 2/3 healthy (exact threshold)") } } // HM-06c: Empty quorum. func TestHealthMonitor_HM06c_EmptyQuorum(t *testing.T) { if ValidateQuorum(map[string]ComponentStatus{}) { t.Error("expected quorum invalid with 0 components") } } // HM-07: Metrics collection (no data loss). func TestHealthMonitor_HM07_MetricsCollection(t *testing.T) { collector := &mockCollector{ results: map[string]map[string]float64{ "comp-0": {"cpu": 25, "memory": 40}, }, } hm := NewHealthMonitor(collector, 10) hm.RegisterComponent(ComponentConfig{Name: "comp-0", Type: "go_binary"}) hm.collectMetrics(context.Background()) hm.mu.RLock() comp := hm.components["comp-0"] hm.mu.RUnlock() if comp.Metrics["cpu"] != 25 { t.Errorf("expected cpu=25, got %f", comp.Metrics["cpu"]) } if comp.Metrics["memory"] != 40 { t.Errorf("expected memory=40, got %f", comp.Metrics["memory"]) } } // HM-07b: Collection error increments consecutive failures. func TestHealthMonitor_HM07b_CollectionError(t *testing.T) { collector := &mockCollector{ errors: map[string]error{ "comp-0": fmt.Errorf("connection refused"), }, } hm := NewHealthMonitor(collector, 10) hm.RegisterComponent(ComponentConfig{Name: "comp-0", Type: "go_binary"}) hm.collectMetrics(context.Background()) hm.mu.RLock() comp := hm.components["comp-0"] hm.mu.RUnlock() if comp.Consecutive != 1 { t.Errorf("expected 1 consecutive failure, got %d", comp.Consecutive) } } // HM-08: Alert bus fan-out (non-blocking). func TestHealthMonitor_HM08_AlertBusFanOut(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 5) hm.RegisterComponent(ComponentConfig{ Name: "comp", Type: "go_binary", Thresholds: map[string]float64{"cpu": 50}, ThresholdIsMax: map[string]bool{"cpu": true}, }) // Fill alert bus. for i := 0; i < 5; i++ { hm.alertBus <- HealthAlert{Component: fmt.Sprintf("test-%d", i)} } // Emit one more — should be dropped (non-blocking). hm.emitAlert(HealthAlert{Component: "overflow"}) // No panic = success. } // Test GetHealth returns a deep copy. func TestHealthMonitor_GetHealthDeepCopy(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 10) hm.RegisterComponent(ComponentConfig{Name: "test", Type: "go_binary"}) hm.UpdateMetrics("test", map[string]float64{"cpu": 50}) health := hm.GetHealth() health.Components[0].Metrics["cpu"] = 999 // Original should be unchanged. hm.mu.RLock() original := hm.components["test"].Metrics["cpu"] hm.mu.RUnlock() if original != 50 { t.Errorf("deep copy failed: original modified to %f", original) } } // Test threshold breach transitions status to DEGRADED then CRITICAL. func TestHealthMonitor_StatusTransitions(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 100) hm.RegisterComponent(ComponentConfig{ Name: "comp", Type: "go_binary", Thresholds: map[string]float64{"error_rate": 5}, ThresholdIsMax: map[string]bool{"error_rate": true}, }) // Breach once → DEGRADED. hm.UpdateMetrics("comp", map[string]float64{"error_rate": 10}) hm.checkHealth() hm.mu.RLock() status := hm.components["comp"].Status hm.mu.RUnlock() if status != StatusDegraded { t.Errorf("expected DEGRADED after 1 breach, got %s", status) } // Breach 3× → CRITICAL. for i := 0; i < 3; i++ { hm.checkHealth() } hm.mu.RLock() status = hm.components["comp"].Status hm.mu.RUnlock() if status != StatusCritical { t.Errorf("expected CRITICAL after repeated breaches, got %s", status) } } // Test lower-bound threshold (ThresholdIsMax=false). func TestHealthMonitor_LowerBoundThreshold(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 100) hm.RegisterComponent(ComponentConfig{ Name: "immune", Type: "c_kernel_module", Thresholds: map[string]float64{"hooks_active": 10}, ThresholdIsMax: map[string]bool{"hooks_active": false}, }) // hooks_active = 5 (below threshold of 10) → warning. hm.UpdateMetrics("immune", map[string]float64{"hooks_active": 5}) hm.checkHealth() select { case alert := <-hm.alertBus: if alert.Component != "immune" || alert.Metric != "hooks_active" { t.Errorf("unexpected alert: %+v", alert) } default: t.Error("expected alert for hooks_active below threshold") } } // Test ComponentCount. func TestHealthMonitor_ComponentCount(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 10) if hm.ComponentCount() != 0 { t.Error("expected 0 initially") } registerTestComponents(hm, 4) if hm.ComponentCount() != 4 { t.Errorf("expected 4, got %d", hm.ComponentCount()) } } // Test Start/Stop lifecycle. func TestHealthMonitor_StartStop(t *testing.T) { hm := NewHealthMonitor(&mockCollector{}, 10) registerTestComponents(hm, 2) ctx, cancel := context.WithCancel(context.Background()) done := make(chan struct{}) go func() { hm.Start(ctx) close(done) }() // Let it run briefly. time.Sleep(50 * time.Millisecond) cancel() select { case <-done: // Clean shutdown. case <-time.After(time.Second): t.Fatal("Start() did not return after context cancellation") } } // --- Helpers --- func registerTestComponents(hm *HealthMonitor, n int) { for i := 0; i < n; i++ { hm.RegisterComponent(ComponentConfig{ Name: fmt.Sprintf("comp-%d", i), Type: "go_binary", }) } }