mirror of
https://github.com/syntrex-lab/gomcp.git
synced 2026-05-09 19:42:37 +02:00
249 lines
6 KiB
Go
249 lines
6 KiB
Go
package watchdog
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestRegisterPeer(t *testing.T) {
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("immune", "http://localhost:9760/health")
|
|
m.RegisterPeer("sidecar", "http://localhost:9770/health")
|
|
|
|
peers := m.AllPeers()
|
|
if len(peers) != 2 {
|
|
t.Fatalf("peer count = %d, want 2", len(peers))
|
|
}
|
|
}
|
|
|
|
func TestHealthyPeer(t *testing.T) {
|
|
// Create a mock healthy peer.
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("healthy-peer", srv.URL+"/health")
|
|
|
|
// Run one check cycle.
|
|
ctx := context.Background()
|
|
m.checkAllPeers(ctx)
|
|
|
|
peer, ok := m.GetPeerStatus("healthy-peer")
|
|
if !ok {
|
|
t.Fatal("peer not found")
|
|
}
|
|
if peer.Status != StatusHealthy {
|
|
t.Errorf("status = %s, want HEALTHY", peer.Status)
|
|
}
|
|
if peer.MissedCount != 0 {
|
|
t.Errorf("missed = %d, want 0", peer.MissedCount)
|
|
}
|
|
}
|
|
|
|
func TestUnhealthyPeerDegraded(t *testing.T) {
|
|
// Peer that's down (no server listening).
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("dead-peer", "http://127.0.0.1:19999/health")
|
|
|
|
ctx := context.Background()
|
|
|
|
// One miss → DEGRADED.
|
|
m.checkAllPeers(ctx)
|
|
|
|
peer, _ := m.GetPeerStatus("dead-peer")
|
|
if peer.Status != StatusDegraded {
|
|
t.Errorf("status = %s, want DEGRADED", peer.Status)
|
|
}
|
|
if peer.MissedCount != 1 {
|
|
t.Errorf("missed = %d, want 1", peer.MissedCount)
|
|
}
|
|
}
|
|
|
|
func TestEscalationToRestart(t *testing.T) {
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("flaky-peer", "http://127.0.0.1:19999/health")
|
|
|
|
var escalations []EscalationAction
|
|
m.OnEscalation(func(a EscalationAction) {
|
|
escalations = append(escalations, a)
|
|
})
|
|
|
|
ctx := context.Background()
|
|
|
|
// Miss 3 heartbeats → should trigger restart.
|
|
for i := 0; i < MaxMissedBeforeRestart; i++ {
|
|
m.checkAllPeers(ctx)
|
|
}
|
|
|
|
peer, _ := m.GetPeerStatus("flaky-peer")
|
|
if peer.Status != StatusOffline {
|
|
t.Errorf("status = %s, want OFFLINE", peer.Status)
|
|
}
|
|
if peer.RestartCount != 1 {
|
|
t.Errorf("restart_count = %d, want 1", peer.RestartCount)
|
|
}
|
|
|
|
// Check that escalation was fired.
|
|
found := false
|
|
for _, e := range escalations {
|
|
if e.Action == "restart" {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Error("expected 'restart' escalation, got none")
|
|
}
|
|
}
|
|
|
|
func TestEscalationToIsolate(t *testing.T) {
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("broken-peer", "http://127.0.0.1:19999/health")
|
|
|
|
var escalations []EscalationAction
|
|
m.OnEscalation(func(a EscalationAction) {
|
|
escalations = append(escalations, a)
|
|
})
|
|
|
|
ctx := context.Background()
|
|
|
|
// Trigger MaxRestartsBeforeIsolate restart cycles.
|
|
for r := 0; r < MaxRestartsBeforeIsolate; r++ {
|
|
for i := 0; i < MaxMissedBeforeRestart; i++ {
|
|
m.checkAllPeers(ctx)
|
|
}
|
|
}
|
|
|
|
// Now one more miss cycle should trigger isolation.
|
|
for i := 0; i < MaxMissedBeforeRestart; i++ {
|
|
m.checkAllPeers(ctx)
|
|
}
|
|
|
|
peer, _ := m.GetPeerStatus("broken-peer")
|
|
if peer.Status != StatusIsolated {
|
|
t.Errorf("status = %s, want ISOLATED", peer.Status)
|
|
}
|
|
|
|
// Check for isolate escalation.
|
|
found := false
|
|
for _, e := range escalations {
|
|
if e.Action == "isolate" {
|
|
found = true
|
|
if e.Severity != "CRITICAL" {
|
|
t.Errorf("isolate severity = %s, want CRITICAL", e.Severity)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Error("expected 'isolate' escalation, got none")
|
|
}
|
|
}
|
|
|
|
func TestRecoveryAfterRestart(t *testing.T) {
|
|
// Peer goes down, gets restarted (simulated), then comes back.
|
|
healthy := true
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if healthy {
|
|
w.WriteHeader(http.StatusOK)
|
|
} else {
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("recovering-peer", srv.URL+"/health")
|
|
|
|
ctx := context.Background()
|
|
|
|
// Initially healthy.
|
|
m.checkAllPeers(ctx)
|
|
peer, _ := m.GetPeerStatus("recovering-peer")
|
|
if peer.Status != StatusHealthy {
|
|
t.Fatalf("initial status = %s, want HEALTHY", peer.Status)
|
|
}
|
|
|
|
// Goes down.
|
|
healthy = false
|
|
m.checkAllPeers(ctx)
|
|
peer, _ = m.GetPeerStatus("recovering-peer")
|
|
if peer.Status != StatusDegraded {
|
|
t.Fatalf("down status = %s, want DEGRADED", peer.Status)
|
|
}
|
|
|
|
// Comes back.
|
|
healthy = true
|
|
m.checkAllPeers(ctx)
|
|
peer, _ = m.GetPeerStatus("recovering-peer")
|
|
if peer.Status != StatusHealthy {
|
|
t.Errorf("recovered status = %s, want HEALTHY", peer.Status)
|
|
}
|
|
if peer.MissedCount != 0 {
|
|
t.Errorf("missed after recovery = %d, want 0", peer.MissedCount)
|
|
}
|
|
}
|
|
|
|
func TestStats(t *testing.T) {
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("p1", "http://127.0.0.1:19999/health")
|
|
|
|
ctx := context.Background()
|
|
m.checkAllPeers(ctx)
|
|
m.checkAllPeers(ctx)
|
|
|
|
stats := m.Stats()
|
|
if stats.TotalChecks != 2 {
|
|
t.Errorf("total_checks = %d, want 2", stats.TotalChecks)
|
|
}
|
|
if stats.TotalMisses != 2 {
|
|
t.Errorf("total_misses = %d, want 2", stats.TotalMisses)
|
|
}
|
|
if stats.PeerCount != 1 {
|
|
t.Errorf("peer_count = %d, want 1", stats.PeerCount)
|
|
}
|
|
}
|
|
|
|
func TestServeHTTP(t *testing.T) {
|
|
m := NewMonitor("test-self")
|
|
m.RegisterPeer("p1", "http://localhost:9760/health")
|
|
|
|
w := httptest.NewRecorder()
|
|
r := httptest.NewRequest(http.MethodGet, "/mesh", nil)
|
|
|
|
m.ServeHTTP(w, r)
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Errorf("status = %d, want 200", w.Code)
|
|
}
|
|
if ct := w.Header().Get("Content-Type"); ct != "application/json" {
|
|
t.Errorf("content-type = %s, want application/json", ct)
|
|
}
|
|
}
|
|
|
|
func TestMonitorStartStop(t *testing.T) {
|
|
m := NewMonitor("test-self")
|
|
m.interval = 50 * time.Millisecond // Fast for tests.
|
|
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
m.RegisterPeer("fast-peer", srv.URL+"/health")
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
|
defer cancel()
|
|
|
|
m.Start(ctx) // Blocks until context expires.
|
|
|
|
stats := m.Stats()
|
|
if stats.TotalChecks < 2 {
|
|
t.Errorf("expected at least 2 checks in 200ms, got %d", stats.TotalChecks)
|
|
}
|
|
}
|