mirror of
https://github.com/syntrex-lab/gomcp.git
synced 2026-04-24 20:06:21 +02:00
322 lines
8.9 KiB
Go
322 lines
8.9 KiB
Go
// Copyright 2026 Syntrex Lab. All rights reserved.
|
|
// Use of this source code is governed by an Apache-2.0 license
|
|
// that can be found in the LICENSE file.
|
|
|
|
package resilience
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// --- Mock playbook executor ---
|
|
|
|
type mockPlaybookExecutor struct {
|
|
failSteps map[string]bool
|
|
callCount int
|
|
}
|
|
|
|
func newMockPlaybookExecutor() *mockPlaybookExecutor {
|
|
return &mockPlaybookExecutor{failSteps: make(map[string]bool)}
|
|
}
|
|
|
|
func (m *mockPlaybookExecutor) execute(_ context.Context, step PlaybookStep, _ string) (string, error) {
|
|
m.callCount++
|
|
if m.failSteps[step.ID] {
|
|
return "", fmt.Errorf("step %s failed", step.ID)
|
|
}
|
|
return fmt.Sprintf("step %s completed", step.ID), nil
|
|
}
|
|
|
|
// --- Recovery Playbook Tests ---
|
|
|
|
// AR-01: Component resurrection (success).
|
|
func TestPlaybook_AR01_ResurrectionSuccess(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
execID, err := rpe.Execute(context.Background(), "component-resurrection", "soc-ingest")
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
exec, ok := rpe.GetExecution(execID)
|
|
if !ok {
|
|
t.Fatal("execution not found")
|
|
}
|
|
if exec.Status != PlaybookSucceeded {
|
|
t.Errorf("expected SUCCEEDED, got %s", exec.Status)
|
|
}
|
|
if len(exec.StepsRun) == 0 {
|
|
t.Error("expected steps to be recorded")
|
|
}
|
|
}
|
|
|
|
// AR-02: Component resurrection (failure → rollback).
|
|
func TestPlaybook_AR02_ResurrectionFailure(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
mock.failSteps["restart-component"] = true
|
|
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "component-resurrection", "soc-ingest")
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
|
|
execs := rpe.RecentExecutions(10)
|
|
if len(execs) == 0 {
|
|
t.Fatal("expected execution")
|
|
}
|
|
if execs[0].Status != PlaybookRolledBack {
|
|
t.Errorf("expected ROLLED_BACK, got %s", execs[0].Status)
|
|
}
|
|
}
|
|
|
|
// AR-03: Consensus recovery (success).
|
|
func TestPlaybook_AR03_ConsensusSuccess(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ConsensusRecoveryPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "consensus-recovery", "cluster")
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
// AR-04: Consensus recovery (failure → readonly maintained).
|
|
func TestPlaybook_AR04_ConsensusFailure(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
mock.failSteps["elect-leader"] = true
|
|
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ConsensusRecoveryPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "consensus-recovery", "cluster")
|
|
if err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
|
|
execs := rpe.RecentExecutions(10)
|
|
if execs[0].Status != PlaybookRolledBack {
|
|
t.Errorf("expected ROLLED_BACK, got %s", execs[0].Status)
|
|
}
|
|
}
|
|
|
|
// AR-05: Crypto key rotation (success).
|
|
func TestPlaybook_AR05_CryptoSuccess(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(CryptoRotationPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "crypto-rotation", "system")
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
// AR-06: Crypto rotation (emergency — cert rotation fails → rollback).
|
|
func TestPlaybook_AR06_CryptoRollback(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
mock.failSteps["rotate-certs"] = true
|
|
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(CryptoRotationPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "crypto-rotation", "system")
|
|
if err == nil {
|
|
t.Fatal("expected error on cert rotation failure")
|
|
}
|
|
|
|
execs := rpe.RecentExecutions(10)
|
|
// Should have run rollback (revert keys).
|
|
found := false
|
|
for _, s := range execs[0].StepsRun {
|
|
if s.StepID == "rb-revert-keys" {
|
|
found = true
|
|
}
|
|
}
|
|
if !found {
|
|
t.Error("expected rollback step rb-revert-keys")
|
|
}
|
|
}
|
|
|
|
// AR-07: Forensic capture (all steps recorded).
|
|
func TestPlaybook_AR07_ForensicCapture(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
execID, _ := rpe.Execute(context.Background(), "component-resurrection", "comp")
|
|
exec, _ := rpe.GetExecution(execID)
|
|
|
|
for _, step := range exec.StepsRun {
|
|
if step.StepID == "" {
|
|
t.Error("step missing ID")
|
|
}
|
|
if step.StepName == "" {
|
|
t.Errorf("step %s has empty name", step.StepID)
|
|
}
|
|
}
|
|
}
|
|
|
|
// AR-08: Rollback execution on action failure.
|
|
func TestPlaybook_AR08_RollbackExecution(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
mock.failSteps["sync-state"] = true // Sync fails → rollback trigger.
|
|
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ConsensusRecoveryPlaybook())
|
|
|
|
rpe.Execute(context.Background(), "consensus-recovery", "cluster")
|
|
|
|
execs := rpe.RecentExecutions(10)
|
|
if execs[0].Status != PlaybookRolledBack {
|
|
t.Errorf("expected ROLLED_BACK, got %s", execs[0].Status)
|
|
}
|
|
}
|
|
|
|
// AR-09: Step retries.
|
|
func TestPlaybook_AR09_StepRetries(t *testing.T) {
|
|
callCount := 0
|
|
executor := func(_ context.Context, step PlaybookStep, _ string) (string, error) {
|
|
callCount++
|
|
if step.ID == "verify-health" && callCount <= 2 {
|
|
return "", fmt.Errorf("not healthy yet")
|
|
}
|
|
return "ok", nil
|
|
}
|
|
|
|
rpe := NewRecoveryPlaybookEngine(executor)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "component-resurrection", "comp")
|
|
if err != nil {
|
|
t.Fatalf("expected success after retries: %v", err)
|
|
}
|
|
}
|
|
|
|
// AR-10: Playbook not found.
|
|
func TestPlaybook_AR10_NotFound(t *testing.T) {
|
|
rpe := NewRecoveryPlaybookEngine(nil)
|
|
_, err := rpe.Execute(context.Background(), "nonexistent", "comp")
|
|
if err == nil {
|
|
t.Fatal("expected error for nonexistent playbook")
|
|
}
|
|
}
|
|
|
|
// AR-11: Audit logging (all step timestamps).
|
|
func TestPlaybook_AR11_AuditTimestamps(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
execID, _ := rpe.Execute(context.Background(), "component-resurrection", "comp")
|
|
exec, _ := rpe.GetExecution(execID)
|
|
|
|
if exec.StartedAt.IsZero() {
|
|
t.Error("missing started_at")
|
|
}
|
|
if exec.CompletedAt.IsZero() {
|
|
t.Error("missing completed_at")
|
|
}
|
|
}
|
|
|
|
// AR-12: OnError=continue skips non-critical failures.
|
|
func TestPlaybook_AR12_ContinueOnError(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
mock.failSteps["capture-forensics"] = true // OnError=continue.
|
|
mock.failSteps["notify-success"] = true // OnError=continue.
|
|
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
_, err := rpe.Execute(context.Background(), "component-resurrection", "comp")
|
|
if err != nil {
|
|
t.Fatalf("expected success despite continue-on-error steps: %v", err)
|
|
}
|
|
}
|
|
|
|
// AR-13: Context cancellation.
|
|
func TestPlaybook_AR13_ContextCancel(t *testing.T) {
|
|
executor := func(ctx context.Context, _ PlaybookStep, _ string) (string, error) {
|
|
select {
|
|
case <-ctx.Done():
|
|
return "", ctx.Err()
|
|
case <-time.After(10 * time.Millisecond):
|
|
return "ok", nil
|
|
}
|
|
}
|
|
|
|
rpe := NewRecoveryPlaybookEngine(executor)
|
|
rpe.RegisterPlaybook(ComponentResurrectionPlaybook())
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel immediately.
|
|
|
|
_, err := rpe.Execute(ctx, "component-resurrection", "comp")
|
|
// May or may not error depending on timing, but should not hang.
|
|
_ = err
|
|
}
|
|
|
|
// AR-14: DefaultPlaybooks returns 3.
|
|
func TestPlaybook_AR14_DefaultPlaybooks(t *testing.T) {
|
|
pbs := DefaultPlaybooks()
|
|
if len(pbs) != 3 {
|
|
t.Errorf("expected 3 playbooks, got %d", len(pbs))
|
|
}
|
|
|
|
ids := map[string]bool{}
|
|
for _, pb := range pbs {
|
|
if ids[pb.ID] {
|
|
t.Errorf("duplicate playbook ID: %s", pb.ID)
|
|
}
|
|
ids[pb.ID] = true
|
|
|
|
if len(pb.Actions) == 0 {
|
|
t.Errorf("playbook %s has no actions", pb.ID)
|
|
}
|
|
if len(pb.SuccessCriteria) == 0 {
|
|
t.Errorf("playbook %s has no success criteria", pb.ID)
|
|
}
|
|
}
|
|
}
|
|
|
|
// AR-15: PlaybookCount and RecentExecutions.
|
|
func TestPlaybook_AR15_CountsAndRecent(t *testing.T) {
|
|
mock := newMockPlaybookExecutor()
|
|
rpe := NewRecoveryPlaybookEngine(mock.execute)
|
|
|
|
if rpe.PlaybookCount() != 0 {
|
|
t.Error("expected 0")
|
|
}
|
|
|
|
for _, pb := range DefaultPlaybooks() {
|
|
rpe.RegisterPlaybook(pb)
|
|
}
|
|
if rpe.PlaybookCount() != 3 {
|
|
t.Errorf("expected 3, got %d", rpe.PlaybookCount())
|
|
}
|
|
|
|
// Run two playbooks.
|
|
rpe.Execute(context.Background(), "component-resurrection", "comp1")
|
|
rpe.Execute(context.Background(), "crypto-rotation", "comp2")
|
|
|
|
recent := rpe.RecentExecutions(1)
|
|
if len(recent) != 1 {
|
|
t.Errorf("expected 1 recent, got %d", len(recent))
|
|
}
|
|
if recent[0].PlaybookID != "crypto-rotation" {
|
|
t.Errorf("expected crypto-rotation, got %s", recent[0].PlaybookID)
|
|
}
|
|
|
|
all := rpe.RecentExecutions(100)
|
|
if len(all) != 2 {
|
|
t.Errorf("expected 2 total, got %d", len(all))
|
|
}
|
|
}
|