Update version to 2.0.0 and enhance chaos engineering features in Flakestorm. Added support for environment chaos, behavioral contracts, and replay regression. Expanded documentation and improved scoring mechanisms. Updated .gitignore to include new documentation files.

2026-05-11 16:52:42 +02:00 · 2026-03-06 23:33:21 +08:00 · 2026-03-06 23:33:21 +08:00 · 9c3450a75d
commit 9c3450a75d
parent 59cca61f3c
63 changed files with 4147 additions and 134 deletions
--- a/examples/v2_research_agent/flakestorm.yaml
+++ b/examples/v2_research_agent/flakestorm.yaml
@ -0,0 +1,129 @@
+# Flakestorm v2.0 — Research Assistant Example
+# Demonstrates: mutation testing, chaos, behavioral contract, replay, ci
+
+version: "2.0"
+
+# -----------------------------------------------------------------------------
+# Agent (HTTP). Start with: python agent.py  (or uvicorn agent:app --port 8790)
+# -----------------------------------------------------------------------------
+agent:
+  endpoint: "http://localhost:8790/invoke"
+  type: "http"
+  method: "POST"
+  request_template: '{"input": "{prompt}"}'
+  response_path: "result"
+  timeout: 15000
+  reset_endpoint: "http://localhost:8790/reset"
+
+# -----------------------------------------------------------------------------
+# Model (for mutation generation only)
+# -----------------------------------------------------------------------------
+model:
+  provider: "ollama"
+  name: "gemma3:1b"
+  base_url: "http://localhost:11434"
+
+# -----------------------------------------------------------------------------
+# Mutations
+# -----------------------------------------------------------------------------
+mutations:
+  count: 5
+  types:
+    - paraphrase
+    - noise
+    - tone_shift
+    - prompt_injection
+
+# -----------------------------------------------------------------------------
+# Golden prompts
+# -----------------------------------------------------------------------------
+golden_prompts:
+  - "What is the capital of France?"
+  - "Summarize the benefits of renewable energy."
+
+# -----------------------------------------------------------------------------
+# Invariants (run invariants)
+# -----------------------------------------------------------------------------
+invariants:
+  - type: latency
+    max_ms: 30000
+  - type: contains
+    value: "source"
+  - type: output_not_empty
+
+# -----------------------------------------------------------------------------
+# V2: Environment Chaos (tool/LLM faults)
+# For HTTP agent, tool_faults with tool "*" apply to the single request to endpoint.
+# -----------------------------------------------------------------------------
+chaos:
+  tool_faults:
+    - tool: "*"
+      mode: error
+      error_code: 503
+      message: "Service Unavailable"
+      probability: 0.3
+  llm_faults:
+    - mode: truncated_response
+      max_tokens: 5
+      probability: 0.2
+
+# -----------------------------------------------------------------------------
+# V2: Behavioral Contract + Chaos Matrix
+# -----------------------------------------------------------------------------
+contract:
+  name: "Research Agent Contract"
+  description: "Must cite source and complete under chaos"
+  invariants:
+    - id: always-cite-source
+      type: regex
+      pattern: "(?i)(source|according to)"
+      severity: critical
+      when: always
+      description: "Must cite a source"
+    - id: completes
+      type: completes
+      severity: high
+      when: always
+      description: "Must return a response"
+    - id: max-latency
+      type: latency
+      max_ms: 60000
+      severity: medium
+      when: always
+  chaos_matrix:
+    - name: "no-chaos"
+      tool_faults: []
+      llm_faults: []
+    - name: "api-outage"
+      tool_faults:
+        - tool: "*"
+          mode: error
+          error_code: 503
+          message: "Service Unavailable"
+
+# -----------------------------------------------------------------------------
+# V2: Replay regression (sessions can reference file or be inline)
+# -----------------------------------------------------------------------------
+replays:
+  sessions:
+    - file: "replays/incident_001.yaml"
+
+# -----------------------------------------------------------------------------
+# V2: Scoring weights (overall = mutation*0.2 + chaos*0.35 + contract*0.35 + replay*0.1)
+# -----------------------------------------------------------------------------
+scoring:
+  mutation: 0.20
+  chaos: 0.35
+  contract: 0.35
+  replay: 0.10
+
+# -----------------------------------------------------------------------------
+# Output
+# -----------------------------------------------------------------------------
+output:
+  format: "html"
+  path: "./reports"
+
+advanced:
+  concurrency: 5
+  retries: 2