mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-05-11 16:52:42 +02:00
Update version to 2.0.0 and enhance chaos engineering features in Flakestorm. Added support for environment chaos, behavioral contracts, and replay regression. Expanded documentation and improved scoring mechanisms. Updated .gitignore to include new documentation files.
This commit is contained in:
parent
59cca61f3c
commit
9c3450a75d
63 changed files with 4147 additions and 134 deletions
129
examples/v2_research_agent/flakestorm.yaml
Normal file
129
examples/v2_research_agent/flakestorm.yaml
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
# Flakestorm v2.0 — Research Assistant Example
|
||||
# Demonstrates: mutation testing, chaos, behavioral contract, replay, ci
|
||||
|
||||
version: "2.0"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Agent (HTTP). Start with: python agent.py (or uvicorn agent:app --port 8790)
|
||||
# -----------------------------------------------------------------------------
|
||||
agent:
|
||||
endpoint: "http://localhost:8790/invoke"
|
||||
type: "http"
|
||||
method: "POST"
|
||||
request_template: '{"input": "{prompt}"}'
|
||||
response_path: "result"
|
||||
timeout: 15000
|
||||
reset_endpoint: "http://localhost:8790/reset"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Model (for mutation generation only)
|
||||
# -----------------------------------------------------------------------------
|
||||
model:
|
||||
provider: "ollama"
|
||||
name: "gemma3:1b"
|
||||
base_url: "http://localhost:11434"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Mutations
|
||||
# -----------------------------------------------------------------------------
|
||||
mutations:
|
||||
count: 5
|
||||
types:
|
||||
- paraphrase
|
||||
- noise
|
||||
- tone_shift
|
||||
- prompt_injection
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Golden prompts
|
||||
# -----------------------------------------------------------------------------
|
||||
golden_prompts:
|
||||
- "What is the capital of France?"
|
||||
- "Summarize the benefits of renewable energy."
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Invariants (run invariants)
|
||||
# -----------------------------------------------------------------------------
|
||||
invariants:
|
||||
- type: latency
|
||||
max_ms: 30000
|
||||
- type: contains
|
||||
value: "source"
|
||||
- type: output_not_empty
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# V2: Environment Chaos (tool/LLM faults)
|
||||
# For HTTP agent, tool_faults with tool "*" apply to the single request to endpoint.
|
||||
# -----------------------------------------------------------------------------
|
||||
chaos:
|
||||
tool_faults:
|
||||
- tool: "*"
|
||||
mode: error
|
||||
error_code: 503
|
||||
message: "Service Unavailable"
|
||||
probability: 0.3
|
||||
llm_faults:
|
||||
- mode: truncated_response
|
||||
max_tokens: 5
|
||||
probability: 0.2
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# V2: Behavioral Contract + Chaos Matrix
|
||||
# -----------------------------------------------------------------------------
|
||||
contract:
|
||||
name: "Research Agent Contract"
|
||||
description: "Must cite source and complete under chaos"
|
||||
invariants:
|
||||
- id: always-cite-source
|
||||
type: regex
|
||||
pattern: "(?i)(source|according to)"
|
||||
severity: critical
|
||||
when: always
|
||||
description: "Must cite a source"
|
||||
- id: completes
|
||||
type: completes
|
||||
severity: high
|
||||
when: always
|
||||
description: "Must return a response"
|
||||
- id: max-latency
|
||||
type: latency
|
||||
max_ms: 60000
|
||||
severity: medium
|
||||
when: always
|
||||
chaos_matrix:
|
||||
- name: "no-chaos"
|
||||
tool_faults: []
|
||||
llm_faults: []
|
||||
- name: "api-outage"
|
||||
tool_faults:
|
||||
- tool: "*"
|
||||
mode: error
|
||||
error_code: 503
|
||||
message: "Service Unavailable"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# V2: Replay regression (sessions can reference file or be inline)
|
||||
# -----------------------------------------------------------------------------
|
||||
replays:
|
||||
sessions:
|
||||
- file: "replays/incident_001.yaml"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# V2: Scoring weights (overall = mutation*0.2 + chaos*0.35 + contract*0.35 + replay*0.1)
|
||||
# -----------------------------------------------------------------------------
|
||||
scoring:
|
||||
mutation: 0.20
|
||||
chaos: 0.35
|
||||
contract: 0.35
|
||||
replay: 0.10
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Output
|
||||
# -----------------------------------------------------------------------------
|
||||
output:
|
||||
format: "html"
|
||||
path: "./reports"
|
||||
|
||||
advanced:
|
||||
concurrency: 5
|
||||
retries: 2
|
||||
Loading…
Add table
Add a link
Reference in a new issue