flakestorm/examples/v2_research_agent/replays/incident_001.yaml

9 lines
370 B
YAML

# Replay session: production incident to regress
# Run with: flakestorm replay run replays/incident_001.yaml -c flakestorm.yaml
id: incident-001
name: "Research agent incident - missing source"
source: manual
input: "What is the capital of France?"
tool_responses: []
expected_failure: "Agent returned response without citing source"
contract: "Research Agent Contract"