mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-06-02 16:15:13 +02:00
76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
"""
|
|
Replay runner: run replay sessions and verify against contract.
|
|
|
|
For HTTP agents, deterministic tool response injection is not possible
|
|
(we only see one request). We send session.input and verify the response
|
|
against the resolved contract.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
from flakestorm.core.protocol import AgentResponse, BaseAgentAdapter
|
|
|
|
from flakestorm.core.config import ContractConfig, ReplaySessionConfig
|
|
|
|
|
|
@dataclass
|
|
class ReplayResult:
|
|
"""Result of a replay run including verification against contract."""
|
|
|
|
response: AgentResponse
|
|
passed: bool = True
|
|
verification_details: list[str] = field(default_factory=list)
|
|
|
|
|
|
class ReplayRunner:
|
|
"""Run a single replay session and verify against contract."""
|
|
|
|
def __init__(
|
|
self,
|
|
agent: BaseAgentAdapter,
|
|
contract: ContractConfig | None = None,
|
|
verifier=None,
|
|
):
|
|
self._agent = agent
|
|
self._contract = contract
|
|
self._verifier = verifier
|
|
|
|
async def run(
|
|
self,
|
|
session: ReplaySessionConfig,
|
|
contract: ContractConfig | None = None,
|
|
) -> ReplayResult:
|
|
"""
|
|
Replay the session: send session.input to agent and verify against contract.
|
|
Contract can be passed in or resolved from session.contract by caller.
|
|
"""
|
|
contract = contract or self._contract
|
|
response = await self._agent.invoke(session.input)
|
|
if not contract:
|
|
return ReplayResult(response=response, passed=response.success)
|
|
|
|
# Verify against contract invariants
|
|
from flakestorm.contracts.engine import _contract_invariant_to_invariant_config
|
|
from flakestorm.assertions.verifier import InvariantVerifier
|
|
|
|
invariant_configs = [
|
|
_contract_invariant_to_invariant_config(inv)
|
|
for inv in contract.invariants
|
|
]
|
|
if not invariant_configs:
|
|
return ReplayResult(response=response, passed=not response.error)
|
|
verifier = InvariantVerifier(invariant_configs)
|
|
result = verifier.verify(
|
|
response.output or "",
|
|
response.latency_ms,
|
|
)
|
|
details = [f"{c.type.value}: {'pass' if c.passed else 'fail'}" for c in result.checks]
|
|
return ReplayResult(
|
|
response=response,
|
|
passed=result.all_passed and not response.error,
|
|
verification_details=details,
|
|
)
|