flakestorm/examples/v2_research_agent/agent.py

109 lines
3.7 KiB
Python
Raw Normal View History

"""
V2 Research Assistant Agent Working example for Flakestorm v2.
An HTTP agent that calls a real LLM (Ollama) to answer queries. It uses a
system prompt so responses tend to cite a source (behavioral contract).
Supports /reset for contract matrix isolation. Demonstrates:
- flakestorm run (mutation testing)
- flakestorm run --chaos / --chaos-profile (environment chaos)
- flakestorm contract run (behavioral contract × chaos matrix)
- flakestorm replay run (replay regression)
- flakestorm ci (unified run with overall score)
Requires: Ollama running with the same model as in flakestorm.yaml (e.g. gemma3:1b).
"""
import os
import time
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI(title="V2 Research Assistant Agent")
# In-memory state (cleared by /reset for contract isolation)
_state = {"calls": 0}
# Ollama config (match flakestorm.yaml or set OLLAMA_BASE_URL, OLLAMA_MODEL)
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434").rstrip("/")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma3:1b")
OLLAMA_TIMEOUT = float(os.environ.get("OLLAMA_TIMEOUT", "60"))
SYSTEM_PROMPT = """You are a research assistant. For every answer, you must cite a source using phrases like "According to ...", "Source: ...", or "Per ...". Keep answers concise (2-4 sentences). If you don't know, say so and still cite that you couldn't find a source."""
class InvokeRequest(BaseModel):
"""Request body: prompt or input."""
input: str | None = None
prompt: str | None = None
query: str | None = None
class InvokeResponse(BaseModel):
"""Response with result and optional metadata."""
result: str
source: str = "ollama"
latency_ms: float | None = None
def _call_ollama(prompt: str) -> tuple[str, float]:
"""Call Ollama generate API. Returns (response_text, latency_ms). Raises on failure."""
import httpx
start = time.perf_counter()
url = f"{OLLAMA_BASE_URL}/api/generate"
body = {
"model": OLLAMA_MODEL,
"prompt": f"{SYSTEM_PROMPT}\n\nUser: {prompt}\n\nAssistant:",
"stream": False,
}
with httpx.Client(timeout=OLLAMA_TIMEOUT) as client:
r = client.post(url, json=body)
r.raise_for_status()
data = r.json()
elapsed_ms = (time.perf_counter() - start) * 1000
text = (data.get("response") or "").strip()
return text or "(No response from model)", elapsed_ms
@app.post("/reset")
def reset():
"""Reset agent state. Called by Flakestorm before each contract matrix cell."""
_state["calls"] = 0
return {"ok": True}
@app.post("/invoke", response_model=InvokeResponse)
def invoke(req: InvokeRequest):
"""Handle a single user query. Calls Ollama and returns the model response."""
_state["calls"] += 1
text = (req.input or req.prompt or req.query or "").strip()
if not text:
return InvokeResponse(
result="I didn't receive a question. Please ask something.",
source="none",
)
try:
response, latency_ms = _call_ollama(text)
return InvokeResponse(
result=response,
source="ollama",
latency_ms=round(latency_ms, 2),
)
except Exception as e:
# Graceful fallback so "completes" invariant can still pass under chaos
return InvokeResponse(
result=f"According to [source: system], I couldn't reach the knowledge base right now ({type(e).__name__}). Please try again.",
source="fallback",
latency_ms=None,
)
@app.get("/health")
def health():
return {"status": "ok"}
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", "8790"))
uvicorn.run(app, host="0.0.0.0", port=port)