mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 00:36:54 +02:00
108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
"""
|
||
V2 Research Assistant Agent — Working example for Flakestorm v2.
|
||
|
||
An HTTP agent that calls a real LLM (Ollama) to answer queries. It uses a
|
||
system prompt so responses tend to cite a source (behavioral contract).
|
||
Supports /reset for contract matrix isolation. Demonstrates:
|
||
- flakestorm run (mutation testing)
|
||
- flakestorm run --chaos / --chaos-profile (environment chaos)
|
||
- flakestorm contract run (behavioral contract × chaos matrix)
|
||
- flakestorm replay run (replay regression)
|
||
- flakestorm ci (unified run with overall score)
|
||
|
||
Requires: Ollama running with the same model as in flakestorm.yaml (e.g. gemma3:1b).
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
from fastapi import FastAPI
|
||
from pydantic import BaseModel
|
||
|
||
app = FastAPI(title="V2 Research Assistant Agent")
|
||
|
||
# In-memory state (cleared by /reset for contract isolation)
|
||
_state = {"calls": 0}
|
||
|
||
# Ollama config (match flakestorm.yaml or set OLLAMA_BASE_URL, OLLAMA_MODEL)
|
||
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434").rstrip("/")
|
||
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma3:1b")
|
||
OLLAMA_TIMEOUT = float(os.environ.get("OLLAMA_TIMEOUT", "60"))
|
||
|
||
SYSTEM_PROMPT = """You are a research assistant. For every answer, you must cite a source using phrases like "According to ...", "Source: ...", or "Per ...". Keep answers concise (2-4 sentences). If you don't know, say so and still cite that you couldn't find a source."""
|
||
|
||
|
||
class InvokeRequest(BaseModel):
|
||
"""Request body: prompt or input."""
|
||
input: str | None = None
|
||
prompt: str | None = None
|
||
query: str | None = None
|
||
|
||
|
||
class InvokeResponse(BaseModel):
|
||
"""Response with result and optional metadata."""
|
||
result: str
|
||
source: str = "ollama"
|
||
latency_ms: float | None = None
|
||
|
||
|
||
def _call_ollama(prompt: str) -> tuple[str, float]:
|
||
"""Call Ollama generate API. Returns (response_text, latency_ms). Raises on failure."""
|
||
import httpx
|
||
start = time.perf_counter()
|
||
url = f"{OLLAMA_BASE_URL}/api/generate"
|
||
body = {
|
||
"model": OLLAMA_MODEL,
|
||
"prompt": f"{SYSTEM_PROMPT}\n\nUser: {prompt}\n\nAssistant:",
|
||
"stream": False,
|
||
}
|
||
with httpx.Client(timeout=OLLAMA_TIMEOUT) as client:
|
||
r = client.post(url, json=body)
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
elapsed_ms = (time.perf_counter() - start) * 1000
|
||
text = (data.get("response") or "").strip()
|
||
return text or "(No response from model)", elapsed_ms
|
||
|
||
|
||
@app.post("/reset")
|
||
def reset():
|
||
"""Reset agent state. Called by Flakestorm before each contract matrix cell."""
|
||
_state["calls"] = 0
|
||
return {"ok": True}
|
||
|
||
|
||
@app.post("/invoke", response_model=InvokeResponse)
|
||
def invoke(req: InvokeRequest):
|
||
"""Handle a single user query. Calls Ollama and returns the model response."""
|
||
_state["calls"] += 1
|
||
text = (req.input or req.prompt or req.query or "").strip()
|
||
if not text:
|
||
return InvokeResponse(
|
||
result="I didn't receive a question. Please ask something.",
|
||
source="none",
|
||
)
|
||
try:
|
||
response, latency_ms = _call_ollama(text)
|
||
return InvokeResponse(
|
||
result=response,
|
||
source="ollama",
|
||
latency_ms=round(latency_ms, 2),
|
||
)
|
||
except Exception as e:
|
||
# Graceful fallback so "completes" invariant can still pass under chaos
|
||
return InvokeResponse(
|
||
result=f"According to [source: system], I couldn't reach the knowledge base right now ({type(e).__name__}). Please try again.",
|
||
source="fallback",
|
||
latency_ms=None,
|
||
)
|
||
|
||
|
||
@app.get("/health")
|
||
def health():
|
||
return {"status": "ok"}
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
port = int(os.environ.get("PORT", "8790"))
|
||
uvicorn.run(app, host="0.0.0.0", port=port)
|