nomyo-router/test/test_api_integration.py

304 lines
12 KiB
Python

"""
Integration tests against the real backend at 192.168.0.50:12434.
Run with:
pytest test/test_api_integration.py -v -m integration
All tests in this file are marked @pytest.mark.integration.
They require the test server to be reachable and to have at least one
chat model and one embedding model available.
Env vars to pin specific models:
NOMYO_TEST_MODEL_CHAT e.g. qwen2.5:1.5b
NOMYO_TEST_EMBED_MODEL e.g. nomic-embed-text:latest
"""
import json
import pytest
pytestmark = pytest.mark.integration
# ── Health / discovery routes ─────────────────────────────────────────────────
class TestDiscoveryRoutes:
async def test_version(self, integration_client):
resp = await integration_client.get("/api/version")
assert resp.status_code == 200
data = resp.json()
assert "version" in data
assert isinstance(data["version"], str)
async def test_tags_returns_models(self, integration_client):
resp = await integration_client.get("/api/tags")
assert resp.status_code == 200
data = resp.json()
assert "models" in data
assert isinstance(data["models"], list)
assert len(data["models"]) > 0
async def test_ps_returns_list(self, integration_client):
resp = await integration_client.get("/api/ps")
assert resp.status_code == 200
data = resp.json()
assert "models" in data
assert isinstance(data["models"], list)
async def test_v1_models_returns_data(self, integration_client):
resp = await integration_client.get("/v1/models")
assert resp.status_code == 200
data = resp.json()
assert "data" in data
assert isinstance(data["data"], list)
async def test_usage_returns_counts(self, integration_client):
resp = await integration_client.get("/api/usage")
assert resp.status_code == 200
data = resp.json()
assert "usage_counts" in data
assert "token_usage_counts" in data
async def test_config_returns_endpoints(self, integration_client):
resp = await integration_client.get("/api/config")
assert resp.status_code == 200
data = resp.json()
assert "endpoints" in data
async def test_hostname(self, integration_client):
resp = await integration_client.get("/api/hostname")
assert resp.status_code == 200
assert "hostname" in resp.json()
async def test_health(self, integration_client):
resp = await integration_client.get("/health")
assert resp.status_code in (200, 503)
data = resp.json()
assert data["status"] in ("ok", "error")
assert "endpoints" in data
async def test_cache_stats(self, integration_client):
resp = await integration_client.get("/api/cache/stats")
assert resp.status_code == 200
data = resp.json()
assert "enabled" in data
# ── /api/chat ─────────────────────────────────────────────────────────────────
class TestApiChat:
async def test_non_streaming(self, integration_client, chat_model):
resp = await integration_client.post(
"/api/chat",
json={
"model": chat_model,
"stream": False,
"messages": [{"role": "user", "content": "Reply with exactly: OK"}],
"options": {"num_predict": 10},
},
)
assert resp.status_code == 200
data = resp.json()
assert "message" in data
assert "content" in data["message"]
async def test_streaming_ndjson(self, integration_client, chat_model):
resp = await integration_client.post(
"/api/chat",
json={
"model": chat_model,
"stream": True,
"messages": [{"role": "user", "content": "Say hi"}],
"options": {"num_predict": 5},
},
)
assert resp.status_code == 200
lines = [l for l in resp.text.strip().split("\n") if l.strip()]
assert len(lines) >= 1
for line in lines:
obj = json.loads(line)
assert "model" in obj
async def test_non_streaming_has_token_counts(self, integration_client, chat_model):
resp = await integration_client.post(
"/api/chat",
json={
"model": chat_model,
"stream": False,
"messages": [{"role": "user", "content": "Count to 3"}],
"options": {"num_predict": 20},
},
)
assert resp.status_code == 200
data = resp.json()
assert data.get("done") is True
# Token counts should be present in the final chunk
assert data.get("prompt_eval_count", 0) >= 0
async def test_system_message_honoured(self, integration_client, chat_model):
resp = await integration_client.post(
"/api/chat",
json={
"model": chat_model,
"stream": False,
"messages": [
{"role": "system", "content": "You are a helpful assistant. Always reply with exactly: PONG"},
{"role": "user", "content": "PING"},
],
"options": {"num_predict": 10},
},
)
assert resp.status_code == 200
content = resp.json()["message"]["content"]
assert isinstance(content, str)
assert len(content) > 0
# ── /api/generate ─────────────────────────────────────────────────────────────
class TestApiGenerate:
async def test_non_streaming(self, integration_client, chat_model):
resp = await integration_client.post(
"/api/generate",
json={
"model": chat_model,
"prompt": "Complete: The sky is",
"stream": False,
"options": {"num_predict": 5},
},
)
assert resp.status_code == 200
data = resp.json()
assert "response" in data
async def test_streaming(self, integration_client, chat_model):
resp = await integration_client.post(
"/api/generate",
json={
"model": chat_model,
"prompt": "One plus one equals",
"stream": True,
"options": {"num_predict": 5},
},
)
assert resp.status_code == 200
lines = [l for l in resp.text.strip().split("\n") if l.strip()]
assert len(lines) >= 1
# ── /api/embed ────────────────────────────────────────────────────────────────
class TestApiEmbed:
async def test_embed_single_string(self, integration_client, embed_model):
resp = await integration_client.post(
"/api/embed",
json={"model": embed_model, "input": "The quick brown fox"},
)
assert resp.status_code == 200
data = resp.json()
assert "embeddings" in data
assert isinstance(data["embeddings"], list)
assert len(data["embeddings"]) == 1
assert len(data["embeddings"][0]) > 0
async def test_embed_multiple_inputs(self, integration_client, embed_model):
resp = await integration_client.post(
"/api/embed",
json={"model": embed_model, "input": ["sentence one", "sentence two"]},
)
assert resp.status_code == 200
data = resp.json()
assert "embeddings" in data
assert len(data["embeddings"]) == 2
# ── /v1/chat/completions ──────────────────────────────────────────────────────
class TestOpenAIChatCompletions:
async def test_non_streaming(self, integration_client, chat_model):
model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
resp = await integration_client.post(
"/v1/chat/completions",
json={
"model": model,
"messages": [{"role": "user", "content": "Reply OK"}],
"max_tokens": 10,
"stream": False,
},
)
assert resp.status_code == 200
data = resp.json()
assert "choices" in data
assert len(data["choices"]) > 0
assert "message" in data["choices"][0]
async def test_streaming_sse(self, integration_client, chat_model):
model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
resp = await integration_client.post(
"/v1/chat/completions",
json={
"model": model,
"messages": [{"role": "user", "content": "Hi"}],
"max_tokens": 5,
"stream": True,
},
)
assert resp.status_code == 200
# Response should be SSE format
assert "data:" in resp.text or "[DONE]" in resp.text
async def test_non_streaming_has_usage(self, integration_client, chat_model):
model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
resp = await integration_client.post(
"/v1/chat/completions",
json={
"model": model,
"messages": [{"role": "user", "content": "Say yes"}],
"max_tokens": 5,
"stream": False,
},
)
assert resp.status_code == 200
data = resp.json()
if "usage" in data and data["usage"]:
assert data["usage"].get("prompt_tokens", 0) >= 0
# ── /v1/embeddings ────────────────────────────────────────────────────────────
class TestOpenAIEmbeddings:
async def test_single_input(self, integration_client, embed_model):
model = embed_model.replace(":latest", "") if ":latest" in embed_model else embed_model
resp = await integration_client.post(
"/v1/embeddings",
json={"model": model, "input": "Test sentence"},
)
assert resp.status_code == 200
data = resp.json()
assert "data" in data
assert len(data["data"]) > 0
embedding = data["data"][0].get("embedding")
assert isinstance(embedding, list)
assert len(embedding) > 0
# ── Token counts (database-backed) ───────────────────────────────────────────
class TestTokenCounts:
async def test_token_counts_endpoint(self, integration_client):
resp = await integration_client.get("/api/token_counts")
assert resp.status_code == 200
data = resp.json()
assert "total_tokens" in data
assert "breakdown" in data
# ── ps_details (extended ps) ─────────────────────────────────────────────────
class TestPsDetails:
async def test_ps_details_returns_models(self, integration_client):
resp = await integration_client.get("/api/ps_details")
assert resp.status_code == 200
data = resp.json()
assert "models" in data
assert isinstance(data["models"], list)