304 lines
12 KiB
Python
304 lines
12 KiB
Python
"""
|
|
Integration tests against the real backend at 192.168.0.50:12434.
|
|
|
|
Run with:
|
|
pytest test/test_api_integration.py -v -m integration
|
|
|
|
All tests in this file are marked @pytest.mark.integration.
|
|
They require the test server to be reachable and to have at least one
|
|
chat model and one embedding model available.
|
|
|
|
Env vars to pin specific models:
|
|
NOMYO_TEST_MODEL_CHAT e.g. qwen2.5:1.5b
|
|
NOMYO_TEST_EMBED_MODEL e.g. nomic-embed-text:latest
|
|
"""
|
|
import json
|
|
|
|
import pytest
|
|
|
|
|
|
pytestmark = pytest.mark.integration
|
|
|
|
|
|
# ── Health / discovery routes ─────────────────────────────────────────────────
|
|
|
|
class TestDiscoveryRoutes:
|
|
async def test_version(self, integration_client):
|
|
resp = await integration_client.get("/api/version")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "version" in data
|
|
assert isinstance(data["version"], str)
|
|
|
|
async def test_tags_returns_models(self, integration_client):
|
|
resp = await integration_client.get("/api/tags")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "models" in data
|
|
assert isinstance(data["models"], list)
|
|
assert len(data["models"]) > 0
|
|
|
|
async def test_ps_returns_list(self, integration_client):
|
|
resp = await integration_client.get("/api/ps")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "models" in data
|
|
assert isinstance(data["models"], list)
|
|
|
|
async def test_v1_models_returns_data(self, integration_client):
|
|
resp = await integration_client.get("/v1/models")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "data" in data
|
|
assert isinstance(data["data"], list)
|
|
|
|
async def test_usage_returns_counts(self, integration_client):
|
|
resp = await integration_client.get("/api/usage")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "usage_counts" in data
|
|
assert "token_usage_counts" in data
|
|
|
|
async def test_config_returns_endpoints(self, integration_client):
|
|
resp = await integration_client.get("/api/config")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "endpoints" in data
|
|
|
|
async def test_hostname(self, integration_client):
|
|
resp = await integration_client.get("/api/hostname")
|
|
assert resp.status_code == 200
|
|
assert "hostname" in resp.json()
|
|
|
|
async def test_health(self, integration_client):
|
|
resp = await integration_client.get("/health")
|
|
assert resp.status_code in (200, 503)
|
|
data = resp.json()
|
|
assert data["status"] in ("ok", "error")
|
|
assert "endpoints" in data
|
|
|
|
async def test_cache_stats(self, integration_client):
|
|
resp = await integration_client.get("/api/cache/stats")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "enabled" in data
|
|
|
|
|
|
# ── /api/chat ─────────────────────────────────────────────────────────────────
|
|
|
|
class TestApiChat:
|
|
async def test_non_streaming(self, integration_client, chat_model):
|
|
resp = await integration_client.post(
|
|
"/api/chat",
|
|
json={
|
|
"model": chat_model,
|
|
"stream": False,
|
|
"messages": [{"role": "user", "content": "Reply with exactly: OK"}],
|
|
"options": {"num_predict": 10},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "message" in data
|
|
assert "content" in data["message"]
|
|
|
|
async def test_streaming_ndjson(self, integration_client, chat_model):
|
|
resp = await integration_client.post(
|
|
"/api/chat",
|
|
json={
|
|
"model": chat_model,
|
|
"stream": True,
|
|
"messages": [{"role": "user", "content": "Say hi"}],
|
|
"options": {"num_predict": 5},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
lines = [l for l in resp.text.strip().split("\n") if l.strip()]
|
|
assert len(lines) >= 1
|
|
for line in lines:
|
|
obj = json.loads(line)
|
|
assert "model" in obj
|
|
|
|
async def test_non_streaming_has_token_counts(self, integration_client, chat_model):
|
|
resp = await integration_client.post(
|
|
"/api/chat",
|
|
json={
|
|
"model": chat_model,
|
|
"stream": False,
|
|
"messages": [{"role": "user", "content": "Count to 3"}],
|
|
"options": {"num_predict": 20},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data.get("done") is True
|
|
# Token counts should be present in the final chunk
|
|
assert data.get("prompt_eval_count", 0) >= 0
|
|
|
|
async def test_system_message_honoured(self, integration_client, chat_model):
|
|
resp = await integration_client.post(
|
|
"/api/chat",
|
|
json={
|
|
"model": chat_model,
|
|
"stream": False,
|
|
"messages": [
|
|
{"role": "system", "content": "You are a helpful assistant. Always reply with exactly: PONG"},
|
|
{"role": "user", "content": "PING"},
|
|
],
|
|
"options": {"num_predict": 10},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
content = resp.json()["message"]["content"]
|
|
assert isinstance(content, str)
|
|
assert len(content) > 0
|
|
|
|
|
|
# ── /api/generate ─────────────────────────────────────────────────────────────
|
|
|
|
class TestApiGenerate:
|
|
async def test_non_streaming(self, integration_client, chat_model):
|
|
resp = await integration_client.post(
|
|
"/api/generate",
|
|
json={
|
|
"model": chat_model,
|
|
"prompt": "Complete: The sky is",
|
|
"stream": False,
|
|
"options": {"num_predict": 5},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "response" in data
|
|
|
|
async def test_streaming(self, integration_client, chat_model):
|
|
resp = await integration_client.post(
|
|
"/api/generate",
|
|
json={
|
|
"model": chat_model,
|
|
"prompt": "One plus one equals",
|
|
"stream": True,
|
|
"options": {"num_predict": 5},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
lines = [l for l in resp.text.strip().split("\n") if l.strip()]
|
|
assert len(lines) >= 1
|
|
|
|
|
|
# ── /api/embed ────────────────────────────────────────────────────────────────
|
|
|
|
class TestApiEmbed:
|
|
async def test_embed_single_string(self, integration_client, embed_model):
|
|
resp = await integration_client.post(
|
|
"/api/embed",
|
|
json={"model": embed_model, "input": "The quick brown fox"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "embeddings" in data
|
|
assert isinstance(data["embeddings"], list)
|
|
assert len(data["embeddings"]) == 1
|
|
assert len(data["embeddings"][0]) > 0
|
|
|
|
async def test_embed_multiple_inputs(self, integration_client, embed_model):
|
|
resp = await integration_client.post(
|
|
"/api/embed",
|
|
json={"model": embed_model, "input": ["sentence one", "sentence two"]},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "embeddings" in data
|
|
assert len(data["embeddings"]) == 2
|
|
|
|
|
|
# ── /v1/chat/completions ──────────────────────────────────────────────────────
|
|
|
|
class TestOpenAIChatCompletions:
|
|
async def test_non_streaming(self, integration_client, chat_model):
|
|
model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
|
|
resp = await integration_client.post(
|
|
"/v1/chat/completions",
|
|
json={
|
|
"model": model,
|
|
"messages": [{"role": "user", "content": "Reply OK"}],
|
|
"max_tokens": 10,
|
|
"stream": False,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "choices" in data
|
|
assert len(data["choices"]) > 0
|
|
assert "message" in data["choices"][0]
|
|
|
|
async def test_streaming_sse(self, integration_client, chat_model):
|
|
model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
|
|
resp = await integration_client.post(
|
|
"/v1/chat/completions",
|
|
json={
|
|
"model": model,
|
|
"messages": [{"role": "user", "content": "Hi"}],
|
|
"max_tokens": 5,
|
|
"stream": True,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
# Response should be SSE format
|
|
assert "data:" in resp.text or "[DONE]" in resp.text
|
|
|
|
async def test_non_streaming_has_usage(self, integration_client, chat_model):
|
|
model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
|
|
resp = await integration_client.post(
|
|
"/v1/chat/completions",
|
|
json={
|
|
"model": model,
|
|
"messages": [{"role": "user", "content": "Say yes"}],
|
|
"max_tokens": 5,
|
|
"stream": False,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
if "usage" in data and data["usage"]:
|
|
assert data["usage"].get("prompt_tokens", 0) >= 0
|
|
|
|
|
|
# ── /v1/embeddings ────────────────────────────────────────────────────────────
|
|
|
|
class TestOpenAIEmbeddings:
|
|
async def test_single_input(self, integration_client, embed_model):
|
|
model = embed_model.replace(":latest", "") if ":latest" in embed_model else embed_model
|
|
resp = await integration_client.post(
|
|
"/v1/embeddings",
|
|
json={"model": model, "input": "Test sentence"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "data" in data
|
|
assert len(data["data"]) > 0
|
|
embedding = data["data"][0].get("embedding")
|
|
assert isinstance(embedding, list)
|
|
assert len(embedding) > 0
|
|
|
|
|
|
# ── Token counts (database-backed) ───────────────────────────────────────────
|
|
|
|
class TestTokenCounts:
|
|
async def test_token_counts_endpoint(self, integration_client):
|
|
resp = await integration_client.get("/api/token_counts")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "total_tokens" in data
|
|
assert "breakdown" in data
|
|
|
|
|
|
# ── ps_details (extended ps) ─────────────────────────────────────────────────
|
|
|
|
class TestPsDetails:
|
|
async def test_ps_details_returns_models(self, integration_client):
|
|
resp = await integration_client.get("/api/ps_details")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "models" in data
|
|
assert isinstance(data["models"], list)
|