nomyo-router/test/test_api_integration.py

"""
Integration tests against the real backend at 192.168.0.50:12434.

Run with:
    pytest test/test_api_integration.py -v -m integration

All tests in this file are marked @pytest.mark.integration.
They require the test server to be reachable and to have at least one
chat model and one embedding model available.

Env vars to pin specific models:
    NOMYO_TEST_MODEL_CHAT   e.g. qwen2.5:1.5b
    NOMYO_TEST_EMBED_MODEL  e.g. nomic-embed-text:latest
"""
import json

import pytest


pytestmark = pytest.mark.integration


# ── Health / discovery routes ─────────────────────────────────────────────────

class TestDiscoveryRoutes:
    async def test_version(self, integration_client):
        resp = await integration_client.get("/api/version")
        assert resp.status_code == 200
        data = resp.json()
        assert "version" in data
        assert isinstance(data["version"], str)

    async def test_tags_returns_models(self, integration_client):
        resp = await integration_client.get("/api/tags")
        assert resp.status_code == 200
        data = resp.json()
        assert "models" in data
        assert isinstance(data["models"], list)
        assert len(data["models"]) > 0

    async def test_ps_returns_list(self, integration_client):
        resp = await integration_client.get("/api/ps")
        assert resp.status_code == 200
        data = resp.json()
        assert "models" in data
        assert isinstance(data["models"], list)

    async def test_v1_models_returns_data(self, integration_client):
        resp = await integration_client.get("/v1/models")
        assert resp.status_code == 200
        data = resp.json()
        assert "data" in data
        assert isinstance(data["data"], list)

    async def test_usage_returns_counts(self, integration_client):
        resp = await integration_client.get("/api/usage")
        assert resp.status_code == 200
        data = resp.json()
        assert "usage_counts" in data
        assert "token_usage_counts" in data

    async def test_config_returns_endpoints(self, integration_client):
        resp = await integration_client.get("/api/config")
        assert resp.status_code == 200
        data = resp.json()
        assert "endpoints" in data

    async def test_hostname(self, integration_client):
        resp = await integration_client.get("/api/hostname")
        assert resp.status_code == 200
        assert "hostname" in resp.json()

    async def test_health(self, integration_client):
        resp = await integration_client.get("/health")
        assert resp.status_code in (200, 503)
        data = resp.json()
        assert data["status"] in ("ok", "error")
        assert "endpoints" in data

    async def test_cache_stats(self, integration_client):
        resp = await integration_client.get("/api/cache/stats")
        assert resp.status_code == 200
        data = resp.json()
        assert "enabled" in data


# ── /api/chat ─────────────────────────────────────────────────────────────────

class TestApiChat:
    async def test_non_streaming(self, integration_client, chat_model):
        resp = await integration_client.post(
            "/api/chat",
            json={
                "model": chat_model,
                "stream": False,
                "messages": [{"role": "user", "content": "Reply with exactly: OK"}],
                "options": {"num_predict": 10},
            },
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "message" in data
        assert "content" in data["message"]

    async def test_streaming_ndjson(self, integration_client, chat_model):
        resp = await integration_client.post(
            "/api/chat",
            json={
                "model": chat_model,
                "stream": True,
                "messages": [{"role": "user", "content": "Say hi"}],
                "options": {"num_predict": 5},
            },
        )
        assert resp.status_code == 200
        lines = [l for l in resp.text.strip().split("\n") if l.strip()]
        assert len(lines) >= 1
        for line in lines:
            obj = json.loads(line)
            assert "model" in obj

    async def test_non_streaming_has_token_counts(self, integration_client, chat_model):
        resp = await integration_client.post(
            "/api/chat",
            json={
                "model": chat_model,
                "stream": False,
                "messages": [{"role": "user", "content": "Count to 3"}],
                "options": {"num_predict": 20},
            },
        )
        assert resp.status_code == 200
        data = resp.json()
        assert data.get("done") is True
        # Token counts should be present in the final chunk
        assert data.get("prompt_eval_count", 0) >= 0

    async def test_system_message_honoured(self, integration_client, chat_model):
        resp = await integration_client.post(
            "/api/chat",
            json={
                "model": chat_model,
                "stream": False,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant. Always reply with exactly: PONG"},
                    {"role": "user", "content": "PING"},
                ],
                "options": {"num_predict": 10},
            },
        )
        assert resp.status_code == 200
        content = resp.json()["message"]["content"]
        assert isinstance(content, str)
        assert len(content) > 0


# ── /api/generate ─────────────────────────────────────────────────────────────

class TestApiGenerate:
    async def test_non_streaming(self, integration_client, chat_model):
        resp = await integration_client.post(
            "/api/generate",
            json={
                "model": chat_model,
                "prompt": "Complete: The sky is",
                "stream": False,
                "options": {"num_predict": 5},
            },
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "response" in data

    async def test_streaming(self, integration_client, chat_model):
        resp = await integration_client.post(
            "/api/generate",
            json={
                "model": chat_model,
                "prompt": "One plus one equals",
                "stream": True,
                "options": {"num_predict": 5},
            },
        )
        assert resp.status_code == 200
        lines = [l for l in resp.text.strip().split("\n") if l.strip()]
        assert len(lines) >= 1


# ── /api/embed ────────────────────────────────────────────────────────────────

class TestApiEmbed:
    async def test_embed_single_string(self, integration_client, embed_model):
        resp = await integration_client.post(
            "/api/embed",
            json={"model": embed_model, "input": "The quick brown fox"},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "embeddings" in data
        assert isinstance(data["embeddings"], list)
        assert len(data["embeddings"]) == 1
        assert len(data["embeddings"][0]) > 0

    async def test_embed_multiple_inputs(self, integration_client, embed_model):
        resp = await integration_client.post(
            "/api/embed",
            json={"model": embed_model, "input": ["sentence one", "sentence two"]},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "embeddings" in data
        assert len(data["embeddings"]) == 2


# ── /v1/chat/completions ──────────────────────────────────────────────────────

class TestOpenAIChatCompletions:
    async def test_non_streaming(self, integration_client, chat_model):
        model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
        resp = await integration_client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": [{"role": "user", "content": "Reply OK"}],
                "max_tokens": 10,
                "stream": False,
            },
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "choices" in data
        assert len(data["choices"]) > 0
        assert "message" in data["choices"][0]

    async def test_streaming_sse(self, integration_client, chat_model):
        model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
        resp = await integration_client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": [{"role": "user", "content": "Hi"}],
                "max_tokens": 5,
                "stream": True,
            },
        )
        assert resp.status_code == 200
        # Response should be SSE format
        assert "data:" in resp.text or "[DONE]" in resp.text

    async def test_non_streaming_has_usage(self, integration_client, chat_model):
        model = chat_model.replace(":latest", "") if ":latest" in chat_model else chat_model
        resp = await integration_client.post(
            "/v1/chat/completions",
            json={
                "model": model,
                "messages": [{"role": "user", "content": "Say yes"}],
                "max_tokens": 5,
                "stream": False,
            },
        )
        assert resp.status_code == 200
        data = resp.json()
        if "usage" in data and data["usage"]:
            assert data["usage"].get("prompt_tokens", 0) >= 0


# ── /v1/embeddings ────────────────────────────────────────────────────────────

class TestOpenAIEmbeddings:
    async def test_single_input(self, integration_client, embed_model):
        model = embed_model.replace(":latest", "") if ":latest" in embed_model else embed_model
        resp = await integration_client.post(
            "/v1/embeddings",
            json={"model": model, "input": "Test sentence"},
        )
        assert resp.status_code == 200
        data = resp.json()
        assert "data" in data
        assert len(data["data"]) > 0
        embedding = data["data"][0].get("embedding")
        assert isinstance(embedding, list)
        assert len(embedding) > 0


# ── Token counts (database-backed) ───────────────────────────────────────────

class TestTokenCounts:
    async def test_token_counts_endpoint(self, integration_client):
        resp = await integration_client.get("/api/token_counts")
        assert resp.status_code == 200
        data = resp.json()
        assert "total_tokens" in data
        assert "breakdown" in data


# ── ps_details (extended ps) ─────────────────────────────────────────────────

class TestPsDetails:
    async def test_ps_details_returns_models(self, integration_client):
        resp = await integration_client.get("/api/ps_details")
        assert resp.status_code == 200
        data = resp.json()
        assert "models" in data
        assert isinstance(data["models"], list)