nomyo-router/test/test_cache.py

"""Unit tests for cache.LLMCache in exact-match mode (no sentence-transformers needed)."""
import tempfile
from pathlib import Path
from types import SimpleNamespace

import orjson
import pytest

import cache as cache_mod
from cache import (
    LLMCache,
    _bm25_weighted_text,
    get_llm_cache,
    init_llm_cache,
    openai_nonstream_to_sse,
)

_CACHE_DB_PATH = str(Path(tempfile.gettempdir()) / "nomyo_test_cache.db")


def _exact_cfg(backend: str = "memory") -> SimpleNamespace:
    """Config for exact-match mode — similarity=1.0 avoids embedding deps."""
    return SimpleNamespace(
        cache_enabled=True,
        cache_backend=backend,
        cache_similarity=1.0,
        cache_history_weight=0.3,
        cache_ttl=300,
        cache_db_path=_CACHE_DB_PATH,
        cache_redis_url="redis://localhost:6379",
    )


# ──────────────────────────────────────────────────────────────────────────────
# Pure helpers
# ──────────────────────────────────────────────────────────────────────────────

class TestBM25WeightedText:
    def test_empty_history(self):
        assert _bm25_weighted_text([]) == ""

    def test_history_without_content(self):
        assert _bm25_weighted_text([{"role": "user"}, {"role": "assistant"}]) == ""

    def test_repeats_high_idf_terms(self):
        history = [
            {"role": "user", "content": "Tell me about quantum entanglement"},
            {"role": "assistant", "content": "Quantum entanglement is a phenomenon"},
            {"role": "user", "content": "How does entanglement work?"},
        ]
        out = _bm25_weighted_text(history)
        # Rare/domain term ("entanglement") should appear; short stopwords (<=2 chars) dropped
        assert "entanglement" in out
        assert "is" not in out.split()


# ──────────────────────────────────────────────────────────────────────────────
# openai_nonstream_to_sse
# ──────────────────────────────────────────────────────────────────────────────

class TestOpenAINonstreamToSSE:
    def test_valid_chat_completion(self):
        chat = {
            "id": "x1",
            "created": 123,
            "model": "gpt-4o",
            "choices": [{"message": {"role": "assistant", "content": "hello"}}],
            "usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
        }
        out = openai_nonstream_to_sse(orjson.dumps(chat), "gpt-4o")
        text = out.decode()
        assert text.startswith("data: ")
        assert text.endswith("data: [DONE]\n\n")
        # First chunk contains the original content
        first = text.split("\n\n")[0][len("data: "):]
        parsed = orjson.loads(first)
        assert parsed["choices"][0]["delta"]["content"] == "hello"
        assert parsed["usage"]["total_tokens"] == 3

    def test_corrupt_bytes_return_done_only(self):
        out = openai_nonstream_to_sse(b"not-json", "m")
        assert out == b"data: [DONE]\n\n"


# ──────────────────────────────────────────────────────────────────────────────
# LLMCache internal helpers
# ──────────────────────────────────────────────────────────────────────────────

class TestLLMCacheParsing:
    def test_namespace_is_stable_and_isolated(self):
        c = LLMCache(_exact_cfg())
        a = c._namespace("chat", "m1", "system A")
        b = c._namespace("chat", "m1", "system A")
        assert a == b
        assert c._namespace("chat", "m1", "system B") != a
        assert c._namespace("generate", "m1", "system A") != a
        assert len(a) == 16

    def test_parse_messages_flat_strings(self):
        c = LLMCache(_exact_cfg())
        sys, hist, last = c._parse_messages([
            {"role": "system", "content": "be helpful"},
            {"role": "user", "content": "hi"},
            {"role": "assistant", "content": "hello"},
            {"role": "user", "content": "what is 2+2?"},
        ])
        assert sys == "be helpful"
        assert last == "what is 2+2?"
        assert hist == [
            {"role": "user", "content": "hi"},
            {"role": "assistant", "content": "hello"},
        ]

    def test_parse_messages_multimodal_content(self):
        c = LLMCache(_exact_cfg())
        sys, _hist, last = c._parse_messages([
            {"role": "system", "content": "sys"},
            {"role": "user", "content": [
                {"type": "text", "text": "describe"},
                {"type": "image_url", "image_url": {"url": "data:..."}},
            ]},
        ])
        assert sys == "sys"
        assert last == "describe"

    def test_parse_messages_no_user_message(self):
        c = LLMCache(_exact_cfg())
        sys, hist, last = c._parse_messages([
            {"role": "system", "content": "sys only"},
        ])
        assert sys == "sys only"
        assert last == ""
        assert hist == []


class TestPersonalTokenExtraction:
    def test_email_extracted(self):
        c = LLMCache(_exact_cfg())
        toks = c._extract_personal_tokens("Reach me at alice@example.com please")
        assert "alice@example.com" in toks

    def test_numeric_id_after_keyword(self):
        c = LLMCache(_exact_cfg())
        toks = c._extract_personal_tokens("User id: 123456")
        assert "123456" in toks

    def test_identity_tag_names_extracted(self):
        c = LLMCache(_exact_cfg())
        toks = c._extract_personal_tokens(
            "[Tags: identity] User's name is Andreas Schwibbe"
        )
        # Both name tokens should be extracted lowercased; stopwords dropped
        assert "andreas" in toks
        assert "schwibbe" in toks
        assert "name" not in toks   # in _IDENTITY_STOPWORDS
        assert "user" not in toks

    def test_empty_system_returns_empty_set(self):
        c = LLMCache(_exact_cfg())
        assert c._extract_personal_tokens("") == frozenset()


class TestResponseIsPersonalized:
    def _resp(self, content: str) -> bytes:
        return orjson.dumps({"choices": [{"message": {"content": content}}]})

    def test_email_in_response_is_personalized(self):
        c = LLMCache(_exact_cfg())
        assert c._response_is_personalized(self._resp("contact bob@x.com"), "")

    def test_uuid_in_response_is_personalized(self):
        c = LLMCache(_exact_cfg())
        uuid = "550e8400-e29b-41d4-a716-446655440000"
        assert c._response_is_personalized(self._resp(f"id={uuid}"), "")

    def test_long_numeric_id_in_response_is_personalized(self):
        c = LLMCache(_exact_cfg())
        assert c._response_is_personalized(self._resp("account 12345678"), "")

    def test_identity_token_from_system_echoed_in_response(self):
        c = LLMCache(_exact_cfg())
        system = "[Tags: identity] Andreas works here"
        assert c._response_is_personalized(
            self._resp("Yes, Andreas is logged in"), system
        )

    def test_generic_response_not_personalized(self):
        c = LLMCache(_exact_cfg())
        assert not c._response_is_personalized(
            self._resp("The capital of France is Paris."), "be helpful"
        )

    def test_ollama_message_format_parsed(self):
        c = LLMCache(_exact_cfg())
        body = orjson.dumps({"message": {"content": "alice@example.com"}})
        assert c._response_is_personalized(body, "")

    def test_unparseable_body_with_bytes_is_conservative(self):
        c = LLMCache(_exact_cfg())
        # Can't parse → returns True (err on the side of privacy)
        assert c._response_is_personalized(b"binary-junk", "")

    def test_empty_response_not_personalized(self):
        c = LLMCache(_exact_cfg())
        assert not c._response_is_personalized(b"", "anything")


# ──────────────────────────────────────────────────────────────────────────────
# End-to-end exact-match cache with the memory backend
# ──────────────────────────────────────────────────────────────────────────────

@pytest.fixture
async def memcache():
    """LLMCache wired up with the in-memory backend (no external deps)."""
    c = LLMCache(_exact_cfg("memory"))
    await c.init()
    return c


class TestExactMatchCache:
    async def test_miss_then_set_then_hit(self, memcache):
        msgs = [
            {"role": "system", "content": "be helpful"},
            {"role": "user", "content": "what is 2+2?"},
        ]
        resp = orjson.dumps({"choices": [{"message": {"content": "4"}}]})

        assert await memcache.get_chat("chat", "m1", msgs) is None
        await memcache.set_chat("chat", "m1", msgs, resp)
        hit = await memcache.get_chat("chat", "m1", msgs)
        assert hit == resp

    async def test_namespace_isolation_by_system(self, memcache):
        resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
        msgs_a = [
            {"role": "system", "content": "system A"},
            {"role": "user", "content": "same question"},
        ]
        msgs_b = [
            {"role": "system", "content": "system B"},
            {"role": "user", "content": "same question"},
        ]
        await memcache.set_chat("chat", "m", msgs_a, resp)
        # Same question + different system prompt = different namespace = miss
        assert await memcache.get_chat("chat", "m", msgs_b) is None

    async def test_namespace_isolation_by_route(self, memcache):
        resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
        msgs = [{"role": "user", "content": "ping"}]
        await memcache.set_chat("chat", "m", msgs, resp)
        assert await memcache.get_chat("openai_chat", "m", msgs) is None

    async def test_no_user_message_is_noop(self, memcache):
        msgs = [{"role": "system", "content": "sys only"}]
        resp = orjson.dumps({"choices": [{"message": {"content": "x"}}]})
        # Both get and set should silently no-op
        assert await memcache.get_chat("chat", "m", msgs) is None
        await memcache.set_chat("chat", "m", msgs, resp)
        assert await memcache.get_chat("chat", "m", msgs) is None

    async def test_personalized_response_generic_system_not_stored(self, memcache):
        msgs = [
            {"role": "system", "content": "be helpful"},      # generic
            {"role": "user", "content": "give me an email"},
        ]
        # Response contains an email → would leak across users sharing the
        # generic namespace → must NOT be stored at all
        resp = orjson.dumps({"choices": [{"message": {"content": "bob@x.com"}}]})
        await memcache.set_chat("chat", "m", msgs, resp)
        assert await memcache.get_chat("chat", "m", msgs) is None

    async def test_personalized_response_user_specific_system_stored(self, memcache):
        msgs = [
            {"role": "system", "content": "User id: 998877 prefers concise answers"},
            {"role": "user", "content": "what is my id?"},
        ]
        resp = orjson.dumps({"choices": [{"message": {"content": "Your id is 998877"}}]})
        await memcache.set_chat("chat", "m", msgs, resp)
        # User-specific namespace → exact-match within this user is OK
        assert await memcache.get_chat("chat", "m", msgs) == resp

    async def test_generate_convenience_wrappers(self, memcache):
        resp = orjson.dumps({"response": "blue"})
        await memcache.set_generate("m", "what color is the sky?", "", resp)
        assert await memcache.get_generate("m", "what color is the sky?") == resp


class TestStatsAndClear:
    async def test_stats_tracks_hits_and_misses(self, memcache):
        msgs = [{"role": "user", "content": "hello"}]
        await memcache.get_chat("chat", "m", msgs)             # miss
        resp = orjson.dumps({"choices": [{"message": {"content": "hi"}}]})
        await memcache.set_chat("chat", "m", msgs, resp)
        await memcache.get_chat("chat", "m", msgs)             # hit
        s = memcache.stats()
        assert s["hits"] == 1
        assert s["misses"] == 1
        assert s["hit_rate"] == 0.5
        assert s["semantic"] is False
        assert s["backend"] == "memory"

    async def test_clear_resets_counters_and_storage(self, memcache):
        msgs = [{"role": "user", "content": "hi"}]
        resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
        await memcache.set_chat("chat", "m", msgs, resp)
        await memcache.get_chat("chat", "m", msgs)
        await memcache.clear()
        s = memcache.stats()
        assert s["hits"] == 0
        assert s["misses"] == 0
        assert await memcache.get_chat("chat", "m", msgs) is None


# ──────────────────────────────────────────────────────────────────────────────
# Module-level helpers
# ──────────────────────────────────────────────────────────────────────────────

class TestInitLLMCache:
    async def test_disabled_returns_none(self):
        cfg = _exact_cfg()
        cfg.cache_enabled = False
        result = await init_llm_cache(cfg)
        assert result is None

    async def test_enabled_returns_initialized_cache(self):
        cfg = _exact_cfg()
        try:
            result = await init_llm_cache(cfg)
            assert result is not None
            assert get_llm_cache() is result
        finally:
            # Reset singleton between tests
            cache_mod._cache = None