"""Unit tests for cache.LLMCache in exact-match mode (no sentence-transformers needed).""" import tempfile from pathlib import Path from types import SimpleNamespace import orjson import pytest import cache as cache_mod from cache import ( LLMCache, _bm25_weighted_text, get_llm_cache, init_llm_cache, openai_nonstream_to_sse, ) _CACHE_DB_PATH = str(Path(tempfile.gettempdir()) / "nomyo_test_cache.db") def _exact_cfg(backend: str = "memory") -> SimpleNamespace: """Config for exact-match mode — similarity=1.0 avoids embedding deps.""" return SimpleNamespace( cache_enabled=True, cache_backend=backend, cache_similarity=1.0, cache_history_weight=0.3, cache_ttl=300, cache_db_path=_CACHE_DB_PATH, cache_redis_url="redis://localhost:6379", ) # ────────────────────────────────────────────────────────────────────────────── # Pure helpers # ────────────────────────────────────────────────────────────────────────────── class TestBM25WeightedText: def test_empty_history(self): assert _bm25_weighted_text([]) == "" def test_history_without_content(self): assert _bm25_weighted_text([{"role": "user"}, {"role": "assistant"}]) == "" def test_repeats_high_idf_terms(self): history = [ {"role": "user", "content": "Tell me about quantum entanglement"}, {"role": "assistant", "content": "Quantum entanglement is a phenomenon"}, {"role": "user", "content": "How does entanglement work?"}, ] out = _bm25_weighted_text(history) # Rare/domain term ("entanglement") should appear; short stopwords (<=2 chars) dropped assert "entanglement" in out assert "is" not in out.split() # ────────────────────────────────────────────────────────────────────────────── # openai_nonstream_to_sse # ────────────────────────────────────────────────────────────────────────────── class TestOpenAINonstreamToSSE: def test_valid_chat_completion(self): chat = { "id": "x1", "created": 123, "model": "gpt-4o", "choices": [{"message": {"role": "assistant", "content": "hello"}}], "usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}, } out = openai_nonstream_to_sse(orjson.dumps(chat), "gpt-4o") text = out.decode() assert text.startswith("data: ") assert text.endswith("data: [DONE]\n\n") # First chunk contains the original content first = text.split("\n\n")[0][len("data: "):] parsed = orjson.loads(first) assert parsed["choices"][0]["delta"]["content"] == "hello" assert parsed["usage"]["total_tokens"] == 3 def test_corrupt_bytes_return_done_only(self): out = openai_nonstream_to_sse(b"not-json", "m") assert out == b"data: [DONE]\n\n" # ────────────────────────────────────────────────────────────────────────────── # LLMCache internal helpers # ────────────────────────────────────────────────────────────────────────────── class TestLLMCacheParsing: def test_namespace_is_stable_and_isolated(self): c = LLMCache(_exact_cfg()) a = c._namespace("chat", "m1", "system A") b = c._namespace("chat", "m1", "system A") assert a == b assert c._namespace("chat", "m1", "system B") != a assert c._namespace("generate", "m1", "system A") != a assert len(a) == 16 def test_parse_messages_flat_strings(self): c = LLMCache(_exact_cfg()) sys, hist, last = c._parse_messages([ {"role": "system", "content": "be helpful"}, {"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}, {"role": "user", "content": "what is 2+2?"}, ]) assert sys == "be helpful" assert last == "what is 2+2?" assert hist == [ {"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}, ] def test_parse_messages_multimodal_content(self): c = LLMCache(_exact_cfg()) sys, _hist, last = c._parse_messages([ {"role": "system", "content": "sys"}, {"role": "user", "content": [ {"type": "text", "text": "describe"}, {"type": "image_url", "image_url": {"url": "data:..."}}, ]}, ]) assert sys == "sys" assert last == "describe" def test_parse_messages_no_user_message(self): c = LLMCache(_exact_cfg()) sys, hist, last = c._parse_messages([ {"role": "system", "content": "sys only"}, ]) assert sys == "sys only" assert last == "" assert hist == [] class TestPersonalTokenExtraction: def test_email_extracted(self): c = LLMCache(_exact_cfg()) toks = c._extract_personal_tokens("Reach me at alice@example.com please") assert "alice@example.com" in toks def test_numeric_id_after_keyword(self): c = LLMCache(_exact_cfg()) toks = c._extract_personal_tokens("User id: 123456") assert "123456" in toks def test_identity_tag_names_extracted(self): c = LLMCache(_exact_cfg()) toks = c._extract_personal_tokens( "[Tags: identity] User's name is Andreas Schwibbe" ) # Both name tokens should be extracted lowercased; stopwords dropped assert "andreas" in toks assert "schwibbe" in toks assert "name" not in toks # in _IDENTITY_STOPWORDS assert "user" not in toks def test_empty_system_returns_empty_set(self): c = LLMCache(_exact_cfg()) assert c._extract_personal_tokens("") == frozenset() class TestResponseIsPersonalized: def _resp(self, content: str) -> bytes: return orjson.dumps({"choices": [{"message": {"content": content}}]}) def test_email_in_response_is_personalized(self): c = LLMCache(_exact_cfg()) assert c._response_is_personalized(self._resp("contact bob@x.com"), "") def test_uuid_in_response_is_personalized(self): c = LLMCache(_exact_cfg()) uuid = "550e8400-e29b-41d4-a716-446655440000" assert c._response_is_personalized(self._resp(f"id={uuid}"), "") def test_long_numeric_id_in_response_is_personalized(self): c = LLMCache(_exact_cfg()) assert c._response_is_personalized(self._resp("account 12345678"), "") def test_identity_token_from_system_echoed_in_response(self): c = LLMCache(_exact_cfg()) system = "[Tags: identity] Andreas works here" assert c._response_is_personalized( self._resp("Yes, Andreas is logged in"), system ) def test_generic_response_not_personalized(self): c = LLMCache(_exact_cfg()) assert not c._response_is_personalized( self._resp("The capital of France is Paris."), "be helpful" ) def test_ollama_message_format_parsed(self): c = LLMCache(_exact_cfg()) body = orjson.dumps({"message": {"content": "alice@example.com"}}) assert c._response_is_personalized(body, "") def test_unparseable_body_with_bytes_is_conservative(self): c = LLMCache(_exact_cfg()) # Can't parse → returns True (err on the side of privacy) assert c._response_is_personalized(b"binary-junk", "") def test_empty_response_not_personalized(self): c = LLMCache(_exact_cfg()) assert not c._response_is_personalized(b"", "anything") # ────────────────────────────────────────────────────────────────────────────── # End-to-end exact-match cache with the memory backend # ────────────────────────────────────────────────────────────────────────────── @pytest.fixture async def memcache(): """LLMCache wired up with the in-memory backend (no external deps).""" c = LLMCache(_exact_cfg("memory")) await c.init() return c class TestExactMatchCache: async def test_miss_then_set_then_hit(self, memcache): msgs = [ {"role": "system", "content": "be helpful"}, {"role": "user", "content": "what is 2+2?"}, ] resp = orjson.dumps({"choices": [{"message": {"content": "4"}}]}) assert await memcache.get_chat("chat", "m1", msgs) is None await memcache.set_chat("chat", "m1", msgs, resp) hit = await memcache.get_chat("chat", "m1", msgs) assert hit == resp async def test_namespace_isolation_by_system(self, memcache): resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]}) msgs_a = [ {"role": "system", "content": "system A"}, {"role": "user", "content": "same question"}, ] msgs_b = [ {"role": "system", "content": "system B"}, {"role": "user", "content": "same question"}, ] await memcache.set_chat("chat", "m", msgs_a, resp) # Same question + different system prompt = different namespace = miss assert await memcache.get_chat("chat", "m", msgs_b) is None async def test_namespace_isolation_by_route(self, memcache): resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]}) msgs = [{"role": "user", "content": "ping"}] await memcache.set_chat("chat", "m", msgs, resp) assert await memcache.get_chat("openai_chat", "m", msgs) is None async def test_no_user_message_is_noop(self, memcache): msgs = [{"role": "system", "content": "sys only"}] resp = orjson.dumps({"choices": [{"message": {"content": "x"}}]}) # Both get and set should silently no-op assert await memcache.get_chat("chat", "m", msgs) is None await memcache.set_chat("chat", "m", msgs, resp) assert await memcache.get_chat("chat", "m", msgs) is None async def test_personalized_response_generic_system_not_stored(self, memcache): msgs = [ {"role": "system", "content": "be helpful"}, # generic {"role": "user", "content": "give me an email"}, ] # Response contains an email → would leak across users sharing the # generic namespace → must NOT be stored at all resp = orjson.dumps({"choices": [{"message": {"content": "bob@x.com"}}]}) await memcache.set_chat("chat", "m", msgs, resp) assert await memcache.get_chat("chat", "m", msgs) is None async def test_personalized_response_user_specific_system_stored(self, memcache): msgs = [ {"role": "system", "content": "User id: 998877 prefers concise answers"}, {"role": "user", "content": "what is my id?"}, ] resp = orjson.dumps({"choices": [{"message": {"content": "Your id is 998877"}}]}) await memcache.set_chat("chat", "m", msgs, resp) # User-specific namespace → exact-match within this user is OK assert await memcache.get_chat("chat", "m", msgs) == resp async def test_generate_convenience_wrappers(self, memcache): resp = orjson.dumps({"response": "blue"}) await memcache.set_generate("m", "what color is the sky?", "", resp) assert await memcache.get_generate("m", "what color is the sky?") == resp class TestStatsAndClear: async def test_stats_tracks_hits_and_misses(self, memcache): msgs = [{"role": "user", "content": "hello"}] await memcache.get_chat("chat", "m", msgs) # miss resp = orjson.dumps({"choices": [{"message": {"content": "hi"}}]}) await memcache.set_chat("chat", "m", msgs, resp) await memcache.get_chat("chat", "m", msgs) # hit s = memcache.stats() assert s["hits"] == 1 assert s["misses"] == 1 assert s["hit_rate"] == 0.5 assert s["semantic"] is False assert s["backend"] == "memory" async def test_clear_resets_counters_and_storage(self, memcache): msgs = [{"role": "user", "content": "hi"}] resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]}) await memcache.set_chat("chat", "m", msgs, resp) await memcache.get_chat("chat", "m", msgs) await memcache.clear() s = memcache.stats() assert s["hits"] == 0 assert s["misses"] == 0 assert await memcache.get_chat("chat", "m", msgs) is None # ────────────────────────────────────────────────────────────────────────────── # Module-level helpers # ────────────────────────────────────────────────────────────────────────────── class TestInitLLMCache: async def test_disabled_returns_none(self): cfg = _exact_cfg() cfg.cache_enabled = False result = await init_llm_cache(cfg) assert result is None async def test_enabled_returns_initialized_cache(self): cfg = _exact_cfg() try: result = await init_llm_cache(cfg) assert result is not None assert get_llm_cache() is result finally: # Reset singleton between tests cache_mod._cache = None