nomyo-router/test/test_cache.py
alpha nerd 0b64a84e96
All checks were successful
PR Tests / test (pull_request) Successful in 1m0s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m24s
fix: replace hardcoded tokendb path
2026-05-17 10:53:33 +02:00

333 lines
14 KiB
Python

"""Unit tests for cache.LLMCache in exact-match mode (no sentence-transformers needed)."""
import tempfile
from pathlib import Path
from types import SimpleNamespace
import orjson
import pytest
import cache as cache_mod
from cache import (
LLMCache,
_bm25_weighted_text,
get_llm_cache,
init_llm_cache,
openai_nonstream_to_sse,
)
_CACHE_DB_PATH = str(Path(tempfile.gettempdir()) / "nomyo_test_cache.db")
def _exact_cfg(backend: str = "memory") -> SimpleNamespace:
"""Config for exact-match mode — similarity=1.0 avoids embedding deps."""
return SimpleNamespace(
cache_enabled=True,
cache_backend=backend,
cache_similarity=1.0,
cache_history_weight=0.3,
cache_ttl=300,
cache_db_path=_CACHE_DB_PATH,
cache_redis_url="redis://localhost:6379",
)
# ──────────────────────────────────────────────────────────────────────────────
# Pure helpers
# ──────────────────────────────────────────────────────────────────────────────
class TestBM25WeightedText:
def test_empty_history(self):
assert _bm25_weighted_text([]) == ""
def test_history_without_content(self):
assert _bm25_weighted_text([{"role": "user"}, {"role": "assistant"}]) == ""
def test_repeats_high_idf_terms(self):
history = [
{"role": "user", "content": "Tell me about quantum entanglement"},
{"role": "assistant", "content": "Quantum entanglement is a phenomenon"},
{"role": "user", "content": "How does entanglement work?"},
]
out = _bm25_weighted_text(history)
# Rare/domain term ("entanglement") should appear; short stopwords (<=2 chars) dropped
assert "entanglement" in out
assert "is" not in out.split()
# ──────────────────────────────────────────────────────────────────────────────
# openai_nonstream_to_sse
# ──────────────────────────────────────────────────────────────────────────────
class TestOpenAINonstreamToSSE:
def test_valid_chat_completion(self):
chat = {
"id": "x1",
"created": 123,
"model": "gpt-4o",
"choices": [{"message": {"role": "assistant", "content": "hello"}}],
"usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
}
out = openai_nonstream_to_sse(orjson.dumps(chat), "gpt-4o")
text = out.decode()
assert text.startswith("data: ")
assert text.endswith("data: [DONE]\n\n")
# First chunk contains the original content
first = text.split("\n\n")[0][len("data: "):]
parsed = orjson.loads(first)
assert parsed["choices"][0]["delta"]["content"] == "hello"
assert parsed["usage"]["total_tokens"] == 3
def test_corrupt_bytes_return_done_only(self):
out = openai_nonstream_to_sse(b"not-json", "m")
assert out == b"data: [DONE]\n\n"
# ──────────────────────────────────────────────────────────────────────────────
# LLMCache internal helpers
# ──────────────────────────────────────────────────────────────────────────────
class TestLLMCacheParsing:
def test_namespace_is_stable_and_isolated(self):
c = LLMCache(_exact_cfg())
a = c._namespace("chat", "m1", "system A")
b = c._namespace("chat", "m1", "system A")
assert a == b
assert c._namespace("chat", "m1", "system B") != a
assert c._namespace("generate", "m1", "system A") != a
assert len(a) == 16
def test_parse_messages_flat_strings(self):
c = LLMCache(_exact_cfg())
sys, hist, last = c._parse_messages([
{"role": "system", "content": "be helpful"},
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
{"role": "user", "content": "what is 2+2?"},
])
assert sys == "be helpful"
assert last == "what is 2+2?"
assert hist == [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
]
def test_parse_messages_multimodal_content(self):
c = LLMCache(_exact_cfg())
sys, _hist, last = c._parse_messages([
{"role": "system", "content": "sys"},
{"role": "user", "content": [
{"type": "text", "text": "describe"},
{"type": "image_url", "image_url": {"url": "data:..."}},
]},
])
assert sys == "sys"
assert last == "describe"
def test_parse_messages_no_user_message(self):
c = LLMCache(_exact_cfg())
sys, hist, last = c._parse_messages([
{"role": "system", "content": "sys only"},
])
assert sys == "sys only"
assert last == ""
assert hist == []
class TestPersonalTokenExtraction:
def test_email_extracted(self):
c = LLMCache(_exact_cfg())
toks = c._extract_personal_tokens("Reach me at alice@example.com please")
assert "alice@example.com" in toks
def test_numeric_id_after_keyword(self):
c = LLMCache(_exact_cfg())
toks = c._extract_personal_tokens("User id: 123456")
assert "123456" in toks
def test_identity_tag_names_extracted(self):
c = LLMCache(_exact_cfg())
toks = c._extract_personal_tokens(
"[Tags: identity] User's name is Andreas Schwibbe"
)
# Both name tokens should be extracted lowercased; stopwords dropped
assert "andreas" in toks
assert "schwibbe" in toks
assert "name" not in toks # in _IDENTITY_STOPWORDS
assert "user" not in toks
def test_empty_system_returns_empty_set(self):
c = LLMCache(_exact_cfg())
assert c._extract_personal_tokens("") == frozenset()
class TestResponseIsPersonalized:
def _resp(self, content: str) -> bytes:
return orjson.dumps({"choices": [{"message": {"content": content}}]})
def test_email_in_response_is_personalized(self):
c = LLMCache(_exact_cfg())
assert c._response_is_personalized(self._resp("contact bob@x.com"), "")
def test_uuid_in_response_is_personalized(self):
c = LLMCache(_exact_cfg())
uuid = "550e8400-e29b-41d4-a716-446655440000"
assert c._response_is_personalized(self._resp(f"id={uuid}"), "")
def test_long_numeric_id_in_response_is_personalized(self):
c = LLMCache(_exact_cfg())
assert c._response_is_personalized(self._resp("account 12345678"), "")
def test_identity_token_from_system_echoed_in_response(self):
c = LLMCache(_exact_cfg())
system = "[Tags: identity] Andreas works here"
assert c._response_is_personalized(
self._resp("Yes, Andreas is logged in"), system
)
def test_generic_response_not_personalized(self):
c = LLMCache(_exact_cfg())
assert not c._response_is_personalized(
self._resp("The capital of France is Paris."), "be helpful"
)
def test_ollama_message_format_parsed(self):
c = LLMCache(_exact_cfg())
body = orjson.dumps({"message": {"content": "alice@example.com"}})
assert c._response_is_personalized(body, "")
def test_unparseable_body_with_bytes_is_conservative(self):
c = LLMCache(_exact_cfg())
# Can't parse → returns True (err on the side of privacy)
assert c._response_is_personalized(b"binary-junk", "")
def test_empty_response_not_personalized(self):
c = LLMCache(_exact_cfg())
assert not c._response_is_personalized(b"", "anything")
# ──────────────────────────────────────────────────────────────────────────────
# End-to-end exact-match cache with the memory backend
# ──────────────────────────────────────────────────────────────────────────────
@pytest.fixture
async def memcache():
"""LLMCache wired up with the in-memory backend (no external deps)."""
c = LLMCache(_exact_cfg("memory"))
await c.init()
return c
class TestExactMatchCache:
async def test_miss_then_set_then_hit(self, memcache):
msgs = [
{"role": "system", "content": "be helpful"},
{"role": "user", "content": "what is 2+2?"},
]
resp = orjson.dumps({"choices": [{"message": {"content": "4"}}]})
assert await memcache.get_chat("chat", "m1", msgs) is None
await memcache.set_chat("chat", "m1", msgs, resp)
hit = await memcache.get_chat("chat", "m1", msgs)
assert hit == resp
async def test_namespace_isolation_by_system(self, memcache):
resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
msgs_a = [
{"role": "system", "content": "system A"},
{"role": "user", "content": "same question"},
]
msgs_b = [
{"role": "system", "content": "system B"},
{"role": "user", "content": "same question"},
]
await memcache.set_chat("chat", "m", msgs_a, resp)
# Same question + different system prompt = different namespace = miss
assert await memcache.get_chat("chat", "m", msgs_b) is None
async def test_namespace_isolation_by_route(self, memcache):
resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
msgs = [{"role": "user", "content": "ping"}]
await memcache.set_chat("chat", "m", msgs, resp)
assert await memcache.get_chat("openai_chat", "m", msgs) is None
async def test_no_user_message_is_noop(self, memcache):
msgs = [{"role": "system", "content": "sys only"}]
resp = orjson.dumps({"choices": [{"message": {"content": "x"}}]})
# Both get and set should silently no-op
assert await memcache.get_chat("chat", "m", msgs) is None
await memcache.set_chat("chat", "m", msgs, resp)
assert await memcache.get_chat("chat", "m", msgs) is None
async def test_personalized_response_generic_system_not_stored(self, memcache):
msgs = [
{"role": "system", "content": "be helpful"}, # generic
{"role": "user", "content": "give me an email"},
]
# Response contains an email → would leak across users sharing the
# generic namespace → must NOT be stored at all
resp = orjson.dumps({"choices": [{"message": {"content": "bob@x.com"}}]})
await memcache.set_chat("chat", "m", msgs, resp)
assert await memcache.get_chat("chat", "m", msgs) is None
async def test_personalized_response_user_specific_system_stored(self, memcache):
msgs = [
{"role": "system", "content": "User id: 998877 prefers concise answers"},
{"role": "user", "content": "what is my id?"},
]
resp = orjson.dumps({"choices": [{"message": {"content": "Your id is 998877"}}]})
await memcache.set_chat("chat", "m", msgs, resp)
# User-specific namespace → exact-match within this user is OK
assert await memcache.get_chat("chat", "m", msgs) == resp
async def test_generate_convenience_wrappers(self, memcache):
resp = orjson.dumps({"response": "blue"})
await memcache.set_generate("m", "what color is the sky?", "", resp)
assert await memcache.get_generate("m", "what color is the sky?") == resp
class TestStatsAndClear:
async def test_stats_tracks_hits_and_misses(self, memcache):
msgs = [{"role": "user", "content": "hello"}]
await memcache.get_chat("chat", "m", msgs) # miss
resp = orjson.dumps({"choices": [{"message": {"content": "hi"}}]})
await memcache.set_chat("chat", "m", msgs, resp)
await memcache.get_chat("chat", "m", msgs) # hit
s = memcache.stats()
assert s["hits"] == 1
assert s["misses"] == 1
assert s["hit_rate"] == 0.5
assert s["semantic"] is False
assert s["backend"] == "memory"
async def test_clear_resets_counters_and_storage(self, memcache):
msgs = [{"role": "user", "content": "hi"}]
resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
await memcache.set_chat("chat", "m", msgs, resp)
await memcache.get_chat("chat", "m", msgs)
await memcache.clear()
s = memcache.stats()
assert s["hits"] == 0
assert s["misses"] == 0
assert await memcache.get_chat("chat", "m", msgs) is None
# ──────────────────────────────────────────────────────────────────────────────
# Module-level helpers
# ──────────────────────────────────────────────────────────────────────────────
class TestInitLLMCache:
async def test_disabled_returns_none(self):
cfg = _exact_cfg()
cfg.cache_enabled = False
result = await init_llm_cache(cfg)
assert result is None
async def test_enabled_returns_initialized_cache(self):
cfg = _exact_cfg()
try:
result = await init_llm_cache(cfg)
assert result is not None
assert get_llm_cache() is result
finally:
# Reset singleton between tests
cache_mod._cache = None