333 lines
14 KiB
Python
333 lines
14 KiB
Python
"""Unit tests for cache.LLMCache in exact-match mode (no sentence-transformers needed)."""
|
|
import tempfile
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
|
|
import orjson
|
|
import pytest
|
|
|
|
import cache as cache_mod
|
|
from cache import (
|
|
LLMCache,
|
|
_bm25_weighted_text,
|
|
get_llm_cache,
|
|
init_llm_cache,
|
|
openai_nonstream_to_sse,
|
|
)
|
|
|
|
_CACHE_DB_PATH = str(Path(tempfile.gettempdir()) / "nomyo_test_cache.db")
|
|
|
|
|
|
def _exact_cfg(backend: str = "memory") -> SimpleNamespace:
|
|
"""Config for exact-match mode — similarity=1.0 avoids embedding deps."""
|
|
return SimpleNamespace(
|
|
cache_enabled=True,
|
|
cache_backend=backend,
|
|
cache_similarity=1.0,
|
|
cache_history_weight=0.3,
|
|
cache_ttl=300,
|
|
cache_db_path=_CACHE_DB_PATH,
|
|
cache_redis_url="redis://localhost:6379",
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Pure helpers
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestBM25WeightedText:
|
|
def test_empty_history(self):
|
|
assert _bm25_weighted_text([]) == ""
|
|
|
|
def test_history_without_content(self):
|
|
assert _bm25_weighted_text([{"role": "user"}, {"role": "assistant"}]) == ""
|
|
|
|
def test_repeats_high_idf_terms(self):
|
|
history = [
|
|
{"role": "user", "content": "Tell me about quantum entanglement"},
|
|
{"role": "assistant", "content": "Quantum entanglement is a phenomenon"},
|
|
{"role": "user", "content": "How does entanglement work?"},
|
|
]
|
|
out = _bm25_weighted_text(history)
|
|
# Rare/domain term ("entanglement") should appear; short stopwords (<=2 chars) dropped
|
|
assert "entanglement" in out
|
|
assert "is" not in out.split()
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# openai_nonstream_to_sse
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestOpenAINonstreamToSSE:
|
|
def test_valid_chat_completion(self):
|
|
chat = {
|
|
"id": "x1",
|
|
"created": 123,
|
|
"model": "gpt-4o",
|
|
"choices": [{"message": {"role": "assistant", "content": "hello"}}],
|
|
"usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
|
|
}
|
|
out = openai_nonstream_to_sse(orjson.dumps(chat), "gpt-4o")
|
|
text = out.decode()
|
|
assert text.startswith("data: ")
|
|
assert text.endswith("data: [DONE]\n\n")
|
|
# First chunk contains the original content
|
|
first = text.split("\n\n")[0][len("data: "):]
|
|
parsed = orjson.loads(first)
|
|
assert parsed["choices"][0]["delta"]["content"] == "hello"
|
|
assert parsed["usage"]["total_tokens"] == 3
|
|
|
|
def test_corrupt_bytes_return_done_only(self):
|
|
out = openai_nonstream_to_sse(b"not-json", "m")
|
|
assert out == b"data: [DONE]\n\n"
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# LLMCache internal helpers
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestLLMCacheParsing:
|
|
def test_namespace_is_stable_and_isolated(self):
|
|
c = LLMCache(_exact_cfg())
|
|
a = c._namespace("chat", "m1", "system A")
|
|
b = c._namespace("chat", "m1", "system A")
|
|
assert a == b
|
|
assert c._namespace("chat", "m1", "system B") != a
|
|
assert c._namespace("generate", "m1", "system A") != a
|
|
assert len(a) == 16
|
|
|
|
def test_parse_messages_flat_strings(self):
|
|
c = LLMCache(_exact_cfg())
|
|
sys, hist, last = c._parse_messages([
|
|
{"role": "system", "content": "be helpful"},
|
|
{"role": "user", "content": "hi"},
|
|
{"role": "assistant", "content": "hello"},
|
|
{"role": "user", "content": "what is 2+2?"},
|
|
])
|
|
assert sys == "be helpful"
|
|
assert last == "what is 2+2?"
|
|
assert hist == [
|
|
{"role": "user", "content": "hi"},
|
|
{"role": "assistant", "content": "hello"},
|
|
]
|
|
|
|
def test_parse_messages_multimodal_content(self):
|
|
c = LLMCache(_exact_cfg())
|
|
sys, _hist, last = c._parse_messages([
|
|
{"role": "system", "content": "sys"},
|
|
{"role": "user", "content": [
|
|
{"type": "text", "text": "describe"},
|
|
{"type": "image_url", "image_url": {"url": "data:..."}},
|
|
]},
|
|
])
|
|
assert sys == "sys"
|
|
assert last == "describe"
|
|
|
|
def test_parse_messages_no_user_message(self):
|
|
c = LLMCache(_exact_cfg())
|
|
sys, hist, last = c._parse_messages([
|
|
{"role": "system", "content": "sys only"},
|
|
])
|
|
assert sys == "sys only"
|
|
assert last == ""
|
|
assert hist == []
|
|
|
|
|
|
class TestPersonalTokenExtraction:
|
|
def test_email_extracted(self):
|
|
c = LLMCache(_exact_cfg())
|
|
toks = c._extract_personal_tokens("Reach me at alice@example.com please")
|
|
assert "alice@example.com" in toks
|
|
|
|
def test_numeric_id_after_keyword(self):
|
|
c = LLMCache(_exact_cfg())
|
|
toks = c._extract_personal_tokens("User id: 123456")
|
|
assert "123456" in toks
|
|
|
|
def test_identity_tag_names_extracted(self):
|
|
c = LLMCache(_exact_cfg())
|
|
toks = c._extract_personal_tokens(
|
|
"[Tags: identity] User's name is Andreas Schwibbe"
|
|
)
|
|
# Both name tokens should be extracted lowercased; stopwords dropped
|
|
assert "andreas" in toks
|
|
assert "schwibbe" in toks
|
|
assert "name" not in toks # in _IDENTITY_STOPWORDS
|
|
assert "user" not in toks
|
|
|
|
def test_empty_system_returns_empty_set(self):
|
|
c = LLMCache(_exact_cfg())
|
|
assert c._extract_personal_tokens("") == frozenset()
|
|
|
|
|
|
class TestResponseIsPersonalized:
|
|
def _resp(self, content: str) -> bytes:
|
|
return orjson.dumps({"choices": [{"message": {"content": content}}]})
|
|
|
|
def test_email_in_response_is_personalized(self):
|
|
c = LLMCache(_exact_cfg())
|
|
assert c._response_is_personalized(self._resp("contact bob@x.com"), "")
|
|
|
|
def test_uuid_in_response_is_personalized(self):
|
|
c = LLMCache(_exact_cfg())
|
|
uuid = "550e8400-e29b-41d4-a716-446655440000"
|
|
assert c._response_is_personalized(self._resp(f"id={uuid}"), "")
|
|
|
|
def test_long_numeric_id_in_response_is_personalized(self):
|
|
c = LLMCache(_exact_cfg())
|
|
assert c._response_is_personalized(self._resp("account 12345678"), "")
|
|
|
|
def test_identity_token_from_system_echoed_in_response(self):
|
|
c = LLMCache(_exact_cfg())
|
|
system = "[Tags: identity] Andreas works here"
|
|
assert c._response_is_personalized(
|
|
self._resp("Yes, Andreas is logged in"), system
|
|
)
|
|
|
|
def test_generic_response_not_personalized(self):
|
|
c = LLMCache(_exact_cfg())
|
|
assert not c._response_is_personalized(
|
|
self._resp("The capital of France is Paris."), "be helpful"
|
|
)
|
|
|
|
def test_ollama_message_format_parsed(self):
|
|
c = LLMCache(_exact_cfg())
|
|
body = orjson.dumps({"message": {"content": "alice@example.com"}})
|
|
assert c._response_is_personalized(body, "")
|
|
|
|
def test_unparseable_body_with_bytes_is_conservative(self):
|
|
c = LLMCache(_exact_cfg())
|
|
# Can't parse → returns True (err on the side of privacy)
|
|
assert c._response_is_personalized(b"binary-junk", "")
|
|
|
|
def test_empty_response_not_personalized(self):
|
|
c = LLMCache(_exact_cfg())
|
|
assert not c._response_is_personalized(b"", "anything")
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# End-to-end exact-match cache with the memory backend
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
@pytest.fixture
|
|
async def memcache():
|
|
"""LLMCache wired up with the in-memory backend (no external deps)."""
|
|
c = LLMCache(_exact_cfg("memory"))
|
|
await c.init()
|
|
return c
|
|
|
|
|
|
class TestExactMatchCache:
|
|
async def test_miss_then_set_then_hit(self, memcache):
|
|
msgs = [
|
|
{"role": "system", "content": "be helpful"},
|
|
{"role": "user", "content": "what is 2+2?"},
|
|
]
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "4"}}]})
|
|
|
|
assert await memcache.get_chat("chat", "m1", msgs) is None
|
|
await memcache.set_chat("chat", "m1", msgs, resp)
|
|
hit = await memcache.get_chat("chat", "m1", msgs)
|
|
assert hit == resp
|
|
|
|
async def test_namespace_isolation_by_system(self, memcache):
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
|
|
msgs_a = [
|
|
{"role": "system", "content": "system A"},
|
|
{"role": "user", "content": "same question"},
|
|
]
|
|
msgs_b = [
|
|
{"role": "system", "content": "system B"},
|
|
{"role": "user", "content": "same question"},
|
|
]
|
|
await memcache.set_chat("chat", "m", msgs_a, resp)
|
|
# Same question + different system prompt = different namespace = miss
|
|
assert await memcache.get_chat("chat", "m", msgs_b) is None
|
|
|
|
async def test_namespace_isolation_by_route(self, memcache):
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
|
|
msgs = [{"role": "user", "content": "ping"}]
|
|
await memcache.set_chat("chat", "m", msgs, resp)
|
|
assert await memcache.get_chat("openai_chat", "m", msgs) is None
|
|
|
|
async def test_no_user_message_is_noop(self, memcache):
|
|
msgs = [{"role": "system", "content": "sys only"}]
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "x"}}]})
|
|
# Both get and set should silently no-op
|
|
assert await memcache.get_chat("chat", "m", msgs) is None
|
|
await memcache.set_chat("chat", "m", msgs, resp)
|
|
assert await memcache.get_chat("chat", "m", msgs) is None
|
|
|
|
async def test_personalized_response_generic_system_not_stored(self, memcache):
|
|
msgs = [
|
|
{"role": "system", "content": "be helpful"}, # generic
|
|
{"role": "user", "content": "give me an email"},
|
|
]
|
|
# Response contains an email → would leak across users sharing the
|
|
# generic namespace → must NOT be stored at all
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "bob@x.com"}}]})
|
|
await memcache.set_chat("chat", "m", msgs, resp)
|
|
assert await memcache.get_chat("chat", "m", msgs) is None
|
|
|
|
async def test_personalized_response_user_specific_system_stored(self, memcache):
|
|
msgs = [
|
|
{"role": "system", "content": "User id: 998877 prefers concise answers"},
|
|
{"role": "user", "content": "what is my id?"},
|
|
]
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "Your id is 998877"}}]})
|
|
await memcache.set_chat("chat", "m", msgs, resp)
|
|
# User-specific namespace → exact-match within this user is OK
|
|
assert await memcache.get_chat("chat", "m", msgs) == resp
|
|
|
|
async def test_generate_convenience_wrappers(self, memcache):
|
|
resp = orjson.dumps({"response": "blue"})
|
|
await memcache.set_generate("m", "what color is the sky?", "", resp)
|
|
assert await memcache.get_generate("m", "what color is the sky?") == resp
|
|
|
|
|
|
class TestStatsAndClear:
|
|
async def test_stats_tracks_hits_and_misses(self, memcache):
|
|
msgs = [{"role": "user", "content": "hello"}]
|
|
await memcache.get_chat("chat", "m", msgs) # miss
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "hi"}}]})
|
|
await memcache.set_chat("chat", "m", msgs, resp)
|
|
await memcache.get_chat("chat", "m", msgs) # hit
|
|
s = memcache.stats()
|
|
assert s["hits"] == 1
|
|
assert s["misses"] == 1
|
|
assert s["hit_rate"] == 0.5
|
|
assert s["semantic"] is False
|
|
assert s["backend"] == "memory"
|
|
|
|
async def test_clear_resets_counters_and_storage(self, memcache):
|
|
msgs = [{"role": "user", "content": "hi"}]
|
|
resp = orjson.dumps({"choices": [{"message": {"content": "ok"}}]})
|
|
await memcache.set_chat("chat", "m", msgs, resp)
|
|
await memcache.get_chat("chat", "m", msgs)
|
|
await memcache.clear()
|
|
s = memcache.stats()
|
|
assert s["hits"] == 0
|
|
assert s["misses"] == 0
|
|
assert await memcache.get_chat("chat", "m", msgs) is None
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Module-level helpers
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestInitLLMCache:
|
|
async def test_disabled_returns_none(self):
|
|
cfg = _exact_cfg()
|
|
cfg.cache_enabled = False
|
|
result = await init_llm_cache(cfg)
|
|
assert result is None
|
|
|
|
async def test_enabled_returns_initialized_cache(self):
|
|
cfg = _exact_cfg()
|
|
try:
|
|
result = await init_llm_cache(cfg)
|
|
assert result is not None
|
|
assert get_llm_cache() is result
|
|
finally:
|
|
# Reset singleton between tests
|
|
cache_mod._cache = None
|