More LLM param test coverage (#535)

* More LLM tests * Fixing tests
2026-06-24 06:08:05 +02:00 · 2025-09-26 01:00:30 +01:00 · 2025-09-26 01:00:30 +01:00 · 43cfcb18a0
commit 43cfcb18a0
parent b0a3716b0e
18 changed files with 3563 additions and 0 deletions
--- a/tests/unit/test_text_completion/test_parameter_caching.py
+++ b/tests/unit/test_text_completion/test_parameter_caching.py
@ -0,0 +1,186 @@
+"""
+Unit tests for Parameter-Based Caching in LLM Processors
+Testing processors that cache based on temperature parameters (Bedrock, GoogleAIStudio)
+"""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from unittest import IsolatedAsyncioTestCase
+
+from trustgraph.model.text_completion.googleaistudio.llm import Processor as GoogleAIProcessor
+from trustgraph.base import LlmResult
+
+
+class TestParameterCaching(IsolatedAsyncioTestCase):
+    """Test parameter-based caching functionality"""
+
+    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
+    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
+    @patch('trustgraph.base.llm_service.LlmService.__init__')
+    async def test_googleai_temperature_cache_keys(self, mock_llm_init, mock_async_init, mock_genai):
+        """Test that GoogleAI processor creates separate cache entries for different temperatures"""
+        # Arrange
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+
+        mock_response = MagicMock()
+        mock_response.text = "Generated response"
+        mock_response.usage_metadata.prompt_token_count = 10
+        mock_response.usage_metadata.candidates_token_count = 5
+        mock_client.models.generate_content.return_value = mock_response
+
+        mock_async_init.return_value = None
+        mock_llm_init.return_value = None
+
+        config = {
+            'model': 'gemini-2.0-flash-001',
+            'api_key': 'test-api-key',
+            'temperature': 0.0,  # Default temperature
+            'max_output': 1024,
+            'concurrency': 1,
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = GoogleAIProcessor(**config)
+
+        # Act - Call with different temperatures
+        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.0)
+        await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.5)
+        await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=1.0)
+
+        # Assert - Should have 3 different cache entries
+        cache_keys = list(processor.generation_configs.keys())
+
+        assert len(cache_keys) == 3
+        assert "gemini-2.0-flash-001:0.0" in cache_keys
+        assert "gemini-2.0-flash-001:0.5" in cache_keys
+        assert "gemini-2.0-flash-001:1.0" in cache_keys
+
+        # Verify each cached config has the correct temperature
+        assert processor.generation_configs["gemini-2.0-flash-001:0.0"].temperature == 0.0
+        assert processor.generation_configs["gemini-2.0-flash-001:0.5"].temperature == 0.5
+        assert processor.generation_configs["gemini-2.0-flash-001:1.0"].temperature == 1.0
+
+    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
+    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
+    @patch('trustgraph.base.llm_service.LlmService.__init__')
+    async def test_googleai_cache_reuse_same_parameters(self, mock_llm_init, mock_async_init, mock_genai):
+        """Test that GoogleAI processor reuses cache for identical model+temperature combinations"""
+        # Arrange
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+
+        mock_response = MagicMock()
+        mock_response.text = "Generated response"
+        mock_response.usage_metadata.prompt_token_count = 10
+        mock_response.usage_metadata.candidates_token_count = 5
+        mock_client.models.generate_content.return_value = mock_response
+
+        mock_async_init.return_value = None
+        mock_llm_init.return_value = None
+
+        config = {
+            'model': 'gemini-2.0-flash-001',
+            'api_key': 'test-api-key',
+            'temperature': 0.0,
+            'max_output': 1024,
+            'concurrency': 1,
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = GoogleAIProcessor(**config)
+
+        # Act - Call multiple times with same parameters
+        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.7)
+        await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.7)
+        await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=0.7)
+
+        # Assert - Should have only 1 cache entry for the repeated parameters
+        cache_keys = list(processor.generation_configs.keys())
+        assert len(cache_keys) == 1
+        assert "gemini-2.0-flash-001:0.7" in cache_keys
+
+        # The same config object should be reused
+        config_obj = processor.generation_configs["gemini-2.0-flash-001:0.7"]
+        assert config_obj.temperature == 0.7
+
+    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
+    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
+    @patch('trustgraph.base.llm_service.LlmService.__init__')
+    async def test_googleai_different_models_separate_caches(self, mock_llm_init, mock_async_init, mock_genai):
+        """Test that different models create separate cache entries even with same temperature"""
+        # Arrange
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+
+        mock_response = MagicMock()
+        mock_response.text = "Generated response"
+        mock_response.usage_metadata.prompt_token_count = 10
+        mock_response.usage_metadata.candidates_token_count = 5
+        mock_client.models.generate_content.return_value = mock_response
+
+        mock_async_init.return_value = None
+        mock_llm_init.return_value = None
+
+        config = {
+            'model': 'gemini-2.0-flash-001',
+            'api_key': 'test-api-key',
+            'temperature': 0.0,
+            'max_output': 1024,
+            'concurrency': 1,
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = GoogleAIProcessor(**config)
+
+        # Act - Call with different models, same temperature
+        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.5)
+        await processor.generate_content("System", "Prompt 2", model="gemini-1.5-flash-001", temperature=0.5)
+
+        # Assert - Should have separate cache entries for different models
+        cache_keys = list(processor.generation_configs.keys())
+        assert len(cache_keys) == 2
+        assert "gemini-2.0-flash-001:0.5" in cache_keys
+        assert "gemini-1.5-flash-001:0.5" in cache_keys
+
+    # Note: Bedrock tests would be similar but testing the Bedrock processor's caching behavior
+    # The Bedrock processor caches model variants with temperature in the cache key
+
+    async def test_bedrock_temperature_cache_keys(self):
+        """Test Bedrock processor temperature-aware caching"""
+        # This would test the Bedrock processor's _get_or_create_variant method
+        # with different temperature values to ensure proper cache key generation
+
+        # Implementation would follow similar pattern to GoogleAI tests above
+        # but using the Bedrock processor and testing model_variants cache
+        pass
+
+    async def test_bedrock_cache_isolation_different_temperatures(self):
+        """Test that Bedrock processor isolates cache entries by temperature"""
+        pass
+
+    async def test_cache_memory_efficiency(self):
+        """Test that caches don't grow unbounded with many different parameter combinations"""
+        # This could test cache size limits or cleanup behavior if implemented
+        pass
+
+
+class TestCachePerformance(IsolatedAsyncioTestCase):
+    """Test caching performance characteristics"""
+
+    async def test_cache_hit_performance(self):
+        """Test that cache hits are faster than cache misses"""
+        # This would measure timing differences between cache hits and misses
+        pass
+
+    async def test_concurrent_cache_access(self):
+        """Test concurrent access to cached configurations"""
+        # This would test thread-safety of cache access
+        pass
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])