trustgraph/tests/unit/test_text_completion/test_parameter_caching.py

"""
Unit tests for Parameter-Based Caching in LLM Processors
Testing processors that cache based on temperature parameters (Bedrock, GoogleAIStudio)
"""

import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from unittest import IsolatedAsyncioTestCase

from trustgraph.model.text_completion.googleaistudio.llm import Processor as GoogleAIProcessor
from trustgraph.base import LlmResult


class TestParameterCaching(IsolatedAsyncioTestCase):
    """Test parameter-based caching functionality"""

    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
    @patch('trustgraph.base.llm_service.LlmService.__init__')
    async def test_googleai_temperature_cache_keys(self, mock_llm_init, mock_async_init, mock_genai):
        """Test that GoogleAI processor creates separate cache entries for different temperatures"""
        # Arrange
        mock_client = MagicMock()
        mock_genai.Client.return_value = mock_client

        mock_response = MagicMock()
        mock_response.text = "Generated response"
        mock_response.usage_metadata.prompt_token_count = 10
        mock_response.usage_metadata.candidates_token_count = 5
        mock_client.models.generate_content.return_value = mock_response

        mock_async_init.return_value = None
        mock_llm_init.return_value = None

        config = {
            'model': 'gemini-2.0-flash-001',
            'api_key': 'test-api-key',
            'temperature': 0.0,  # Default temperature
            'max_output': 1024,
            'concurrency': 1,
            'taskgroup': AsyncMock(),
            'id': 'test-processor'
        }

        processor = GoogleAIProcessor(**config)

        # Act - Call with different temperatures
        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.0)
        await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.5)
        await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=1.0)

        # Assert - Should have 3 different cache entries
        cache_keys = list(processor.generation_configs.keys())

        assert len(cache_keys) == 3
        assert "gemini-2.0-flash-001:0.0" in cache_keys
        assert "gemini-2.0-flash-001:0.5" in cache_keys
        assert "gemini-2.0-flash-001:1.0" in cache_keys

        # Verify each cached config has the correct temperature
        assert processor.generation_configs["gemini-2.0-flash-001:0.0"].temperature == 0.0
        assert processor.generation_configs["gemini-2.0-flash-001:0.5"].temperature == 0.5
        assert processor.generation_configs["gemini-2.0-flash-001:1.0"].temperature == 1.0

    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
    @patch('trustgraph.base.llm_service.LlmService.__init__')
    async def test_googleai_cache_reuse_same_parameters(self, mock_llm_init, mock_async_init, mock_genai):
        """Test that GoogleAI processor reuses cache for identical model+temperature combinations"""
        # Arrange
        mock_client = MagicMock()
        mock_genai.Client.return_value = mock_client

        mock_response = MagicMock()
        mock_response.text = "Generated response"
        mock_response.usage_metadata.prompt_token_count = 10
        mock_response.usage_metadata.candidates_token_count = 5
        mock_client.models.generate_content.return_value = mock_response

        mock_async_init.return_value = None
        mock_llm_init.return_value = None

        config = {
            'model': 'gemini-2.0-flash-001',
            'api_key': 'test-api-key',
            'temperature': 0.0,
            'max_output': 1024,
            'concurrency': 1,
            'taskgroup': AsyncMock(),
            'id': 'test-processor'
        }

        processor = GoogleAIProcessor(**config)

        # Act - Call multiple times with same parameters
        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.7)
        await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.7)
        await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=0.7)

        # Assert - Should have only 1 cache entry for the repeated parameters
        cache_keys = list(processor.generation_configs.keys())
        assert len(cache_keys) == 1
        assert "gemini-2.0-flash-001:0.7" in cache_keys

        # The same config object should be reused
        config_obj = processor.generation_configs["gemini-2.0-flash-001:0.7"]
        assert config_obj.temperature == 0.7

    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
    @patch('trustgraph.base.llm_service.LlmService.__init__')
    async def test_googleai_different_models_separate_caches(self, mock_llm_init, mock_async_init, mock_genai):
        """Test that different models create separate cache entries even with same temperature"""
        # Arrange
        mock_client = MagicMock()
        mock_genai.Client.return_value = mock_client

        mock_response = MagicMock()
        mock_response.text = "Generated response"
        mock_response.usage_metadata.prompt_token_count = 10
        mock_response.usage_metadata.candidates_token_count = 5
        mock_client.models.generate_content.return_value = mock_response

        mock_async_init.return_value = None
        mock_llm_init.return_value = None

        config = {
            'model': 'gemini-2.0-flash-001',
            'api_key': 'test-api-key',
            'temperature': 0.0,
            'max_output': 1024,
            'concurrency': 1,
            'taskgroup': AsyncMock(),
            'id': 'test-processor'
        }

        processor = GoogleAIProcessor(**config)

        # Act - Call with different models, same temperature
        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.5)
        await processor.generate_content("System", "Prompt 2", model="gemini-1.5-flash-001", temperature=0.5)

        # Assert - Should have separate cache entries for different models
        cache_keys = list(processor.generation_configs.keys())
        assert len(cache_keys) == 2
        assert "gemini-2.0-flash-001:0.5" in cache_keys
        assert "gemini-1.5-flash-001:0.5" in cache_keys

    # Note: Bedrock tests would be similar but testing the Bedrock processor's caching behavior
    # The Bedrock processor caches model variants with temperature in the cache key

    async def test_bedrock_temperature_cache_keys(self):
        """Test Bedrock processor temperature-aware caching"""
        # This would test the Bedrock processor's _get_or_create_variant method
        # with different temperature values to ensure proper cache key generation

        # Implementation would follow similar pattern to GoogleAI tests above
        # but using the Bedrock processor and testing model_variants cache
        pass

    async def test_bedrock_cache_isolation_different_temperatures(self):
        """Test that Bedrock processor isolates cache entries by temperature"""
        pass

    async def test_cache_memory_efficiency(self):
        """Test that caches don't grow unbounded with many different parameter combinations"""
        # This could test cache size limits or cleanup behavior if implemented
        pass


class TestCachePerformance(IsolatedAsyncioTestCase):
    """Test caching performance characteristics"""

    async def test_cache_hit_performance(self):
        """Test that cache hits are faster than cache misses"""
        # This would measure timing differences between cache hits and misses
        pass

    async def test_concurrent_cache_access(self):
        """Test concurrent access to cached configurations"""
        # This would test thread-safety of cache access
        pass


if __name__ == '__main__':
    pytest.main([__file__])