trustgraph/tests/unit/test_text_completion/test_parameter_caching.py

"""
Unit tests for Parameter-Based Caching in LLM Processors
Testing processors that cache based on temperature parameters (Bedrock, GoogleAIStudio)
"""

import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from unittest import IsolatedAsyncioTestCase

from trustgraph.model.text_completion.googleaistudio.llm import Processor as GoogleAIProcessor
from trustgraph.base import LlmResult


class TestParameterCaching(IsolatedAsyncioTestCase):
    """Test parameter-based caching functionality"""

    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
    @patch('trustgraph.base.llm_service.LlmService.__init__')
    async def test_googleai_temperature_cache_keys(self, mock_llm_init, mock_async_init, mock_genai):
        """Test that GoogleAI processor creates separate cache entries for different temperatures"""
        # Arrange
        mock_client = MagicMock()
        mock_genai.Client.return_value = mock_client

        mock_response = MagicMock()
        mock_response.text = "Generated response"
        mock_response.usage_metadata.prompt_token_count = 10
        mock_response.usage_metadata.candidates_token_count = 5
        mock_client.models.generate_content.return_value = mock_response

        mock_async_init.return_value = None
        mock_llm_init.return_value = None

        config = {
            'model': 'gemini-2.0-flash-001',
            'api_key': 'test-api-key',
            'temperature': 0.0,  # Default temperature
            'max_output': 1024,
            'concurrency': 1,
            'taskgroup': AsyncMock(),
            'id': 'test-processor'
        }

        processor = GoogleAIProcessor(**config)

        # Act - Call with different temperatures
        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.0)
        await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.5)
        await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=1.0)

        # Assert - Should have 3 different cache entries
        cache_keys = list(processor.generation_configs.keys())

        assert len(cache_keys) == 3
        assert "gemini-2.0-flash-001:0.0" in cache_keys
        assert "gemini-2.0-flash-001:0.5" in cache_keys
        assert "gemini-2.0-flash-001:1.0" in cache_keys

        # Verify each cached config has the correct temperature
        assert processor.generation_configs["gemini-2.0-flash-001:0.0"].temperature == 0.0
        assert processor.generation_configs["gemini-2.0-flash-001:0.5"].temperature == 0.5
        assert processor.generation_configs["gemini-2.0-flash-001:1.0"].temperature == 1.0

    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
    @patch('trustgraph.base.llm_service.LlmService.__init__')
    async def test_googleai_cache_reuse_same_parameters(self, mock_llm_init, mock_async_init, mock_genai):
        """Test that GoogleAI processor reuses cache for identical model+temperature combinations"""
        # Arrange
        mock_client = MagicMock()
        mock_genai.Client.return_value = mock_client

        mock_response = MagicMock()
        mock_response.text = "Generated response"
        mock_response.usage_metadata.prompt_token_count = 10
        mock_response.usage_metadata.candidates_token_count = 5
        mock_client.models.generate_content.return_value = mock_response

        mock_async_init.return_value = None
        mock_llm_init.return_value = None

        config = {
            'model': 'gemini-2.0-flash-001',
            'api_key': 'test-api-key',
            'temperature': 0.0,
            'max_output': 1024,
            'concurrency': 1,
            'taskgroup': AsyncMock(),
            'id': 'test-processor'
        }

        processor = GoogleAIProcessor(**config)

        # Act - Call multiple times with same parameters
        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.7)
        await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.7)
        await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=0.7)

        # Assert - Should have only 1 cache entry for the repeated parameters
        cache_keys = list(processor.generation_configs.keys())
        assert len(cache_keys) == 1
        assert "gemini-2.0-flash-001:0.7" in cache_keys

        # The same config object should be reused
        config_obj = processor.generation_configs["gemini-2.0-flash-001:0.7"]
        assert config_obj.temperature == 0.7

    @patch('trustgraph.model.text_completion.googleaistudio.llm.genai')
    @patch('trustgraph.base.async_processor.AsyncProcessor.__init__')
    @patch('trustgraph.base.llm_service.LlmService.__init__')
    async def test_googleai_different_models_separate_caches(self, mock_llm_init, mock_async_init, mock_genai):
        """Test that different models create separate cache entries even with same temperature"""
        # Arrange
        mock_client = MagicMock()
        mock_genai.Client.return_value = mock_client

        mock_response = MagicMock()
        mock_response.text = "Generated response"
        mock_response.usage_metadata.prompt_token_count = 10
        mock_response.usage_metadata.candidates_token_count = 5
        mock_client.models.generate_content.return_value = mock_response

        mock_async_init.return_value = None
        mock_llm_init.return_value = None

        config = {
            'model': 'gemini-2.0-flash-001',
            'api_key': 'test-api-key',
            'temperature': 0.0,
            'max_output': 1024,
            'concurrency': 1,
            'taskgroup': AsyncMock(),
            'id': 'test-processor'
        }

        processor = GoogleAIProcessor(**config)

        # Act - Call with different models, same temperature
        await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.5)
        await processor.generate_content("System", "Prompt 2", model="gemini-1.5-flash-001", temperature=0.5)

        # Assert - Should have separate cache entries for different models
        cache_keys = list(processor.generation_configs.keys())
        assert len(cache_keys) == 2
        assert "gemini-2.0-flash-001:0.5" in cache_keys
        assert "gemini-1.5-flash-001:0.5" in cache_keys

    # Note: Bedrock tests would be similar but testing the Bedrock processor's caching behavior
    # The Bedrock processor caches model variants with temperature in the cache key

    async def test_bedrock_temperature_cache_keys(self):
        """Test Bedrock processor temperature-aware caching"""
        # This would test the Bedrock processor's _get_or_create_variant method
        # with different temperature values to ensure proper cache key generation

        # Implementation would follow similar pattern to GoogleAI tests above
        # but using the Bedrock processor and testing model_variants cache
        pass

    async def test_bedrock_cache_isolation_different_temperatures(self):
        """Test that Bedrock processor isolates cache entries by temperature"""
        pass

    async def test_cache_memory_efficiency(self):
        """Test that caches don't grow unbounded with many different parameter combinations"""
        # This could test cache size limits or cleanup behavior if implemented
        pass


class TestCachePerformance(IsolatedAsyncioTestCase):
    """Test caching performance characteristics"""

    async def test_cache_hit_performance(self):
        """Test that cache hits are faster than cache misses"""
        # This would measure timing differences between cache hits and misses
        pass

    async def test_concurrent_cache_access(self):
        """Test concurrent access to cached configurations"""
        # This would test thread-safety of cache access
        pass


if __name__ == '__main__':
    pytest.main([__file__])
More LLM param test coverage (#535) * More LLM tests * Fixing tests 2025-09-26 01:00:30 +01:00			`"""`
			`Unit tests for Parameter-Based Caching in LLM Processors`
			`Testing processors that cache based on temperature parameters (Bedrock, GoogleAIStudio)`
			`"""`

			`import pytest`
			`from unittest.mock import AsyncMock, MagicMock, patch`
			`from unittest import IsolatedAsyncioTestCase`

			`from trustgraph.model.text_completion.googleaistudio.llm import Processor as GoogleAIProcessor`
			`from trustgraph.base import LlmResult`


			`class TestParameterCaching(IsolatedAsyncioTestCase):`
			`"""Test parameter-based caching functionality"""`

			`@patch('trustgraph.model.text_completion.googleaistudio.llm.genai')`
			`@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')`
			`@patch('trustgraph.base.llm_service.LlmService.__init__')`
			`async def test_googleai_temperature_cache_keys(self, mock_llm_init, mock_async_init, mock_genai):`
			`"""Test that GoogleAI processor creates separate cache entries for different temperatures"""`
			`# Arrange`
			`mock_client = MagicMock()`
			`mock_genai.Client.return_value = mock_client`

			`mock_response = MagicMock()`
			`mock_response.text = "Generated response"`
			`mock_response.usage_metadata.prompt_token_count = 10`
			`mock_response.usage_metadata.candidates_token_count = 5`
			`mock_client.models.generate_content.return_value = mock_response`

			`mock_async_init.return_value = None`
			`mock_llm_init.return_value = None`

			`config = {`
			`'model': 'gemini-2.0-flash-001',`
			`'api_key': 'test-api-key',`
			`'temperature': 0.0, # Default temperature`
			`'max_output': 1024,`
			`'concurrency': 1,`
			`'taskgroup': AsyncMock(),`
			`'id': 'test-processor'`
			`}`

			`processor = GoogleAIProcessor(**config)`

			`# Act - Call with different temperatures`
			`await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.0)`
			`await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.5)`
			`await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=1.0)`

			`# Assert - Should have 3 different cache entries`
			`cache_keys = list(processor.generation_configs.keys())`

			`assert len(cache_keys) == 3`
			`assert "gemini-2.0-flash-001:0.0" in cache_keys`
			`assert "gemini-2.0-flash-001:0.5" in cache_keys`
			`assert "gemini-2.0-flash-001:1.0" in cache_keys`

			`# Verify each cached config has the correct temperature`
			`assert processor.generation_configs["gemini-2.0-flash-001:0.0"].temperature == 0.0`
			`assert processor.generation_configs["gemini-2.0-flash-001:0.5"].temperature == 0.5`
			`assert processor.generation_configs["gemini-2.0-flash-001:1.0"].temperature == 1.0`

			`@patch('trustgraph.model.text_completion.googleaistudio.llm.genai')`
			`@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')`
			`@patch('trustgraph.base.llm_service.LlmService.__init__')`
			`async def test_googleai_cache_reuse_same_parameters(self, mock_llm_init, mock_async_init, mock_genai):`
			`"""Test that GoogleAI processor reuses cache for identical model+temperature combinations"""`
			`# Arrange`
			`mock_client = MagicMock()`
			`mock_genai.Client.return_value = mock_client`

			`mock_response = MagicMock()`
			`mock_response.text = "Generated response"`
			`mock_response.usage_metadata.prompt_token_count = 10`
			`mock_response.usage_metadata.candidates_token_count = 5`
			`mock_client.models.generate_content.return_value = mock_response`

			`mock_async_init.return_value = None`
			`mock_llm_init.return_value = None`

			`config = {`
			`'model': 'gemini-2.0-flash-001',`
			`'api_key': 'test-api-key',`
			`'temperature': 0.0,`
			`'max_output': 1024,`
			`'concurrency': 1,`
			`'taskgroup': AsyncMock(),`
			`'id': 'test-processor'`
			`}`

			`processor = GoogleAIProcessor(**config)`

			`# Act - Call multiple times with same parameters`
			`await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.7)`
			`await processor.generate_content("System", "Prompt 2", model="gemini-2.0-flash-001", temperature=0.7)`
			`await processor.generate_content("System", "Prompt 3", model="gemini-2.0-flash-001", temperature=0.7)`

			`# Assert - Should have only 1 cache entry for the repeated parameters`
			`cache_keys = list(processor.generation_configs.keys())`
			`assert len(cache_keys) == 1`
			`assert "gemini-2.0-flash-001:0.7" in cache_keys`

			`# The same config object should be reused`
			`config_obj = processor.generation_configs["gemini-2.0-flash-001:0.7"]`
			`assert config_obj.temperature == 0.7`

			`@patch('trustgraph.model.text_completion.googleaistudio.llm.genai')`
			`@patch('trustgraph.base.async_processor.AsyncProcessor.__init__')`
			`@patch('trustgraph.base.llm_service.LlmService.__init__')`
			`async def test_googleai_different_models_separate_caches(self, mock_llm_init, mock_async_init, mock_genai):`
			`"""Test that different models create separate cache entries even with same temperature"""`
			`# Arrange`
			`mock_client = MagicMock()`
			`mock_genai.Client.return_value = mock_client`

			`mock_response = MagicMock()`
			`mock_response.text = "Generated response"`
			`mock_response.usage_metadata.prompt_token_count = 10`
			`mock_response.usage_metadata.candidates_token_count = 5`
			`mock_client.models.generate_content.return_value = mock_response`

			`mock_async_init.return_value = None`
			`mock_llm_init.return_value = None`

			`config = {`
			`'model': 'gemini-2.0-flash-001',`
			`'api_key': 'test-api-key',`
			`'temperature': 0.0,`
			`'max_output': 1024,`
			`'concurrency': 1,`
			`'taskgroup': AsyncMock(),`
			`'id': 'test-processor'`
			`}`

			`processor = GoogleAIProcessor(**config)`

			`# Act - Call with different models, same temperature`
			`await processor.generate_content("System", "Prompt 1", model="gemini-2.0-flash-001", temperature=0.5)`
			`await processor.generate_content("System", "Prompt 2", model="gemini-1.5-flash-001", temperature=0.5)`

			`# Assert - Should have separate cache entries for different models`
			`cache_keys = list(processor.generation_configs.keys())`
			`assert len(cache_keys) == 2`
			`assert "gemini-2.0-flash-001:0.5" in cache_keys`
			`assert "gemini-1.5-flash-001:0.5" in cache_keys`

			`# Note: Bedrock tests would be similar but testing the Bedrock processor's caching behavior`
			`# The Bedrock processor caches model variants with temperature in the cache key`

			`async def test_bedrock_temperature_cache_keys(self):`
			`"""Test Bedrock processor temperature-aware caching"""`
			`# This would test the Bedrock processor's _get_or_create_variant method`
			`# with different temperature values to ensure proper cache key generation`

			`# Implementation would follow similar pattern to GoogleAI tests above`
			`# but using the Bedrock processor and testing model_variants cache`
			`pass`

			`async def test_bedrock_cache_isolation_different_temperatures(self):`
			`"""Test that Bedrock processor isolates cache entries by temperature"""`
			`pass`

			`async def test_cache_memory_efficiency(self):`
			`"""Test that caches don't grow unbounded with many different parameter combinations"""`
			`# This could test cache size limits or cleanup behavior if implemented`
			`pass`


			`class TestCachePerformance(IsolatedAsyncioTestCase):`
			`"""Test caching performance characteristics"""`

			`async def test_cache_hit_performance(self):`
			`"""Test that cache hits are faster than cache misses"""`
			`# This would measure timing differences between cache hits and misses`
			`pass`

			`async def test_concurrent_cache_access(self):`
			`"""Test concurrent access to cached configurations"""`
			`# This would test thread-safety of cache access`
			`pass`


			`if __name__ == '__main__':`
			`pytest.main([__file__])`