async-semantic-llm-cache/semantic_llm_cache/config.py

"""Configuration management for prompt-cache."""

from dataclasses import dataclass
from typing import Any, Callable, Optional


@dataclass
class CacheConfig:
    """Configuration for cache behavior."""

    similarity_threshold: float = 1.0  # 1.0 = exact match, lower = semantic
    ttl: Optional[int] = 3600  # Time to live in seconds, None = forever
    namespace: str = "default"  # Isolate different use cases
    enabled: bool = True  # Enable/disable caching
    key_func: Optional[Callable[[Any], str]] = None  # Custom cache key function

    # Cost estimation for statistics (USD per 1K tokens)
    input_cost_per_1k: float = 0.001  # Default ~$1/1M for cheaper models
    output_cost_per_1k: float = 0.002  # Default ~$2/1M for cheaper models

    # Performance settings
    max_cache_size: Optional[int] = None  # LRU eviction when set
    embedding_model: str = "all-MiniLM-L6-v2"  # Default sentence-transformer model

    def __post_init__(self) -> None:
        """Validate configuration."""
        if not 0.0 <= self.similarity_threshold <= 1.0:
            raise ValueError("similarity_threshold must be between 0.0 and 1.0")
        if self.ttl is not None and self.ttl <= 0:
            raise ValueError("ttl must be positive or None")
        if self.max_cache_size is not None and self.max_cache_size <= 0:
            raise ValueError("max_cache_size must be positive or None")


@dataclass
class CacheEntry:
    """A cached response with metadata."""

    prompt: str
    response: Any
    embedding: Optional[list[float]] = None  # Normalized embedding vector
    created_at: float = 0.0  # Unix timestamp
    ttl: Optional[int] = None  # Time to live in seconds
    namespace: str = "default"
    hit_count: int = 0

    # Approximate token counts for cost estimation
    input_tokens: int = 0
    output_tokens: int = 0

    def is_expired(self, current_time: float) -> bool:
        """Check if entry has expired based on TTL."""
        if self.ttl is None:
            return False
        return (current_time - self.created_at) > self.ttl

    def estimate_cost(self, input_cost: float, output_cost: float) -> float:
        """Estimate cost savings in USD."""
        input_savings = (self.input_tokens / 1000) * input_cost
        output_savings = (self.output_tokens / 1000) * output_cost
        return input_savings + output_savings
Add files via upload initial commit 2026-03-06 15:54:47 +01:00			`"""Configuration management for prompt-cache."""`

			`from dataclasses import dataclass`
			`from typing import Any, Callable, Optional`


			`@dataclass`
			`class CacheConfig:`
			`"""Configuration for cache behavior."""`

			`similarity_threshold: float = 1.0 # 1.0 = exact match, lower = semantic`
			`ttl: Optional[int] = 3600 # Time to live in seconds, None = forever`
			`namespace: str = "default" # Isolate different use cases`
			`enabled: bool = True # Enable/disable caching`
			`key_func: Optional[Callable[[Any], str]] = None # Custom cache key function`

			`# Cost estimation for statistics (USD per 1K tokens)`
			`input_cost_per_1k: float = 0.001 # Default ~$1/1M for cheaper models`
			`output_cost_per_1k: float = 0.002 # Default ~$2/1M for cheaper models`

			`# Performance settings`
			`max_cache_size: Optional[int] = None # LRU eviction when set`
			`embedding_model: str = "all-MiniLM-L6-v2" # Default sentence-transformer model`

			`def __post_init__(self) -> None:`
			`"""Validate configuration."""`
			`if not 0.0 <= self.similarity_threshold <= 1.0:`
			`raise ValueError("similarity_threshold must be between 0.0 and 1.0")`
			`if self.ttl is not None and self.ttl <= 0:`
			`raise ValueError("ttl must be positive or None")`
			`if self.max_cache_size is not None and self.max_cache_size <= 0:`
			`raise ValueError("max_cache_size must be positive or None")`


			`@dataclass`
			`class CacheEntry:`
			`"""A cached response with metadata."""`

			`prompt: str`
			`response: Any`
			`embedding: Optional[list[float]] = None # Normalized embedding vector`
			`created_at: float = 0.0 # Unix timestamp`
			`ttl: Optional[int] = None # Time to live in seconds`
			`namespace: str = "default"`
			`hit_count: int = 0`

			`# Approximate token counts for cost estimation`
			`input_tokens: int = 0`
			`output_tokens: int = 0`

			`def is_expired(self, current_time: float) -> bool:`
			`"""Check if entry has expired based on TTL."""`
			`if self.ttl is None:`
			`return False`
			`return (current_time - self.created_at) > self.ttl`

			`def estimate_cost(self, input_cost: float, output_cost: float) -> float:`
			`"""Estimate cost savings in USD."""`
			`input_savings = (self.input_tokens / 1000) * input_cost`
			`output_savings = (self.output_tokens / 1000) * output_cost`
			`return input_savings + output_savings`