nomyo-router/config.py

"""Router configuration loader.

Pydantic ``BaseSettings`` model populated from YAML (path resolved via
``_config_path_from_env``) with ``${VAR}`` expansion, plus env-var overrides
under the ``NOMYO_ROUTER_`` prefix.
"""
import os
import re
from pathlib import Path
from typing import Dict, List, Optional

import yaml
from pydantic import Field
from pydantic_settings import BaseSettings


class Config(BaseSettings):
    # List of Ollama endpoints
    endpoints: list[str] = Field(
        default_factory=lambda: [
            "http://localhost:11434",
        ]
    )
    # List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
    llama_server_endpoints: List[str] = Field(default_factory=list)
    # Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
    max_concurrent_connections: int = 1
    # Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
    endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
    # When True, config order = priority; routes by utilization ratio + config index (WRR)
    priority_routing: bool = Field(default=False)

    # Conversation affinity: route the same conversation back to the endpoint that
    # previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
    # Soft preference — falls back to the standard algorithm when the affine endpoint
    # is saturated or no longer has the model loaded.
    conversation_affinity: bool = Field(default=False)
    # TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
    # if the backend has already evicted the model, the KV cache is cold anyway.
    conversation_affinity_ttl: int = Field(default=300)

    api_keys: Dict[str, str] = Field(default_factory=dict)
    # Optional router-level API key used to gate access to this service and dashboard
    router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")

    # Database configuration
    db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))

    # Semantic LLM Cache configuration
    cache_enabled: bool = Field(default=False)
    # Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
    cache_backend: str = Field(default="memory")
    # Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
    cache_similarity: float = Field(default=1.0)
    # TTL in seconds; None = cache forever
    cache_ttl: Optional[int] = Field(default=3600)
    # SQLite backend: path to cache database file
    cache_db_path: str = Field(default="llm_cache.db")
    # Redis backend: connection URL
    cache_redis_url: str = Field(default="redis://localhost:6379/0")
    # Weight of BM25-weighted chat-history embedding vs last-user-message embedding
    # 0.3 = 30% history context signal, 70% question signal
    cache_history_weight: float = Field(default=0.3)

    class Config:
        # YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
        env_prefix = "NOMYO_ROUTER_"

    @classmethod
    def _expand_env_refs(cls, obj):
        """Recursively replace `${VAR}` with os.getenv('VAR')."""
        if isinstance(obj, dict):
            return {k: cls._expand_env_refs(v) for k, v in obj.items()}
        if isinstance(obj, list):
            return [cls._expand_env_refs(v) for v in obj]
        if isinstance(obj, str):
            # Only expand if it is exactly ${VAR}
            m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
            if m:
                return os.getenv(m.group(1), "")
        return obj

    @classmethod
    def from_yaml(cls, path: Path) -> "Config":
        """Load the YAML file and create the Config instance."""
        if path.exists():
            with path.open("r", encoding="utf-8") as fp:
                data = yaml.safe_load(fp) or {}
                cleaned = cls._expand_env_refs(data)
                if isinstance(cleaned, dict):
                    # Accept hyphenated config key and map it to the field name
                    key_aliases = [
                        # canonical field name
                        "router_api_key",
                        # lowercase, hyphen/underscore variants
                        "nomyo-router-api-key",
                        "nomyo_router_api_key",
                        "nomyo-router_api_key",
                        "nomyo_router-api_key",
                        # uppercase env-style variants
                        "NOMYO-ROUTER_API_KEY",
                        "NOMYO_ROUTER_API_KEY",
                    ]
                    for alias in key_aliases:
                        if alias in cleaned:
                            cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
                            break
                    # If not present in YAML (or empty), fall back to env var explicitly
                    if not cleaned.get("router_api_key"):
                        env_key = os.getenv("NOMYO_ROUTER_API_KEY")
                        if env_key:
                            cleaned["router_api_key"] = env_key
            return cls(**cleaned)
        return cls()


def _config_path_from_env() -> Path:
    """
    Resolve the configuration file path. Defaults to `config.yaml`
    in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
    is set.
    """
    candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
    if candidate:
        return Path(candidate).expanduser()
    return Path("config.yaml")


# ------------------------------------------------------------------
# Shared config accessor
# ------------------------------------------------------------------
# Submodules read config at call time via get_config() instead of importing
# a bound name. The single source of truth is ``router.config`` — the lazy
# import below resolves it after router.py has finished loading, and lets
# tests that ``patch.object(router, "config", cfg)`` flow through.
def get_config() -> "Config":
    """Return the currently active Config from router.py."""
    import router  # lazy to avoid module-load circular import
    return router.config