"""Router configuration loader. Pydantic ``BaseSettings`` model populated from YAML (path resolved via ``_config_path_from_env``) with ``${VAR}`` expansion, plus env-var overrides under the ``NOMYO_ROUTER_`` prefix. """ import os import re from pathlib import Path from typing import Dict, List, Optional import yaml from pydantic import Field from pydantic_settings import BaseSettings class Config(BaseSettings): # List of Ollama endpoints endpoints: list[str] = Field( default_factory=lambda: [ "http://localhost:11434", ] ) # List of llama-server endpoints (OpenAI-compatible with /v1/models status info) llama_server_endpoints: List[str] = Field(default_factory=list) # Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL max_concurrent_connections: int = 1 # Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}} endpoint_config: Dict[str, Dict] = Field(default_factory=dict) # When True, config order = priority; routes by utilization ratio + config index (WRR) priority_routing: bool = Field(default=False) # Conversation affinity: route the same conversation back to the endpoint that # previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm. # Soft preference — falls back to the standard algorithm when the affine endpoint # is saturated or no longer has the model loaded. conversation_affinity: bool = Field(default=False) # TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min): # if the backend has already evicted the model, the KV cache is cold anyway. conversation_affinity_ttl: int = Field(default=300) api_keys: Dict[str, str] = Field(default_factory=dict) # Optional router-level API key used to gate access to this service and dashboard router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY") # Database configuration db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db")) # Semantic LLM Cache configuration cache_enabled: bool = Field(default=False) # Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed) cache_backend: str = Field(default="memory") # Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image) cache_similarity: float = Field(default=1.0) # TTL in seconds; None = cache forever cache_ttl: Optional[int] = Field(default=3600) # SQLite backend: path to cache database file cache_db_path: str = Field(default="llm_cache.db") # Redis backend: connection URL cache_redis_url: str = Field(default="redis://localhost:6379/0") # Weight of BM25-weighted chat-history embedding vs last-user-message embedding # 0.3 = 30% history context signal, 70% question signal cache_history_weight: float = Field(default=0.3) class Config: # YAML loading is handled manually via Config.from_yaml(); env vars use this prefix. env_prefix = "NOMYO_ROUTER_" @classmethod def _expand_env_refs(cls, obj): """Recursively replace `${VAR}` with os.getenv('VAR').""" if isinstance(obj, dict): return {k: cls._expand_env_refs(v) for k, v in obj.items()} if isinstance(obj, list): return [cls._expand_env_refs(v) for v in obj] if isinstance(obj, str): # Only expand if it is exactly ${VAR} m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj) if m: return os.getenv(m.group(1), "") return obj @classmethod def from_yaml(cls, path: Path) -> "Config": """Load the YAML file and create the Config instance.""" if path.exists(): with path.open("r", encoding="utf-8") as fp: data = yaml.safe_load(fp) or {} cleaned = cls._expand_env_refs(data) if isinstance(cleaned, dict): # Accept hyphenated config key and map it to the field name key_aliases = [ # canonical field name "router_api_key", # lowercase, hyphen/underscore variants "nomyo-router-api-key", "nomyo_router_api_key", "nomyo-router_api_key", "nomyo_router-api_key", # uppercase env-style variants "NOMYO-ROUTER_API_KEY", "NOMYO_ROUTER_API_KEY", ] for alias in key_aliases: if alias in cleaned: cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias)) break # If not present in YAML (or empty), fall back to env var explicitly if not cleaned.get("router_api_key"): env_key = os.getenv("NOMYO_ROUTER_API_KEY") if env_key: cleaned["router_api_key"] = env_key return cls(**cleaned) return cls() def _config_path_from_env() -> Path: """ Resolve the configuration file path. Defaults to `config.yaml` in the current working directory unless NOMYO_ROUTER_CONFIG_PATH is set. """ candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH") if candidate: return Path(candidate).expanduser() return Path("config.yaml")