2026-05-19 11:00:50 +02:00
|
|
|
|
"""Router configuration loader.
|
|
|
|
|
|
|
|
|
|
|
|
Pydantic ``BaseSettings`` model populated from YAML (path resolved via
|
|
|
|
|
|
``_config_path_from_env``) with ``${VAR}`` expansion, plus env-var overrides
|
|
|
|
|
|
under the ``NOMYO_ROUTER_`` prefix.
|
|
|
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
|
|
|
import re
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
|
|
import yaml
|
|
|
|
|
|
from pydantic import Field
|
|
|
|
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Config(BaseSettings):
|
|
|
|
|
|
# List of Ollama endpoints
|
|
|
|
|
|
endpoints: list[str] = Field(
|
|
|
|
|
|
default_factory=lambda: [
|
|
|
|
|
|
"http://localhost:11434",
|
|
|
|
|
|
]
|
|
|
|
|
|
)
|
|
|
|
|
|
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
|
|
|
|
|
|
llama_server_endpoints: List[str] = Field(default_factory=list)
|
|
|
|
|
|
# Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
|
|
|
|
|
|
max_concurrent_connections: int = 1
|
|
|
|
|
|
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
|
|
|
|
|
|
endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
|
|
|
|
|
|
# When True, config order = priority; routes by utilization ratio + config index (WRR)
|
|
|
|
|
|
priority_routing: bool = Field(default=False)
|
|
|
|
|
|
|
|
|
|
|
|
# Conversation affinity: route the same conversation back to the endpoint that
|
|
|
|
|
|
# previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
|
|
|
|
|
|
# Soft preference — falls back to the standard algorithm when the affine endpoint
|
|
|
|
|
|
# is saturated or no longer has the model loaded.
|
|
|
|
|
|
conversation_affinity: bool = Field(default=False)
|
|
|
|
|
|
# TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
|
|
|
|
|
|
# if the backend has already evicted the model, the KV cache is cold anyway.
|
|
|
|
|
|
conversation_affinity_ttl: int = Field(default=300)
|
|
|
|
|
|
|
|
|
|
|
|
api_keys: Dict[str, str] = Field(default_factory=dict)
|
|
|
|
|
|
# Optional router-level API key used to gate access to this service and dashboard
|
|
|
|
|
|
router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
# Database configuration
|
|
|
|
|
|
db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
|
|
|
|
|
|
|
|
|
|
|
|
# Semantic LLM Cache configuration
|
|
|
|
|
|
cache_enabled: bool = Field(default=False)
|
|
|
|
|
|
# Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
|
|
|
|
|
|
cache_backend: str = Field(default="memory")
|
|
|
|
|
|
# Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
|
|
|
|
|
|
cache_similarity: float = Field(default=1.0)
|
|
|
|
|
|
# TTL in seconds; None = cache forever
|
|
|
|
|
|
cache_ttl: Optional[int] = Field(default=3600)
|
|
|
|
|
|
# SQLite backend: path to cache database file
|
|
|
|
|
|
cache_db_path: str = Field(default="llm_cache.db")
|
|
|
|
|
|
# Redis backend: connection URL
|
|
|
|
|
|
cache_redis_url: str = Field(default="redis://localhost:6379/0")
|
|
|
|
|
|
# Weight of BM25-weighted chat-history embedding vs last-user-message embedding
|
|
|
|
|
|
# 0.3 = 30% history context signal, 70% question signal
|
|
|
|
|
|
cache_history_weight: float = Field(default=0.3)
|
|
|
|
|
|
|
|
|
|
|
|
class Config:
|
|
|
|
|
|
# YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
|
|
|
|
|
|
env_prefix = "NOMYO_ROUTER_"
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def _expand_env_refs(cls, obj):
|
|
|
|
|
|
"""Recursively replace `${VAR}` with os.getenv('VAR')."""
|
|
|
|
|
|
if isinstance(obj, dict):
|
|
|
|
|
|
return {k: cls._expand_env_refs(v) for k, v in obj.items()}
|
|
|
|
|
|
if isinstance(obj, list):
|
|
|
|
|
|
return [cls._expand_env_refs(v) for v in obj]
|
|
|
|
|
|
if isinstance(obj, str):
|
|
|
|
|
|
# Only expand if it is exactly ${VAR}
|
|
|
|
|
|
m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
|
|
|
|
|
|
if m:
|
|
|
|
|
|
return os.getenv(m.group(1), "")
|
|
|
|
|
|
return obj
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def from_yaml(cls, path: Path) -> "Config":
|
|
|
|
|
|
"""Load the YAML file and create the Config instance."""
|
|
|
|
|
|
if path.exists():
|
|
|
|
|
|
with path.open("r", encoding="utf-8") as fp:
|
|
|
|
|
|
data = yaml.safe_load(fp) or {}
|
|
|
|
|
|
cleaned = cls._expand_env_refs(data)
|
|
|
|
|
|
if isinstance(cleaned, dict):
|
|
|
|
|
|
# Accept hyphenated config key and map it to the field name
|
|
|
|
|
|
key_aliases = [
|
|
|
|
|
|
# canonical field name
|
|
|
|
|
|
"router_api_key",
|
|
|
|
|
|
# lowercase, hyphen/underscore variants
|
|
|
|
|
|
"nomyo-router-api-key",
|
|
|
|
|
|
"nomyo_router_api_key",
|
|
|
|
|
|
"nomyo-router_api_key",
|
|
|
|
|
|
"nomyo_router-api_key",
|
|
|
|
|
|
# uppercase env-style variants
|
|
|
|
|
|
"NOMYO-ROUTER_API_KEY",
|
|
|
|
|
|
"NOMYO_ROUTER_API_KEY",
|
|
|
|
|
|
]
|
|
|
|
|
|
for alias in key_aliases:
|
|
|
|
|
|
if alias in cleaned:
|
|
|
|
|
|
cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
|
|
|
|
|
|
break
|
|
|
|
|
|
# If not present in YAML (or empty), fall back to env var explicitly
|
|
|
|
|
|
if not cleaned.get("router_api_key"):
|
|
|
|
|
|
env_key = os.getenv("NOMYO_ROUTER_API_KEY")
|
|
|
|
|
|
if env_key:
|
|
|
|
|
|
cleaned["router_api_key"] = env_key
|
|
|
|
|
|
return cls(**cleaned)
|
|
|
|
|
|
return cls()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _config_path_from_env() -> Path:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Resolve the configuration file path. Defaults to `config.yaml`
|
|
|
|
|
|
in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
|
|
|
|
|
|
is set.
|
|
|
|
|
|
"""
|
|
|
|
|
|
candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
|
|
|
|
|
|
if candidate:
|
|
|
|
|
|
return Path(candidate).expanduser()
|
|
|
|
|
|
return Path("config.yaml")
|
2026-05-19 12:05:51 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
# Shared config accessor
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
# Submodules read config at call time via get_config() instead of importing
|
|
|
|
|
|
# a bound name. The single source of truth is ``router.config`` — the lazy
|
|
|
|
|
|
# import below resolves it after router.py has finished loading, and lets
|
|
|
|
|
|
# tests that ``patch.object(router, "config", cfg)`` flow through.
|
|
|
|
|
|
def get_config() -> "Config":
|
|
|
|
|
|
"""Return the currently active Config from router.py."""
|
|
|
|
|
|
import router # lazy to avoid module-load circular import
|
|
|
|
|
|
return router.config
|