refac: modularize config II

This commit is contained in:
Alpha Nerd 2026-05-19 11:00:50 +02:00
parent 90b6868f5a
commit d2b31b6c7b
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
2 changed files with 127 additions and 112 deletions

126
config.py Normal file
View file

@ -0,0 +1,126 @@
"""Router configuration loader.
Pydantic ``BaseSettings`` model populated from YAML (path resolved via
``_config_path_from_env``) with ``${VAR}`` expansion, plus env-var overrides
under the ``NOMYO_ROUTER_`` prefix.
"""
import os
import re
from pathlib import Path
from typing import Dict, List, Optional
import yaml
from pydantic import Field
from pydantic_settings import BaseSettings
class Config(BaseSettings):
# List of Ollama endpoints
endpoints: list[str] = Field(
default_factory=lambda: [
"http://localhost:11434",
]
)
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
llama_server_endpoints: List[str] = Field(default_factory=list)
# Max concurrent connections per endpointmodel pair, see OLLAMA_NUM_PARALLEL
max_concurrent_connections: int = 1
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
# When True, config order = priority; routes by utilization ratio + config index (WRR)
priority_routing: bool = Field(default=False)
# Conversation affinity: route the same conversation back to the endpoint that
# previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
# Soft preference — falls back to the standard algorithm when the affine endpoint
# is saturated or no longer has the model loaded.
conversation_affinity: bool = Field(default=False)
# TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
# if the backend has already evicted the model, the KV cache is cold anyway.
conversation_affinity_ttl: int = Field(default=300)
api_keys: Dict[str, str] = Field(default_factory=dict)
# Optional router-level API key used to gate access to this service and dashboard
router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
# Database configuration
db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
# Semantic LLM Cache configuration
cache_enabled: bool = Field(default=False)
# Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
cache_backend: str = Field(default="memory")
# Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
cache_similarity: float = Field(default=1.0)
# TTL in seconds; None = cache forever
cache_ttl: Optional[int] = Field(default=3600)
# SQLite backend: path to cache database file
cache_db_path: str = Field(default="llm_cache.db")
# Redis backend: connection URL
cache_redis_url: str = Field(default="redis://localhost:6379/0")
# Weight of BM25-weighted chat-history embedding vs last-user-message embedding
# 0.3 = 30% history context signal, 70% question signal
cache_history_weight: float = Field(default=0.3)
class Config:
# YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
env_prefix = "NOMYO_ROUTER_"
@classmethod
def _expand_env_refs(cls, obj):
"""Recursively replace `${VAR}` with os.getenv('VAR')."""
if isinstance(obj, dict):
return {k: cls._expand_env_refs(v) for k, v in obj.items()}
if isinstance(obj, list):
return [cls._expand_env_refs(v) for v in obj]
if isinstance(obj, str):
# Only expand if it is exactly ${VAR}
m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
if m:
return os.getenv(m.group(1), "")
return obj
@classmethod
def from_yaml(cls, path: Path) -> "Config":
"""Load the YAML file and create the Config instance."""
if path.exists():
with path.open("r", encoding="utf-8") as fp:
data = yaml.safe_load(fp) or {}
cleaned = cls._expand_env_refs(data)
if isinstance(cleaned, dict):
# Accept hyphenated config key and map it to the field name
key_aliases = [
# canonical field name
"router_api_key",
# lowercase, hyphen/underscore variants
"nomyo-router-api-key",
"nomyo_router_api_key",
"nomyo-router_api_key",
"nomyo_router-api_key",
# uppercase env-style variants
"NOMYO-ROUTER_API_KEY",
"NOMYO_ROUTER_API_KEY",
]
for alias in key_aliases:
if alias in cleaned:
cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
break
# If not present in YAML (or empty), fall back to env var explicitly
if not cleaned.get("router_api_key"):
env_key = os.getenv("NOMYO_ROUTER_API_KEY")
if env_key:
cleaned["router_api_key"] = env_key
return cls(**cleaned)
return cls()
def _config_path_from_env() -> Path:
"""
Resolve the configuration file path. Defaults to `config.yaml`
in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
is set.
"""
candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
if candidate:
return Path(candidate).expanduser()
return Path("config.yaml")

113
router.py
View file

@ -107,118 +107,7 @@ buffer_lock = asyncio.Lock()
# Configuration for periodic flushing
FLUSH_INTERVAL = 10 # seconds
# -------------------------------------------------------------
# 1. Configuration loader
# -------------------------------------------------------------
class Config(BaseSettings):
# List of Ollama endpoints
endpoints: list[str] = Field(
default_factory=lambda: [
"http://localhost:11434",
]
)
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
llama_server_endpoints: List[str] = Field(default_factory=list)
# Max concurrent connections per endpointmodel pair, see OLLAMA_NUM_PARALLEL
max_concurrent_connections: int = 1
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
# When True, config order = priority; routes by utilization ratio + config index (WRR)
priority_routing: bool = Field(default=False)
# Conversation affinity: route the same conversation back to the endpoint that
# previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
# Soft preference — falls back to the standard algorithm when the affine endpoint
# is saturated or no longer has the model loaded.
conversation_affinity: bool = Field(default=False)
# TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
# if the backend has already evicted the model, the KV cache is cold anyway.
conversation_affinity_ttl: int = Field(default=300)
api_keys: Dict[str, str] = Field(default_factory=dict)
# Optional router-level API key used to gate access to this service and dashboard
router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
# Database configuration
db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
# Semantic LLM Cache configuration
cache_enabled: bool = Field(default=False)
# Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
cache_backend: str = Field(default="memory")
# Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
cache_similarity: float = Field(default=1.0)
# TTL in seconds; None = cache forever
cache_ttl: Optional[int] = Field(default=3600)
# SQLite backend: path to cache database file
cache_db_path: str = Field(default="llm_cache.db")
# Redis backend: connection URL
cache_redis_url: str = Field(default="redis://localhost:6379/0")
# Weight of BM25-weighted chat-history embedding vs last-user-message embedding
# 0.3 = 30% history context signal, 70% question signal
cache_history_weight: float = Field(default=0.3)
class Config:
# YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
env_prefix = "NOMYO_ROUTER_"
@classmethod
def _expand_env_refs(cls, obj):
"""Recursively replace `${VAR}` with os.getenv('VAR')."""
if isinstance(obj, dict):
return {k: cls._expand_env_refs(v) for k, v in obj.items()}
if isinstance(obj, list):
return [cls._expand_env_refs(v) for v in obj]
if isinstance(obj, str):
# Only expand if it is exactly ${VAR}
m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
if m:
return os.getenv(m.group(1), "")
return obj
@classmethod
def from_yaml(cls, path: Path) -> "Config":
"""Load the YAML file and create the Config instance."""
if path.exists():
with path.open("r", encoding="utf-8") as fp:
data = yaml.safe_load(fp) or {}
cleaned = cls._expand_env_refs(data)
if isinstance(cleaned, dict):
# Accept hyphenated config key and map it to the field name
key_aliases = [
# canonical field name
"router_api_key",
# lowercase, hyphen/underscore variants
"nomyo-router-api-key",
"nomyo_router_api_key",
"nomyo-router_api_key",
"nomyo_router-api_key",
# uppercase env-style variants
"NOMYO-ROUTER_API_KEY",
"NOMYO_ROUTER_API_KEY",
]
for alias in key_aliases:
if alias in cleaned:
cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
break
# If not present in YAML (or empty), fall back to env var explicitly
if not cleaned.get("router_api_key"):
env_key = os.getenv("NOMYO_ROUTER_API_KEY")
if env_key:
cleaned["router_api_key"] = env_key
return cls(**cleaned)
return cls()
def _config_path_from_env() -> Path:
"""
Resolve the configuration file path. Defaults to `config.yaml`
in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
is set.
"""
candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
if candidate:
return Path(candidate).expanduser()
return Path("config.yaml")
from config import Config, _config_path_from_env
from ollama._types import TokenLogprob, Logprob
from db import TokenDatabase