refac: modularize config II
This commit is contained in:
parent
90b6868f5a
commit
d2b31b6c7b
2 changed files with 127 additions and 112 deletions
126
config.py
Normal file
126
config.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Router configuration loader.
|
||||
|
||||
Pydantic ``BaseSettings`` model populated from YAML (path resolved via
|
||||
``_config_path_from_env``) with ``${VAR}`` expansion, plus env-var overrides
|
||||
under the ``NOMYO_ROUTER_`` prefix.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Config(BaseSettings):
|
||||
# List of Ollama endpoints
|
||||
endpoints: list[str] = Field(
|
||||
default_factory=lambda: [
|
||||
"http://localhost:11434",
|
||||
]
|
||||
)
|
||||
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
|
||||
llama_server_endpoints: List[str] = Field(default_factory=list)
|
||||
# Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
|
||||
max_concurrent_connections: int = 1
|
||||
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
|
||||
endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
|
||||
# When True, config order = priority; routes by utilization ratio + config index (WRR)
|
||||
priority_routing: bool = Field(default=False)
|
||||
|
||||
# Conversation affinity: route the same conversation back to the endpoint that
|
||||
# previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
|
||||
# Soft preference — falls back to the standard algorithm when the affine endpoint
|
||||
# is saturated or no longer has the model loaded.
|
||||
conversation_affinity: bool = Field(default=False)
|
||||
# TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
|
||||
# if the backend has already evicted the model, the KV cache is cold anyway.
|
||||
conversation_affinity_ttl: int = Field(default=300)
|
||||
|
||||
api_keys: Dict[str, str] = Field(default_factory=dict)
|
||||
# Optional router-level API key used to gate access to this service and dashboard
|
||||
router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
|
||||
|
||||
# Database configuration
|
||||
db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
|
||||
|
||||
# Semantic LLM Cache configuration
|
||||
cache_enabled: bool = Field(default=False)
|
||||
# Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
|
||||
cache_backend: str = Field(default="memory")
|
||||
# Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
|
||||
cache_similarity: float = Field(default=1.0)
|
||||
# TTL in seconds; None = cache forever
|
||||
cache_ttl: Optional[int] = Field(default=3600)
|
||||
# SQLite backend: path to cache database file
|
||||
cache_db_path: str = Field(default="llm_cache.db")
|
||||
# Redis backend: connection URL
|
||||
cache_redis_url: str = Field(default="redis://localhost:6379/0")
|
||||
# Weight of BM25-weighted chat-history embedding vs last-user-message embedding
|
||||
# 0.3 = 30% history context signal, 70% question signal
|
||||
cache_history_weight: float = Field(default=0.3)
|
||||
|
||||
class Config:
|
||||
# YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
|
||||
env_prefix = "NOMYO_ROUTER_"
|
||||
|
||||
@classmethod
|
||||
def _expand_env_refs(cls, obj):
|
||||
"""Recursively replace `${VAR}` with os.getenv('VAR')."""
|
||||
if isinstance(obj, dict):
|
||||
return {k: cls._expand_env_refs(v) for k, v in obj.items()}
|
||||
if isinstance(obj, list):
|
||||
return [cls._expand_env_refs(v) for v in obj]
|
||||
if isinstance(obj, str):
|
||||
# Only expand if it is exactly ${VAR}
|
||||
m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
|
||||
if m:
|
||||
return os.getenv(m.group(1), "")
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: Path) -> "Config":
|
||||
"""Load the YAML file and create the Config instance."""
|
||||
if path.exists():
|
||||
with path.open("r", encoding="utf-8") as fp:
|
||||
data = yaml.safe_load(fp) or {}
|
||||
cleaned = cls._expand_env_refs(data)
|
||||
if isinstance(cleaned, dict):
|
||||
# Accept hyphenated config key and map it to the field name
|
||||
key_aliases = [
|
||||
# canonical field name
|
||||
"router_api_key",
|
||||
# lowercase, hyphen/underscore variants
|
||||
"nomyo-router-api-key",
|
||||
"nomyo_router_api_key",
|
||||
"nomyo-router_api_key",
|
||||
"nomyo_router-api_key",
|
||||
# uppercase env-style variants
|
||||
"NOMYO-ROUTER_API_KEY",
|
||||
"NOMYO_ROUTER_API_KEY",
|
||||
]
|
||||
for alias in key_aliases:
|
||||
if alias in cleaned:
|
||||
cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
|
||||
break
|
||||
# If not present in YAML (or empty), fall back to env var explicitly
|
||||
if not cleaned.get("router_api_key"):
|
||||
env_key = os.getenv("NOMYO_ROUTER_API_KEY")
|
||||
if env_key:
|
||||
cleaned["router_api_key"] = env_key
|
||||
return cls(**cleaned)
|
||||
return cls()
|
||||
|
||||
|
||||
def _config_path_from_env() -> Path:
|
||||
"""
|
||||
Resolve the configuration file path. Defaults to `config.yaml`
|
||||
in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
|
||||
is set.
|
||||
"""
|
||||
candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
|
||||
if candidate:
|
||||
return Path(candidate).expanduser()
|
||||
return Path("config.yaml")
|
||||
113
router.py
113
router.py
|
|
@ -107,118 +107,7 @@ buffer_lock = asyncio.Lock()
|
|||
# Configuration for periodic flushing
|
||||
FLUSH_INTERVAL = 10 # seconds
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# 1. Configuration loader
|
||||
# -------------------------------------------------------------
|
||||
class Config(BaseSettings):
|
||||
# List of Ollama endpoints
|
||||
endpoints: list[str] = Field(
|
||||
default_factory=lambda: [
|
||||
"http://localhost:11434",
|
||||
]
|
||||
)
|
||||
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
|
||||
llama_server_endpoints: List[str] = Field(default_factory=list)
|
||||
# Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
|
||||
max_concurrent_connections: int = 1
|
||||
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
|
||||
endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
|
||||
# When True, config order = priority; routes by utilization ratio + config index (WRR)
|
||||
priority_routing: bool = Field(default=False)
|
||||
|
||||
# Conversation affinity: route the same conversation back to the endpoint that
|
||||
# previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
|
||||
# Soft preference — falls back to the standard algorithm when the affine endpoint
|
||||
# is saturated or no longer has the model loaded.
|
||||
conversation_affinity: bool = Field(default=False)
|
||||
# TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
|
||||
# if the backend has already evicted the model, the KV cache is cold anyway.
|
||||
conversation_affinity_ttl: int = Field(default=300)
|
||||
|
||||
api_keys: Dict[str, str] = Field(default_factory=dict)
|
||||
# Optional router-level API key used to gate access to this service and dashboard
|
||||
router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
|
||||
|
||||
# Database configuration
|
||||
db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
|
||||
|
||||
# Semantic LLM Cache configuration
|
||||
cache_enabled: bool = Field(default=False)
|
||||
# Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
|
||||
cache_backend: str = Field(default="memory")
|
||||
# Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
|
||||
cache_similarity: float = Field(default=1.0)
|
||||
# TTL in seconds; None = cache forever
|
||||
cache_ttl: Optional[int] = Field(default=3600)
|
||||
# SQLite backend: path to cache database file
|
||||
cache_db_path: str = Field(default="llm_cache.db")
|
||||
# Redis backend: connection URL
|
||||
cache_redis_url: str = Field(default="redis://localhost:6379/0")
|
||||
# Weight of BM25-weighted chat-history embedding vs last-user-message embedding
|
||||
# 0.3 = 30% history context signal, 70% question signal
|
||||
cache_history_weight: float = Field(default=0.3)
|
||||
|
||||
class Config:
|
||||
# YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
|
||||
env_prefix = "NOMYO_ROUTER_"
|
||||
|
||||
@classmethod
|
||||
def _expand_env_refs(cls, obj):
|
||||
"""Recursively replace `${VAR}` with os.getenv('VAR')."""
|
||||
if isinstance(obj, dict):
|
||||
return {k: cls._expand_env_refs(v) for k, v in obj.items()}
|
||||
if isinstance(obj, list):
|
||||
return [cls._expand_env_refs(v) for v in obj]
|
||||
if isinstance(obj, str):
|
||||
# Only expand if it is exactly ${VAR}
|
||||
m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
|
||||
if m:
|
||||
return os.getenv(m.group(1), "")
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: Path) -> "Config":
|
||||
"""Load the YAML file and create the Config instance."""
|
||||
if path.exists():
|
||||
with path.open("r", encoding="utf-8") as fp:
|
||||
data = yaml.safe_load(fp) or {}
|
||||
cleaned = cls._expand_env_refs(data)
|
||||
if isinstance(cleaned, dict):
|
||||
# Accept hyphenated config key and map it to the field name
|
||||
key_aliases = [
|
||||
# canonical field name
|
||||
"router_api_key",
|
||||
# lowercase, hyphen/underscore variants
|
||||
"nomyo-router-api-key",
|
||||
"nomyo_router_api_key",
|
||||
"nomyo-router_api_key",
|
||||
"nomyo_router-api_key",
|
||||
# uppercase env-style variants
|
||||
"NOMYO-ROUTER_API_KEY",
|
||||
"NOMYO_ROUTER_API_KEY",
|
||||
]
|
||||
for alias in key_aliases:
|
||||
if alias in cleaned:
|
||||
cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
|
||||
break
|
||||
# If not present in YAML (or empty), fall back to env var explicitly
|
||||
if not cleaned.get("router_api_key"):
|
||||
env_key = os.getenv("NOMYO_ROUTER_API_KEY")
|
||||
if env_key:
|
||||
cleaned["router_api_key"] = env_key
|
||||
return cls(**cleaned)
|
||||
return cls()
|
||||
|
||||
def _config_path_from_env() -> Path:
|
||||
"""
|
||||
Resolve the configuration file path. Defaults to `config.yaml`
|
||||
in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
|
||||
is set.
|
||||
"""
|
||||
candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
|
||||
if candidate:
|
||||
return Path(candidate).expanduser()
|
||||
return Path("config.yaml")
|
||||
from config import Config, _config_path_from_env
|
||||
|
||||
from ollama._types import TokenLogprob, Logprob
|
||||
from db import TokenDatabase
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue