diff --git a/config.py b/config.py
new file mode 100644
index 0000000..e3b5ee6
--- /dev/null
+++ b/config.py
@@ -0,0 +1,126 @@
+"""Router configuration loader.
+
+Pydantic ``BaseSettings`` model populated from YAML (path resolved via
+``_config_path_from_env``) with ``${VAR}`` expansion, plus env-var overrides
+under the ``NOMYO_ROUTER_`` prefix.
+"""
+import os
+import re
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import yaml
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+
+class Config(BaseSettings):
+    # List of Ollama endpoints
+    endpoints: list[str] = Field(
+        default_factory=lambda: [
+            "http://localhost:11434",
+        ]
+    )
+    # List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
+    llama_server_endpoints: List[str] = Field(default_factory=list)
+    # Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
+    max_concurrent_connections: int = 1
+    # Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
+    endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
+    # When True, config order = priority; routes by utilization ratio + config index (WRR)
+    priority_routing: bool = Field(default=False)
+
+    # Conversation affinity: route the same conversation back to the endpoint that
+    # previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
+    # Soft preference — falls back to the standard algorithm when the affine endpoint
+    # is saturated or no longer has the model loaded.
+    conversation_affinity: bool = Field(default=False)
+    # TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
+    # if the backend has already evicted the model, the KV cache is cold anyway.
+    conversation_affinity_ttl: int = Field(default=300)
+
+    api_keys: Dict[str, str] = Field(default_factory=dict)
+    # Optional router-level API key used to gate access to this service and dashboard
+    router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
+
+    # Database configuration
+    db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
+
+    # Semantic LLM Cache configuration
+    cache_enabled: bool = Field(default=False)
+    # Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
+    cache_backend: str = Field(default="memory")
+    # Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
+    cache_similarity: float = Field(default=1.0)
+    # TTL in seconds; None = cache forever
+    cache_ttl: Optional[int] = Field(default=3600)
+    # SQLite backend: path to cache database file
+    cache_db_path: str = Field(default="llm_cache.db")
+    # Redis backend: connection URL
+    cache_redis_url: str = Field(default="redis://localhost:6379/0")
+    # Weight of BM25-weighted chat-history embedding vs last-user-message embedding
+    # 0.3 = 30% history context signal, 70% question signal
+    cache_history_weight: float = Field(default=0.3)
+
+    class Config:
+        # YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
+        env_prefix = "NOMYO_ROUTER_"
+
+    @classmethod
+    def _expand_env_refs(cls, obj):
+        """Recursively replace `${VAR}` with os.getenv('VAR')."""
+        if isinstance(obj, dict):
+            return {k: cls._expand_env_refs(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [cls._expand_env_refs(v) for v in obj]
+        if isinstance(obj, str):
+            # Only expand if it is exactly ${VAR}
+            m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
+            if m:
+                return os.getenv(m.group(1), "")
+        return obj
+
+    @classmethod
+    def from_yaml(cls, path: Path) -> "Config":
+        """Load the YAML file and create the Config instance."""
+        if path.exists():
+            with path.open("r", encoding="utf-8") as fp:
+                data = yaml.safe_load(fp) or {}
+                cleaned = cls._expand_env_refs(data)
+                if isinstance(cleaned, dict):
+                    # Accept hyphenated config key and map it to the field name
+                    key_aliases = [
+                        # canonical field name
+                        "router_api_key",
+                        # lowercase, hyphen/underscore variants
+                        "nomyo-router-api-key",
+                        "nomyo_router_api_key",
+                        "nomyo-router_api_key",
+                        "nomyo_router-api_key",
+                        # uppercase env-style variants
+                        "NOMYO-ROUTER_API_KEY",
+                        "NOMYO_ROUTER_API_KEY",
+                    ]
+                    for alias in key_aliases:
+                        if alias in cleaned:
+                            cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
+                            break
+                    # If not present in YAML (or empty), fall back to env var explicitly
+                    if not cleaned.get("router_api_key"):
+                        env_key = os.getenv("NOMYO_ROUTER_API_KEY")
+                        if env_key:
+                            cleaned["router_api_key"] = env_key
+            return cls(**cleaned)
+        return cls()
+
+
+def _config_path_from_env() -> Path:
+    """
+    Resolve the configuration file path. Defaults to `config.yaml`
+    in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
+    is set.
+    """
+    candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
+    if candidate:
+        return Path(candidate).expanduser()
+    return Path("config.yaml")
diff --git a/router.py b/router.py
index c5a0336..825bafc 100644
--- a/router.py
+++ b/router.py
@@ -107,118 +107,7 @@ buffer_lock = asyncio.Lock()
 # Configuration for periodic flushing
 FLUSH_INTERVAL = 10  # seconds
 
-# -------------------------------------------------------------
-# 1. Configuration loader
-# -------------------------------------------------------------
-class Config(BaseSettings):
-    # List of Ollama endpoints
-    endpoints: list[str] = Field(
-        default_factory=lambda: [
-            "http://localhost:11434",
-        ]
-    )
-    # List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
-    llama_server_endpoints: List[str] = Field(default_factory=list)
-    # Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
-    max_concurrent_connections: int = 1
-    # Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
-    endpoint_config: Dict[str, Dict] = Field(default_factory=dict)
-    # When True, config order = priority; routes by utilization ratio + config index (WRR)
-    priority_routing: bool = Field(default=False)
-
-    # Conversation affinity: route the same conversation back to the endpoint that
-    # previously served it, to keep the llama.cpp / Ollama prompt cache (KV cache) warm.
-    # Soft preference — falls back to the standard algorithm when the affine endpoint
-    # is saturated or no longer has the model loaded.
-    conversation_affinity: bool = Field(default=False)
-    # TTL (seconds) for affinity entries. Defaults to Ollama's default keep_alive (5 min):
-    # if the backend has already evicted the model, the KV cache is cold anyway.
-    conversation_affinity_ttl: int = Field(default=300)
-
-    api_keys: Dict[str, str] = Field(default_factory=dict)
-    # Optional router-level API key used to gate access to this service and dashboard
-    router_api_key: Optional[str] = Field(default=None, env="NOMYO_ROUTER_API_KEY")
-
-    # Database configuration
-    db_path: str = Field(default=os.getenv("NOMYO_ROUTER_DB_PATH", "token_counts.db"))
-
-    # Semantic LLM Cache configuration
-    cache_enabled: bool = Field(default=False)
-    # Backend: "memory" (default, in-process), "sqlite" (persistent), "redis" (distributed)
-    cache_backend: str = Field(default="memory")
-    # Cosine similarity threshold: 1.0 = exact match only, <1.0 = semantic (requires :semantic image)
-    cache_similarity: float = Field(default=1.0)
-    # TTL in seconds; None = cache forever
-    cache_ttl: Optional[int] = Field(default=3600)
-    # SQLite backend: path to cache database file
-    cache_db_path: str = Field(default="llm_cache.db")
-    # Redis backend: connection URL
-    cache_redis_url: str = Field(default="redis://localhost:6379/0")
-    # Weight of BM25-weighted chat-history embedding vs last-user-message embedding
-    # 0.3 = 30% history context signal, 70% question signal
-    cache_history_weight: float = Field(default=0.3)
-
-    class Config:
-        # YAML loading is handled manually via Config.from_yaml(); env vars use this prefix.
-        env_prefix = "NOMYO_ROUTER_"
-
-    @classmethod
-    def _expand_env_refs(cls, obj):
-        """Recursively replace `${VAR}` with os.getenv('VAR')."""
-        if isinstance(obj, dict):
-            return {k: cls._expand_env_refs(v) for k, v in obj.items()}
-        if isinstance(obj, list):
-            return [cls._expand_env_refs(v) for v in obj]
-        if isinstance(obj, str):
-            # Only expand if it is exactly ${VAR}
-            m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
-            if m:
-                return os.getenv(m.group(1), "")
-        return obj
-
-    @classmethod
-    def from_yaml(cls, path: Path) -> "Config":
-        """Load the YAML file and create the Config instance."""
-        if path.exists():
-            with path.open("r", encoding="utf-8") as fp:
-                data = yaml.safe_load(fp) or {}
-                cleaned = cls._expand_env_refs(data)
-                if isinstance(cleaned, dict):
-                    # Accept hyphenated config key and map it to the field name
-                    key_aliases = [
-                        # canonical field name
-                        "router_api_key",
-                        # lowercase, hyphen/underscore variants
-                        "nomyo-router-api-key",
-                        "nomyo_router_api_key",
-                        "nomyo-router_api_key",
-                        "nomyo_router-api_key",
-                        # uppercase env-style variants
-                        "NOMYO-ROUTER_API_KEY",
-                        "NOMYO_ROUTER_API_KEY",
-                    ]
-                    for alias in key_aliases:
-                        if alias in cleaned:
-                            cleaned["router_api_key"] = cleaned.get("router_api_key", cleaned.pop(alias))
-                            break
-                    # If not present in YAML (or empty), fall back to env var explicitly
-                    if not cleaned.get("router_api_key"):
-                        env_key = os.getenv("NOMYO_ROUTER_API_KEY")
-                        if env_key:
-                            cleaned["router_api_key"] = env_key
-            return cls(**cleaned)
-        return cls()
-
-def _config_path_from_env() -> Path:
-    """
-    Resolve the configuration file path. Defaults to `config.yaml`
-    in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
-    is set.
-    """
-    candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
-    if candidate:
-        return Path(candidate).expanduser()
-    return Path("config.yaml")
+from config import Config, _config_path_from_env
 
 from ollama._types import TokenLogprob, Logprob
 from db import TokenDatabase