diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py index 4bce79a43..d624ff56c 100644 --- a/surfsense_backend/app/services/llm_router_service.py +++ b/surfsense_backend/app/services/llm_router_service.py @@ -207,6 +207,12 @@ class LLMRouterService: """ Initialize the router with global LLM configurations. + Configs with ``router_pool_eligible=False`` are skipped so that + dynamic OpenRouter entries stay out of the shared router pool used + by title-gen / sub-agent ``model="auto"`` flows. Those dynamic + entries are still available for user-facing Auto-mode thread pinning + via ``auto_model_pin_service``. + Args: global_configs: List of global LLM config dictionaries from YAML router_settings: Optional router settings (routing_strategy, num_retries, etc.) @@ -220,6 +226,8 @@ class LLMRouterService: model_list = [] premium_models: set[str] = set() for config in global_configs: + if config.get("router_pool_eligible") is False: + continue deployment = cls._config_to_deployment(config) if deployment: model_list.append(deployment) @@ -308,10 +316,45 @@ class LLMRouterService: logger.error(f"Failed to initialize LLM Router: {e}") instance._router = None + @classmethod + def rebuild( + cls, + global_configs: list[dict], + router_settings: dict | None = None, + ) -> None: + """Reset the router and re-run ``initialize`` with fresh configs. + + ``initialize`` short-circuits once it has run to avoid re-creating the + LiteLLM Router on every request; ``rebuild`` deliberately clears + ``_initialized`` so a caller (e.g. background OpenRouter refresh) + can force the pool to be rebuilt after catalogue changes. + """ + instance = cls.get_instance() + instance._initialized = False + instance._router = None + instance._model_list = [] + instance._premium_model_strings = set() + cls.initialize(global_configs, router_settings) + @classmethod def is_premium_model(cls, model_string: str) -> bool: - """Return True if *model_string* (as reported by LiteLLM) belongs to a - premium-tier deployment in the router pool.""" + """Return True if *model_string* belongs to a premium-tier deployment + in the LiteLLM router pool. + + Scope: only covers configs with ``router_pool_eligible`` truthy. That + includes static YAML premium configs AND dynamic OpenRouter *premium* + entries (which opt in at generation time). Dynamic OpenRouter *free* + entries and the virtual ``openrouter/free`` router are deliberately + kept out of the router pool — OpenRouter enforces free-tier limits + globally per account, so per-deployment router accounting can't + represent them correctly — and therefore return ``False`` here, which + matches their ``billing_tier="free"`` (no premium quota). + + For per-request premium checks on an arbitrary config (static or + dynamic, pool or non-pool), read ``agent_config.is_premium`` instead; + that reflects the per-config ``billing_tier`` directly and is what + user-facing Auto-mode thread pinning uses to bill correctly. + """ instance = cls.get_instance() return model_string in instance._premium_model_strings