diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index d62b4a4a5..79cbe1e51 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -283,19 +283,15 @@ openrouter_integration: tpm: 1000000 # Rate limits for FREE OpenRouter models. Informational only: free OR - # models and openrouter/free are intentionally kept OUT of the LiteLLM - # Router pool, because OpenRouter enforces free-tier limits globally per - # account (~20 RPM + 50-1000 daily requests across every ":free" model - # combined) — per-deployment router accounting can't represent a shared - # bucket correctly. Free OR models stay fully available in the model - # selector and for user-facing Auto thread pinning. + # models are intentionally kept OUT of the LiteLLM Router pool, because + # OpenRouter enforces free-tier limits globally per account (~20 RPM + + # 50-1000 daily requests across every ":free" model combined) — + # per-deployment router accounting can't represent a shared bucket + # correctly. Free OR models stay fully available in the model selector + # and for user-facing Auto thread pinning. free_rpm: 20 free_tpm: 100000 - # Expose openrouter/free as a single virtual "Free (Auto-Select)" entry. - # Recommended: keep true. OpenRouter picks a capable free model per request. - free_router_enabled: true - litellm_params: max_tokens: 16384 system_instructions: "" diff --git a/surfsense_backend/app/services/openrouter_integration_service.py b/surfsense_backend/app/services/openrouter_integration_service.py index 2d6a42337..06b7becdc 100644 --- a/surfsense_backend/app/services/openrouter_integration_service.py +++ b/surfsense_backend/app/services/openrouter_integration_service.py @@ -26,11 +26,6 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models" # dynamic OpenRouter entries from hand-written YAML entries during refresh. _OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__" -# Fixed negative ID for the virtual ``openrouter/free`` auto-select entry. -# Chosen to sit far below any reasonable ``id_offset`` so it never collides -# with per-model stable IDs. -_FREE_ROUTER_ID = -9_999_999 - # Width of the hash space used by ``_stable_config_id``. 9_000_000 provides # enough headroom to avoid frequent collisions for OpenRouter's catalogue # (~300 models) while keeping IDs comfortably within Postgres INTEGER range. @@ -107,6 +102,11 @@ _EXCLUDED_MODEL_IDS: set[str] = { # Deep-research models reject standard params (temperature, etc.) "openai/o3-deep-research", "openai/o4-mini-deep-research", + # OpenRouter's own meta-router over free models. We already enumerate every + # concrete ``:free`` model into GLOBAL_LLM_CONFIGS and Auto-mode thread + # pinning handles churn via the repair path, so exposing an additional + # indirection layer would only duplicate the capability with an opaque slug. + "openrouter/free", } _EXCLUDED_MODEL_SUFFIXES: tuple[str, ...] = ("-deep-research",) @@ -160,43 +160,6 @@ async def _fetch_models_async() -> list[dict] | None: return None -def _build_free_router_config(settings: dict[str, Any]) -> dict[str, Any]: - """Build the virtual ``openrouter/free`` auto-select config entry. - - This exposes OpenRouter's Free Models Router as a single selectable - option. LiteLLM forwards ``openrouter/openrouter/free`` and OpenRouter - picks a capable free model per request (availability varies, account-wide - rate limit is ~20 req/min). - """ - return { - "id": _FREE_ROUTER_ID, - "name": "OpenRouter Free (Auto-Select)", - "description": ( - "OpenRouter picks a capable free model per request. " - "~20 req/min account-wide; availability varies." - ), - "provider": "OPENROUTER", - "model_name": "openrouter/free", - "api_key": settings.get("api_key", ""), - "api_base": "", - "billing_tier": "free", - "rpm": settings.get("free_rpm", 20), - "tpm": settings.get("free_tpm", 100_000), - "anonymous_enabled": settings.get("anonymous_enabled_free", False), - "seo_enabled": False, - "seo_slug": None, - "quota_reserve_tokens": settings.get("quota_reserve_tokens", 4000), - "litellm_params": dict(settings.get("litellm_params") or {}), - "system_instructions": settings.get("system_instructions", ""), - "use_default_system_instructions": settings.get( - "use_default_system_instructions", True - ), - "citations_enabled": settings.get("citations_enabled", True), - "router_pool_eligible": False, - _OPENROUTER_DYNAMIC_MARKER: True, - } - - def _generate_configs( raw_models: list[dict], settings: dict[str, Any], @@ -213,13 +176,18 @@ def _generate_configs( - Premium OR models join the LiteLLM router pool (``router_pool_eligible=True``) so sub-agent ``model="auto"`` flows benefit from load balancing and failover across the curated YAML configs and the OR premium passthrough. - - Free OR models and the virtual ``openrouter/free`` entry stay excluded - (``router_pool_eligible=False``). LiteLLM Router tracks rate limits per - deployment, but OpenRouter enforces a single global free-tier quota - (~20 RPM + 50-1000 daily requests account-wide across every ``:free`` - model), so rotating across many free deployments would only burn the - shared bucket faster. Free OR models remain fully available for user- - facing Auto-mode thread pinning via ``auto_model_pin_service``. + - Free OR models stay excluded (``router_pool_eligible=False``). LiteLLM + Router tracks rate limits per deployment, but OpenRouter enforces a + single global free-tier quota (~20 RPM + 50-1000 daily requests + account-wide across every ``:free`` model), so rotating across many + free deployments would only burn the shared bucket faster. Free OR + models remain fully available for user-facing Auto-mode thread pinning + via ``auto_model_pin_service``. + + OpenRouter's own ``openrouter/free`` meta-router is filtered out upstream + via ``_EXCLUDED_MODEL_IDS``; we don't expose a redundant auto-select layer + because our own Auto (Fastest) pin + 24 h refresh + repair logic already + cover the catalogue-churn case. """ id_offset: int = settings.get("id_offset", -10000) api_key: str = settings.get("api_key", "") @@ -248,13 +216,7 @@ def _generate_configs( ] configs: list[dict] = [] - - if settings.get("free_router_enabled", True) and api_key: - configs.append(_build_free_router_config(settings)) - taken: set[int] = set() - if configs: - taken.add(_FREE_ROUTER_ID) for model in text_models: model_id: str = model["id"] @@ -382,9 +344,9 @@ class OpenRouterIntegrationService: ) # Rebuild the LiteLLM router so freshly fetched configs flow through - # (the router filters dynamic OR entries out of its pool, but a - # refresh still needs to pick up any static-config edits and reset - # cached context-window profiles). + # (dynamic OR premium entries now opt into the pool, free ones stay + # out; a refresh also needs to pick up any static-config edits and + # reset cached context-window profiles). try: from app.config import config as _app_config from app.services.llm_router_service import LLMRouterService diff --git a/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py b/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py index 618edc23c..d3921729d 100644 --- a/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py +++ b/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py @@ -5,9 +5,7 @@ from __future__ import annotations import pytest from app.services.openrouter_integration_service import ( - _FREE_ROUTER_ID, _OPENROUTER_DYNAMIC_MARKER, - _build_free_router_config, _generate_configs, _openrouter_tier, _stable_config_id, @@ -135,7 +133,6 @@ _SETTINGS_BASE: dict = { "anonymous_enabled_paid": False, "anonymous_enabled_free": True, "quota_reserve_tokens": 4000, - "free_router_enabled": False, } @@ -172,33 +169,26 @@ def test_generate_configs_respects_tier(): assert free["router_pool_eligible"] is False -def test_generate_configs_includes_free_router_when_enabled(): - raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")] - settings = {**_SETTINGS_BASE, "free_router_enabled": True} - cfgs = _generate_configs(raw, settings) - free_router = next( - (c for c in cfgs if c["model_name"] == "openrouter/free"), None - ) - assert free_router is not None - assert free_router["id"] == _FREE_ROUTER_ID - assert free_router["billing_tier"] == "free" - assert free_router["router_pool_eligible"] is False - assert free_router["anonymous_enabled"] is True +def test_generate_configs_excludes_upstream_openrouter_free_router(): + """OpenRouter's own ``openrouter/free`` meta-router must never become a card. - -def test_generate_configs_excludes_free_router_when_disabled(): - raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")] - settings = {**_SETTINGS_BASE, "free_router_enabled": False} - cfgs = _generate_configs(raw, settings) - assert not any(c["model_name"] == "openrouter/free" for c in cfgs) - - -def test_generate_configs_excludes_free_router_without_api_key(): - """Without an API key the free-router entry is useless; skip it.""" - raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")] - settings = {**_SETTINGS_BASE, "free_router_enabled": True, "api_key": ""} - cfgs = _generate_configs(raw, settings) - assert not any(c["model_name"] == "openrouter/free" for c in cfgs) + The upstream API returns this as a first-class zero-priced model, so + without an explicit blocklist entry it would slip through every other + filter (text output, tool calling, 200k context, non-Amazon) and land + in the selector as a duplicate of the concrete ``:free`` cards. The + exclusion in ``_EXCLUDED_MODEL_IDS`` prevents that. + """ + raw = [ + _minimal_openrouter_model(model_id="openai/gpt-4o"), + _minimal_openrouter_model( + model_id="openrouter/free", + pricing={"prompt": "0", "completion": "0"}, + ), + ] + cfgs = _generate_configs(raw, dict(_SETTINGS_BASE)) + model_names = {c["model_name"] for c in cfgs} + assert "openrouter/free" not in model_names + assert "openai/gpt-4o" in model_names def test_generate_configs_drops_non_text_and_non_tool_models(): @@ -226,11 +216,3 @@ def test_generate_configs_drops_non_text_and_non_tool_models(): assert "openai/completion-only" not in model_names -def test_build_free_router_config_shape(): - cfg = _build_free_router_config(dict(_SETTINGS_BASE)) - assert cfg["provider"] == "OPENROUTER" - assert cfg["model_name"] == "openrouter/free" - assert cfg["id"] == _FREE_ROUTER_ID - assert cfg["billing_tier"] == "free" - assert cfg["router_pool_eligible"] is False - assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True