refactor(openrouter): remove virtual openrouter/free auto-select entry

2026-05-21 18:55:16 +02:00 · 2026-05-01 18:16:47 +05:30 · 2026-05-01 18:16:47 +05:30 · 680a1c1c38
commit 680a1c1c38
parent 4d34b56c4d
3 changed files with 45 additions and 105 deletions
--- a/surfsense_backend/app/config/global_llm_config.example.yaml
+++ b/surfsense_backend/app/config/global_llm_config.example.yaml
@ -283,19 +283,15 @@ openrouter_integration:
  tpm: 1000000

  # Rate limits for FREE OpenRouter models. Informational only: free OR
-  # models and openrouter/free are intentionally kept OUT of the LiteLLM
-  # Router pool, because OpenRouter enforces free-tier limits globally per
-  # account (~20 RPM + 50-1000 daily requests across every ":free" model
-  # combined) — per-deployment router accounting can't represent a shared
-  # bucket correctly. Free OR models stay fully available in the model
-  # selector and for user-facing Auto thread pinning.
+  # models are intentionally kept OUT of the LiteLLM Router pool, because
+  # OpenRouter enforces free-tier limits globally per account (~20 RPM +
+  # 50-1000 daily requests across every ":free" model combined) —
+  # per-deployment router accounting can't represent a shared bucket
+  # correctly. Free OR models stay fully available in the model selector
+  # and for user-facing Auto thread pinning.
  free_rpm: 20
  free_tpm: 100000

-  # Expose openrouter/free as a single virtual "Free (Auto-Select)" entry.
-  # Recommended: keep true. OpenRouter picks a capable free model per request.
-  free_router_enabled: true
-
  litellm_params:
    max_tokens: 16384
  system_instructions: ""
--- a/surfsense_backend/app/services/openrouter_integration_service.py
+++ b/surfsense_backend/app/services/openrouter_integration_service.py
@ -26,11 +26,6 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
 # dynamic OpenRouter entries from hand-written YAML entries during refresh.
 _OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__"

-# Fixed negative ID for the virtual ``openrouter/free`` auto-select entry.
-# Chosen to sit far below any reasonable ``id_offset`` so it never collides
-# with per-model stable IDs.
-_FREE_ROUTER_ID = -9_999_999
-
 # Width of the hash space used by ``_stable_config_id``. 9_000_000 provides
 # enough headroom to avoid frequent collisions for OpenRouter's catalogue
 # (~300 models) while keeping IDs comfortably within Postgres INTEGER range.
@ -107,6 +102,11 @@ _EXCLUDED_MODEL_IDS: set[str] = {
    # Deep-research models reject standard params (temperature, etc.)
    "openai/o3-deep-research",
    "openai/o4-mini-deep-research",
+    # OpenRouter's own meta-router over free models. We already enumerate every
+    # concrete ``:free`` model into GLOBAL_LLM_CONFIGS and Auto-mode thread
+    # pinning handles churn via the repair path, so exposing an additional
+    # indirection layer would only duplicate the capability with an opaque slug.
+    "openrouter/free",
 }

 _EXCLUDED_MODEL_SUFFIXES: tuple[str, ...] = ("-deep-research",)
@ -160,43 +160,6 @@ async def _fetch_models_async() -> list[dict] | None:
        return None


-def _build_free_router_config(settings: dict[str, Any]) -> dict[str, Any]:
-    """Build the virtual ``openrouter/free`` auto-select config entry.
-
-    This exposes OpenRouter's Free Models Router as a single selectable
-    option. LiteLLM forwards ``openrouter/openrouter/free`` and OpenRouter
-    picks a capable free model per request (availability varies, account-wide
-    rate limit is ~20 req/min).
-    """
-    return {
-        "id": _FREE_ROUTER_ID,
-        "name": "OpenRouter Free (Auto-Select)",
-        "description": (
-            "OpenRouter picks a capable free model per request. "
-            "~20 req/min account-wide; availability varies."
-        ),
-        "provider": "OPENROUTER",
-        "model_name": "openrouter/free",
-        "api_key": settings.get("api_key", ""),
-        "api_base": "",
-        "billing_tier": "free",
-        "rpm": settings.get("free_rpm", 20),
-        "tpm": settings.get("free_tpm", 100_000),
-        "anonymous_enabled": settings.get("anonymous_enabled_free", False),
-        "seo_enabled": False,
-        "seo_slug": None,
-        "quota_reserve_tokens": settings.get("quota_reserve_tokens", 4000),
-        "litellm_params": dict(settings.get("litellm_params") or {}),
-        "system_instructions": settings.get("system_instructions", ""),
-        "use_default_system_instructions": settings.get(
-            "use_default_system_instructions", True
-        ),
-        "citations_enabled": settings.get("citations_enabled", True),
-        "router_pool_eligible": False,
-        _OPENROUTER_DYNAMIC_MARKER: True,
-    }
-
-
 def _generate_configs(
    raw_models: list[dict],
    settings: dict[str, Any],
@ -213,13 +176,18 @@ def _generate_configs(
    - Premium OR models join the LiteLLM router pool (``router_pool_eligible=True``)
      so sub-agent ``model="auto"`` flows benefit from load balancing and
      failover across the curated YAML configs and the OR premium passthrough.
-    - Free OR models and the virtual ``openrouter/free`` entry stay excluded
-      (``router_pool_eligible=False``). LiteLLM Router tracks rate limits per
-      deployment, but OpenRouter enforces a single global free-tier quota
-      (~20 RPM + 50-1000 daily requests account-wide across every ``:free``
-      model), so rotating across many free deployments would only burn the
-      shared bucket faster. Free OR models remain fully available for user-
-      facing Auto-mode thread pinning via ``auto_model_pin_service``.
+    - Free OR models stay excluded (``router_pool_eligible=False``). LiteLLM
+      Router tracks rate limits per deployment, but OpenRouter enforces a
+      single global free-tier quota (~20 RPM + 50-1000 daily requests
+      account-wide across every ``:free`` model), so rotating across many
+      free deployments would only burn the shared bucket faster. Free OR
+      models remain fully available for user-facing Auto-mode thread pinning
+      via ``auto_model_pin_service``.
+
+    OpenRouter's own ``openrouter/free`` meta-router is filtered out upstream
+    via ``_EXCLUDED_MODEL_IDS``; we don't expose a redundant auto-select layer
+    because our own Auto (Fastest) pin + 24 h refresh + repair logic already
+    cover the catalogue-churn case.
    """
    id_offset: int = settings.get("id_offset", -10000)
    api_key: str = settings.get("api_key", "")
@ -248,13 +216,7 @@ def _generate_configs(
    ]

    configs: list[dict] = []
-
-    if settings.get("free_router_enabled", True) and api_key:
-        configs.append(_build_free_router_config(settings))
-
    taken: set[int] = set()
-    if configs:
-        taken.add(_FREE_ROUTER_ID)

    for model in text_models:
        model_id: str = model["id"]
@ -382,9 +344,9 @@ class OpenRouterIntegrationService:
        )

        # Rebuild the LiteLLM router so freshly fetched configs flow through
-        # (the router filters dynamic OR entries out of its pool, but a
-        # refresh still needs to pick up any static-config edits and reset
-        # cached context-window profiles).
+        # (dynamic OR premium entries now opt into the pool, free ones stay
+        # out; a refresh also needs to pick up any static-config edits and
+        # reset cached context-window profiles).
        try:
            from app.config import config as _app_config
            from app.services.llm_router_service import LLMRouterService
--- a/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
+++ b/surfsense_backend/tests/unit/services/test_openrouter_integration_service.py
@ -5,9 +5,7 @@ from __future__ import annotations
 import pytest

 from app.services.openrouter_integration_service import (
-    _FREE_ROUTER_ID,
    _OPENROUTER_DYNAMIC_MARKER,
-    _build_free_router_config,
    _generate_configs,
    _openrouter_tier,
    _stable_config_id,
@ -135,7 +133,6 @@ _SETTINGS_BASE: dict = {
    "anonymous_enabled_paid": False,
    "anonymous_enabled_free": True,
    "quota_reserve_tokens": 4000,
-    "free_router_enabled": False,
 }


@ -172,33 +169,26 @@ def test_generate_configs_respects_tier():
    assert free["router_pool_eligible"] is False


-def test_generate_configs_includes_free_router_when_enabled():
-    raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")]
-    settings = {**_SETTINGS_BASE, "free_router_enabled": True}
-    cfgs = _generate_configs(raw, settings)
-    free_router = next(
-        (c for c in cfgs if c["model_name"] == "openrouter/free"), None
-    )
-    assert free_router is not None
-    assert free_router["id"] == _FREE_ROUTER_ID
-    assert free_router["billing_tier"] == "free"
-    assert free_router["router_pool_eligible"] is False
-    assert free_router["anonymous_enabled"] is True
+def test_generate_configs_excludes_upstream_openrouter_free_router():
+    """OpenRouter's own ``openrouter/free`` meta-router must never become a card.

-
-def test_generate_configs_excludes_free_router_when_disabled():
-    raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")]
-    settings = {**_SETTINGS_BASE, "free_router_enabled": False}
-    cfgs = _generate_configs(raw, settings)
-    assert not any(c["model_name"] == "openrouter/free" for c in cfgs)
-
-
-def test_generate_configs_excludes_free_router_without_api_key():
-    """Without an API key the free-router entry is useless; skip it."""
-    raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")]
-    settings = {**_SETTINGS_BASE, "free_router_enabled": True, "api_key": ""}
-    cfgs = _generate_configs(raw, settings)
-    assert not any(c["model_name"] == "openrouter/free" for c in cfgs)
+    The upstream API returns this as a first-class zero-priced model, so
+    without an explicit blocklist entry it would slip through every other
+    filter (text output, tool calling, 200k context, non-Amazon) and land
+    in the selector as a duplicate of the concrete ``:free`` cards. The
+    exclusion in ``_EXCLUDED_MODEL_IDS`` prevents that.
+    """
+    raw = [
+        _minimal_openrouter_model(model_id="openai/gpt-4o"),
+        _minimal_openrouter_model(
+            model_id="openrouter/free",
+            pricing={"prompt": "0", "completion": "0"},
+        ),
+    ]
+    cfgs = _generate_configs(raw, dict(_SETTINGS_BASE))
+    model_names = {c["model_name"] for c in cfgs}
+    assert "openrouter/free" not in model_names
+    assert "openai/gpt-4o" in model_names


 def test_generate_configs_drops_non_text_and_non_tool_models():
@ -226,11 +216,3 @@ def test_generate_configs_drops_non_text_and_non_tool_models():
    assert "openai/completion-only" not in model_names


-def test_build_free_router_config_shape():
-    cfg = _build_free_router_config(dict(_SETTINGS_BASE))
-    assert cfg["provider"] == "OPENROUTER"
-    assert cfg["model_name"] == "openrouter/free"
-    assert cfg["id"] == _FREE_ROUTER_ID
-    assert cfg["billing_tier"] == "free"
-    assert cfg["router_pool_eligible"] is False
-    assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True