feat(openrouter): derive billing tier per-model and stabilize config IDs

2026-06-16 21:05:20 +02:00 · 2026-05-01 17:42:21 +05:30 · 2026-05-01 17:42:21 +05:30 · ccd7caf99f
commit ccd7caf99f
parent 5dd45a5740
1 changed files with 173 additions and 18 deletions
--- a/surfsense_backend/app/services/openrouter_integration_service.py
+++ b/surfsense_backend/app/services/openrouter_integration_service.py
@ -11,6 +11,7 @@ this service only manages the catalogue, not the inference path.
 """

 import asyncio
+import hashlib
 import logging
 import threading
 from typing import Any
@ -25,6 +26,56 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
 # dynamic OpenRouter entries from hand-written YAML entries during refresh.
 _OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__"

+# Fixed negative ID for the virtual ``openrouter/free`` auto-select entry.
+# Chosen to sit far below any reasonable ``id_offset`` so it never collides
+# with per-model stable IDs.
+_FREE_ROUTER_ID = -9_999_999
+
+# Width of the hash space used by ``_stable_config_id``. 9_000_000 provides
+# enough headroom to avoid frequent collisions for OpenRouter's catalogue
+# (~300 models) while keeping IDs comfortably within Postgres INTEGER range.
+_STABLE_ID_HASH_WIDTH = 9_000_000
+
+
+def _stable_config_id(model_id: str, offset: int, taken: set[int]) -> int:
+    """Derive a deterministic negative config ID from ``model_id``.
+
+    The same ``model_id`` always hashes to the same base value so thread pins
+    survive catalogue churn (models appearing/disappearing/reordering between
+    refreshes). On collision we decrement until we find an unused slot; this
+    keeps the mapping stable for the first config that claimed a slot and
+    only shifts collisions, which is much less disruptive than the legacy
+    index-based scheme that reshuffled every ID when the catalogue changed.
+    """
+    digest = hashlib.blake2b(model_id.encode("utf-8"), digest_size=6).digest()
+    base = offset - (int.from_bytes(digest, "big") % _STABLE_ID_HASH_WIDTH)
+    cid = base
+    while cid in taken:
+        cid -= 1
+    taken.add(cid)
+    return cid
+
+
+def _openrouter_tier(model: dict) -> str:
+    """Classify an OpenRouter model as ``"free"`` or ``"premium"``.
+
+    Per OpenRouter's API contract, a model is free if:
+    - Its id ends with ``:free`` (OpenRouter's own free-variant convention), or
+    - Both ``pricing.prompt`` and ``pricing.completion`` are zero strings.
+
+    Anything else (missing pricing, non-zero pricing) falls through to
+    ``"premium"`` so we never under-charge users. This derivation runs off the
+    already-cached /api/v1/models payload, so it adds no network cost.
+    """
+    if model.get("id", "").endswith(":free"):
+        return "free"
+    pricing = model.get("pricing") or {}
+    prompt = str(pricing.get("prompt", "")).strip()
+    completion = str(pricing.get("completion", "")).strip()
+    if prompt == "0" and completion == "0":
+        return "free"
+    return "premium"
+

 def _is_text_output_model(model: dict) -> bool:
    """Return True if the model produces text output only (skip image/audio generators)."""
@ -109,24 +160,77 @@ async def _fetch_models_async() -> list[dict] | None:
        return None


+def _build_free_router_config(settings: dict[str, Any]) -> dict[str, Any]:
+    """Build the virtual ``openrouter/free`` auto-select config entry.
+
+    This exposes OpenRouter's Free Models Router as a single selectable
+    option. LiteLLM forwards ``openrouter/openrouter/free`` and OpenRouter
+    picks a capable free model per request (availability varies, account-wide
+    rate limit is ~20 req/min).
+    """
+    return {
+        "id": _FREE_ROUTER_ID,
+        "name": "OpenRouter Free (Auto-Select)",
+        "description": (
+            "OpenRouter picks a capable free model per request. "
+            "~20 req/min account-wide; availability varies."
+        ),
+        "provider": "OPENROUTER",
+        "model_name": "openrouter/free",
+        "api_key": settings.get("api_key", ""),
+        "api_base": "",
+        "billing_tier": "free",
+        "rpm": settings.get("free_rpm", 20),
+        "tpm": settings.get("free_tpm", 100_000),
+        "anonymous_enabled": settings.get("anonymous_enabled_free", False),
+        "seo_enabled": False,
+        "seo_slug": None,
+        "quota_reserve_tokens": settings.get("quota_reserve_tokens", 4000),
+        "litellm_params": dict(settings.get("litellm_params") or {}),
+        "system_instructions": settings.get("system_instructions", ""),
+        "use_default_system_instructions": settings.get(
+            "use_default_system_instructions", True
+        ),
+        "citations_enabled": settings.get("citations_enabled", True),
+        "router_pool_eligible": False,
+        _OPENROUTER_DYNAMIC_MARKER: True,
+    }
+
+
 def _generate_configs(
    raw_models: list[dict],
    settings: dict[str, Any],
 ) -> list[dict]:
-    """
-    Convert raw OpenRouter model entries into global LLM config dicts.
+    """Convert raw OpenRouter model entries into global LLM config dicts.

-    Models are sorted by ID for deterministic, stable ID assignment across
-    restarts and refreshes.
+    Tier (``billing_tier``) is derived per-model from OpenRouter's own API
+    signals via ``_openrouter_tier`` — there is no longer a uniform YAML
+    override. Config IDs are derived via ``_stable_config_id`` so they
+    survive catalogue churn across refreshes.
+
+    Router-pool membership is tier-aware:
+
+    - Premium OR models join the LiteLLM router pool (``router_pool_eligible=True``)
+      so sub-agent ``model="auto"`` flows benefit from load balancing and
+      failover across the curated YAML configs and the OR premium passthrough.
+    - Free OR models and the virtual ``openrouter/free`` entry stay excluded
+      (``router_pool_eligible=False``). LiteLLM Router tracks rate limits per
+      deployment, but OpenRouter enforces a single global free-tier quota
+      (~20 RPM + 50-1000 daily requests account-wide across every ``:free``
+      model), so rotating across many free deployments would only burn the
+      shared bucket faster. Free OR models remain fully available for user-
+      facing Auto-mode thread pinning via ``auto_model_pin_service``.
    """
    id_offset: int = settings.get("id_offset", -10000)
    api_key: str = settings.get("api_key", "")
-    billing_tier: str = settings.get("billing_tier", "premium")
-    anonymous_enabled: bool = settings.get("anonymous_enabled", False)
    seo_enabled: bool = settings.get("seo_enabled", False)
    quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
    rpm: int = settings.get("rpm", 200)
-    tpm: int = settings.get("tpm", 1000000)
+    tpm: int = settings.get("tpm", 1_000_000)
+    free_rpm: int = settings.get("free_rpm", 20)
+    free_tpm: int = settings.get("free_tpm", 100_000)
+    anon_paid: bool = settings.get("anonymous_enabled_paid", False)
+    anon_free: bool = settings.get("anonymous_enabled_free", False)
    litellm_params: dict = settings.get("litellm_params") or {}
    system_instructions: str = settings.get("system_instructions", "")
    use_default: bool = settings.get("use_default_system_instructions", True)
@ -142,19 +246,27 @@ def _generate_configs(
        and _is_allowed_model(m)
        and "/" in m.get("id", "")
    ]
-    text_models.sort(key=lambda m: m["id"])

    configs: list[dict] = []
-    for idx, model in enumerate(text_models):
+
+    if settings.get("free_router_enabled", True) and api_key:
+        configs.append(_build_free_router_config(settings))
+
+    taken: set[int] = set()
+    if configs:
+        taken.add(_FREE_ROUTER_ID)
+
+    for model in text_models:
        model_id: str = model["id"]
        name: str = model.get("name", model_id)
+        tier = _openrouter_tier(model)

        cfg: dict[str, Any] = {
-            "id": id_offset - idx,
+            "id": _stable_config_id(model_id, id_offset, taken),
            "name": name,
            "description": f"{name} via OpenRouter",
-            "billing_tier": billing_tier,
-            "anonymous_enabled": anonymous_enabled,
+            "billing_tier": tier,
+            "anonymous_enabled": anon_free if tier == "free" else anon_paid,
            "seo_enabled": seo_enabled,
            "seo_slug": None,
            "quota_reserve_tokens": quota_reserve_tokens,
@ -162,12 +274,18 @@ def _generate_configs(
            "model_name": model_id,
            "api_key": api_key,
            "api_base": "",
-            "rpm": rpm,
-            "tpm": tpm,
+            "rpm": free_rpm if tier == "free" else rpm,
+            "tpm": free_tpm if tier == "free" else tpm,
            "litellm_params": dict(litellm_params),
            "system_instructions": system_instructions,
            "use_default_system_instructions": use_default,
            "citations_enabled": citations_enabled,
+            # Premium OR deployments join the LiteLLM router pool so sub-agent
+            # model="auto" flows can load-balance / fail over across them.
+            # Free OR deployments stay out: OpenRouter's free tier is a single
+            # account-wide quota, so per-deployment routing can't spread load
+            # there — it just drains the shared bucket faster.
+            "router_pool_eligible": tier == "premium",
            _OPENROUTER_DYNAMIC_MARKER: True,
        }
        configs.append(cfg)
@ -220,11 +338,12 @@ class OpenRouterIntegrationService:
        self._configs_by_id = {c["id"]: c for c in self._configs}
        self._initialized = True

+        tier_counts = self._tier_counts(self._configs)
        logger.info(
-            "OpenRouter integration: loaded %d models (IDs %d to %d)",
+            "OpenRouter integration: loaded %d models (free=%d, premium=%d)",
            len(self._configs),
-            self._configs[0]["id"] if self._configs else 0,
-            self._configs[-1]["id"] if self._configs else 0,
+            tier_counts["free"],
+            tier_counts["premium"],
        )
        return self._configs

@ -254,7 +373,43 @@ class OpenRouterIntegrationService:
        self._configs = new_configs
        self._configs_by_id = new_by_id

-        logger.info("OpenRouter refresh: updated to %d models", len(new_configs))
+        tier_counts = self._tier_counts(new_configs)
+        logger.info(
+            "OpenRouter refresh: updated to %d models (free=%d, premium=%d)",
+            len(new_configs),
+            tier_counts["free"],
+            tier_counts["premium"],
+        )
+
+        # Rebuild the LiteLLM router so freshly fetched configs flow through
+        # (the router filters dynamic OR entries out of its pool, but a
+        # refresh still needs to pick up any static-config edits and reset
+        # cached context-window profiles).
+        try:
+            from app.config import config as _app_config
+            from app.services.llm_router_service import LLMRouterService
+            from app.services.llm_router_service import (
+                _router_instance_cache as _chat_router_cache,
+            )
+
+            LLMRouterService.rebuild(
+                _app_config.GLOBAL_LLM_CONFIGS,
+                getattr(_app_config, "ROUTER_SETTINGS", None),
+            )
+            _chat_router_cache.clear()
+        except Exception as exc:
+            logger.warning(
+                "OpenRouter refresh: router rebuild skipped (%s)", exc
+            )
+
+    @staticmethod
+    def _tier_counts(configs: list[dict]) -> dict[str, int]:
+        counts = {"free": 0, "premium": 0}
+        for cfg in configs:
+            tier = str(cfg.get("billing_tier", "")).lower()
+            if tier in counts:
+                counts[tier] += 1
+        return counts

    async def _refresh_loop(self, interval_hours: float) -> None:
        interval_sec = interval_hours * 3600