From ccd7caf99f14411dffe5067cd3171357ab690808 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 1 May 2026 17:42:21 +0530 Subject: [PATCH] feat(openrouter): derive billing tier per-model and stabilize config IDs --- .../openrouter_integration_service.py | 191 ++++++++++++++++-- 1 file changed, 173 insertions(+), 18 deletions(-) diff --git a/surfsense_backend/app/services/openrouter_integration_service.py b/surfsense_backend/app/services/openrouter_integration_service.py index 1245f73aa..2d6a42337 100644 --- a/surfsense_backend/app/services/openrouter_integration_service.py +++ b/surfsense_backend/app/services/openrouter_integration_service.py @@ -11,6 +11,7 @@ this service only manages the catalogue, not the inference path. """ import asyncio +import hashlib import logging import threading from typing import Any @@ -25,6 +26,56 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models" # dynamic OpenRouter entries from hand-written YAML entries during refresh. _OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__" +# Fixed negative ID for the virtual ``openrouter/free`` auto-select entry. +# Chosen to sit far below any reasonable ``id_offset`` so it never collides +# with per-model stable IDs. +_FREE_ROUTER_ID = -9_999_999 + +# Width of the hash space used by ``_stable_config_id``. 9_000_000 provides +# enough headroom to avoid frequent collisions for OpenRouter's catalogue +# (~300 models) while keeping IDs comfortably within Postgres INTEGER range. +_STABLE_ID_HASH_WIDTH = 9_000_000 + + +def _stable_config_id(model_id: str, offset: int, taken: set[int]) -> int: + """Derive a deterministic negative config ID from ``model_id``. + + The same ``model_id`` always hashes to the same base value so thread pins + survive catalogue churn (models appearing/disappearing/reordering between + refreshes). On collision we decrement until we find an unused slot; this + keeps the mapping stable for the first config that claimed a slot and + only shifts collisions, which is much less disruptive than the legacy + index-based scheme that reshuffled every ID when the catalogue changed. + """ + digest = hashlib.blake2b(model_id.encode("utf-8"), digest_size=6).digest() + base = offset - (int.from_bytes(digest, "big") % _STABLE_ID_HASH_WIDTH) + cid = base + while cid in taken: + cid -= 1 + taken.add(cid) + return cid + + +def _openrouter_tier(model: dict) -> str: + """Classify an OpenRouter model as ``"free"`` or ``"premium"``. + + Per OpenRouter's API contract, a model is free if: + - Its id ends with ``:free`` (OpenRouter's own free-variant convention), or + - Both ``pricing.prompt`` and ``pricing.completion`` are zero strings. + + Anything else (missing pricing, non-zero pricing) falls through to + ``"premium"`` so we never under-charge users. This derivation runs off the + already-cached /api/v1/models payload, so it adds no network cost. + """ + if model.get("id", "").endswith(":free"): + return "free" + pricing = model.get("pricing") or {} + prompt = str(pricing.get("prompt", "")).strip() + completion = str(pricing.get("completion", "")).strip() + if prompt == "0" and completion == "0": + return "free" + return "premium" + def _is_text_output_model(model: dict) -> bool: """Return True if the model produces text output only (skip image/audio generators).""" @@ -109,24 +160,77 @@ async def _fetch_models_async() -> list[dict] | None: return None +def _build_free_router_config(settings: dict[str, Any]) -> dict[str, Any]: + """Build the virtual ``openrouter/free`` auto-select config entry. + + This exposes OpenRouter's Free Models Router as a single selectable + option. LiteLLM forwards ``openrouter/openrouter/free`` and OpenRouter + picks a capable free model per request (availability varies, account-wide + rate limit is ~20 req/min). + """ + return { + "id": _FREE_ROUTER_ID, + "name": "OpenRouter Free (Auto-Select)", + "description": ( + "OpenRouter picks a capable free model per request. " + "~20 req/min account-wide; availability varies." + ), + "provider": "OPENROUTER", + "model_name": "openrouter/free", + "api_key": settings.get("api_key", ""), + "api_base": "", + "billing_tier": "free", + "rpm": settings.get("free_rpm", 20), + "tpm": settings.get("free_tpm", 100_000), + "anonymous_enabled": settings.get("anonymous_enabled_free", False), + "seo_enabled": False, + "seo_slug": None, + "quota_reserve_tokens": settings.get("quota_reserve_tokens", 4000), + "litellm_params": dict(settings.get("litellm_params") or {}), + "system_instructions": settings.get("system_instructions", ""), + "use_default_system_instructions": settings.get( + "use_default_system_instructions", True + ), + "citations_enabled": settings.get("citations_enabled", True), + "router_pool_eligible": False, + _OPENROUTER_DYNAMIC_MARKER: True, + } + + def _generate_configs( raw_models: list[dict], settings: dict[str, Any], ) -> list[dict]: - """ - Convert raw OpenRouter model entries into global LLM config dicts. + """Convert raw OpenRouter model entries into global LLM config dicts. - Models are sorted by ID for deterministic, stable ID assignment across - restarts and refreshes. + Tier (``billing_tier``) is derived per-model from OpenRouter's own API + signals via ``_openrouter_tier`` — there is no longer a uniform YAML + override. Config IDs are derived via ``_stable_config_id`` so they + survive catalogue churn across refreshes. + + Router-pool membership is tier-aware: + + - Premium OR models join the LiteLLM router pool (``router_pool_eligible=True``) + so sub-agent ``model="auto"`` flows benefit from load balancing and + failover across the curated YAML configs and the OR premium passthrough. + - Free OR models and the virtual ``openrouter/free`` entry stay excluded + (``router_pool_eligible=False``). LiteLLM Router tracks rate limits per + deployment, but OpenRouter enforces a single global free-tier quota + (~20 RPM + 50-1000 daily requests account-wide across every ``:free`` + model), so rotating across many free deployments would only burn the + shared bucket faster. Free OR models remain fully available for user- + facing Auto-mode thread pinning via ``auto_model_pin_service``. """ id_offset: int = settings.get("id_offset", -10000) api_key: str = settings.get("api_key", "") - billing_tier: str = settings.get("billing_tier", "premium") - anonymous_enabled: bool = settings.get("anonymous_enabled", False) seo_enabled: bool = settings.get("seo_enabled", False) quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000) rpm: int = settings.get("rpm", 200) - tpm: int = settings.get("tpm", 1000000) + tpm: int = settings.get("tpm", 1_000_000) + free_rpm: int = settings.get("free_rpm", 20) + free_tpm: int = settings.get("free_tpm", 100_000) + anon_paid: bool = settings.get("anonymous_enabled_paid", False) + anon_free: bool = settings.get("anonymous_enabled_free", False) litellm_params: dict = settings.get("litellm_params") or {} system_instructions: str = settings.get("system_instructions", "") use_default: bool = settings.get("use_default_system_instructions", True) @@ -142,19 +246,27 @@ def _generate_configs( and _is_allowed_model(m) and "/" in m.get("id", "") ] - text_models.sort(key=lambda m: m["id"]) configs: list[dict] = [] - for idx, model in enumerate(text_models): + + if settings.get("free_router_enabled", True) and api_key: + configs.append(_build_free_router_config(settings)) + + taken: set[int] = set() + if configs: + taken.add(_FREE_ROUTER_ID) + + for model in text_models: model_id: str = model["id"] name: str = model.get("name", model_id) + tier = _openrouter_tier(model) cfg: dict[str, Any] = { - "id": id_offset - idx, + "id": _stable_config_id(model_id, id_offset, taken), "name": name, "description": f"{name} via OpenRouter", - "billing_tier": billing_tier, - "anonymous_enabled": anonymous_enabled, + "billing_tier": tier, + "anonymous_enabled": anon_free if tier == "free" else anon_paid, "seo_enabled": seo_enabled, "seo_slug": None, "quota_reserve_tokens": quota_reserve_tokens, @@ -162,12 +274,18 @@ def _generate_configs( "model_name": model_id, "api_key": api_key, "api_base": "", - "rpm": rpm, - "tpm": tpm, + "rpm": free_rpm if tier == "free" else rpm, + "tpm": free_tpm if tier == "free" else tpm, "litellm_params": dict(litellm_params), "system_instructions": system_instructions, "use_default_system_instructions": use_default, "citations_enabled": citations_enabled, + # Premium OR deployments join the LiteLLM router pool so sub-agent + # model="auto" flows can load-balance / fail over across them. + # Free OR deployments stay out: OpenRouter's free tier is a single + # account-wide quota, so per-deployment routing can't spread load + # there — it just drains the shared bucket faster. + "router_pool_eligible": tier == "premium", _OPENROUTER_DYNAMIC_MARKER: True, } configs.append(cfg) @@ -220,11 +338,12 @@ class OpenRouterIntegrationService: self._configs_by_id = {c["id"]: c for c in self._configs} self._initialized = True + tier_counts = self._tier_counts(self._configs) logger.info( - "OpenRouter integration: loaded %d models (IDs %d to %d)", + "OpenRouter integration: loaded %d models (free=%d, premium=%d)", len(self._configs), - self._configs[0]["id"] if self._configs else 0, - self._configs[-1]["id"] if self._configs else 0, + tier_counts["free"], + tier_counts["premium"], ) return self._configs @@ -254,7 +373,43 @@ class OpenRouterIntegrationService: self._configs = new_configs self._configs_by_id = new_by_id - logger.info("OpenRouter refresh: updated to %d models", len(new_configs)) + tier_counts = self._tier_counts(new_configs) + logger.info( + "OpenRouter refresh: updated to %d models (free=%d, premium=%d)", + len(new_configs), + tier_counts["free"], + tier_counts["premium"], + ) + + # Rebuild the LiteLLM router so freshly fetched configs flow through + # (the router filters dynamic OR entries out of its pool, but a + # refresh still needs to pick up any static-config edits and reset + # cached context-window profiles). + try: + from app.config import config as _app_config + from app.services.llm_router_service import LLMRouterService + from app.services.llm_router_service import ( + _router_instance_cache as _chat_router_cache, + ) + + LLMRouterService.rebuild( + _app_config.GLOBAL_LLM_CONFIGS, + getattr(_app_config, "ROUTER_SETTINGS", None), + ) + _chat_router_cache.clear() + except Exception as exc: + logger.warning( + "OpenRouter refresh: router rebuild skipped (%s)", exc + ) + + @staticmethod + def _tier_counts(configs: list[dict]) -> dict[str, int]: + counts = {"free": 0, "premium": 0} + for cfg in configs: + tier = str(cfg.get("billing_tier", "")).lower() + if tier in counts: + counts[tier] += 1 + return counts async def _refresh_loop(self, interval_hours: float) -> None: interval_sec = interval_hours * 3600