mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-16 21:05:20 +02:00
feat(openrouter): derive billing tier per-model and stabilize config IDs
This commit is contained in:
parent
5dd45a5740
commit
ccd7caf99f
1 changed files with 173 additions and 18 deletions
|
|
@ -11,6 +11,7 @@ this service only manages the catalogue, not the inference path.
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import threading
|
||||
from typing import Any
|
||||
|
|
@ -25,6 +26,56 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
|
|||
# dynamic OpenRouter entries from hand-written YAML entries during refresh.
|
||||
_OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__"
|
||||
|
||||
# Fixed negative ID for the virtual ``openrouter/free`` auto-select entry.
|
||||
# Chosen to sit far below any reasonable ``id_offset`` so it never collides
|
||||
# with per-model stable IDs.
|
||||
_FREE_ROUTER_ID = -9_999_999
|
||||
|
||||
# Width of the hash space used by ``_stable_config_id``. 9_000_000 provides
|
||||
# enough headroom to avoid frequent collisions for OpenRouter's catalogue
|
||||
# (~300 models) while keeping IDs comfortably within Postgres INTEGER range.
|
||||
_STABLE_ID_HASH_WIDTH = 9_000_000
|
||||
|
||||
|
||||
def _stable_config_id(model_id: str, offset: int, taken: set[int]) -> int:
|
||||
"""Derive a deterministic negative config ID from ``model_id``.
|
||||
|
||||
The same ``model_id`` always hashes to the same base value so thread pins
|
||||
survive catalogue churn (models appearing/disappearing/reordering between
|
||||
refreshes). On collision we decrement until we find an unused slot; this
|
||||
keeps the mapping stable for the first config that claimed a slot and
|
||||
only shifts collisions, which is much less disruptive than the legacy
|
||||
index-based scheme that reshuffled every ID when the catalogue changed.
|
||||
"""
|
||||
digest = hashlib.blake2b(model_id.encode("utf-8"), digest_size=6).digest()
|
||||
base = offset - (int.from_bytes(digest, "big") % _STABLE_ID_HASH_WIDTH)
|
||||
cid = base
|
||||
while cid in taken:
|
||||
cid -= 1
|
||||
taken.add(cid)
|
||||
return cid
|
||||
|
||||
|
||||
def _openrouter_tier(model: dict) -> str:
|
||||
"""Classify an OpenRouter model as ``"free"`` or ``"premium"``.
|
||||
|
||||
Per OpenRouter's API contract, a model is free if:
|
||||
- Its id ends with ``:free`` (OpenRouter's own free-variant convention), or
|
||||
- Both ``pricing.prompt`` and ``pricing.completion`` are zero strings.
|
||||
|
||||
Anything else (missing pricing, non-zero pricing) falls through to
|
||||
``"premium"`` so we never under-charge users. This derivation runs off the
|
||||
already-cached /api/v1/models payload, so it adds no network cost.
|
||||
"""
|
||||
if model.get("id", "").endswith(":free"):
|
||||
return "free"
|
||||
pricing = model.get("pricing") or {}
|
||||
prompt = str(pricing.get("prompt", "")).strip()
|
||||
completion = str(pricing.get("completion", "")).strip()
|
||||
if prompt == "0" and completion == "0":
|
||||
return "free"
|
||||
return "premium"
|
||||
|
||||
|
||||
def _is_text_output_model(model: dict) -> bool:
|
||||
"""Return True if the model produces text output only (skip image/audio generators)."""
|
||||
|
|
@ -109,24 +160,77 @@ async def _fetch_models_async() -> list[dict] | None:
|
|||
return None
|
||||
|
||||
|
||||
def _build_free_router_config(settings: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Build the virtual ``openrouter/free`` auto-select config entry.
|
||||
|
||||
This exposes OpenRouter's Free Models Router as a single selectable
|
||||
option. LiteLLM forwards ``openrouter/openrouter/free`` and OpenRouter
|
||||
picks a capable free model per request (availability varies, account-wide
|
||||
rate limit is ~20 req/min).
|
||||
"""
|
||||
return {
|
||||
"id": _FREE_ROUTER_ID,
|
||||
"name": "OpenRouter Free (Auto-Select)",
|
||||
"description": (
|
||||
"OpenRouter picks a capable free model per request. "
|
||||
"~20 req/min account-wide; availability varies."
|
||||
),
|
||||
"provider": "OPENROUTER",
|
||||
"model_name": "openrouter/free",
|
||||
"api_key": settings.get("api_key", ""),
|
||||
"api_base": "",
|
||||
"billing_tier": "free",
|
||||
"rpm": settings.get("free_rpm", 20),
|
||||
"tpm": settings.get("free_tpm", 100_000),
|
||||
"anonymous_enabled": settings.get("anonymous_enabled_free", False),
|
||||
"seo_enabled": False,
|
||||
"seo_slug": None,
|
||||
"quota_reserve_tokens": settings.get("quota_reserve_tokens", 4000),
|
||||
"litellm_params": dict(settings.get("litellm_params") or {}),
|
||||
"system_instructions": settings.get("system_instructions", ""),
|
||||
"use_default_system_instructions": settings.get(
|
||||
"use_default_system_instructions", True
|
||||
),
|
||||
"citations_enabled": settings.get("citations_enabled", True),
|
||||
"router_pool_eligible": False,
|
||||
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||
}
|
||||
|
||||
|
||||
def _generate_configs(
|
||||
raw_models: list[dict],
|
||||
settings: dict[str, Any],
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Convert raw OpenRouter model entries into global LLM config dicts.
|
||||
"""Convert raw OpenRouter model entries into global LLM config dicts.
|
||||
|
||||
Models are sorted by ID for deterministic, stable ID assignment across
|
||||
restarts and refreshes.
|
||||
Tier (``billing_tier``) is derived per-model from OpenRouter's own API
|
||||
signals via ``_openrouter_tier`` — there is no longer a uniform YAML
|
||||
override. Config IDs are derived via ``_stable_config_id`` so they
|
||||
survive catalogue churn across refreshes.
|
||||
|
||||
Router-pool membership is tier-aware:
|
||||
|
||||
- Premium OR models join the LiteLLM router pool (``router_pool_eligible=True``)
|
||||
so sub-agent ``model="auto"`` flows benefit from load balancing and
|
||||
failover across the curated YAML configs and the OR premium passthrough.
|
||||
- Free OR models and the virtual ``openrouter/free`` entry stay excluded
|
||||
(``router_pool_eligible=False``). LiteLLM Router tracks rate limits per
|
||||
deployment, but OpenRouter enforces a single global free-tier quota
|
||||
(~20 RPM + 50-1000 daily requests account-wide across every ``:free``
|
||||
model), so rotating across many free deployments would only burn the
|
||||
shared bucket faster. Free OR models remain fully available for user-
|
||||
facing Auto-mode thread pinning via ``auto_model_pin_service``.
|
||||
"""
|
||||
id_offset: int = settings.get("id_offset", -10000)
|
||||
api_key: str = settings.get("api_key", "")
|
||||
billing_tier: str = settings.get("billing_tier", "premium")
|
||||
anonymous_enabled: bool = settings.get("anonymous_enabled", False)
|
||||
seo_enabled: bool = settings.get("seo_enabled", False)
|
||||
quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
|
||||
rpm: int = settings.get("rpm", 200)
|
||||
tpm: int = settings.get("tpm", 1000000)
|
||||
tpm: int = settings.get("tpm", 1_000_000)
|
||||
free_rpm: int = settings.get("free_rpm", 20)
|
||||
free_tpm: int = settings.get("free_tpm", 100_000)
|
||||
anon_paid: bool = settings.get("anonymous_enabled_paid", False)
|
||||
anon_free: bool = settings.get("anonymous_enabled_free", False)
|
||||
litellm_params: dict = settings.get("litellm_params") or {}
|
||||
system_instructions: str = settings.get("system_instructions", "")
|
||||
use_default: bool = settings.get("use_default_system_instructions", True)
|
||||
|
|
@ -142,19 +246,27 @@ def _generate_configs(
|
|||
and _is_allowed_model(m)
|
||||
and "/" in m.get("id", "")
|
||||
]
|
||||
text_models.sort(key=lambda m: m["id"])
|
||||
|
||||
configs: list[dict] = []
|
||||
for idx, model in enumerate(text_models):
|
||||
|
||||
if settings.get("free_router_enabled", True) and api_key:
|
||||
configs.append(_build_free_router_config(settings))
|
||||
|
||||
taken: set[int] = set()
|
||||
if configs:
|
||||
taken.add(_FREE_ROUTER_ID)
|
||||
|
||||
for model in text_models:
|
||||
model_id: str = model["id"]
|
||||
name: str = model.get("name", model_id)
|
||||
tier = _openrouter_tier(model)
|
||||
|
||||
cfg: dict[str, Any] = {
|
||||
"id": id_offset - idx,
|
||||
"id": _stable_config_id(model_id, id_offset, taken),
|
||||
"name": name,
|
||||
"description": f"{name} via OpenRouter",
|
||||
"billing_tier": billing_tier,
|
||||
"anonymous_enabled": anonymous_enabled,
|
||||
"billing_tier": tier,
|
||||
"anonymous_enabled": anon_free if tier == "free" else anon_paid,
|
||||
"seo_enabled": seo_enabled,
|
||||
"seo_slug": None,
|
||||
"quota_reserve_tokens": quota_reserve_tokens,
|
||||
|
|
@ -162,12 +274,18 @@ def _generate_configs(
|
|||
"model_name": model_id,
|
||||
"api_key": api_key,
|
||||
"api_base": "",
|
||||
"rpm": rpm,
|
||||
"tpm": tpm,
|
||||
"rpm": free_rpm if tier == "free" else rpm,
|
||||
"tpm": free_tpm if tier == "free" else tpm,
|
||||
"litellm_params": dict(litellm_params),
|
||||
"system_instructions": system_instructions,
|
||||
"use_default_system_instructions": use_default,
|
||||
"citations_enabled": citations_enabled,
|
||||
# Premium OR deployments join the LiteLLM router pool so sub-agent
|
||||
# model="auto" flows can load-balance / fail over across them.
|
||||
# Free OR deployments stay out: OpenRouter's free tier is a single
|
||||
# account-wide quota, so per-deployment routing can't spread load
|
||||
# there — it just drains the shared bucket faster.
|
||||
"router_pool_eligible": tier == "premium",
|
||||
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||
}
|
||||
configs.append(cfg)
|
||||
|
|
@ -220,11 +338,12 @@ class OpenRouterIntegrationService:
|
|||
self._configs_by_id = {c["id"]: c for c in self._configs}
|
||||
self._initialized = True
|
||||
|
||||
tier_counts = self._tier_counts(self._configs)
|
||||
logger.info(
|
||||
"OpenRouter integration: loaded %d models (IDs %d to %d)",
|
||||
"OpenRouter integration: loaded %d models (free=%d, premium=%d)",
|
||||
len(self._configs),
|
||||
self._configs[0]["id"] if self._configs else 0,
|
||||
self._configs[-1]["id"] if self._configs else 0,
|
||||
tier_counts["free"],
|
||||
tier_counts["premium"],
|
||||
)
|
||||
return self._configs
|
||||
|
||||
|
|
@ -254,7 +373,43 @@ class OpenRouterIntegrationService:
|
|||
self._configs = new_configs
|
||||
self._configs_by_id = new_by_id
|
||||
|
||||
logger.info("OpenRouter refresh: updated to %d models", len(new_configs))
|
||||
tier_counts = self._tier_counts(new_configs)
|
||||
logger.info(
|
||||
"OpenRouter refresh: updated to %d models (free=%d, premium=%d)",
|
||||
len(new_configs),
|
||||
tier_counts["free"],
|
||||
tier_counts["premium"],
|
||||
)
|
||||
|
||||
# Rebuild the LiteLLM router so freshly fetched configs flow through
|
||||
# (the router filters dynamic OR entries out of its pool, but a
|
||||
# refresh still needs to pick up any static-config edits and reset
|
||||
# cached context-window profiles).
|
||||
try:
|
||||
from app.config import config as _app_config
|
||||
from app.services.llm_router_service import LLMRouterService
|
||||
from app.services.llm_router_service import (
|
||||
_router_instance_cache as _chat_router_cache,
|
||||
)
|
||||
|
||||
LLMRouterService.rebuild(
|
||||
_app_config.GLOBAL_LLM_CONFIGS,
|
||||
getattr(_app_config, "ROUTER_SETTINGS", None),
|
||||
)
|
||||
_chat_router_cache.clear()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"OpenRouter refresh: router rebuild skipped (%s)", exc
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _tier_counts(configs: list[dict]) -> dict[str, int]:
|
||||
counts = {"free": 0, "premium": 0}
|
||||
for cfg in configs:
|
||||
tier = str(cfg.get("billing_tier", "")).lower()
|
||||
if tier in counts:
|
||||
counts[tier] += 1
|
||||
return counts
|
||||
|
||||
async def _refresh_loop(self, interval_hours: float) -> None:
|
||||
interval_sec = interval_hours * 3600
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue