mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-21 18:55:16 +02:00
refactor(openrouter): remove virtual openrouter/free auto-select entry
This commit is contained in:
parent
4d34b56c4d
commit
680a1c1c38
3 changed files with 45 additions and 105 deletions
|
|
@ -283,19 +283,15 @@ openrouter_integration:
|
|||
tpm: 1000000
|
||||
|
||||
# Rate limits for FREE OpenRouter models. Informational only: free OR
|
||||
# models and openrouter/free are intentionally kept OUT of the LiteLLM
|
||||
# Router pool, because OpenRouter enforces free-tier limits globally per
|
||||
# account (~20 RPM + 50-1000 daily requests across every ":free" model
|
||||
# combined) — per-deployment router accounting can't represent a shared
|
||||
# bucket correctly. Free OR models stay fully available in the model
|
||||
# selector and for user-facing Auto thread pinning.
|
||||
# models are intentionally kept OUT of the LiteLLM Router pool, because
|
||||
# OpenRouter enforces free-tier limits globally per account (~20 RPM +
|
||||
# 50-1000 daily requests across every ":free" model combined) —
|
||||
# per-deployment router accounting can't represent a shared bucket
|
||||
# correctly. Free OR models stay fully available in the model selector
|
||||
# and for user-facing Auto thread pinning.
|
||||
free_rpm: 20
|
||||
free_tpm: 100000
|
||||
|
||||
# Expose openrouter/free as a single virtual "Free (Auto-Select)" entry.
|
||||
# Recommended: keep true. OpenRouter picks a capable free model per request.
|
||||
free_router_enabled: true
|
||||
|
||||
litellm_params:
|
||||
max_tokens: 16384
|
||||
system_instructions: ""
|
||||
|
|
|
|||
|
|
@ -26,11 +26,6 @@ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
|
|||
# dynamic OpenRouter entries from hand-written YAML entries during refresh.
|
||||
_OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__"
|
||||
|
||||
# Fixed negative ID for the virtual ``openrouter/free`` auto-select entry.
|
||||
# Chosen to sit far below any reasonable ``id_offset`` so it never collides
|
||||
# with per-model stable IDs.
|
||||
_FREE_ROUTER_ID = -9_999_999
|
||||
|
||||
# Width of the hash space used by ``_stable_config_id``. 9_000_000 provides
|
||||
# enough headroom to avoid frequent collisions for OpenRouter's catalogue
|
||||
# (~300 models) while keeping IDs comfortably within Postgres INTEGER range.
|
||||
|
|
@ -107,6 +102,11 @@ _EXCLUDED_MODEL_IDS: set[str] = {
|
|||
# Deep-research models reject standard params (temperature, etc.)
|
||||
"openai/o3-deep-research",
|
||||
"openai/o4-mini-deep-research",
|
||||
# OpenRouter's own meta-router over free models. We already enumerate every
|
||||
# concrete ``:free`` model into GLOBAL_LLM_CONFIGS and Auto-mode thread
|
||||
# pinning handles churn via the repair path, so exposing an additional
|
||||
# indirection layer would only duplicate the capability with an opaque slug.
|
||||
"openrouter/free",
|
||||
}
|
||||
|
||||
_EXCLUDED_MODEL_SUFFIXES: tuple[str, ...] = ("-deep-research",)
|
||||
|
|
@ -160,43 +160,6 @@ async def _fetch_models_async() -> list[dict] | None:
|
|||
return None
|
||||
|
||||
|
||||
def _build_free_router_config(settings: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Build the virtual ``openrouter/free`` auto-select config entry.
|
||||
|
||||
This exposes OpenRouter's Free Models Router as a single selectable
|
||||
option. LiteLLM forwards ``openrouter/openrouter/free`` and OpenRouter
|
||||
picks a capable free model per request (availability varies, account-wide
|
||||
rate limit is ~20 req/min).
|
||||
"""
|
||||
return {
|
||||
"id": _FREE_ROUTER_ID,
|
||||
"name": "OpenRouter Free (Auto-Select)",
|
||||
"description": (
|
||||
"OpenRouter picks a capable free model per request. "
|
||||
"~20 req/min account-wide; availability varies."
|
||||
),
|
||||
"provider": "OPENROUTER",
|
||||
"model_name": "openrouter/free",
|
||||
"api_key": settings.get("api_key", ""),
|
||||
"api_base": "",
|
||||
"billing_tier": "free",
|
||||
"rpm": settings.get("free_rpm", 20),
|
||||
"tpm": settings.get("free_tpm", 100_000),
|
||||
"anonymous_enabled": settings.get("anonymous_enabled_free", False),
|
||||
"seo_enabled": False,
|
||||
"seo_slug": None,
|
||||
"quota_reserve_tokens": settings.get("quota_reserve_tokens", 4000),
|
||||
"litellm_params": dict(settings.get("litellm_params") or {}),
|
||||
"system_instructions": settings.get("system_instructions", ""),
|
||||
"use_default_system_instructions": settings.get(
|
||||
"use_default_system_instructions", True
|
||||
),
|
||||
"citations_enabled": settings.get("citations_enabled", True),
|
||||
"router_pool_eligible": False,
|
||||
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||
}
|
||||
|
||||
|
||||
def _generate_configs(
|
||||
raw_models: list[dict],
|
||||
settings: dict[str, Any],
|
||||
|
|
@ -213,13 +176,18 @@ def _generate_configs(
|
|||
- Premium OR models join the LiteLLM router pool (``router_pool_eligible=True``)
|
||||
so sub-agent ``model="auto"`` flows benefit from load balancing and
|
||||
failover across the curated YAML configs and the OR premium passthrough.
|
||||
- Free OR models and the virtual ``openrouter/free`` entry stay excluded
|
||||
(``router_pool_eligible=False``). LiteLLM Router tracks rate limits per
|
||||
deployment, but OpenRouter enforces a single global free-tier quota
|
||||
(~20 RPM + 50-1000 daily requests account-wide across every ``:free``
|
||||
model), so rotating across many free deployments would only burn the
|
||||
shared bucket faster. Free OR models remain fully available for user-
|
||||
facing Auto-mode thread pinning via ``auto_model_pin_service``.
|
||||
- Free OR models stay excluded (``router_pool_eligible=False``). LiteLLM
|
||||
Router tracks rate limits per deployment, but OpenRouter enforces a
|
||||
single global free-tier quota (~20 RPM + 50-1000 daily requests
|
||||
account-wide across every ``:free`` model), so rotating across many
|
||||
free deployments would only burn the shared bucket faster. Free OR
|
||||
models remain fully available for user-facing Auto-mode thread pinning
|
||||
via ``auto_model_pin_service``.
|
||||
|
||||
OpenRouter's own ``openrouter/free`` meta-router is filtered out upstream
|
||||
via ``_EXCLUDED_MODEL_IDS``; we don't expose a redundant auto-select layer
|
||||
because our own Auto (Fastest) pin + 24 h refresh + repair logic already
|
||||
cover the catalogue-churn case.
|
||||
"""
|
||||
id_offset: int = settings.get("id_offset", -10000)
|
||||
api_key: str = settings.get("api_key", "")
|
||||
|
|
@ -248,13 +216,7 @@ def _generate_configs(
|
|||
]
|
||||
|
||||
configs: list[dict] = []
|
||||
|
||||
if settings.get("free_router_enabled", True) and api_key:
|
||||
configs.append(_build_free_router_config(settings))
|
||||
|
||||
taken: set[int] = set()
|
||||
if configs:
|
||||
taken.add(_FREE_ROUTER_ID)
|
||||
|
||||
for model in text_models:
|
||||
model_id: str = model["id"]
|
||||
|
|
@ -382,9 +344,9 @@ class OpenRouterIntegrationService:
|
|||
)
|
||||
|
||||
# Rebuild the LiteLLM router so freshly fetched configs flow through
|
||||
# (the router filters dynamic OR entries out of its pool, but a
|
||||
# refresh still needs to pick up any static-config edits and reset
|
||||
# cached context-window profiles).
|
||||
# (dynamic OR premium entries now opt into the pool, free ones stay
|
||||
# out; a refresh also needs to pick up any static-config edits and
|
||||
# reset cached context-window profiles).
|
||||
try:
|
||||
from app.config import config as _app_config
|
||||
from app.services.llm_router_service import LLMRouterService
|
||||
|
|
|
|||
|
|
@ -5,9 +5,7 @@ from __future__ import annotations
|
|||
import pytest
|
||||
|
||||
from app.services.openrouter_integration_service import (
|
||||
_FREE_ROUTER_ID,
|
||||
_OPENROUTER_DYNAMIC_MARKER,
|
||||
_build_free_router_config,
|
||||
_generate_configs,
|
||||
_openrouter_tier,
|
||||
_stable_config_id,
|
||||
|
|
@ -135,7 +133,6 @@ _SETTINGS_BASE: dict = {
|
|||
"anonymous_enabled_paid": False,
|
||||
"anonymous_enabled_free": True,
|
||||
"quota_reserve_tokens": 4000,
|
||||
"free_router_enabled": False,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -172,33 +169,26 @@ def test_generate_configs_respects_tier():
|
|||
assert free["router_pool_eligible"] is False
|
||||
|
||||
|
||||
def test_generate_configs_includes_free_router_when_enabled():
|
||||
raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")]
|
||||
settings = {**_SETTINGS_BASE, "free_router_enabled": True}
|
||||
cfgs = _generate_configs(raw, settings)
|
||||
free_router = next(
|
||||
(c for c in cfgs if c["model_name"] == "openrouter/free"), None
|
||||
)
|
||||
assert free_router is not None
|
||||
assert free_router["id"] == _FREE_ROUTER_ID
|
||||
assert free_router["billing_tier"] == "free"
|
||||
assert free_router["router_pool_eligible"] is False
|
||||
assert free_router["anonymous_enabled"] is True
|
||||
def test_generate_configs_excludes_upstream_openrouter_free_router():
|
||||
"""OpenRouter's own ``openrouter/free`` meta-router must never become a card.
|
||||
|
||||
|
||||
def test_generate_configs_excludes_free_router_when_disabled():
|
||||
raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")]
|
||||
settings = {**_SETTINGS_BASE, "free_router_enabled": False}
|
||||
cfgs = _generate_configs(raw, settings)
|
||||
assert not any(c["model_name"] == "openrouter/free" for c in cfgs)
|
||||
|
||||
|
||||
def test_generate_configs_excludes_free_router_without_api_key():
|
||||
"""Without an API key the free-router entry is useless; skip it."""
|
||||
raw = [_minimal_openrouter_model(model_id="openai/gpt-4o")]
|
||||
settings = {**_SETTINGS_BASE, "free_router_enabled": True, "api_key": ""}
|
||||
cfgs = _generate_configs(raw, settings)
|
||||
assert not any(c["model_name"] == "openrouter/free" for c in cfgs)
|
||||
The upstream API returns this as a first-class zero-priced model, so
|
||||
without an explicit blocklist entry it would slip through every other
|
||||
filter (text output, tool calling, 200k context, non-Amazon) and land
|
||||
in the selector as a duplicate of the concrete ``:free`` cards. The
|
||||
exclusion in ``_EXCLUDED_MODEL_IDS`` prevents that.
|
||||
"""
|
||||
raw = [
|
||||
_minimal_openrouter_model(model_id="openai/gpt-4o"),
|
||||
_minimal_openrouter_model(
|
||||
model_id="openrouter/free",
|
||||
pricing={"prompt": "0", "completion": "0"},
|
||||
),
|
||||
]
|
||||
cfgs = _generate_configs(raw, dict(_SETTINGS_BASE))
|
||||
model_names = {c["model_name"] for c in cfgs}
|
||||
assert "openrouter/free" not in model_names
|
||||
assert "openai/gpt-4o" in model_names
|
||||
|
||||
|
||||
def test_generate_configs_drops_non_text_and_non_tool_models():
|
||||
|
|
@ -226,11 +216,3 @@ def test_generate_configs_drops_non_text_and_non_tool_models():
|
|||
assert "openai/completion-only" not in model_names
|
||||
|
||||
|
||||
def test_build_free_router_config_shape():
|
||||
cfg = _build_free_router_config(dict(_SETTINGS_BASE))
|
||||
assert cfg["provider"] == "OPENROUTER"
|
||||
assert cfg["model_name"] == "openrouter/free"
|
||||
assert cfg["id"] == _FREE_ROUTER_ID
|
||||
assert cfg["billing_tier"] == "free"
|
||||
assert cfg["router_pool_eligible"] is False
|
||||
assert cfg[_OPENROUTER_DYNAMIC_MARKER] is True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue