diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index bd97d2bb1..11cbe24a7 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -194,6 +194,9 @@ def load_openrouter_integration_settings() -> dict | None: """ Load OpenRouter integration settings from the YAML config. + Emits startup warnings for deprecated keys (``billing_tier``, + ``anonymous_enabled``) and seeds their replacements for back-compat. + Returns: dict with settings if present and enabled, None otherwise """ @@ -206,9 +209,31 @@ def load_openrouter_integration_settings() -> dict | None: with open(global_config_file, encoding="utf-8") as f: data = yaml.safe_load(f) settings = data.get("openrouter_integration") - if settings and settings.get("enabled"): - return settings - return None + if not settings or not settings.get("enabled"): + return None + + if "billing_tier" in settings: + print( + "Warning: openrouter_integration.billing_tier is deprecated; " + "tier is now derived per model from OpenRouter data " + "(':free' suffix or zero pricing). Remove this key." + ) + + if "anonymous_enabled" in settings: + print( + "Warning: openrouter_integration.anonymous_enabled is " + "deprecated; use anonymous_enabled_paid and/or " + "anonymous_enabled_free instead. Both new flags have been " + "seeded from the legacy value for back-compat." + ) + settings.setdefault( + "anonymous_enabled_paid", settings["anonymous_enabled"] + ) + settings.setdefault( + "anonymous_enabled_free", settings["anonymous_enabled"] + ) + + return settings except Exception as e: print(f"Warning: Failed to load OpenRouter integration settings: {e}") return None @@ -217,9 +242,14 @@ def load_openrouter_integration_settings() -> dict | None: def initialize_openrouter_integration(): """ If enabled, fetch all OpenRouter models and append them to - config.GLOBAL_LLM_CONFIGS as dynamic premium entries. - Should be called BEFORE initialize_llm_router() so the router - correctly excludes premium models from Auto mode. + config.GLOBAL_LLM_CONFIGS as dynamic entries. Each model's ``billing_tier`` + is derived per-model from OpenRouter's API signals (``:free`` suffix or + zero pricing), so free OpenRouter models correctly skip premium quota. + + Should be called BEFORE initialize_llm_router(). Dynamic entries are + tagged ``router_pool_eligible=False`` so the LiteLLM Router pool (used + by title-gen / sub-agent flows) remains scoped to curated YAML configs, + while user-facing Auto-mode thread pinning still considers them. """ settings = load_openrouter_integration_settings() if not settings: @@ -235,9 +265,15 @@ def initialize_openrouter_integration(): if new_configs: config.GLOBAL_LLM_CONFIGS.extend(new_configs) + free_count = sum( + 1 for c in new_configs if c.get("billing_tier") == "free" + ) + premium_count = sum( + 1 for c in new_configs if c.get("billing_tier") == "premium" + ) print( f"Info: OpenRouter integration added {len(new_configs)} models " - f"(billing_tier={settings.get('billing_tier', 'premium')})" + f"(free={free_count}, premium={premium_count})" ) else: print("Info: OpenRouter integration enabled but no models fetched") diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index 9aca0f022..d62b4a4a5 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -245,31 +245,57 @@ global_llm_configs: # ============================================================================= # When enabled, dynamically fetches ALL available models from the OpenRouter API # and injects them as global configs. This gives premium users access to any model -# on OpenRouter (Claude, Gemini, Llama, Mistral, etc.) via their premium token quota. +# on OpenRouter (Claude, Gemini, Llama, Mistral, etc.) via their premium token quota, +# while free-tier OpenRouter models show up with a green Free badge and do NOT +# consume premium quota. # Models are fetched at startup and refreshed periodically in the background. # All calls go through LiteLLM with the openrouter/ prefix. openrouter_integration: enabled: false api_key: "sk-or-your-openrouter-api-key" - # billing_tier: "premium" or "free". Controls whether users need premium tokens. - billing_tier: "premium" - # anonymous_enabled: set true to also show OpenRouter models to no-login users - anonymous_enabled: false + + # Tier is derived PER MODEL from OpenRouter's own API signals: + # - id ends with ":free" -> billing_tier=free + # - pricing.prompt AND pricing.completion == "0" -> billing_tier=free + # - otherwise -> billing_tier=premium + # No global billing_tier knob is honored; any legacy value emits a startup warning. + + # Anonymous access is split by tier so operators can expose only free + # models to no-login users without leaking paid inference. + anonymous_enabled_paid: false + anonymous_enabled_free: false + seo_enabled: false # quota_reserve_tokens: tokens reserved per call for quota enforcement quota_reserve_tokens: 4000 - # id_offset: starting negative ID for dynamically generated configs. - # Must not overlap with your static global_llm_configs IDs above. + # id_offset: base negative ID for dynamically generated configs. + # Model IDs are derived deterministically via BLAKE2b so they survive + # catalogue churn. Must not overlap with your static global_llm_configs IDs. id_offset: -10000 # refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only) refresh_interval_hours: 24 - # rpm/tpm: Applied uniformly to all OpenRouter models for LiteLLM Router load balancing. - # OpenRouter doesn't expose per-model rate limits via API; actual throttling is handled - # upstream by OpenRouter itself (your account limits are at https://openrouter.ai/settings/limits). - # These values only matter if you set billing_tier to "free" (adding them to Auto mode). - # For premium-only models they are cosmetic. Set conservatively or match your account tier. + + # Rate limits for PAID OpenRouter models. These are used by LiteLLM Router + # for per-deployment accounting when OR premium models participate in the + # shared sub-agent "auto" pool. They do NOT cap OpenRouter itself — your + # real account limits live at https://openrouter.ai/settings/limits. rpm: 200 tpm: 1000000 + + # Rate limits for FREE OpenRouter models. Informational only: free OR + # models and openrouter/free are intentionally kept OUT of the LiteLLM + # Router pool, because OpenRouter enforces free-tier limits globally per + # account (~20 RPM + 50-1000 daily requests across every ":free" model + # combined) — per-deployment router accounting can't represent a shared + # bucket correctly. Free OR models stay fully available in the model + # selector and for user-facing Auto thread pinning. + free_rpm: 20 + free_tpm: 100000 + + # Expose openrouter/free as a single virtual "Free (Auto-Select)" entry. + # Recommended: keep true. OpenRouter picks a capable free model per request. + free_router_enabled: true + litellm_params: max_tokens: 16384 system_instructions: ""