chore(config): deprecate billing_tier / anonymous_enabled, split anon flags

This commit is contained in:
Anish Sarkar 2026-05-01 17:42:44 +05:30
parent ccd7caf99f
commit 925c33abd1
2 changed files with 81 additions and 19 deletions

View file

@ -194,6 +194,9 @@ def load_openrouter_integration_settings() -> dict | None:
"""
Load OpenRouter integration settings from the YAML config.
Emits startup warnings for deprecated keys (``billing_tier``,
``anonymous_enabled``) and seeds their replacements for back-compat.
Returns:
dict with settings if present and enabled, None otherwise
"""
@ -206,9 +209,31 @@ def load_openrouter_integration_settings() -> dict | None:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
settings = data.get("openrouter_integration")
if settings and settings.get("enabled"):
return settings
return None
if not settings or not settings.get("enabled"):
return None
if "billing_tier" in settings:
print(
"Warning: openrouter_integration.billing_tier is deprecated; "
"tier is now derived per model from OpenRouter data "
"(':free' suffix or zero pricing). Remove this key."
)
if "anonymous_enabled" in settings:
print(
"Warning: openrouter_integration.anonymous_enabled is "
"deprecated; use anonymous_enabled_paid and/or "
"anonymous_enabled_free instead. Both new flags have been "
"seeded from the legacy value for back-compat."
)
settings.setdefault(
"anonymous_enabled_paid", settings["anonymous_enabled"]
)
settings.setdefault(
"anonymous_enabled_free", settings["anonymous_enabled"]
)
return settings
except Exception as e:
print(f"Warning: Failed to load OpenRouter integration settings: {e}")
return None
@ -217,9 +242,14 @@ def load_openrouter_integration_settings() -> dict | None:
def initialize_openrouter_integration():
"""
If enabled, fetch all OpenRouter models and append them to
config.GLOBAL_LLM_CONFIGS as dynamic premium entries.
Should be called BEFORE initialize_llm_router() so the router
correctly excludes premium models from Auto mode.
config.GLOBAL_LLM_CONFIGS as dynamic entries. Each model's ``billing_tier``
is derived per-model from OpenRouter's API signals (``:free`` suffix or
zero pricing), so free OpenRouter models correctly skip premium quota.
Should be called BEFORE initialize_llm_router(). Dynamic entries are
tagged ``router_pool_eligible=False`` so the LiteLLM Router pool (used
by title-gen / sub-agent flows) remains scoped to curated YAML configs,
while user-facing Auto-mode thread pinning still considers them.
"""
settings = load_openrouter_integration_settings()
if not settings:
@ -235,9 +265,15 @@ def initialize_openrouter_integration():
if new_configs:
config.GLOBAL_LLM_CONFIGS.extend(new_configs)
free_count = sum(
1 for c in new_configs if c.get("billing_tier") == "free"
)
premium_count = sum(
1 for c in new_configs if c.get("billing_tier") == "premium"
)
print(
f"Info: OpenRouter integration added {len(new_configs)} models "
f"(billing_tier={settings.get('billing_tier', 'premium')})"
f"(free={free_count}, premium={premium_count})"
)
else:
print("Info: OpenRouter integration enabled but no models fetched")

View file

@ -245,31 +245,57 @@ global_llm_configs:
# =============================================================================
# When enabled, dynamically fetches ALL available models from the OpenRouter API
# and injects them as global configs. This gives premium users access to any model
# on OpenRouter (Claude, Gemini, Llama, Mistral, etc.) via their premium token quota.
# on OpenRouter (Claude, Gemini, Llama, Mistral, etc.) via their premium token quota,
# while free-tier OpenRouter models show up with a green Free badge and do NOT
# consume premium quota.
# Models are fetched at startup and refreshed periodically in the background.
# All calls go through LiteLLM with the openrouter/ prefix.
openrouter_integration:
enabled: false
api_key: "sk-or-your-openrouter-api-key"
# billing_tier: "premium" or "free". Controls whether users need premium tokens.
billing_tier: "premium"
# anonymous_enabled: set true to also show OpenRouter models to no-login users
anonymous_enabled: false
# Tier is derived PER MODEL from OpenRouter's own API signals:
# - id ends with ":free" -> billing_tier=free
# - pricing.prompt AND pricing.completion == "0" -> billing_tier=free
# - otherwise -> billing_tier=premium
# No global billing_tier knob is honored; any legacy value emits a startup warning.
# Anonymous access is split by tier so operators can expose only free
# models to no-login users without leaking paid inference.
anonymous_enabled_paid: false
anonymous_enabled_free: false
seo_enabled: false
# quota_reserve_tokens: tokens reserved per call for quota enforcement
quota_reserve_tokens: 4000
# id_offset: starting negative ID for dynamically generated configs.
# Must not overlap with your static global_llm_configs IDs above.
# id_offset: base negative ID for dynamically generated configs.
# Model IDs are derived deterministically via BLAKE2b so they survive
# catalogue churn. Must not overlap with your static global_llm_configs IDs.
id_offset: -10000
# refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only)
refresh_interval_hours: 24
# rpm/tpm: Applied uniformly to all OpenRouter models for LiteLLM Router load balancing.
# OpenRouter doesn't expose per-model rate limits via API; actual throttling is handled
# upstream by OpenRouter itself (your account limits are at https://openrouter.ai/settings/limits).
# These values only matter if you set billing_tier to "free" (adding them to Auto mode).
# For premium-only models they are cosmetic. Set conservatively or match your account tier.
# Rate limits for PAID OpenRouter models. These are used by LiteLLM Router
# for per-deployment accounting when OR premium models participate in the
# shared sub-agent "auto" pool. They do NOT cap OpenRouter itself — your
# real account limits live at https://openrouter.ai/settings/limits.
rpm: 200
tpm: 1000000
# Rate limits for FREE OpenRouter models. Informational only: free OR
# models and openrouter/free are intentionally kept OUT of the LiteLLM
# Router pool, because OpenRouter enforces free-tier limits globally per
# account (~20 RPM + 50-1000 daily requests across every ":free" model
# combined) — per-deployment router accounting can't represent a shared
# bucket correctly. Free OR models stay fully available in the model
# selector and for user-facing Auto thread pinning.
free_rpm: 20
free_tpm: 100000
# Expose openrouter/free as a single virtual "Free (Auto-Select)" entry.
# Recommended: keep true. OpenRouter picks a capable free model per request.
free_router_enabled: true
litellm_params:
max_tokens: 16384
system_instructions: ""