mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-29 10:56:24 +02:00
cloud: added openrouter integration with global configs
This commit is contained in:
parent
ff4e0f9b62
commit
4a51ccdc2c
26 changed files with 911 additions and 178 deletions
|
|
@ -187,24 +187,82 @@ def load_image_gen_router_settings():
|
|||
return default_settings
|
||||
|
||||
|
||||
def load_openrouter_integration_settings() -> dict | None:
|
||||
"""
|
||||
Load OpenRouter integration settings from the YAML config.
|
||||
|
||||
Returns:
|
||||
dict with settings if present and enabled, None otherwise
|
||||
"""
|
||||
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
|
||||
|
||||
if not global_config_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(global_config_file, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
settings = data.get("openrouter_integration")
|
||||
if settings and settings.get("enabled"):
|
||||
return settings
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to load OpenRouter integration settings: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def initialize_openrouter_integration():
|
||||
"""
|
||||
If enabled, fetch all OpenRouter models and append them to
|
||||
config.GLOBAL_LLM_CONFIGS as dynamic premium entries.
|
||||
Should be called BEFORE initialize_llm_router() so the router
|
||||
correctly excludes premium models from Auto mode.
|
||||
"""
|
||||
settings = load_openrouter_integration_settings()
|
||||
if not settings:
|
||||
return
|
||||
|
||||
try:
|
||||
from app.services.openrouter_integration_service import (
|
||||
OpenRouterIntegrationService,
|
||||
)
|
||||
|
||||
service = OpenRouterIntegrationService.get_instance()
|
||||
new_configs = service.initialize(settings)
|
||||
|
||||
if new_configs:
|
||||
config.GLOBAL_LLM_CONFIGS.extend(new_configs)
|
||||
print(
|
||||
f"Info: OpenRouter integration added {len(new_configs)} models "
|
||||
f"(billing_tier={settings.get('billing_tier', 'premium')})"
|
||||
)
|
||||
else:
|
||||
print("Info: OpenRouter integration enabled but no models fetched")
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to initialize OpenRouter integration: {e}")
|
||||
|
||||
|
||||
def initialize_llm_router():
|
||||
"""
|
||||
Initialize the LLM Router service for Auto mode.
|
||||
This should be called during application startup.
|
||||
This should be called during application startup, AFTER
|
||||
initialize_openrouter_integration() so dynamic models are included.
|
||||
Uses config.GLOBAL_LLM_CONFIGS (in-memory) which includes both
|
||||
static YAML configs and dynamic OpenRouter models.
|
||||
"""
|
||||
global_configs = load_global_llm_configs()
|
||||
all_configs = config.GLOBAL_LLM_CONFIGS
|
||||
router_settings = load_router_settings()
|
||||
|
||||
if not global_configs:
|
||||
if not all_configs:
|
||||
print("Info: No global LLM configs found, Auto mode will not be available")
|
||||
return
|
||||
|
||||
try:
|
||||
from app.services.llm_router_service import LLMRouterService
|
||||
|
||||
LLMRouterService.initialize(global_configs, router_settings)
|
||||
LLMRouterService.initialize(all_configs, router_settings)
|
||||
print(
|
||||
f"Info: LLM Router initialized with {len(global_configs)} models "
|
||||
f"Info: LLM Router initialized with {len(all_configs)} models "
|
||||
f"(strategy: {router_settings.get('routing_strategy', 'usage-based-routing')})"
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -326,7 +384,7 @@ class Config:
|
|||
)
|
||||
|
||||
# Premium token quota settings
|
||||
PREMIUM_TOKEN_LIMIT = int(os.getenv("PREMIUM_TOKEN_LIMIT", "5000000"))
|
||||
PREMIUM_TOKEN_LIMIT = int(os.getenv("PREMIUM_TOKEN_LIMIT", "3000000"))
|
||||
STRIPE_PREMIUM_TOKEN_PRICE_ID = os.getenv("STRIPE_PREMIUM_TOKEN_PRICE_ID")
|
||||
STRIPE_TOKENS_PER_UNIT = int(os.getenv("STRIPE_TOKENS_PER_UNIT", "1000000"))
|
||||
STRIPE_TOKEN_BUYING_ENABLED = (
|
||||
|
|
@ -335,9 +393,9 @@ class Config:
|
|||
|
||||
# Anonymous / no-login mode settings
|
||||
NOLOGIN_MODE_ENABLED = os.getenv("NOLOGIN_MODE_ENABLED", "FALSE").upper() == "TRUE"
|
||||
ANON_TOKEN_LIMIT = int(os.getenv("ANON_TOKEN_LIMIT", "1000000"))
|
||||
ANON_TOKEN_LIMIT = int(os.getenv("ANON_TOKEN_LIMIT", "500000"))
|
||||
ANON_TOKEN_WARNING_THRESHOLD = int(
|
||||
os.getenv("ANON_TOKEN_WARNING_THRESHOLD", "800000")
|
||||
os.getenv("ANON_TOKEN_WARNING_THRESHOLD", "400000")
|
||||
)
|
||||
ANON_TOKEN_QUOTA_TTL_DAYS = int(os.getenv("ANON_TOKEN_QUOTA_TTL_DAYS", "30"))
|
||||
ANON_MAX_UPLOAD_SIZE_MB = int(os.getenv("ANON_MAX_UPLOAD_SIZE_MB", "5"))
|
||||
|
|
@ -450,6 +508,9 @@ class Config:
|
|||
# Router settings for Vision LLM Auto mode
|
||||
VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
|
||||
|
||||
# OpenRouter Integration settings (optional)
|
||||
OPENROUTER_INTEGRATION_SETTINGS = load_openrouter_integration_settings()
|
||||
|
||||
# Chonkie Configuration | Edit this to your needs
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
||||
# Azure OpenAI credentials from environment variables
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Global LLM Configuration
|
||||
#
|
||||
#
|
||||
# SETUP INSTRUCTIONS:
|
||||
# 1. For production: Copy this file to global_llm_config.yaml and add your real API keys
|
||||
# 2. For testing: The system will use this example file automatically if global_llm_config.yaml doesn't exist
|
||||
|
|
@ -29,16 +29,16 @@ router_settings:
|
|||
# - "least-busy": Routes to least busy deployment
|
||||
# - "latency-based-routing": Routes based on response latency
|
||||
routing_strategy: "usage-based-routing"
|
||||
|
||||
|
||||
# Number of retries before failing
|
||||
num_retries: 3
|
||||
|
||||
|
||||
# Number of failures allowed before cooling down a deployment
|
||||
allowed_fails: 3
|
||||
|
||||
|
||||
# Cooldown time in seconds after allowed_fails is exceeded
|
||||
cooldown_time: 60
|
||||
|
||||
|
||||
# Fallback models (optional) - when primary fails, try these
|
||||
# Format: [{"primary_model": ["fallback1", "fallback2"]}]
|
||||
# fallbacks: []
|
||||
|
|
@ -58,13 +58,13 @@ global_llm_configs:
|
|||
api_key: "sk-your-openai-api-key-here"
|
||||
api_base: ""
|
||||
# Rate limits for load balancing (requests/tokens per minute)
|
||||
rpm: 500 # Requests per minute
|
||||
tpm: 100000 # Tokens per minute
|
||||
rpm: 500 # Requests per minute
|
||||
tpm: 100000 # Tokens per minute
|
||||
litellm_params:
|
||||
temperature: 0.7
|
||||
max_tokens: 4000
|
||||
# Prompt Configuration
|
||||
system_instructions: "" # Empty = use default SURFSENSE_SYSTEM_INSTRUCTIONS
|
||||
system_instructions: "" # Empty = use default SURFSENSE_SYSTEM_INSTRUCTIONS
|
||||
use_default_system_instructions: true
|
||||
citations_enabled: true
|
||||
|
||||
|
|
@ -103,14 +103,14 @@ global_llm_configs:
|
|||
model_name: "gpt-3.5-turbo"
|
||||
api_key: "sk-your-openai-api-key-here"
|
||||
api_base: ""
|
||||
rpm: 3500 # GPT-3.5 has higher rate limits
|
||||
rpm: 3500 # GPT-3.5 has higher rate limits
|
||||
tpm: 200000
|
||||
litellm_params:
|
||||
temperature: 0.5
|
||||
max_tokens: 2000
|
||||
system_instructions: ""
|
||||
use_default_system_instructions: true
|
||||
citations_enabled: false # Disabled for faster responses
|
||||
citations_enabled: false # Disabled for faster responses
|
||||
|
||||
# Example: Chinese LLM - DeepSeek with custom instructions
|
||||
- id: -4
|
||||
|
|
@ -134,9 +134,9 @@ global_llm_configs:
|
|||
system_instructions: |
|
||||
<system_instruction>
|
||||
You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base.
|
||||
|
||||
|
||||
Today's date (UTC): {resolved_today}
|
||||
|
||||
|
||||
IMPORTANT: Please respond in Chinese (简体中文) unless the user specifically requests another language.
|
||||
</system_instruction>
|
||||
use_default_system_instructions: false
|
||||
|
|
@ -158,7 +158,7 @@ global_llm_configs:
|
|||
model_name: "azure/gpt-4o-deployment"
|
||||
api_key: "your-azure-api-key-here"
|
||||
api_base: "https://your-resource.openai.azure.com"
|
||||
api_version: "2024-02-15-preview" # Azure API version
|
||||
api_version: "2024-02-15-preview" # Azure API version
|
||||
rpm: 1000
|
||||
tpm: 150000
|
||||
litellm_params:
|
||||
|
|
@ -191,7 +191,7 @@ global_llm_configs:
|
|||
litellm_params:
|
||||
temperature: 0.7
|
||||
max_tokens: 4000
|
||||
base_model: "gpt-4-turbo" # Maps to gpt-4-turbo-preview
|
||||
base_model: "gpt-4-turbo" # Maps to gpt-4-turbo-preview
|
||||
system_instructions: ""
|
||||
use_default_system_instructions: true
|
||||
citations_enabled: true
|
||||
|
|
@ -209,7 +209,7 @@ global_llm_configs:
|
|||
model_name: "llama3-70b-8192"
|
||||
api_key: "your-groq-api-key-here"
|
||||
api_base: ""
|
||||
rpm: 30 # Groq has lower rate limits on free tier
|
||||
rpm: 30 # Groq has lower rate limits on free tier
|
||||
tpm: 14400
|
||||
litellm_params:
|
||||
temperature: 0.7
|
||||
|
|
@ -234,12 +234,48 @@ global_llm_configs:
|
|||
rpm: 60
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
temperature: 1.0 # MiniMax requires temperature in (0.0, 1.0], cannot be 0
|
||||
temperature: 1.0 # MiniMax requires temperature in (0.0, 1.0], cannot be 0
|
||||
max_tokens: 4000
|
||||
system_instructions: ""
|
||||
use_default_system_instructions: true
|
||||
citations_enabled: true
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Integration
|
||||
# =============================================================================
|
||||
# When enabled, dynamically fetches ALL available models from the OpenRouter API
|
||||
# and injects them as global configs. This gives premium users access to any model
|
||||
# on OpenRouter (Claude, Gemini, Llama, Mistral, etc.) via their premium token quota.
|
||||
# Models are fetched at startup and refreshed periodically in the background.
|
||||
# All calls go through LiteLLM with the openrouter/ prefix.
|
||||
openrouter_integration:
|
||||
enabled: false
|
||||
api_key: "sk-or-your-openrouter-api-key"
|
||||
# billing_tier: "premium" or "free". Controls whether users need premium tokens.
|
||||
billing_tier: "premium"
|
||||
# anonymous_enabled: set true to also show OpenRouter models to no-login users
|
||||
anonymous_enabled: false
|
||||
seo_enabled: false
|
||||
# quota_reserve_tokens: tokens reserved per call for quota enforcement
|
||||
quota_reserve_tokens: 4000
|
||||
# id_offset: starting negative ID for dynamically generated configs.
|
||||
# Must not overlap with your static global_llm_configs IDs above.
|
||||
id_offset: -10000
|
||||
# refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only)
|
||||
refresh_interval_hours: 24
|
||||
# rpm/tpm: Applied uniformly to all OpenRouter models for LiteLLM Router load balancing.
|
||||
# OpenRouter doesn't expose per-model rate limits via API; actual throttling is handled
|
||||
# upstream by OpenRouter itself (your account limits are at https://openrouter.ai/settings/limits).
|
||||
# These values only matter if you set billing_tier to "free" (adding them to Auto mode).
|
||||
# For premium-only models they are cosmetic. Set conservatively or match your account tier.
|
||||
rpm: 200
|
||||
tpm: 1000000
|
||||
litellm_params:
|
||||
max_tokens: 16384
|
||||
system_instructions: ""
|
||||
use_default_system_instructions: true
|
||||
citations_enabled: true
|
||||
|
||||
# =============================================================================
|
||||
# Image Generation Configuration
|
||||
# =============================================================================
|
||||
|
|
@ -265,7 +301,7 @@ global_image_generation_configs:
|
|||
model_name: "dall-e-3"
|
||||
api_key: "sk-your-openai-api-key-here"
|
||||
api_base: ""
|
||||
rpm: 50 # Requests per minute (image gen is rate-limited by RPM, not tokens)
|
||||
rpm: 50 # Requests per minute (image gen is rate-limited by RPM, not tokens)
|
||||
litellm_params: {}
|
||||
|
||||
# Example: OpenAI GPT Image 1
|
||||
|
|
@ -394,7 +430,7 @@ global_vision_llm_configs:
|
|||
#
|
||||
# IMAGE GENERATION NOTES:
|
||||
# - Image generation configs use the same ID scheme as LLM configs (negative for global)
|
||||
# - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure),
|
||||
# - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure),
|
||||
# bedrock/* (AWS), vertex_ai/* (Google), recraft/* (Recraft), openrouter/* (OpenRouter)
|
||||
# - The router uses litellm.aimage_generation() for async image generation
|
||||
# - Only RPM (requests per minute) is relevant for image generation rate limiting.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue