# Global LLM Configuration # # SETUP INSTRUCTIONS: # 1. For production: Copy this file to global_llm_config.yaml and add your real API keys # 2. For testing: The system will use this example file automatically if global_llm_config.yaml doesn't exist # # NOTE: The example API keys below are placeholders and won't work. # Replace them with your actual API keys to enable global configurations. # # These configurations will be available to all users as a convenient option # Users can choose to use these global configs or add their own # # AUTO MODE (Recommended): # - Auto mode (ID: 0) uses LiteLLM Router to automatically load balance across all global configs # - This helps avoid rate limits by distributing requests across multiple providers # - New users are automatically assigned Auto mode by default # - Configure router_settings below to customize the load balancing behavior # # Structure matches NewLLMConfig: # - LLM model configuration (provider, model_name, api_key, etc.) # - Prompt configuration (system_instructions, citations_enabled) # Router Settings for Auto Mode # These settings control how the LiteLLM Router distributes requests across models router_settings: # Routing strategy options: # - "usage-based-routing": Routes to deployment with lowest current usage (recommended for rate limits) # - "simple-shuffle": Random distribution with optional RPM/TPM weighting # - "least-busy": Routes to least busy deployment # - "latency-based-routing": Routes based on response latency routing_strategy: "usage-based-routing" # Number of retries before failing num_retries: 3 # Number of failures allowed before cooling down a deployment allowed_fails: 3 # Cooldown time in seconds after allowed_fails is exceeded cooldown_time: 60 # Fallback models (optional) - when primary fails, try these # Format: [{"primary_model": ["fallback1", "fallback2"]}] # fallbacks: [] global_llm_configs: # Example: OpenAI GPT-4 Turbo with citations enabled - id: -1 name: "Global GPT-4 Turbo" description: "OpenAI's GPT-4 Turbo with default prompts and citations" provider: "OPENAI" model_name: "gpt-4-turbo-preview" api_key: "sk-your-openai-api-key-here" api_base: "" # Rate limits for load balancing (requests/tokens per minute) rpm: 500 # Requests per minute tpm: 100000 # Tokens per minute litellm_params: temperature: 0.7 max_tokens: 4000 # Prompt Configuration system_instructions: "" # Empty = use default SURFSENSE_SYSTEM_INSTRUCTIONS use_default_system_instructions: true citations_enabled: true # Example: Anthropic Claude 3 Opus - id: -2 name: "Global Claude 3 Opus" description: "Anthropic's most capable model with citations" provider: "ANTHROPIC" model_name: "claude-3-opus-20240229" api_key: "sk-ant-your-anthropic-api-key-here" api_base: "" rpm: 1000 tpm: 100000 litellm_params: temperature: 0.7 max_tokens: 4000 system_instructions: "" use_default_system_instructions: true citations_enabled: true # Example: Fast model - GPT-3.5 Turbo (citations disabled for speed) - id: -3 name: "Global GPT-3.5 Turbo (Fast)" description: "Fast responses without citations for quick queries" provider: "OPENAI" model_name: "gpt-3.5-turbo" api_key: "sk-your-openai-api-key-here" api_base: "" rpm: 3500 # GPT-3.5 has higher rate limits tpm: 200000 litellm_params: temperature: 0.5 max_tokens: 2000 system_instructions: "" use_default_system_instructions: true citations_enabled: false # Disabled for faster responses # Example: Chinese LLM - DeepSeek with custom instructions - id: -4 name: "Global DeepSeek Chat (Chinese)" description: "DeepSeek optimized for Chinese language responses" provider: "DEEPSEEK" model_name: "deepseek-chat" api_key: "your-deepseek-api-key-here" api_base: "https://api.deepseek.com/v1" rpm: 60 tpm: 100000 litellm_params: temperature: 0.7 max_tokens: 4000 # Custom system instructions for Chinese responses system_instructions: | You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. Today's date (UTC): {resolved_today} IMPORTANT: Please respond in Chinese (简体中文) unless the user specifically requests another language. use_default_system_instructions: false citations_enabled: true # Example: Groq - Fast inference - id: -5 name: "Global Groq Llama 3" description: "Ultra-fast Llama 3 70B via Groq" provider: "GROQ" model_name: "llama3-70b-8192" api_key: "your-groq-api-key-here" api_base: "" rpm: 30 # Groq has lower rate limits on free tier tpm: 14400 litellm_params: temperature: 0.7 max_tokens: 8000 system_instructions: "" use_default_system_instructions: true citations_enabled: true # Notes: # - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing # - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB) # - IDs should be unique and sequential (e.g., -1, -2, -3, etc.) # - The 'api_key' field will not be exposed to users via API # - system_instructions: Custom prompt or empty string to use defaults # - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty # - citations_enabled: true = include citation instructions, false = include anti-citation instructions # - All standard LiteLLM providers are supported # - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute) # These help the router distribute load evenly and avoid rate limit errors