mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-18 21:15:16 +02:00
320 lines
12 KiB
YAML
320 lines
12 KiB
YAML
# Global LLM Configuration
|
|
#
|
|
# SETUP INSTRUCTIONS:
|
|
# 1. Copy this file to global_llm_config.yaml.
|
|
# 2. Replace placeholder credentials, endpoints, deployment names, and pricing
|
|
# with values from your own provider accounts.
|
|
#
|
|
# This file is intentionally safe to commit. Do not put real API keys in this
|
|
# example file.
|
|
#
|
|
# These YAML entries are materialized at startup as server-owned GLOBAL
|
|
# connections and models:
|
|
#
|
|
# global_llm_configs -> GLOBAL chat models
|
|
# global_image_generation_configs -> GLOBAL image generation models
|
|
#
|
|
# Do not add global_connections or global_models sections here. They are
|
|
# runtime-derived metadata exposed through the model-connections APIs.
|
|
#
|
|
# Static config shape:
|
|
# - Connection fields: provider, api_key, api_base, api_version
|
|
# - Model fields: model_name, billing_tier, rpm/tpm, capabilities, litellm_params
|
|
# - Public no-login SEO metadata: seo_title, seo_description
|
|
# - Prompt defaults: system_instructions, use_default_system_instructions,
|
|
# citations_enabled
|
|
#
|
|
# Provider notes:
|
|
# - Use the canonical provider field.
|
|
# - For Azure, use the bare deployment name in model_name, for example
|
|
# model_name: "gpt-5.1". The resolver prefixes the LiteLLM model string from
|
|
# provider: "azure".
|
|
#
|
|
# GLOBAL ID namespace:
|
|
# - ID 0 is reserved for Auto mode.
|
|
# - Negative IDs are server-owned GLOBAL models.
|
|
# - Positive IDs are user/BYOK database models.
|
|
# - Keep static IDs unique across chat and image generation.
|
|
# - Suggested static ranges: chat -1..-999, image -2001..-2999.
|
|
# - Vision is not a separate config/table. Chat models that accept images use
|
|
# supports_image_input: true.
|
|
#
|
|
# COST-BASED PREMIUM CREDITS:
|
|
# Each premium model bills the user's USD-credit balance based on provider cost
|
|
# reported by LiteLLM. For custom Azure deployments or any model LiteLLM does
|
|
# not know, declare per-token costs inline:
|
|
#
|
|
# litellm_params:
|
|
# base_model: "my-custom-deployment"
|
|
# # USD per token; 0.00000125 == $1.25 per million input tokens.
|
|
# input_cost_per_token: 0.00000125
|
|
# output_cost_per_token: 0.00001
|
|
#
|
|
# OpenRouter dynamic chat models pull pricing automatically from OpenRouter's
|
|
# API. Models without resolvable pricing debit $0 and log a warning.
|
|
|
|
# =============================================================================
|
|
# Chat Auto Mode Router Settings
|
|
# =============================================================================
|
|
# These settings control how the LiteLLM Router distributes Auto-mode requests
|
|
# across curated router-eligible GLOBAL chat deployments.
|
|
router_settings:
|
|
# Routing strategy options:
|
|
# - "usage-based-routing": Routes to deployment with lowest current usage.
|
|
# - "simple-shuffle": Random distribution with optional RPM/TPM weighting.
|
|
# - "least-busy": Routes to least busy deployment.
|
|
# - "latency-based-routing": Routes based on response latency.
|
|
routing_strategy: "usage-based-routing"
|
|
num_retries: 3
|
|
allowed_fails: 3
|
|
cooldown_time: 60
|
|
# Optional fallback map:
|
|
# fallbacks:
|
|
# - {"azure/gpt-5.1": ["azure/gpt-5.4-mini"]}
|
|
|
|
# =============================================================================
|
|
# Static GLOBAL Chat Models
|
|
# =============================================================================
|
|
global_llm_configs:
|
|
# Premium Azure chat model with image input support and explicit custom
|
|
# pricing. This is the current shape to use for hosted GPT 5.x deployments.
|
|
- id: -1
|
|
name: "Azure GPT 5.1"
|
|
billing_tier: "premium"
|
|
anonymous_enabled: false
|
|
seo_enabled: false
|
|
seo_slug: "azure-gpt-5-1"
|
|
quota_reserve_tokens: 4000
|
|
provider: "azure"
|
|
model_name: "gpt-5.1"
|
|
supports_image_input: true
|
|
supports_tools: true
|
|
max_input_tokens: 400000
|
|
api_key: "your-azure-api-key-here"
|
|
api_base: "https://your-resource.openai.azure.com"
|
|
# api_version is optional. Include it if your Azure deployment requires a
|
|
# specific API version.
|
|
# api_version: "2025-04-01-preview"
|
|
rpm: 47500
|
|
tpm: 14750000
|
|
litellm_params:
|
|
max_tokens: 16384
|
|
base_model: "gpt-5.1"
|
|
input_cost_per_token: 0.00000125
|
|
output_cost_per_token: 0.00001
|
|
system_instructions: ""
|
|
use_default_system_instructions: true
|
|
citations_enabled: true
|
|
|
|
# Larger premium chat model. If your provider prices long-context traffic
|
|
# differently, choose a conservative flat price or document the limitation
|
|
# next to the inline pricing.
|
|
- id: -2
|
|
name: "Azure GPT 5.4"
|
|
billing_tier: "premium"
|
|
anonymous_enabled: false
|
|
seo_enabled: false
|
|
seo_slug: "azure-gpt-5-4"
|
|
quota_reserve_tokens: 4000
|
|
provider: "azure"
|
|
model_name: "gpt-5.4"
|
|
supports_image_input: true
|
|
supports_tools: true
|
|
max_input_tokens: 400000
|
|
api_key: "your-azure-api-key-here"
|
|
api_base: "https://your-resource.openai.azure.com"
|
|
rpm: 150000
|
|
tpm: 15000000
|
|
litellm_params:
|
|
max_tokens: 16384
|
|
base_model: "gpt-5.4"
|
|
input_cost_per_token: 0.0000025
|
|
output_cost_per_token: 0.000015
|
|
system_instructions: ""
|
|
use_default_system_instructions: true
|
|
citations_enabled: true
|
|
|
|
# Free/no-login hosted model. Free models are visible to users when
|
|
# anonymous_enabled/seo_enabled are true but do not debit premium credits.
|
|
- id: -3
|
|
name: "Azure GPT 5.4 Mini"
|
|
billing_tier: "free"
|
|
anonymous_enabled: true
|
|
seo_enabled: true
|
|
seo_slug: "gpt-5-4-mini-no-login"
|
|
seo_title: "Free GPT 5.4 Mini Chat"
|
|
seo_description: "Chat with a hosted GPT 5.4 Mini model without signing in."
|
|
quota_reserve_tokens: 4000
|
|
provider: "azure"
|
|
model_name: "gpt-5.4-mini"
|
|
supports_image_input: false
|
|
supports_tools: true
|
|
max_input_tokens: 128000
|
|
api_key: "your-azure-api-key-here"
|
|
api_base: "https://your-resource.openai.azure.com"
|
|
rpm: 15000
|
|
tpm: 15000000
|
|
litellm_params:
|
|
max_tokens: 16384
|
|
base_model: "gpt-5.4-mini"
|
|
system_instructions: ""
|
|
use_default_system_instructions: true
|
|
citations_enabled: true
|
|
|
|
# Planner LLM. This is operator-only and is not shown in the user-facing
|
|
# model selector. Only one global_llm_configs entry should set is_planner.
|
|
- id: -9
|
|
name: "Azure GPT 5.x Nano Planner"
|
|
is_planner: true
|
|
billing_tier: "free"
|
|
anonymous_enabled: false
|
|
seo_enabled: false
|
|
quota_reserve_tokens: 1000
|
|
provider: "azure"
|
|
model_name: "gpt-5.4-nano"
|
|
supports_image_input: false
|
|
supports_tools: false
|
|
router_pool_eligible: false
|
|
api_key: "your-azure-api-key-here"
|
|
api_base: "https://your-resource.openai.azure.com"
|
|
rpm: 20000
|
|
tpm: 4000000
|
|
litellm_params:
|
|
temperature: 0
|
|
max_tokens: 1000
|
|
base_model: "gpt-5.4-nano"
|
|
system_instructions: ""
|
|
use_default_system_instructions: true
|
|
citations_enabled: false
|
|
|
|
# =============================================================================
|
|
# OpenRouter Dynamic Model Integration
|
|
# =============================================================================
|
|
# When enabled, SurfSense fetches the OpenRouter catalog at startup and injects
|
|
# supported models as GLOBAL chat and optionally image-generation models.
|
|
# Tier is derived per model from OpenRouter data:
|
|
# - model id ends with ":free" -> billing_tier=free
|
|
# - prompt and completion pricing are zero -> billing_tier=free
|
|
# - otherwise -> billing_tier=premium
|
|
#
|
|
# Do not use deprecated openrouter_integration.billing_tier or
|
|
# openrouter_integration.anonymous_enabled. Use the tier-specific anonymous
|
|
# switches below.
|
|
openrouter_integration:
|
|
enabled: false
|
|
api_key: "sk-or-your-openrouter-api-key"
|
|
|
|
anonymous_enabled_paid: false
|
|
anonymous_enabled_free: false
|
|
seo_enabled: false
|
|
quota_reserve_tokens: 4000
|
|
|
|
# Base negative ID namespace for dynamic chat models. IDs are derived
|
|
# deterministically so they survive catalog churn. Do not overlap static IDs.
|
|
id_offset: -10000
|
|
|
|
# Separate base negative ID namespace for dynamic image-generation models.
|
|
image_id_offset: -20000
|
|
|
|
# How often to refresh the OpenRouter catalog. 0 means startup only.
|
|
refresh_interval_hours: 24
|
|
|
|
# Paid OpenRouter models may join curated router pools when eligible.
|
|
rpm: 200
|
|
tpm: 1000000
|
|
|
|
# Free OpenRouter models are available for user-facing selection/pinning but
|
|
# should be treated as a shared-account bucket, not normal router capacity.
|
|
free_rpm: 20
|
|
free_tpm: 100000
|
|
|
|
# Image generation is opt-in to avoid injecting a large image catalog during
|
|
# upgrades. Vision-capable chat models are represented with
|
|
# supports_image_input: true.
|
|
image_generation_enabled: false
|
|
vision_enabled: false
|
|
|
|
litellm_params:
|
|
max_tokens: 16384
|
|
system_instructions: ""
|
|
use_default_system_instructions: true
|
|
citations_enabled: true
|
|
|
|
# =============================================================================
|
|
# Image Generation Auto Mode Router Settings
|
|
# =============================================================================
|
|
image_generation_router_settings:
|
|
routing_strategy: "usage-based-routing"
|
|
num_retries: 3
|
|
allowed_fails: 3
|
|
cooldown_time: 60
|
|
|
|
# =============================================================================
|
|
# Static GLOBAL Image Generation Models
|
|
# =============================================================================
|
|
global_image_generation_configs:
|
|
- id: -2001
|
|
name: "Azure GPT Image 1.5"
|
|
billing_tier: "premium"
|
|
provider: "azure"
|
|
model_name: "gpt-image-1.5"
|
|
api_key: "your-azure-api-key-here"
|
|
api_base: "https://your-resource.openai.azure.com"
|
|
# api_version: "2025-04-01-preview"
|
|
rpm: 60
|
|
litellm_params:
|
|
base_model: "gpt-image-1.5"
|
|
|
|
- id: -2002
|
|
name: "Azure GPT Image 1 Mini"
|
|
billing_tier: "free"
|
|
provider: "azure"
|
|
model_name: "gpt-image-1-mini"
|
|
api_key: "your-azure-api-key-here"
|
|
api_base: "https://your-resource.openai.azure.com"
|
|
# api_version: "2025-04-01-preview"
|
|
rpm: 120
|
|
litellm_params:
|
|
base_model: "gpt-image-1-mini"
|
|
|
|
# =============================================================================
|
|
# Field Notes
|
|
# =============================================================================
|
|
# Common chat/image fields:
|
|
# - provider: Canonical provider adapter name. Example: azure, openai,
|
|
# anthropic, openrouter, groq, bedrock.
|
|
# - model_name: Provider model or deployment id. For Azure, use the bare
|
|
# deployment name. The resolver prefixes LiteLLM model strings from provider.
|
|
# - api_base: Provider endpoint/root URL. For OpenAI-compatible providers, the
|
|
# resolver adds /v1 when needed.
|
|
# - api_version: Optional provider-specific API version, stored on the
|
|
# materialized connection extra metadata.
|
|
# - litellm_params: Passed to LiteLLM when invoking the model. Also used for
|
|
# base_model and inline pricing registration.
|
|
#
|
|
# Chat model fields:
|
|
# - supports_image_input: true when the chat model can consume image inputs.
|
|
# - supports_tools: true when the model can use tools/function calling.
|
|
# - max_input_tokens: Optional UI/catalog metadata for context size.
|
|
# - router_pool_eligible: false keeps a model out of shared router pools while
|
|
# still allowing direct selection/pinning.
|
|
# - is_planner: true marks the internal-only planner model. Only one config
|
|
# should set this flag.
|
|
#
|
|
# Catalog and access fields:
|
|
# - billing_tier: "free" or "premium".
|
|
# - anonymous_enabled: Whether the model appears in the public no-login catalog.
|
|
# - seo_enabled: Whether a /free/<seo_slug> landing page is generated.
|
|
# - seo_slug: Stable URL slug for SEO pages. Keep unique and do not change once
|
|
# public.
|
|
# - seo_title / seo_description: Optional SEO metadata overrides.
|
|
# - quota_reserve_tokens: Tokens reserved before each chat LLM call.
|
|
# - rpm / tpm: Optional rate limits for router accounting and load balancing.
|
|
#
|
|
# Image generation notes:
|
|
# - Image-generation configs use the same GLOBAL ID namespace as chat models.
|
|
# - Only RPM is relevant for most image-generation APIs.
|
|
# - The runtime uses litellm.aimage_generation().
|
|
# - Image billing currently uses billing_tier and model catalog metadata. Keep
|
|
# quota reserve tuning in code/catalog unless the materializer copies a YAML
|
|
# key for image quota reservation.
|