mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
feat(story-3.5): add cloud-mode LLM model selection with token quota enforcement
Implement system-managed model catalog, subscription tier enforcement, atomic token quota tracking, and frontend cloud/self-hosted conditional rendering. Apply all 20 BMAD code review patches including security fixes (cross-user API key hijack), race condition mitigation (atomic SQL UPDATE), and SSE mid-stream quota error handling. Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
This commit is contained in:
parent
e7382b26de
commit
c1776b3ec8
19 changed files with 1003 additions and 34 deletions
|
|
@ -52,6 +52,9 @@ global_llm_configs:
|
|||
model_name: "gpt-4-turbo-preview"
|
||||
api_key: "sk-your-openai-api-key-here"
|
||||
api_base: ""
|
||||
tier_required: "pro" # free | pro | enterprise
|
||||
cost_per_1k_input_tokens: 0.01
|
||||
cost_per_1k_output_tokens: 0.03
|
||||
# Rate limits for load balancing (requests/tokens per minute)
|
||||
rpm: 500 # Requests per minute
|
||||
tpm: 100000 # Tokens per minute
|
||||
|
|
@ -71,6 +74,9 @@ global_llm_configs:
|
|||
model_name: "claude-3-opus-20240229"
|
||||
api_key: "sk-ant-your-anthropic-api-key-here"
|
||||
api_base: ""
|
||||
tier_required: "pro"
|
||||
cost_per_1k_input_tokens: 0.015
|
||||
cost_per_1k_output_tokens: 0.075
|
||||
rpm: 1000
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
|
|
@ -88,6 +94,9 @@ global_llm_configs:
|
|||
model_name: "gpt-3.5-turbo"
|
||||
api_key: "sk-your-openai-api-key-here"
|
||||
api_base: ""
|
||||
tier_required: "free"
|
||||
cost_per_1k_input_tokens: 0.0005
|
||||
cost_per_1k_output_tokens: 0.0015
|
||||
rpm: 3500 # GPT-3.5 has higher rate limits
|
||||
tpm: 200000
|
||||
litellm_params:
|
||||
|
|
@ -105,6 +114,9 @@ global_llm_configs:
|
|||
model_name: "deepseek-chat"
|
||||
api_key: "your-deepseek-api-key-here"
|
||||
api_base: "https://api.deepseek.com/v1"
|
||||
tier_required: "free"
|
||||
cost_per_1k_input_tokens: 0.0001
|
||||
cost_per_1k_output_tokens: 0.0002
|
||||
rpm: 60
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
|
|
@ -134,6 +146,9 @@ global_llm_configs:
|
|||
api_key: "your-azure-api-key-here"
|
||||
api_base: "https://your-resource.openai.azure.com"
|
||||
api_version: "2024-02-15-preview" # Azure API version
|
||||
tier_required: "pro"
|
||||
cost_per_1k_input_tokens: 0.005
|
||||
cost_per_1k_output_tokens: 0.015
|
||||
rpm: 1000
|
||||
tpm: 150000
|
||||
litellm_params:
|
||||
|
|
@ -156,6 +171,9 @@ global_llm_configs:
|
|||
api_key: "your-azure-api-key-here"
|
||||
api_base: "https://your-resource.openai.azure.com"
|
||||
api_version: "2024-02-15-preview"
|
||||
tier_required: "pro"
|
||||
cost_per_1k_input_tokens: 0.01
|
||||
cost_per_1k_output_tokens: 0.03
|
||||
rpm: 500
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
|
|
@ -174,6 +192,9 @@ global_llm_configs:
|
|||
model_name: "llama3-70b-8192"
|
||||
api_key: "your-groq-api-key-here"
|
||||
api_base: ""
|
||||
tier_required: "pro"
|
||||
cost_per_1k_input_tokens: 0.00059
|
||||
cost_per_1k_output_tokens: 0.00079
|
||||
rpm: 30 # Groq has lower rate limits on free tier
|
||||
tpm: 14400
|
||||
litellm_params:
|
||||
|
|
@ -191,6 +212,9 @@ global_llm_configs:
|
|||
model_name: "MiniMax-M2.5"
|
||||
api_key: "your-minimax-api-key-here"
|
||||
api_base: "https://api.minimax.io/v1"
|
||||
tier_required: "free"
|
||||
cost_per_1k_input_tokens: 0.001
|
||||
cost_per_1k_output_tokens: 0.003
|
||||
rpm: 60
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
|
|
@ -347,6 +371,10 @@ global_vision_llm_configs:
|
|||
# - system_instructions: Custom prompt or empty string to use defaults
|
||||
# - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty
|
||||
# - citations_enabled: true = include citation instructions, false = include anti-citation instructions
|
||||
# - tier_required: "free" | "pro" | "enterprise" — subscription tier needed to use this model.
|
||||
# If omitted, tier is inferred from model_name via pattern matching (fragile).
|
||||
# - cost_per_1k_input_tokens / cost_per_1k_output_tokens: Optional cost metadata for display.
|
||||
# Not used for billing (token quota is flat), but shown in the UI for transparency.
|
||||
# - All standard LiteLLM providers are supported
|
||||
# - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute)
|
||||
# These help the router distribute load evenly and avoid rate limit errors
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue