diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 75af17d11..b8addb45d 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -389,10 +389,28 @@ def initialize_openrouter_integration(): ) except Exception as e: print(f"Warning: Failed to inject OpenRouter vision-LLM configs: {e}") + + refresh_global_model_catalog() except Exception as e: print(f"Warning: Failed to initialize OpenRouter integration: {e}") +def materialize_global_configs(): + from app.services.global_model_catalog import materialize_global_model_catalog + + return materialize_global_model_catalog( + chat_configs=getattr(config, "GLOBAL_LLM_CONFIGS", []), + vision_configs=getattr(config, "GLOBAL_VISION_LLM_CONFIGS", []), + image_configs=getattr(config, "GLOBAL_IMAGE_GEN_CONFIGS", []), + ) + + +def refresh_global_model_catalog(): + connections, models = materialize_global_configs() + config.GLOBAL_CONNECTIONS = connections + config.GLOBAL_MODELS = models + + def initialize_pricing_registration(): """ Teach LiteLLM the per-token cost of every deployment in @@ -723,7 +741,7 @@ class Config: os.getenv("QUOTA_DEFAULT_IMAGE_RESERVE_MICROS", "50000") ) - # Per-podcast reservation (in micro-USD). One agent LLM call generating + # Per-podcast reservation (in micro-USD). One chat model call generating # a transcript, typically 5k-20k completion tokens. $0.20 covers a long # premium-model run. Tune via env. QUOTA_DEFAULT_PODCAST_RESERVE_MICROS = int( @@ -849,6 +867,19 @@ class Config: # Router settings for Vision LLM Auto mode VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings() + # Virtual GLOBAL connection/model catalog. This is server-only metadata + # derived from global_llm_config.yaml; GLOBAL keys are not stored in DB. + from app.services.global_model_catalog import ( + materialize_global_model_catalog as _materialize_global_model_catalog, + ) + + GLOBAL_CONNECTIONS, GLOBAL_MODELS = _materialize_global_model_catalog( + chat_configs=GLOBAL_LLM_CONFIGS, + vision_configs=GLOBAL_VISION_LLM_CONFIGS, + image_configs=GLOBAL_IMAGE_GEN_CONFIGS, + ) + del _materialize_global_model_catalog + # OpenRouter Integration settings (optional) OPENROUTER_INTEGRATION_SETTINGS = load_openrouter_integration_settings() diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index 1c09a91ac..b0eee6458 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -7,8 +7,9 @@ # NOTE: The example API keys below are placeholders and won't work. # Replace them with your actual API keys to enable global configurations. # -# These configurations will be available to all users as a convenient option -# Users can choose to use these global configs or add their own +# These configurations are materialized as server-owned GLOBAL connections/models +# and become available on the Models page. Users can choose hosted/global models +# or add their own BYOK/local connections. # # AUTO MODE (Recommended): # - Auto mode (ID: 0) uses LiteLLM Router to automatically load balance across all global configs @@ -16,9 +17,12 @@ # - New users are automatically assigned Auto mode by default # - Configure router_settings below to customize the load balancing behavior # -# Structure matches NewLLMConfig: -# - Model configuration (provider, model_name, api_key, etc.) -# - Prompt configuration (system_instructions, citations_enabled) +# Static config shape: +# - Connection fields: provider, api_key, api_base, api_version +# - Model fields: model_name, billing_tier, rpm/tpm, litellm_params +# - Prompt defaults: system_instructions, citations_enabled +# IDs share one GLOBAL model namespace across chat, vision, and image generation. +# Suggested ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999. # # COST-BASED PREMIUM CREDITS: # Each premium config bills the user's USD-credit balance based on the @@ -327,7 +331,7 @@ openrouter_integration: quota_reserve_tokens: 4000 # id_offset: base negative ID for dynamically generated configs. # Model IDs are derived deterministically via BLAKE2b so they survive - # catalogue churn. Must not overlap with your static global_llm_configs IDs. + # catalogue churn. Must not overlap with any static GLOBAL model IDs. id_offset: -10000 # refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only) refresh_interval_hours: 24 @@ -351,8 +355,8 @@ openrouter_integration: # Image generation + vision LLM emission are OPT-IN. OpenRouter's catalogue # contains hundreds of image- and vision-capable models; turning these on - # injects them into the global Image-Generation / Vision-LLM model - # selectors alongside any static configs. Tier (free/premium) is derived + # injects them into the global image-generation / vision model lists + # alongside any static configs. Tier (free/premium) is derived # per model the same way it is for chat (`:free` suffix or zero pricing). # When a user picks a premium image/vision model the call debits the # shared $5 USD-cost-based premium credit pool — so leaving these off @@ -384,7 +388,7 @@ image_generation_router_settings: global_image_generation_configs: # Example: OpenAI DALL-E 3 - - id: -1 + - id: -2001 name: "Global DALL-E 3" description: "OpenAI's DALL-E 3 for high-quality image generation" provider: "OPENAI" @@ -395,7 +399,7 @@ global_image_generation_configs: litellm_params: {} # Example: OpenAI GPT Image 1 - - id: -2 + - id: -2002 name: "Global GPT Image 1" description: "OpenAI's GPT Image 1 model" provider: "OPENAI" @@ -406,7 +410,7 @@ global_image_generation_configs: litellm_params: {} # Example: Azure OpenAI DALL-E 3 - - id: -3 + - id: -2003 name: "Global Azure DALL-E 3" description: "Azure-hosted DALL-E 3 deployment" provider: "AZURE_OPENAI" @@ -419,7 +423,7 @@ global_image_generation_configs: base_model: "dall-e-3" # Example: OpenRouter Gemini Image Generation - # - id: -4 + # - id: -2004 # name: "Global Gemini Image Gen" # description: "Google Gemini image generation via OpenRouter" # provider: "OPENROUTER" @@ -448,7 +452,7 @@ vision_llm_router_settings: global_vision_llm_configs: # Example: OpenAI GPT-4o (recommended for vision) - - id: -1 + - id: -1001 name: "Global GPT-4o Vision" description: "OpenAI's GPT-4o with strong vision capabilities" provider: "OPENAI" @@ -462,7 +466,7 @@ global_vision_llm_configs: max_tokens: 1000 # Example: Google Gemini 2.0 Flash - - id: -2 + - id: -1002 name: "Global Gemini 2.0 Flash" description: "Google's fast vision model with large context" provider: "GOOGLE" @@ -476,7 +480,7 @@ global_vision_llm_configs: max_tokens: 1000 # Example: Anthropic Claude 3.5 Sonnet - - id: -3 + - id: -1003 name: "Global Claude 3.5 Sonnet Vision" description: "Anthropic's Claude 3.5 Sonnet with vision support" provider: "ANTHROPIC" @@ -490,7 +494,7 @@ global_vision_llm_configs: max_tokens: 1000 # Example: Azure OpenAI GPT-4o - # - id: -4 + # - id: -1004 # name: "Global Azure GPT-4o Vision" # description: "Azure-hosted GPT-4o for vision analysis" # provider: "AZURE_OPENAI" @@ -507,8 +511,9 @@ global_vision_llm_configs: # Notes: # - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing -# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB) -# - IDs should be unique and sequential (e.g., -1, -2, -3, etc.) +# - Use negative IDs to distinguish global models from BYOK/local DB models +# - IDs must be unique across chat, vision, and image generation configs +# - Suggested static ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999 # - The 'api_key' field will not be exposed to users via API # - system_instructions: Custom prompt or empty string to use defaults # - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty @@ -519,7 +524,7 @@ global_vision_llm_configs: # # # IMAGE GENERATION NOTES: -# - Image generation configs use the same ID scheme as LLM configs (negative for global) +# - Image generation configs use the shared GLOBAL ID namespace # - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure), # bedrock/* (AWS), vertex_ai/* (Google), recraft/* (Recraft), openrouter/* (OpenRouter) # - The router uses litellm.aimage_generation() for async image generation @@ -527,7 +532,7 @@ global_vision_llm_configs: # TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token. # # VISION LLM NOTES: -# - Vision configs use the same ID scheme (negative for global, positive for user DB) +# - Vision configs use the shared GLOBAL ID namespace # - Only use vision-capable models (GPT-4o, Gemini, Claude 3, etc.) # - Lower temperature (0.3) is recommended for accurate screenshot analysis # - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions diff --git a/surfsense_backend/app/schemas/new_llm_config.py b/surfsense_backend/app/schemas/new_llm_config.py index 716aa0457..2f04a9e66 100644 --- a/surfsense_backend/app/schemas/new_llm_config.py +++ b/surfsense_backend/app/schemas/new_llm_config.py @@ -229,7 +229,7 @@ class LLMPreferencesRead(BaseModel): description="ID of the vision LLM config to use for vision/screenshot analysis", ) agent_llm: dict[str, Any] | None = Field( - None, description="Full config for agent LLM" + None, description="Full config for chat model" ) image_generation_config: dict[str, Any] | None = Field( None, description="Full config for image generation" diff --git a/surfsense_backend/app/services/global_model_catalog.py b/surfsense_backend/app/services/global_model_catalog.py new file mode 100644 index 000000000..a43f58b9e --- /dev/null +++ b/surfsense_backend/app/services/global_model_catalog.py @@ -0,0 +1,142 @@ +"""Materialize server-owned GLOBAL YAML configs as virtual connections/models.""" + +from __future__ import annotations + +from typing import Any + +from app.services.model_resolver import native_connection_from_config + + +def _base_model(config: dict[str, Any]) -> str | None: + litellm_params = config.get("litellm_params") or {} + if isinstance(litellm_params, dict): + return litellm_params.get("base_model") + return None + + +def _connection_key(conn: dict[str, Any]) -> tuple[Any, ...]: + # Deliberately includes api_key because two operator-owned credentials for + # the same provider/base can have different quota/rate limits upstream. + return ( + conn.get("protocol"), + conn.get("native_provider"), + conn.get("base_url"), + conn.get("api_key"), + _freeze(conn.get("extra") or {}), + ) + + +def _freeze(value: Any) -> Any: + if isinstance(value, dict): + return tuple(sorted((key, _freeze(val)) for key, val in value.items())) + if isinstance(value, list): + return tuple(_freeze(item) for item in value) + return value + + +def _capabilities_for(role: str, config: dict[str, Any]) -> dict[str, bool]: + return { + "chat": role == "chat", + "vision": role == "vision" or bool(config.get("supports_image_input")), + "image_gen": role == "image_gen", + "embedding": False, + "tools": bool(config.get("supports_tools", False)), + } + + +def _catalog_metadata(config: dict[str, Any]) -> dict[str, Any]: + return { + "billing_tier": config.get("billing_tier", "free"), + "quota_reserve_tokens": config.get("quota_reserve_tokens"), + "rpm": config.get("rpm"), + "tpm": config.get("tpm"), + "anonymous_enabled": config.get("anonymous_enabled", False), + "seo_enabled": config.get("seo_enabled", False), + "seo_slug": config.get("seo_slug"), + "input_cost_per_token": (config.get("litellm_params") or {}).get( + "input_cost_per_token" + ) + if isinstance(config.get("litellm_params"), dict) + else None, + "output_cost_per_token": (config.get("litellm_params") or {}).get( + "output_cost_per_token" + ) + if isinstance(config.get("litellm_params"), dict) + else None, + "is_planner": config.get("is_planner", False), + "base_model": _base_model(config), + "router_pool_eligible": config.get("router_pool_eligible", True), + } + + +def materialize_global_model_catalog( + *, + chat_configs: list[dict[str, Any]], + vision_configs: list[dict[str, Any]], + image_configs: list[dict[str, Any]], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + connections: list[dict[str, Any]] = [] + models: list[dict[str, Any]] = [] + connection_id_by_key: dict[tuple[Any, ...], int] = {} + next_connection_id = -1 + + def add_config(config: dict[str, Any], role: str) -> None: + nonlocal next_connection_id + if not config.get("id") or not config.get("model_name"): + return + conn = native_connection_from_config(config) + conn["scope"] = "GLOBAL" + conn["enabled"] = True + conn["last_status"] = "OK" + key = _connection_key(conn) + connection_id = connection_id_by_key.get(key) + if connection_id is None: + connection_id = next_connection_id + next_connection_id -= 1 + connection_id_by_key[key] = connection_id + connections.append( + { + "id": connection_id, + **conn, + } + ) + + model_id = int(config["id"]) + models.append( + { + "id": model_id, + "connection_id": connection_id, + "model_id": config["model_name"], + "display_name": config.get("name") or config["model_name"], + "source": "MANUAL", + "capabilities": _capabilities_for(role, config), + "capabilities_declared": _capabilities_for(role, config), + "capabilities_verified": _capabilities_for(role, config), + "capabilities_override": {}, + "embedding_dimension": None, + "enabled": True, + "billing_tier": config.get("billing_tier", "free"), + "catalog": _catalog_metadata(config), + "role": role, + } + ) + + for cfg in chat_configs: + if cfg.get("is_auto_mode"): + continue + add_config(cfg, "chat") + for cfg in vision_configs: + if cfg.get("is_auto_mode"): + continue + add_config(cfg, "vision") + for cfg in image_configs: + if cfg.get("is_auto_mode"): + continue + add_config(cfg, "image_gen") + + # Each virtual connection is server-only. Callers that serialize these + # must strip api_key before returning data to clients. + return connections, models + + +__all__ = ["materialize_global_model_catalog"] diff --git a/surfsense_backend/app/services/model_resolver.py b/surfsense_backend/app/services/model_resolver.py new file mode 100644 index 000000000..ec485a5ae --- /dev/null +++ b/surfsense_backend/app/services/model_resolver.py @@ -0,0 +1,152 @@ +"""Single model-to-LiteLLM resolver. + +All chat, vision, image-generation, validation, and Auto routing paths should +turn a Connection + Model into LiteLLM input through this module. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import TYPE_CHECKING, Any + +from app.services.provider_api_base import resolve_api_base + +if TYPE_CHECKING: + from app.db import Connection + +PROTOCOL_OLLAMA = "OLLAMA" +PROTOCOL_OPENAI_COMPATIBLE = "OPENAI_COMPATIBLE" +PROTOCOL_NATIVE = "NATIVE" + +NATIVE_PROVIDER_PREFIX: dict[str, str] = { + "OPENAI": "openai", + "ANTHROPIC": "anthropic", + "GROQ": "groq", + "COHERE": "cohere", + "GOOGLE": "gemini", + "MISTRAL": "mistral", + "AZURE_OPENAI": "azure", + "AZURE": "azure", + "OPENROUTER": "openrouter", + "COMETAPI": "cometapi", + "XAI": "xai", + "BEDROCK": "bedrock", + "AWS_BEDROCK": "bedrock", + "VERTEX_AI": "vertex_ai", + "TOGETHER_AI": "together_ai", + "FIREWORKS_AI": "fireworks_ai", + "DEEPSEEK": "openai", + "ALIBABA_QWEN": "openai", + "MOONSHOT": "openai", + "ZHIPU": "openai", + "GITHUB_MODELS": "github", + "REPLICATE": "replicate", + "PERPLEXITY": "perplexity", + "ANYSCALE": "anyscale", + "DEEPINFRA": "deepinfra", + "CEREBRAS": "cerebras", + "SAMBANOVA": "sambanova", + "AI21": "ai21", + "CLOUDFLARE": "cloudflare", + "DATABRICKS": "databricks", + "HUGGINGFACE": "huggingface", + "MINIMAX": "openai", + "RECRAFT": "recraft", + "XINFERENCE": "xinference", + "NSCALE": "nscale", + "CUSTOM": "custom", +} + + +def ensure_v1(base_url: str | None) -> str | None: + if not base_url: + return None + stripped = base_url.rstrip("/") + if stripped.endswith("/v1"): + return stripped + return f"{stripped}/v1" + + +def _conn_value(conn: Connection | Mapping[str, Any], key: str) -> Any: + if isinstance(conn, Mapping): + return conn.get(key) + return getattr(conn, key) + + +def _protocol_value(protocol: Any) -> str: + return getattr(protocol, "value", str(protocol)) + + +def to_litellm( + conn: Connection | Mapping[str, Any], + model_id: str, +) -> tuple[str, dict[str, Any]]: + """Return ``(model_string, litellm_kwargs)`` for any model role.""" + protocol = _protocol_value(_conn_value(conn, "protocol")) + base_url = _conn_value(conn, "base_url") + api_key = _conn_value(conn, "api_key") + native_provider = _conn_value(conn, "native_provider") + extra = _conn_value(conn, "extra") or {} + + kwargs: dict[str, Any] = {} + if api_key: + kwargs["api_key"] = api_key + + if protocol == PROTOCOL_OLLAMA: + model_string = f"ollama_chat/{model_id}" + if base_url: + kwargs["api_base"] = base_url.rstrip("/") + elif protocol == PROTOCOL_OPENAI_COMPATIBLE: + model_string = f"openai/{model_id}" + api_base = ensure_v1(base_url) + if api_base: + kwargs["api_base"] = api_base + else: + provider_key = (native_provider or "").upper() + prefix = NATIVE_PROVIDER_PREFIX.get(provider_key, provider_key.lower()) + if prefix == "custom": + custom_provider = extra.get("custom_provider") or native_provider + model_string = f"{custom_provider}/{model_id}" if custom_provider else model_id + else: + model_string = f"{prefix}/{model_id}" + + api_base = resolve_api_base( + provider=provider_key, + provider_prefix=prefix, + config_api_base=base_url, + ) + if api_base: + kwargs["api_base"] = api_base + + if api_version := extra.get("api_version"): + kwargs["api_version"] = api_version + kwargs.update(extra.get("litellm_params", {})) + kwargs.update(extra.get("kwargs", {})) + return model_string, kwargs + + +def native_connection_from_config(config: Mapping[str, Any]) -> dict[str, Any]: + """Build an in-memory NATIVE connection mapping from a legacy/global config.""" + provider = str(config.get("provider") or config.get("custom_provider") or "CUSTOM") + extra: dict[str, Any] = { + "litellm_params": config.get("litellm_params") or {}, + } + if config.get("api_version"): + extra["api_version"] = config.get("api_version") + if config.get("custom_provider"): + extra["custom_provider"] = config.get("custom_provider") + return { + "protocol": PROTOCOL_NATIVE, + "native_provider": provider, + "base_url": config.get("api_base") or None, + "api_key": config.get("api_key") or None, + "extra": extra, + } + + +__all__ = [ + "NATIVE_PROVIDER_PREFIX", + "ensure_v1", + "native_connection_from_config", + "to_litellm", +] diff --git a/surfsense_backend/app/services/provider_capabilities.py b/surfsense_backend/app/services/provider_capabilities.py index f094c9954..9e1433214 100644 --- a/surfsense_backend/app/services/provider_capabilities.py +++ b/surfsense_backend/app/services/provider_capabilities.py @@ -46,6 +46,8 @@ from collections.abc import Iterable import litellm +from app.services.model_resolver import NATIVE_PROVIDER_PREFIX + logger = logging.getLogger(__name__) @@ -58,40 +60,7 @@ logger = logging.getLogger(__name__) # map there directly would re-introduce the # ``app.config -> ... -> deliverables/tools/generate_image -> # app.config`` cycle that prompted the move. -_PROVIDER_PREFIX_MAP: dict[str, str] = { - "OPENAI": "openai", - "ANTHROPIC": "anthropic", - "GROQ": "groq", - "COHERE": "cohere", - "GOOGLE": "gemini", - "OLLAMA": "ollama_chat", - "MISTRAL": "mistral", - "AZURE_OPENAI": "azure", - "OPENROUTER": "openrouter", - "XAI": "xai", - "BEDROCK": "bedrock", - "VERTEX_AI": "vertex_ai", - "TOGETHER_AI": "together_ai", - "FIREWORKS_AI": "fireworks_ai", - "DEEPSEEK": "openai", - "ALIBABA_QWEN": "openai", - "MOONSHOT": "openai", - "ZHIPU": "openai", - "GITHUB_MODELS": "github", - "REPLICATE": "replicate", - "PERPLEXITY": "perplexity", - "ANYSCALE": "anyscale", - "DEEPINFRA": "deepinfra", - "CEREBRAS": "cerebras", - "SAMBANOVA": "sambanova", - "AI21": "ai21", - "CLOUDFLARE": "cloudflare", - "DATABRICKS": "databricks", - "COMETAPI": "cometapi", - "HUGGINGFACE": "huggingface", - "MINIMAX": "openai", - "CUSTOM": "custom", -} +_PROVIDER_PREFIX_MAP = NATIVE_PROVIDER_PREFIX def _candidate_model_strings(