feat(models): add provider catalog and resolver

2026-06-12 20:45:20 +02:00 · 2026-06-10 21:47:42 +05:30 · 2026-06-10 21:47:42 +05:30 · 8b59ca59c1
commit 8b59ca59c1
parent adb857925b
6 changed files with 355 additions and 56 deletions
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -389,10 +389,28 @@ def initialize_openrouter_integration():
                    )
            except Exception as e:
                print(f"Warning: Failed to inject OpenRouter vision-LLM configs: {e}")
+
+        refresh_global_model_catalog()
    except Exception as e:
        print(f"Warning: Failed to initialize OpenRouter integration: {e}")


+def materialize_global_configs():
+    from app.services.global_model_catalog import materialize_global_model_catalog
+
+    return materialize_global_model_catalog(
+        chat_configs=getattr(config, "GLOBAL_LLM_CONFIGS", []),
+        vision_configs=getattr(config, "GLOBAL_VISION_LLM_CONFIGS", []),
+        image_configs=getattr(config, "GLOBAL_IMAGE_GEN_CONFIGS", []),
+    )
+
+
+def refresh_global_model_catalog():
+    connections, models = materialize_global_configs()
+    config.GLOBAL_CONNECTIONS = connections
+    config.GLOBAL_MODELS = models
+
+
 def initialize_pricing_registration():
    """
    Teach LiteLLM the per-token cost of every deployment in
@ -723,7 +741,7 @@ class Config:
        os.getenv("QUOTA_DEFAULT_IMAGE_RESERVE_MICROS", "50000")
    )

-    # Per-podcast reservation (in micro-USD). One agent LLM call generating
+    # Per-podcast reservation (in micro-USD). One chat model call generating
    # a transcript, typically 5k-20k completion tokens. $0.20 covers a long
    # premium-model run. Tune via env.
    QUOTA_DEFAULT_PODCAST_RESERVE_MICROS = int(
@ -849,6 +867,19 @@ class Config:
    # Router settings for Vision LLM Auto mode
    VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()

+    # Virtual GLOBAL connection/model catalog. This is server-only metadata
+    # derived from global_llm_config.yaml; GLOBAL keys are not stored in DB.
+    from app.services.global_model_catalog import (
+        materialize_global_model_catalog as _materialize_global_model_catalog,
+    )
+
+    GLOBAL_CONNECTIONS, GLOBAL_MODELS = _materialize_global_model_catalog(
+        chat_configs=GLOBAL_LLM_CONFIGS,
+        vision_configs=GLOBAL_VISION_LLM_CONFIGS,
+        image_configs=GLOBAL_IMAGE_GEN_CONFIGS,
+    )
+    del _materialize_global_model_catalog
+
    # OpenRouter Integration settings (optional)
    OPENROUTER_INTEGRATION_SETTINGS = load_openrouter_integration_settings()

--- a/surfsense_backend/app/config/global_llm_config.example.yaml
+++ b/surfsense_backend/app/config/global_llm_config.example.yaml
@ -7,8 +7,9 @@
 # NOTE: The example API keys below are placeholders and won't work.
 # Replace them with your actual API keys to enable global configurations.
 #
-# These configurations will be available to all users as a convenient option
-# Users can choose to use these global configs or add their own
+# These configurations are materialized as server-owned GLOBAL connections/models
+# and become available on the Models page. Users can choose hosted/global models
+# or add their own BYOK/local connections.
 #
 # AUTO MODE (Recommended):
 # - Auto mode (ID: 0) uses LiteLLM Router to automatically load balance across all global configs
@ -16,9 +17,12 @@
 # - New users are automatically assigned Auto mode by default
 # - Configure router_settings below to customize the load balancing behavior
 #
-# Structure matches NewLLMConfig:
-# - Model configuration (provider, model_name, api_key, etc.)
-# - Prompt configuration (system_instructions, citations_enabled)
+# Static config shape:
+# - Connection fields: provider, api_key, api_base, api_version
+# - Model fields: model_name, billing_tier, rpm/tpm, litellm_params
+# - Prompt defaults: system_instructions, citations_enabled
+# IDs share one GLOBAL model namespace across chat, vision, and image generation.
+# Suggested ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999.
 #
 # COST-BASED PREMIUM CREDITS:
 # Each premium config bills the user's USD-credit balance based on the
@ -327,7 +331,7 @@ openrouter_integration:
  quota_reserve_tokens: 4000
  # id_offset: base negative ID for dynamically generated configs.
  # Model IDs are derived deterministically via BLAKE2b so they survive
-  # catalogue churn. Must not overlap with your static global_llm_configs IDs.
+  # catalogue churn. Must not overlap with any static GLOBAL model IDs.
  id_offset: -10000
  # refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only)
  refresh_interval_hours: 24
@ -351,8 +355,8 @@ openrouter_integration:

  # Image generation + vision LLM emission are OPT-IN. OpenRouter's catalogue
  # contains hundreds of image- and vision-capable models; turning these on
-  # injects them into the global Image-Generation / Vision-LLM model
-  # selectors alongside any static configs. Tier (free/premium) is derived
+  # injects them into the global image-generation / vision model lists
+  # alongside any static configs. Tier (free/premium) is derived
  # per model the same way it is for chat (`:free` suffix or zero pricing).
  # When a user picks a premium image/vision model the call debits the
  # shared $5 USD-cost-based premium credit pool — so leaving these off
@ -384,7 +388,7 @@ image_generation_router_settings:

 global_image_generation_configs:
  # Example: OpenAI DALL-E 3
-  - id: -1
+  - id: -2001
    name: "Global DALL-E 3"
    description: "OpenAI's DALL-E 3 for high-quality image generation"
    provider: "OPENAI"
@ -395,7 +399,7 @@ global_image_generation_configs:
    litellm_params: {}

  # Example: OpenAI GPT Image 1
-  - id: -2
+  - id: -2002
    name: "Global GPT Image 1"
    description: "OpenAI's GPT Image 1 model"
    provider: "OPENAI"
@ -406,7 +410,7 @@ global_image_generation_configs:
    litellm_params: {}

  # Example: Azure OpenAI DALL-E 3
-  - id: -3
+  - id: -2003
    name: "Global Azure DALL-E 3"
    description: "Azure-hosted DALL-E 3 deployment"
    provider: "AZURE_OPENAI"
@ -419,7 +423,7 @@ global_image_generation_configs:
      base_model: "dall-e-3"

  # Example: OpenRouter Gemini Image Generation
-  # - id: -4
+  # - id: -2004
  #   name: "Global Gemini Image Gen"
  #   description: "Google Gemini image generation via OpenRouter"
  #   provider: "OPENROUTER"
@ -448,7 +452,7 @@ vision_llm_router_settings:

 global_vision_llm_configs:
  # Example: OpenAI GPT-4o (recommended for vision)
-  - id: -1
+  - id: -1001
    name: "Global GPT-4o Vision"
    description: "OpenAI's GPT-4o with strong vision capabilities"
    provider: "OPENAI"
@ -462,7 +466,7 @@ global_vision_llm_configs:
      max_tokens: 1000

  # Example: Google Gemini 2.0 Flash
-  - id: -2
+  - id: -1002
    name: "Global Gemini 2.0 Flash"
    description: "Google's fast vision model with large context"
    provider: "GOOGLE"
@ -476,7 +480,7 @@ global_vision_llm_configs:
      max_tokens: 1000

  # Example: Anthropic Claude 3.5 Sonnet
-  - id: -3
+  - id: -1003
    name: "Global Claude 3.5 Sonnet Vision"
    description: "Anthropic's Claude 3.5 Sonnet with vision support"
    provider: "ANTHROPIC"
@ -490,7 +494,7 @@ global_vision_llm_configs:
      max_tokens: 1000

  # Example: Azure OpenAI GPT-4o
-  # - id: -4
+  # - id: -1004
  #   name: "Global Azure GPT-4o Vision"
  #   description: "Azure-hosted GPT-4o for vision analysis"
  #   provider: "AZURE_OPENAI"
@ -507,8 +511,9 @@ global_vision_llm_configs:

 # Notes:
 # - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
-# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB)
-# - IDs should be unique and sequential (e.g., -1, -2, -3, etc.)
+# - Use negative IDs to distinguish global models from BYOK/local DB models
+# - IDs must be unique across chat, vision, and image generation configs
+# - Suggested static ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999
 # - The 'api_key' field will not be exposed to users via API
 # - system_instructions: Custom prompt or empty string to use defaults
 # - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty
@ -519,7 +524,7 @@ global_vision_llm_configs:
 #
 #
 # IMAGE GENERATION NOTES:
-# - Image generation configs use the same ID scheme as LLM configs (negative for global)
+# - Image generation configs use the shared GLOBAL ID namespace
 # - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure),
 #   bedrock/* (AWS), vertex_ai/* (Google), recraft/* (Recraft), openrouter/* (OpenRouter)
 # - The router uses litellm.aimage_generation() for async image generation
@ -527,7 +532,7 @@ global_vision_llm_configs:
 #   TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token.
 #
 # VISION LLM NOTES:
-# - Vision configs use the same ID scheme (negative for global, positive for user DB)
+# - Vision configs use the shared GLOBAL ID namespace
 # - Only use vision-capable models (GPT-4o, Gemini, Claude 3, etc.)
 # - Lower temperature (0.3) is recommended for accurate screenshot analysis
 # - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions
--- a/surfsense_backend/app/schemas/new_llm_config.py
+++ b/surfsense_backend/app/schemas/new_llm_config.py
@ -229,7 +229,7 @@ class LLMPreferencesRead(BaseModel):
        description="ID of the vision LLM config to use for vision/screenshot analysis",
    )
    agent_llm: dict[str, Any] | None = Field(
-        None, description="Full config for agent LLM"
+        None, description="Full config for chat model"
    )
    image_generation_config: dict[str, Any] | None = Field(
        None, description="Full config for image generation"
--- a/surfsense_backend/app/services/global_model_catalog.py
+++ b/surfsense_backend/app/services/global_model_catalog.py
@ -0,0 +1,142 @@
+"""Materialize server-owned GLOBAL YAML configs as virtual connections/models."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.services.model_resolver import native_connection_from_config
+
+
+def _base_model(config: dict[str, Any]) -> str | None:
+    litellm_params = config.get("litellm_params") or {}
+    if isinstance(litellm_params, dict):
+        return litellm_params.get("base_model")
+    return None
+
+
+def _connection_key(conn: dict[str, Any]) -> tuple[Any, ...]:
+    # Deliberately includes api_key because two operator-owned credentials for
+    # the same provider/base can have different quota/rate limits upstream.
+    return (
+        conn.get("protocol"),
+        conn.get("native_provider"),
+        conn.get("base_url"),
+        conn.get("api_key"),
+        _freeze(conn.get("extra") or {}),
+    )
+
+
+def _freeze(value: Any) -> Any:
+    if isinstance(value, dict):
+        return tuple(sorted((key, _freeze(val)) for key, val in value.items()))
+    if isinstance(value, list):
+        return tuple(_freeze(item) for item in value)
+    return value
+
+
+def _capabilities_for(role: str, config: dict[str, Any]) -> dict[str, bool]:
+    return {
+        "chat": role == "chat",
+        "vision": role == "vision" or bool(config.get("supports_image_input")),
+        "image_gen": role == "image_gen",
+        "embedding": False,
+        "tools": bool(config.get("supports_tools", False)),
+    }
+
+
+def _catalog_metadata(config: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "billing_tier": config.get("billing_tier", "free"),
+        "quota_reserve_tokens": config.get("quota_reserve_tokens"),
+        "rpm": config.get("rpm"),
+        "tpm": config.get("tpm"),
+        "anonymous_enabled": config.get("anonymous_enabled", False),
+        "seo_enabled": config.get("seo_enabled", False),
+        "seo_slug": config.get("seo_slug"),
+        "input_cost_per_token": (config.get("litellm_params") or {}).get(
+            "input_cost_per_token"
+        )
+        if isinstance(config.get("litellm_params"), dict)
+        else None,
+        "output_cost_per_token": (config.get("litellm_params") or {}).get(
+            "output_cost_per_token"
+        )
+        if isinstance(config.get("litellm_params"), dict)
+        else None,
+        "is_planner": config.get("is_planner", False),
+        "base_model": _base_model(config),
+        "router_pool_eligible": config.get("router_pool_eligible", True),
+    }
+
+
+def materialize_global_model_catalog(
+    *,
+    chat_configs: list[dict[str, Any]],
+    vision_configs: list[dict[str, Any]],
+    image_configs: list[dict[str, Any]],
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    connections: list[dict[str, Any]] = []
+    models: list[dict[str, Any]] = []
+    connection_id_by_key: dict[tuple[Any, ...], int] = {}
+    next_connection_id = -1
+
+    def add_config(config: dict[str, Any], role: str) -> None:
+        nonlocal next_connection_id
+        if not config.get("id") or not config.get("model_name"):
+            return
+        conn = native_connection_from_config(config)
+        conn["scope"] = "GLOBAL"
+        conn["enabled"] = True
+        conn["last_status"] = "OK"
+        key = _connection_key(conn)
+        connection_id = connection_id_by_key.get(key)
+        if connection_id is None:
+            connection_id = next_connection_id
+            next_connection_id -= 1
+            connection_id_by_key[key] = connection_id
+            connections.append(
+                {
+                    "id": connection_id,
+                    **conn,
+                }
+            )
+
+        model_id = int(config["id"])
+        models.append(
+            {
+                "id": model_id,
+                "connection_id": connection_id,
+                "model_id": config["model_name"],
+                "display_name": config.get("name") or config["model_name"],
+                "source": "MANUAL",
+                "capabilities": _capabilities_for(role, config),
+                "capabilities_declared": _capabilities_for(role, config),
+                "capabilities_verified": _capabilities_for(role, config),
+                "capabilities_override": {},
+                "embedding_dimension": None,
+                "enabled": True,
+                "billing_tier": config.get("billing_tier", "free"),
+                "catalog": _catalog_metadata(config),
+                "role": role,
+            }
+        )
+
+    for cfg in chat_configs:
+        if cfg.get("is_auto_mode"):
+            continue
+        add_config(cfg, "chat")
+    for cfg in vision_configs:
+        if cfg.get("is_auto_mode"):
+            continue
+        add_config(cfg, "vision")
+    for cfg in image_configs:
+        if cfg.get("is_auto_mode"):
+            continue
+        add_config(cfg, "image_gen")
+
+    # Each virtual connection is server-only. Callers that serialize these
+    # must strip api_key before returning data to clients.
+    return connections, models
+
+
+__all__ = ["materialize_global_model_catalog"]
--- a/surfsense_backend/app/services/model_resolver.py
+++ b/surfsense_backend/app/services/model_resolver.py
@ -0,0 +1,152 @@
+"""Single model-to-LiteLLM resolver.
+
+All chat, vision, image-generation, validation, and Auto routing paths should
+turn a Connection + Model into LiteLLM input through this module.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any
+
+from app.services.provider_api_base import resolve_api_base
+
+if TYPE_CHECKING:
+    from app.db import Connection
+
+PROTOCOL_OLLAMA = "OLLAMA"
+PROTOCOL_OPENAI_COMPATIBLE = "OPENAI_COMPATIBLE"
+PROTOCOL_NATIVE = "NATIVE"
+
+NATIVE_PROVIDER_PREFIX: dict[str, str] = {
+    "OPENAI": "openai",
+    "ANTHROPIC": "anthropic",
+    "GROQ": "groq",
+    "COHERE": "cohere",
+    "GOOGLE": "gemini",
+    "MISTRAL": "mistral",
+    "AZURE_OPENAI": "azure",
+    "AZURE": "azure",
+    "OPENROUTER": "openrouter",
+    "COMETAPI": "cometapi",
+    "XAI": "xai",
+    "BEDROCK": "bedrock",
+    "AWS_BEDROCK": "bedrock",
+    "VERTEX_AI": "vertex_ai",
+    "TOGETHER_AI": "together_ai",
+    "FIREWORKS_AI": "fireworks_ai",
+    "DEEPSEEK": "openai",
+    "ALIBABA_QWEN": "openai",
+    "MOONSHOT": "openai",
+    "ZHIPU": "openai",
+    "GITHUB_MODELS": "github",
+    "REPLICATE": "replicate",
+    "PERPLEXITY": "perplexity",
+    "ANYSCALE": "anyscale",
+    "DEEPINFRA": "deepinfra",
+    "CEREBRAS": "cerebras",
+    "SAMBANOVA": "sambanova",
+    "AI21": "ai21",
+    "CLOUDFLARE": "cloudflare",
+    "DATABRICKS": "databricks",
+    "HUGGINGFACE": "huggingface",
+    "MINIMAX": "openai",
+    "RECRAFT": "recraft",
+    "XINFERENCE": "xinference",
+    "NSCALE": "nscale",
+    "CUSTOM": "custom",
+}
+
+
+def ensure_v1(base_url: str | None) -> str | None:
+    if not base_url:
+        return None
+    stripped = base_url.rstrip("/")
+    if stripped.endswith("/v1"):
+        return stripped
+    return f"{stripped}/v1"
+
+
+def _conn_value(conn: Connection | Mapping[str, Any], key: str) -> Any:
+    if isinstance(conn, Mapping):
+        return conn.get(key)
+    return getattr(conn, key)
+
+
+def _protocol_value(protocol: Any) -> str:
+    return getattr(protocol, "value", str(protocol))
+
+
+def to_litellm(
+    conn: Connection | Mapping[str, Any],
+    model_id: str,
+) -> tuple[str, dict[str, Any]]:
+    """Return ``(model_string, litellm_kwargs)`` for any model role."""
+    protocol = _protocol_value(_conn_value(conn, "protocol"))
+    base_url = _conn_value(conn, "base_url")
+    api_key = _conn_value(conn, "api_key")
+    native_provider = _conn_value(conn, "native_provider")
+    extra = _conn_value(conn, "extra") or {}
+
+    kwargs: dict[str, Any] = {}
+    if api_key:
+        kwargs["api_key"] = api_key
+
+    if protocol == PROTOCOL_OLLAMA:
+        model_string = f"ollama_chat/{model_id}"
+        if base_url:
+            kwargs["api_base"] = base_url.rstrip("/")
+    elif protocol == PROTOCOL_OPENAI_COMPATIBLE:
+        model_string = f"openai/{model_id}"
+        api_base = ensure_v1(base_url)
+        if api_base:
+            kwargs["api_base"] = api_base
+    else:
+        provider_key = (native_provider or "").upper()
+        prefix = NATIVE_PROVIDER_PREFIX.get(provider_key, provider_key.lower())
+        if prefix == "custom":
+            custom_provider = extra.get("custom_provider") or native_provider
+            model_string = f"{custom_provider}/{model_id}" if custom_provider else model_id
+        else:
+            model_string = f"{prefix}/{model_id}"
+
+        api_base = resolve_api_base(
+            provider=provider_key,
+            provider_prefix=prefix,
+            config_api_base=base_url,
+        )
+        if api_base:
+            kwargs["api_base"] = api_base
+
+    if api_version := extra.get("api_version"):
+        kwargs["api_version"] = api_version
+    kwargs.update(extra.get("litellm_params", {}))
+    kwargs.update(extra.get("kwargs", {}))
+    return model_string, kwargs
+
+
+def native_connection_from_config(config: Mapping[str, Any]) -> dict[str, Any]:
+    """Build an in-memory NATIVE connection mapping from a legacy/global config."""
+    provider = str(config.get("provider") or config.get("custom_provider") or "CUSTOM")
+    extra: dict[str, Any] = {
+        "litellm_params": config.get("litellm_params") or {},
+    }
+    if config.get("api_version"):
+        extra["api_version"] = config.get("api_version")
+    if config.get("custom_provider"):
+        extra["custom_provider"] = config.get("custom_provider")
+    return {
+        "protocol": PROTOCOL_NATIVE,
+        "native_provider": provider,
+        "base_url": config.get("api_base") or None,
+        "api_key": config.get("api_key") or None,
+        "extra": extra,
+    }
+
+
+__all__ = [
+    "NATIVE_PROVIDER_PREFIX",
+    "ensure_v1",
+    "native_connection_from_config",
+    "to_litellm",
+]
--- a/surfsense_backend/app/services/provider_capabilities.py
+++ b/surfsense_backend/app/services/provider_capabilities.py
@ -46,6 +46,8 @@ from collections.abc import Iterable

 import litellm

+from app.services.model_resolver import NATIVE_PROVIDER_PREFIX
+
 logger = logging.getLogger(__name__)


@ -58,40 +60,7 @@ logger = logging.getLogger(__name__)
 # map there directly would re-introduce the
 # ``app.config -> ... -> deliverables/tools/generate_image ->
 # app.config`` cycle that prompted the move.
-_PROVIDER_PREFIX_MAP: dict[str, str] = {
-    "OPENAI": "openai",
-    "ANTHROPIC": "anthropic",
-    "GROQ": "groq",
-    "COHERE": "cohere",
-    "GOOGLE": "gemini",
-    "OLLAMA": "ollama_chat",
-    "MISTRAL": "mistral",
-    "AZURE_OPENAI": "azure",
-    "OPENROUTER": "openrouter",
-    "XAI": "xai",
-    "BEDROCK": "bedrock",
-    "VERTEX_AI": "vertex_ai",
-    "TOGETHER_AI": "together_ai",
-    "FIREWORKS_AI": "fireworks_ai",
-    "DEEPSEEK": "openai",
-    "ALIBABA_QWEN": "openai",
-    "MOONSHOT": "openai",
-    "ZHIPU": "openai",
-    "GITHUB_MODELS": "github",
-    "REPLICATE": "replicate",
-    "PERPLEXITY": "perplexity",
-    "ANYSCALE": "anyscale",
-    "DEEPINFRA": "deepinfra",
-    "CEREBRAS": "cerebras",
-    "SAMBANOVA": "sambanova",
-    "AI21": "ai21",
-    "CLOUDFLARE": "cloudflare",
-    "DATABRICKS": "databricks",
-    "COMETAPI": "cometapi",
-    "HUGGINGFACE": "huggingface",
-    "MINIMAX": "openai",
-    "CUSTOM": "custom",
-}
+_PROVIDER_PREFIX_MAP = NATIVE_PROVIDER_PREFIX


 def _candidate_model_strings(