feat(chat): route models by provider capabilities

2026-07-02 22:01:05 +02:00 · 2026-06-11 18:22:23 +05:30 · 2026-06-11 18:22:23 +05:30 · c28c4f5785
commit c28c4f5785
parent 8f20a32571
18 changed files with 429 additions and 319 deletions
--- a/surfsense_backend/app/agents/chat/runtime/llm_config.py
+++ b/surfsense_backend/app/agents/chat/runtime/llm_config.py
@ -2,9 +2,9 @@
 LLM configuration utilities for SurfSense agents.

 This module provides functions for loading LLM configurations from:
-1. Auto mode (ID 0) - Uses LiteLLM Router for load balancing
+1. Auto mode (ID 0) - Resolved by callers to a concrete model-connection model
 2. YAML files (global configs with negative IDs)
-3. Database NewLLMConfig table (user-created configs with positive IDs)
+3. Database model-connections table (user-created configs with positive IDs)

 It also provides utilities for creating ChatLiteLLM instances and
 managing prompt configurations.
@ -33,9 +33,7 @@ from app.agents.chat.runtime.prompt_caching import (
 from app.services.llm_router_service import (
    AUTO_MODE_ID,
    ChatLiteLLMRouter,
-    LLMRouterService,
    _sanitize_content,
-    get_auto_mode_llm,
    is_auto_mode,
 )

@ -92,14 +90,6 @@ class SanitizedChatLiteLLM(ChatLiteLLM):
            yield chunk


-# Re-exported under the historical name ``PROVIDER_MAP``. Source of truth lives
-# in provider_capabilities so the YAML loader can resolve prefixes during
-# app.config init without importing the agent/tools tree.
-from app.services.provider_capabilities import (  # noqa: E402
-    _PROVIDER_PREFIX_MAP as PROVIDER_MAP,
-)
-
-
 def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
    """Attach a ``profile`` dict to ChatLiteLLM with model context metadata."""
    try:
@ -122,7 +112,8 @@ class AgentConfig:
    Complete configuration for the SurfSense agent.

    This combines LLM settings with prompt configuration from NewLLMConfig.
-    Supports Auto mode (ID 0) which uses LiteLLM Router for load balancing.
+    Supports Auto mode metadata (ID 0). Runtime callers must resolve Auto to
+    a concrete global or BYOK model before constructing ChatLiteLLM.
    """

    # LLM Model Settings
@ -219,7 +210,7 @@ class AgentConfig:
            # BYOK rows have no curated flag; ask LiteLLM (default-allow on
            # unknown). The streaming safety net still blocks explicit text-only.
            supports_image_input=derive_supports_image_input(
-                provider=provider_value,
+                litellm_provider=provider_value.lower(),
                model_name=config.model_name,
                base_model=base_model,
                custom_provider=config.custom_provider,
@ -238,7 +229,7 @@ class AgentConfig:

        system_instructions = yaml_config.get("system_instructions", "")

-        provider = yaml_config.get("provider", "").upper()
+        provider = yaml_config.get("litellm_provider", "")
        model_name = yaml_config.get("model_name", "")
        custom_provider = yaml_config.get("custom_provider")
        litellm_params = yaml_config.get("litellm_params") or {}
@ -254,7 +245,7 @@ class AgentConfig:
            supports_image_input = bool(yaml_config.get("supports_image_input"))
        else:
            supports_image_input = derive_supports_image_input(
-                provider=provider,
+                litellm_provider=provider,
                model_name=model_name,
                base_model=base_model,
                custom_provider=custom_provider,
@ -383,9 +374,6 @@ async def load_agent_config(
 ) -> "AgentConfig | None":
    """Main config loader: id 0 -> Auto mode; negative -> YAML; positive -> DB."""
    if is_auto_mode(config_id):
-        if not LLMRouterService.is_initialized():
-            print("Error: Auto mode requested but LLM Router not initialized")
-            return None
        return AgentConfig.from_auto_mode()

    if config_id < 0:
@ -408,9 +396,8 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
    if llm_config.get("custom_provider"):
        model_string = f"{llm_config['custom_provider']}/{llm_config['model_name']}"
    else:
-        provider = llm_config.get("provider", "").upper()
-        provider_prefix = PROVIDER_MAP.get(provider, provider.lower())
-        model_string = f"{provider_prefix}/{llm_config['model_name']}"
+        litellm_provider = llm_config.get("litellm_provider", "openai")
+        model_string = f"{litellm_provider}/{llm_config['model_name']}"

    litellm_kwargs = {
        "model": model_string,
@ -433,29 +420,15 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
 def create_chat_litellm_from_agent_config(
    agent_config: AgentConfig,
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
-    """Create a ChatLiteLLM (or, for Auto mode, a load-balancing router) from config."""
+    """Create a ChatLiteLLM from an already resolved concrete model config."""
    if agent_config.is_auto_mode:
-        if not LLMRouterService.is_initialized():
-            print("Error: Auto mode requested but LLM Router not initialized")
-            return None
-        try:
-            router_llm = get_auto_mode_llm()
-            if router_llm is not None:
-                # Universal injection points only: auto-mode fans out across
-                # providers, so provider-specific kwargs have no known target.
-                apply_litellm_prompt_caching(router_llm, agent_config=agent_config)
-            return router_llm
-        except Exception as e:
-            print(f"Error creating ChatLiteLLMRouter: {e}")
-            return None
+        print("Error: Auto mode must be resolved to a concrete model before LLM creation")
+        return None

    if agent_config.custom_provider:
        model_string = f"{agent_config.custom_provider}/{agent_config.model_name}"
    else:
-        provider_prefix = PROVIDER_MAP.get(
-            agent_config.provider, agent_config.provider.lower()
-        )
-        model_string = f"{provider_prefix}/{agent_config.model_name}"
+        model_string = f"{agent_config.provider}/{agent_config.model_name}"

    litellm_kwargs = {
        "model": model_string,