feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code

2026-06-22 21:28:12 +02:00 · 2026-06-13 12:45:43 +05:30 · 2026-06-13 12:45:43 +05:30 · bd4a04f2e7
commit bd4a04f2e7
parent 50668775f8
93 changed files with 956 additions and 11442 deletions
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/generate_image.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/generate_image.py
@ -215,7 +215,7 @@ def create_generate_image_tool(
                    prompt=prompt,
                    model=getattr(response, "_hidden_params", {}).get("model"),
                    n=n,
-                    image_generation_config_id=config_id,
+                    image_gen_model_id=config_id,
                    response_data=response_dict,
                    search_space_id=search_space_id,
                    access_token=access_token,
--- a/surfsense_backend/app/agents/chat/runtime/llm_config.py
+++ b/surfsense_backend/app/agents/chat/runtime/llm_config.py
@ -24,8 +24,6 @@ from langchain_core.messages import AIMessage, BaseMessage
 from langchain_core.outputs import ChatGenerationChunk, ChatResult
 from langchain_litellm import ChatLiteLLM
 from litellm import get_model_info
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.chat.runtime.prompt_caching import (
    apply_litellm_prompt_caching,
@ -34,7 +32,6 @@ from app.services.llm_router_service import (
    AUTO_MODE_ID,
    ChatLiteLLMRouter,
    _sanitize_content,
-    is_auto_mode,
 )


@ -130,7 +127,7 @@ class AgentConfig:
    """
    Complete configuration for the SurfSense agent.

-    This combines LLM settings with prompt configuration from NewLLMConfig.
+    This combines resolved model settings with prompt configuration.
    Supports Auto mode metadata (ID 0). Runtime callers must resolve Auto to
    a concrete global or BYOK model before constructing ChatLiteLLM.
    """
@ -180,7 +177,7 @@ class AgentConfig:
            use_default_system_instructions=True,
            citations_enabled=True,
            config_id=AUTO_MODE_ID,
-            config_name="Auto (Fastest)",
+            config_name="Auto",
            is_auto_mode=True,
            billing_tier="free",
            is_premium=False,
@ -191,57 +188,12 @@ class AgentConfig:
            supports_image_input=True,
        )

-    @classmethod
-    def from_new_llm_config(cls, config) -> "AgentConfig":
-        """Build an AgentConfig from a NewLLMConfig database model."""
-        # Lazy import: keeps provider_capabilities (and litellm) out of init order.
-        from app.services.provider_capabilities import derive_supports_image_input
-
-        provider_value = (
-            config.provider.value
-            if hasattr(config.provider, "value")
-            else str(config.provider)
-        )
-        litellm_params = config.litellm_params or {}
-        base_model = (
-            litellm_params.get("base_model")
-            if isinstance(litellm_params, dict)
-            else None
-        )
-
-        return cls(
-            provider=provider_value,
-            model_name=config.model_name,
-            api_key=config.api_key,
-            api_base=config.api_base,
-            custom_provider=config.custom_provider,
-            litellm_params=config.litellm_params,
-            system_instructions=config.system_instructions,
-            use_default_system_instructions=config.use_default_system_instructions,
-            citations_enabled=config.citations_enabled,
-            config_id=config.id,
-            config_name=config.name,
-            is_auto_mode=False,
-            billing_tier="free",
-            is_premium=False,
-            anonymous_enabled=False,
-            quota_reserve_tokens=None,
-            # BYOK rows have no curated flag; ask LiteLLM (default-allow on
-            # unknown). The streaming safety net still blocks explicit text-only.
-            supports_image_input=derive_supports_image_input(
-                provider=provider_value.lower(),
-                model_name=config.model_name,
-                base_model=base_model,
-                custom_provider=config.custom_provider,
-            ),
-        )
-
    @classmethod
    def from_yaml_config(cls, yaml_config: dict) -> "AgentConfig":
        """Build an AgentConfig from a YAML configuration dictionary.

-        Supports the same prompt fields as NewLLMConfig (system_instructions,
-        use_default_system_instructions, citations_enabled).
+        Supports prompt fields such as system_instructions,
+        use_default_system_instructions, and citations_enabled.
        """
        # Lazy import: keeps provider_capabilities (and litellm) out of init order.
        from app.services.provider_capabilities import derive_supports_image_input
@ -334,82 +286,6 @@ def load_global_llm_config_by_id(llm_config_id: int) -> dict | None:
    return load_llm_config_from_yaml(llm_config_id)


-async def load_new_llm_config_from_db(
-    session: AsyncSession,
-    config_id: int,
-) -> "AgentConfig | None":
-    """Load a NewLLMConfig from the database by ID."""
-    from app.db import NewLLMConfig
-
-    try:
-        result = await session.execute(
-            select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
-        )
-        config = result.scalars().first()
-
-        if not config:
-            print(f"Error: NewLLMConfig with id {config_id} not found")
-            return None
-
-        return AgentConfig.from_new_llm_config(config)
-    except Exception as e:
-        print(f"Error loading NewLLMConfig from database: {e}")
-        return None
-
-
-async def load_agent_llm_config_for_search_space(
-    session: AsyncSession,
-    search_space_id: int,
-) -> "AgentConfig | None":
-    """Load the chat model config for a search space via its agent_llm_id.
-
-    Positive id -> DB; negative -> YAML; None -> first global config (-1).
-    """
-    from app.db import SearchSpace
-
-    try:
-        result = await session.execute(
-            select(SearchSpace).filter(SearchSpace.id == search_space_id)
-        )
-        search_space = result.scalars().first()
-
-        if not search_space:
-            print(f"Error: SearchSpace with id {search_space_id} not found")
-            return None
-
-        config_id = (
-            search_space.agent_llm_id if search_space.agent_llm_id is not None else -1
-        )
-        return await load_agent_config(session, config_id, search_space_id)
-    except Exception as e:
-        print(f"Error loading chat model config for search space {search_space_id}: {e}")
-        return None
-
-
-async def load_agent_config(
-    session: AsyncSession,
-    config_id: int,
-    search_space_id: int | None = None,
-) -> "AgentConfig | None":
-    """Main config loader: id 0 -> Auto mode; negative -> YAML; positive -> DB."""
-    if is_auto_mode(config_id):
-        return AgentConfig.from_auto_mode()
-
-    if config_id < 0:
-        # In-memory covers static YAML + dynamic OpenRouter configs.
-        from app.config import config as app_config
-
-        for cfg in app_config.GLOBAL_LLM_CONFIGS:
-            if cfg.get("id") == config_id:
-                return AgentConfig.from_yaml_config(cfg)
-        yaml_config = load_llm_config_from_yaml(config_id)
-        if yaml_config:
-            return AgentConfig.from_yaml_config(yaml_config)
-        return None
-    else:
-        return await load_new_llm_config_from_db(session, config_id)
-
-
 def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
    """Create a ChatLiteLLM instance from a global LLM config dictionary."""
    if llm_config.get("custom_provider"):
--- a/surfsense_backend/app/automations/services/model_policy.py
+++ b/surfsense_backend/app/automations/services/model_policy.py
@ -2,11 +2,11 @@

 Automations run unattended, so every run must be **billable**: it may only use
 either a premium global model (``billing_tier == "premium"``) or a user-provided
-BYOK model (a positive config id pointing at a per-user/per-space DB row). Free
+BYOK model (a positive model id pointing at a per-user/per-space DB row). Free
 global models and Auto mode are blocked, because Auto can dispatch to a free
 deployment and free models aren't metered in premium credits.

-Config id conventions (shared across chat / image / vision):
+Model id conventions (shared across chat / image / vision):
 - ``id == 0``  → Auto mode (``AUTO_MODE_ID`` / ``IMAGE_GEN_AUTO_MODE_ID`` /
  ``VISION_AUTO_MODE_ID``). Blocked.
 - ``id < 0``   → global YAML/OpenRouter config. Allowed only if premium.
@ -82,7 +82,7 @@ def get_model_eligibility(

    The ID-based core shared by both the search-space path (creation/eligibility)
    and the captured-snapshot path (runtime backstop). Each violation is
-    ``{"kind", "config_id", "reason"}``.
+    ``{"kind", "model_id", "reason"}``.
    """
    checks: list[tuple[ModelKind, int | None]] = [
        ("chat", chat_model_id),
@ -91,10 +91,10 @@ def get_model_eligibility(
    ]

    violations: list[dict] = []
-    for kind, config_id in checks:
-        allowed, reason = _classify(kind, config_id)
+    for kind, model_id in checks:
+        allowed, reason = _classify(kind, model_id)
        if not allowed:
-            violations.append({"kind": kind, "model_id": config_id, "reason": reason})
+            violations.append({"kind": kind, "model_id": model_id, "reason": reason})

    return {"allowed": not violations, "violations": violations}

--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -119,7 +119,7 @@ def load_global_llm_configs():
                else:
                    seen_slugs[slug] = cfg.get("id", 0)

-        # Stamp Auto (Fastest) ranking metadata. YAML configs are always
+        # Stamp Auto ranking metadata. YAML configs are always
        # Tier A — operator-curated, locked first when premium-eligible.
        # The OpenRouter refresh tick later re-stamps health for any cfg
        # whose provider == "openrouter" via _enrich_health.
@ -210,42 +210,6 @@ def load_global_image_gen_configs():
        return []


-def load_global_vision_llm_configs():
-    data = _global_config_data()
-    if not data:
-        return []
-
-    try:
-        configs = copy.deepcopy(data.get("global_vision_llm_configs", []) or [])
-        for cfg in configs:
-            if isinstance(cfg, dict):
-                cfg.setdefault("billing_tier", "free")
-        return configs
-    except Exception as e:
-        print(f"Warning: Failed to load global vision LLM configs: {e}")
-        return []
-
-
-def load_vision_llm_router_settings():
-    default_settings = {
-        "routing_strategy": "usage-based-routing",
-        "num_retries": 3,
-        "allowed_fails": 3,
-        "cooldown_time": 60,
-    }
-
-    data = _global_config_data()
-    if not data:
-        return default_settings
-
-    try:
-        settings = data.get("vision_llm_router_settings", {})
-        return {**default_settings, **settings}
-    except Exception as e:
-        print(f"Warning: Failed to load vision LLM router settings: {e}")
-        return default_settings
-
-
 def load_image_gen_router_settings():
    """
    Load router settings for image generation Auto mode from YAML file.
@ -482,12 +446,6 @@ def initialize_image_gen_router():
        print(f"Warning: Failed to initialize Image Generation Router: {e}")


-def initialize_vision_llm_router():
-    # Retired: vision Auto now uses shared capability-filtered model selection
-    # over GLOBAL/BYOK chat models with supports_image_input=true.
-    return
-
-
 class Config:
    # Check if ffmpeg is installed
    if not is_ffmpeg_installed():
@ -869,12 +827,6 @@ class Config:
    # Router settings for Image Generation Auto mode
    IMAGE_GEN_ROUTER_SETTINGS = load_image_gen_router_settings()

-    # Global Vision LLM Configurations (optional)
-    GLOBAL_VISION_LLM_CONFIGS = load_global_vision_llm_configs()
-
-    # Router settings for Vision LLM Auto mode
-    VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
-
    # Virtual GLOBAL connection/model catalog. This is server-only metadata
    # derived from global_llm_config.yaml; GLOBAL keys are not stored in DB.
    from app.services.global_model_catalog import (
--- a/surfsense_backend/app/config/global_llm_config.example.yaml
+++ b/surfsense_backend/app/config/global_llm_config.example.yaml
@ -433,87 +433,11 @@ global_image_generation_configs:
  #   rpm: 30
  #   litellm_params: {}

-# =============================================================================
-# Vision LLM Configuration
-# =============================================================================
-# These configurations power the vision autocomplete feature (screenshot analysis).
-# Only vision-capable models should be used here (e.g. GPT-4o, Gemini Pro, Claude 3).
-# Supported providers: OpenAI, Anthropic, Google, Azure OpenAI, Vertex AI, Bedrock,
-# xAI, OpenRouter, Ollama, Groq, Together AI, Fireworks AI, DeepSeek, Mistral, Custom
-#
-# Auto mode (ID 0) uses LiteLLM Router for load balancing across all vision configs.
-
-# Router Settings for Vision LLM Auto Mode
-vision_llm_router_settings:
-  routing_strategy: "usage-based-routing"
-  num_retries: 3
-  allowed_fails: 3
-  cooldown_time: 60
-
-global_vision_llm_configs:
-  # Example: OpenAI GPT-4o (recommended for vision)
-  - id: -1001
-    name: "Global GPT-4o Vision"
-    description: "OpenAI's GPT-4o with strong vision capabilities"
-    litellm_provider: "openai"
-    model_name: "gpt-4o"
-    api_key: "sk-your-openai-api-key-here"
-    api_base: "https://api.openai.com/v1"
-    rpm: 500
-    tpm: 100000
-    litellm_params:
-      temperature: 0.3
-      max_tokens: 1000
-
-  # Example: Google Gemini 2.0 Flash
-  - id: -1002
-    name: "Global Gemini 2.0 Flash"
-    description: "Google's fast vision model with large context"
-    litellm_provider: "gemini"
-    model_name: "gemini-2.0-flash"
-    api_key: "your-google-ai-api-key-here"
-    api_base: "https://generativelanguage.googleapis.com/v1beta"
-    rpm: 1000
-    tpm: 200000
-    litellm_params:
-      temperature: 0.3
-      max_tokens: 1000
-
-  # Example: Anthropic Claude 3.5 Sonnet
-  - id: -1003
-    name: "Global Claude 3.5 Sonnet Vision"
-    description: "Anthropic's Claude 3.5 Sonnet with vision support"
-    litellm_provider: "anthropic"
-    model_name: "claude-3-5-sonnet-20241022"
-    api_key: "sk-ant-your-anthropic-api-key-here"
-    api_base: "https://api.anthropic.com/v1"
-    rpm: 1000
-    tpm: 100000
-    litellm_params:
-      temperature: 0.3
-      max_tokens: 1000
-
-  # Example: Azure OpenAI GPT-4o
-  # - id: -1004
-  #   name: "Global Azure GPT-4o Vision"
-  #   description: "Azure-hosted GPT-4o for vision analysis"
-  #   litellm_provider: "azure"
-  #   model_name: "azure/gpt-4o-deployment"
-  #   api_key: "your-azure-api-key-here"
-  #   api_base: "https://your-resource.openai.azure.com"
-  #   api_version: "2024-02-15-preview"
-  #   rpm: 500
-  #   tpm: 100000
-  #   litellm_params:
-  #     temperature: 0.3
-  #     max_tokens: 1000
-  #     base_model: "gpt-4o"
-
 # Notes:
 # - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
 # - Use negative IDs to distinguish global models from BYOK/local DB models
-# - IDs must be unique across chat, vision, and image generation configs
-# - Suggested static ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999
+# - IDs must be unique across chat and image generation configs
+# - Suggested static ranges: chat -1..-999, image -2001..-2999
 # - The 'api_key' field will not be exposed to users via API
 # - system_instructions: Custom prompt or empty string to use defaults
 # - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -198,81 +198,6 @@ class DocumentStatus:
        return None


-class LiteLLMProvider(StrEnum):
-    """
-    Enum for LLM providers supported by LiteLLM.
-    """
-
-    OPENAI = "OPENAI"
-    ANTHROPIC = "ANTHROPIC"
-    GOOGLE = "GOOGLE"
-    AZURE_OPENAI = "AZURE_OPENAI"
-    BEDROCK = "BEDROCK"
-    VERTEX_AI = "VERTEX_AI"
-    GROQ = "GROQ"
-    COHERE = "COHERE"
-    MISTRAL = "MISTRAL"
-    DEEPSEEK = "DEEPSEEK"
-    XAI = "XAI"
-    OPENROUTER = "OPENROUTER"
-    TOGETHER_AI = "TOGETHER_AI"
-    FIREWORKS_AI = "FIREWORKS_AI"
-    REPLICATE = "REPLICATE"
-    PERPLEXITY = "PERPLEXITY"
-    OLLAMA = "OLLAMA"
-    ALIBABA_QWEN = "ALIBABA_QWEN"
-    MOONSHOT = "MOONSHOT"
-    ZHIPU = "ZHIPU"
-    ANYSCALE = "ANYSCALE"
-    DEEPINFRA = "DEEPINFRA"
-    CEREBRAS = "CEREBRAS"
-    SAMBANOVA = "SAMBANOVA"
-    AI21 = "AI21"
-    CLOUDFLARE = "CLOUDFLARE"
-    DATABRICKS = "DATABRICKS"
-    COMETAPI = "COMETAPI"
-    HUGGINGFACE = "HUGGINGFACE"
-    GITHUB_MODELS = "GITHUB_MODELS"
-    MINIMAX = "MINIMAX"
-    CUSTOM = "CUSTOM"
-
-
-class ImageGenProvider(StrEnum):
-    """
-    Enum for image generation providers supported by LiteLLM.
-    This is a subset of LLM providers — only those that support image generation.
-    See: https://docs.litellm.ai/docs/image_generation#supported-providers
-    """
-
-    OPENAI = "OPENAI"
-    AZURE_OPENAI = "AZURE_OPENAI"
-    GOOGLE = "GOOGLE"  # Google AI Studio
-    VERTEX_AI = "VERTEX_AI"
-    BEDROCK = "BEDROCK"  # AWS Bedrock
-    RECRAFT = "RECRAFT"
-    OPENROUTER = "OPENROUTER"
-    XINFERENCE = "XINFERENCE"
-    NSCALE = "NSCALE"
-
-
-class VisionProvider(StrEnum):
-    OPENAI = "OPENAI"
-    ANTHROPIC = "ANTHROPIC"
-    GOOGLE = "GOOGLE"
-    AZURE_OPENAI = "AZURE_OPENAI"
-    VERTEX_AI = "VERTEX_AI"
-    BEDROCK = "BEDROCK"
-    XAI = "XAI"
-    OPENROUTER = "OPENROUTER"
-    OLLAMA = "OLLAMA"
-    GROQ = "GROQ"
-    TOGETHER_AI = "TOGETHER_AI"
-    FIREWORKS_AI = "FIREWORKS_AI"
-    DEEPSEEK = "DEEPSEEK"
-    MISTRAL = "MISTRAL"
-    CUSTOM = "CUSTOM"
-
-
 class ConnectionScope(StrEnum):
    GLOBAL = "GLOBAL"
    SEARCH_SPACE = "SEARCH_SPACE"
@ -710,11 +635,11 @@ class NewChatThread(BaseModel, TimestampMixin):
        default=False,
        server_default="false",
    )
-    # Auto (Fastest) model pin for this thread: concrete resolved global LLM
+    # Auto model pin for this thread: concrete resolved global LLM
    # config id. NULL means no pin; Auto will resolve on the next turn.
    # Single-writer invariant: only app.services.auto_model_pin_service sets
    # or clears this column (plus bulk clears when a search space's
-    # agent_llm_id changes). Unindexed: all reads are by primary key.
+    # chat_model_id changes). Unindexed: all reads are by primary key.
    pinned_llm_config_id = Column(Integer, nullable=True)

    # Surface metadata for first-party SurfSense and external chat threads.
@ -1686,75 +1611,6 @@ class Model(BaseModel, TimestampMixin):
    )


-class ImageGenerationConfig(BaseModel, TimestampMixin):
-    """
-    Dedicated configuration table for image generation models.
-
-    Separate from NewLLMConfig because image generation models don't need
-    system_instructions, citations_enabled, or use_default_system_instructions.
-    They only need provider credentials and model parameters.
-    """
-
-    __tablename__ = "image_generation_configs"
-
-    name = Column(String(100), nullable=False, index=True)
-    description = Column(String(500), nullable=True)
-
-    # Provider & model (uses ImageGenProvider, NOT LiteLLMProvider)
-    provider = Column(SQLAlchemyEnum(ImageGenProvider), nullable=False)
-    custom_provider = Column(String(100), nullable=True)
-    model_name = Column(String(100), nullable=False)
-
-    # Credentials
-    api_key = Column(String, nullable=False)
-    api_base = Column(String(500), nullable=True)
-    api_version = Column(String(50), nullable=True)  # Azure-specific
-
-    # Additional litellm parameters
-    litellm_params = Column(JSON, nullable=True, default={})
-
-    # Relationships
-    search_space_id = Column(
-        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
-    )
-    search_space = relationship(
-        "SearchSpace", back_populates="image_generation_configs"
-    )
-
-    # User who created this config
-    user_id = Column(
-        UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
-    )
-    user = relationship("User", back_populates="image_generation_configs")
-
-
-class VisionLLMConfig(BaseModel, TimestampMixin):
-    __tablename__ = "vision_llm_configs"
-
-    name = Column(String(100), nullable=False, index=True)
-    description = Column(String(500), nullable=True)
-
-    provider = Column(SQLAlchemyEnum(VisionProvider), nullable=False)
-    custom_provider = Column(String(100), nullable=True)
-    model_name = Column(String(100), nullable=False)
-
-    api_key = Column(String, nullable=False)
-    api_base = Column(String(500), nullable=True)
-    api_version = Column(String(50), nullable=True)
-
-    litellm_params = Column(JSON, nullable=True, default={})
-
-    search_space_id = Column(
-        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
-    )
-    search_space = relationship("SearchSpace", back_populates="vision_llm_configs")
-
-    user_id = Column(
-        UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
-    )
-    user = relationship("User", back_populates="vision_llm_configs")
-
-
 class ImageGeneration(BaseModel, TimestampMixin):
    """
    Stores image generation requests and results using litellm.aimage_generation().
@ -1786,10 +1642,9 @@ class ImageGeneration(BaseModel, TimestampMixin):
    style = Column(String(50), nullable=True)  # Model-specific style parameter
    response_format = Column(String(50), nullable=True)  # "url" or "b64_json"

-    # Image generation config reference
-    # 0 = Auto mode (router), negative IDs = global configs from YAML,
-    # positive IDs = ImageGenerationConfig records in DB
-    image_generation_config_id = Column(Integer, nullable=True)
+    # Image generation model provenance.
+    # 0 = Auto mode, negative IDs = GLOBAL models, positive IDs = Model records.
+    image_gen_model_id = Column(Integer, nullable=True)

    # Response data (full litellm response as JSONB) — present on success
    response_data = Column(JSONB, nullable=True)
@ -1831,23 +1686,7 @@ class SearchSpace(BaseModel, TimestampMixin):

    shared_memory_md = Column(Text, nullable=True, server_default="")

-    # Search space-level LLM preferences (shared by all members)
-    # Note: ID values:
-    #   - 0: Auto mode (uses LiteLLM Router for load balancing) - default for new search spaces
-    #   - Negative IDs: Global configs from YAML
-    #   - Positive IDs: Custom configs from DB (NewLLMConfig table)
-    agent_llm_id = Column(
-        Integer, nullable=True, default=0
-    )  # For chat operations, defaults to Auto mode
-    image_generation_config_id = Column(
-        Integer, nullable=True, default=0
-    )  # For image generation, defaults to Auto mode
-    vision_llm_config_id = Column(
-        Integer, nullable=True, default=0
-    )  # For vision/screenshot analysis, defaults to Auto mode
-
-    # New connection/model role bindings. These supersede the legacy config
-    # columns above without removing them in this PR.
+    # Connection/model role bindings.
    # Note: ID values preserve the existing convention:
    #   - 0: Auto mode
    #   - Negative IDs: Global virtual models from global_llm_config.yaml
@ -1931,24 +1770,6 @@ class SearchSpace(BaseModel, TimestampMixin):
        order_by="SearchSourceConnector.id",
        cascade="all, delete-orphan",
    )
-    new_llm_configs = relationship(
-        "NewLLMConfig",
-        back_populates="search_space",
-        order_by="NewLLMConfig.id",
-        cascade="all, delete-orphan",
-    )
-    image_generation_configs = relationship(
-        "ImageGenerationConfig",
-        back_populates="search_space",
-        order_by="ImageGenerationConfig.id",
-        cascade="all, delete-orphan",
-    )
-    vision_llm_configs = relationship(
-        "VisionLLMConfig",
-        back_populates="search_space",
-        order_by="VisionLLMConfig.id",
-        cascade="all, delete-orphan",
-    )
    connections = relationship(
        "Connection",
        back_populates="search_space",
@ -2057,64 +1878,6 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
    documents = relationship("Document", back_populates="connector")


-class NewLLMConfig(BaseModel, TimestampMixin):
-    """
-    New LLM configuration table that combines model settings with prompt configuration.
-
-    This table provides:
-    - LLM model configuration (provider, model_name, api_key, etc.)
-    - Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
-    - Citation toggle (enable/disable citation instructions)
-
-    Note: Tools instructions are built by get_tools_instructions(thread_visibility) (personal vs shared memory).
-    """
-
-    __tablename__ = "new_llm_configs"
-
-    name = Column(String(100), nullable=False, index=True)
-    description = Column(String(500), nullable=True)
-
-    # === LLM Model Configuration (from original LLMConfig, excluding 'language') ===
-    # Provider from the enum
-    provider = Column(SQLAlchemyEnum(LiteLLMProvider), nullable=False)
-    # Custom provider name when provider is CUSTOM
-    custom_provider = Column(String(100), nullable=True)
-    # Just the model name without provider prefix
-    model_name = Column(String(100), nullable=False)
-    # API Key should be encrypted before storing
-    api_key = Column(String, nullable=False)
-    api_base = Column(String(500), nullable=True)
-    # For any other parameters that litellm supports
-    litellm_params = Column(JSON, nullable=True, default={})
-
-    # === Prompt Configuration ===
-    # Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
-    # Users can customize this from the UI
-    system_instructions = Column(
-        Text,
-        nullable=False,
-        default="",  # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS
-    )
-    # Whether to use the default system instructions when system_instructions is empty
-    use_default_system_instructions = Column(Boolean, nullable=False, default=True)
-
-    # Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected
-    # When disabled, an anti-citation prompt is injected instead
-    citations_enabled = Column(Boolean, nullable=False, default=True)
-
-    # === Relationships ===
-    search_space_id = Column(
-        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
-    )
-    search_space = relationship("SearchSpace", back_populates="new_llm_configs")
-
-    # User who created this config
-    user_id = Column(
-        UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
-    )
-    user = relationship("User", back_populates="new_llm_configs")
-
-
 class Log(BaseModel, TimestampMixin):
    __tablename__ = "logs"

@ -2481,25 +2244,6 @@ if config.AUTH_TYPE == "GOOGLE":
            passive_deletes=True,
        )

-        # LLM configs created by this user
-        new_llm_configs = relationship(
-            "NewLLMConfig",
-            back_populates="user",
-            passive_deletes=True,
-        )
-
-        # Image generation configs created by this user
-        image_generation_configs = relationship(
-            "ImageGenerationConfig",
-            back_populates="user",
-            passive_deletes=True,
-        )
-
-        vision_llm_configs = relationship(
-            "VisionLLMConfig",
-            back_populates="user",
-            passive_deletes=True,
-        )
        connections = relationship(
            "Connection",
            back_populates="user",
@ -2632,25 +2376,6 @@ else:
            passive_deletes=True,
        )

-        # LLM configs created by this user
-        new_llm_configs = relationship(
-            "NewLLMConfig",
-            back_populates="user",
-            passive_deletes=True,
-        )
-
-        # Image generation configs created by this user
-        image_generation_configs = relationship(
-            "ImageGenerationConfig",
-            back_populates="user",
-            passive_deletes=True,
-        )
-
-        vision_llm_configs = relationship(
-            "VisionLLMConfig",
-            back_populates="user",
-            passive_deletes=True,
-        )
        connections = relationship(
            "Connection",
            back_populates="user",
--- a/surfsense_backend/app/prompts/default_system_instructions.py
+++ b/surfsense_backend/app/prompts/default_system_instructions.py
@ -82,7 +82,7 @@ def build_configurable_system_prompt(
    *,
    model_name: str | None = None,
 ) -> str:
-    """Build a configurable SurfSense system prompt (NewLLMConfig path).
+    """Build a configurable SurfSense system prompt.

    See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
    for full parameter docs.
@ -104,7 +104,7 @@ def build_configurable_system_prompt(
 def get_default_system_instructions() -> str:
    """Return the default ``<system_instruction>`` block (no tools / citations).

-    Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
+    Useful for populating the UI when editing custom system instructions.
    The output reflects the current fragment tree, not a baked-in constant.
    """
    resolved_today = datetime.now(UTC).date().isoformat()
--- a/surfsense_backend/app/prompts/system_prompt_composer/composer.py
+++ b/surfsense_backend/app/prompts/system_prompt_composer/composer.py
@ -348,8 +348,7 @@ def compose_system_prompt(
        mcp_connector_tools: ``{server_name: [tool_names...]}`` to inject
            an explicit MCP routing block.
        custom_system_instructions: Free-form instructions that override
-            the default ``<system_instruction>`` block (legacy support
-            for ``NewLLMConfig.system_instructions``).
+            the default ``<system_instruction>`` block.
        use_default_system_instructions: When ``custom_system_instructions``
            is empty/None, fall back to defaults (legacy semantics).
        citations_enabled: Include ``citations_on.md`` (true) or
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -47,7 +47,6 @@ from .model_connections_routes import router as model_connections_router
 from .memory_routes import router as memory_router
 from .model_list_routes import router as model_list_router
 from .new_chat_routes import router as new_chat_router
-from .new_llm_config_routes import router as new_llm_config_router
 from .notes_routes import router as notes_router
 from .notion_add_connector_route import router as notion_add_connector_router
 from .obsidian_plugin_routes import router as obsidian_plugin_router
@ -64,7 +63,6 @@ from .stripe_routes import router as stripe_router
 from .team_memory_routes import router as team_memory_router
 from .teams_add_connector_route import router as teams_add_connector_router
 from .video_presentations_routes import router as video_presentations_router
-from .vision_llm_routes import router as vision_llm_router
 from .youtube_routes import router as youtube_router

 router = APIRouter()
@ -99,7 +97,6 @@ router.include_router(
 )  # Video presentation status and streaming
 router.include_router(reports_router)  # Report CRUD and multi-format export
 router.include_router(image_generation_router)  # Image generation via litellm
-router.include_router(vision_llm_router)  # Vision LLM configs for screenshot analysis
 router.include_router(search_source_connectors_router)
 router.include_router(google_calendar_add_connector_router)
 router.include_router(google_gmail_add_connector_router)
@ -117,7 +114,6 @@ router.include_router(jira_add_connector_router)
 router.include_router(confluence_add_connector_router)
 router.include_router(clickup_add_connector_router)
 router.include_router(dropbox_add_connector_router)
-router.include_router(new_llm_config_router)  # LLM configs with prompt configuration
 router.include_router(model_connections_router)  # Connection-centric model catalog
 router.include_router(model_list_router)  # Dynamic model catalogue from OpenRouter
 router.include_router(logs_router)
--- a/surfsense_backend/app/routes/image_generation_routes.py
+++ b/surfsense_backend/app/routes/image_generation_routes.py
@ -1,7 +1,5 @@
 """
 Image Generation routes:
- CRUD for ImageGenerationConfig (user-created image model configs)
- Global image gen configs endpoint (from YAML)
 - Image generation execution (calls litellm.aimage_generation())
 - CRUD for ImageGeneration records (results)
 - Image serving endpoint (serves b64_json images from DB, protected by signed tokens)
@ -21,7 +19,6 @@ from sqlalchemy.orm import selectinload
 from app.config import config
 from app.db import (
    ImageGeneration,
-    ImageGenerationConfig,
    Model,
    Permission,
    SearchSpace,
@ -30,14 +27,14 @@ from app.db import (
    get_async_session,
 )
 from app.schemas import (
-    GlobalImageGenConfigRead,
-    ImageGenerationConfigCreate,
-    ImageGenerationConfigRead,
-    ImageGenerationConfigUpdate,
    ImageGenerationCreate,
    ImageGenerationListRead,
    ImageGenerationRead,
 )
+from app.services.auto_model_pin_service import (
+    auto_model_candidates,
+    choose_auto_model_candidate,
+)
 from app.services.billable_calls import (
    DEFAULT_IMAGE_RESERVE_MICROS,
    QuotaInsufficientError,
@ -47,12 +44,8 @@ from app.services.image_gen_router_service import (
    IMAGE_GEN_AUTO_MODE_ID,
    is_image_gen_auto_mode,
 )
-from app.services.auto_model_pin_service import (
-    auto_model_candidates,
-    choose_auto_model_candidate,
-)
-from app.services.model_resolver import to_litellm
 from app.services.model_capabilities import has_capability
+from app.services.model_resolver import to_litellm
 from app.users import current_active_user
 from app.utils.rbac import check_permission
 from app.utils.signed_image_urls import verify_image_token
@ -131,14 +124,14 @@ async def _execute_image_generation(
    Call litellm.aimage_generation() with the appropriate config.

    Resolution order:
-    1. Explicit image_generation_config_id on the request
-    2. Search space's image_generation_config_id preference
+    1. Explicit image_gen_model_id on the request
+    2. Search space's image_gen_model_id preference
    3. Falls back to Auto mode if available
    """
-    config_id = image_gen.image_generation_config_id
+    config_id = image_gen.image_gen_model_id
    if config_id is None:
        config_id = search_space.image_gen_model_id or IMAGE_GEN_AUTO_MODE_ID
-        image_gen.image_generation_config_id = config_id
+        image_gen.image_gen_model_id = config_id

    # Build kwargs
    gen_kwargs = {}
@ -163,7 +156,7 @@ async def _execute_image_generation(
        if not candidates:
            raise ValueError("No image-generation models are available for Auto mode")
        config_id = int(choose_auto_model_candidate(candidates, search_space.id)["id"])
-        image_gen.image_generation_config_id = config_id
+        image_gen.image_gen_model_id = config_id

    if config_id < 0:
        global_model = _get_global_model(config_id)
@ -228,266 +221,6 @@ async def _execute_image_generation(
            image_gen.model = hidden["model"]


-# =============================================================================
-# Global Image Generation Configs (from YAML)
-# =============================================================================
-
-
-@router.get(
-    "/global-image-generation-configs",
-    response_model=list[GlobalImageGenConfigRead],
-)
-async def get_global_image_gen_configs(
-    user: User = Depends(current_active_user),
-):
-    """Get all global image generation configs. API keys are hidden."""
-    try:
-        global_configs = config.GLOBAL_IMAGE_GEN_CONFIGS
-        safe_configs = []
-
-        if global_configs and len(global_configs) > 0:
-            safe_configs.append(
-                {
-                    "id": 0,
-                    "name": "Auto (Fastest)",
-                    "description": "Automatically routes across available image generation providers.",
-                    "provider": "AUTO",
-                    "custom_provider": None,
-                    "model_name": "auto",
-                    "api_base": None,
-                    "api_version": None,
-                    "litellm_params": {},
-                    "is_global": True,
-                    "is_auto_mode": True,
-                    # Auto mode currently treated as free until per-deployment
-                    # billing-tier surfacing lands (see _resolve_billing_for_image_gen).
-                    "billing_tier": "free",
-                    "is_premium": False,
-                }
-            )
-
-        for cfg in global_configs:
-            billing_tier = str(cfg.get("billing_tier", "free")).lower()
-            safe_configs.append(
-                {
-                    "id": cfg.get("id"),
-                    "name": cfg.get("name"),
-                    "description": cfg.get("description"),
-                    "provider": cfg.get("provider") or cfg.get("litellm_provider"),
-                    "custom_provider": cfg.get("custom_provider"),
-                    "model_name": cfg.get("model_name"),
-                    "api_base": cfg.get("api_base") or None,
-                    "api_version": cfg.get("api_version") or None,
-                    "litellm_params": cfg.get("litellm_params", {}),
-                    "is_global": True,
-                    "billing_tier": billing_tier,
-                    # Mirror chat (``new_llm_config_routes``) so the new-chat
-                    # selector's premium badge logic keys off the same
-                    # field across chat / image / vision tabs.
-                    "is_premium": billing_tier == "premium",
-                    "quota_reserve_micros": cfg.get("quota_reserve_micros"),
-                }
-            )
-
-        return safe_configs
-    except Exception as e:
-        logger.exception("Failed to fetch global image generation configs")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch configs: {e!s}"
-        ) from e
-
-
-# =============================================================================
-# ImageGenerationConfig CRUD
-# =============================================================================
-
-
-@router.post("/image-generation-configs", response_model=ImageGenerationConfigRead)
-async def create_image_gen_config(
-    config_data: ImageGenerationConfigCreate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """Create a new image generation config for a search space."""
-    try:
-        await check_permission(
-            session,
-            user,
-            config_data.search_space_id,
-            Permission.IMAGE_GENERATIONS_CREATE.value,
-            "You don't have permission to create image generation configs in this search space",
-        )
-
-        db_config = ImageGenerationConfig(**config_data.model_dump(), user_id=user.id)
-        session.add(db_config)
-        await session.commit()
-        await session.refresh(db_config)
-        return db_config
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to create ImageGenerationConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to create config: {e!s}"
-        ) from e
-
-
-@router.get("/image-generation-configs", response_model=list[ImageGenerationConfigRead])
-async def list_image_gen_configs(
-    search_space_id: int,
-    skip: int = 0,
-    limit: int = 100,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """List image generation configs for a search space."""
-    try:
-        await check_permission(
-            session,
-            user,
-            search_space_id,
-            Permission.IMAGE_GENERATIONS_READ.value,
-            "You don't have permission to view image generation configs in this search space",
-        )
-
-        result = await session.execute(
-            select(ImageGenerationConfig)
-            .filter(ImageGenerationConfig.search_space_id == search_space_id)
-            .order_by(ImageGenerationConfig.created_at.desc())
-            .offset(skip)
-            .limit(limit)
-        )
-        return result.scalars().all()
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to list ImageGenerationConfigs")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch configs: {e!s}"
-        ) from e
-
-
-@router.get(
-    "/image-generation-configs/{config_id}", response_model=ImageGenerationConfigRead
-)
-async def get_image_gen_config(
-    config_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """Get a specific image generation config by ID."""
-    try:
-        result = await session.execute(
-            select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if not db_config:
-            raise HTTPException(status_code=404, detail="Config not found")
-
-        await check_permission(
-            session,
-            user,
-            db_config.search_space_id,
-            Permission.IMAGE_GENERATIONS_READ.value,
-            "You don't have permission to view image generation configs in this search space",
-        )
-        return db_config
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to get ImageGenerationConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch config: {e!s}"
-        ) from e
-
-
-@router.put(
-    "/image-generation-configs/{config_id}", response_model=ImageGenerationConfigRead
-)
-async def update_image_gen_config(
-    config_id: int,
-    update_data: ImageGenerationConfigUpdate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """Update an existing image generation config."""
-    try:
-        result = await session.execute(
-            select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if not db_config:
-            raise HTTPException(status_code=404, detail="Config not found")
-
-        await check_permission(
-            session,
-            user,
-            db_config.search_space_id,
-            Permission.IMAGE_GENERATIONS_CREATE.value,
-            "You don't have permission to update image generation configs in this search space",
-        )
-
-        for key, value in update_data.model_dump(exclude_unset=True).items():
-            setattr(db_config, key, value)
-
-        await session.commit()
-        await session.refresh(db_config)
-        return db_config
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to update ImageGenerationConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to update config: {e!s}"
-        ) from e
-
-
-@router.delete("/image-generation-configs/{config_id}", response_model=dict)
-async def delete_image_gen_config(
-    config_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """Delete an image generation config."""
-    try:
-        result = await session.execute(
-            select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if not db_config:
-            raise HTTPException(status_code=404, detail="Config not found")
-
-        await check_permission(
-            session,
-            user,
-            db_config.search_space_id,
-            Permission.IMAGE_GENERATIONS_DELETE.value,
-            "You don't have permission to delete image generation configs in this search space",
-        )
-
-        await session.delete(db_config)
-        await session.commit()
-        return {
-            "message": "Image generation config deleted successfully",
-            "id": config_id,
-        }
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to delete ImageGenerationConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to delete config: {e!s}"
-        ) from e
-
-
 # =============================================================================
 # Image Generation Execution + Results CRUD
 # =============================================================================
@ -536,7 +269,7 @@ async def create_image_generation(
            raise HTTPException(status_code=404, detail="Search space not found")

        billing_tier, base_model, reserve_micros = await _resolve_billing_for_image_gen(
-            session, data.image_generation_config_id, search_space
+            session, data.image_gen_model_id, search_space
        )

        # billable_call runs OUTSIDE the inner try/except so QuotaInsufficientError
@ -562,7 +295,7 @@ async def create_image_generation(
                size=data.size,
                style=data.style,
                response_format=data.response_format,
-                image_generation_config_id=data.image_generation_config_id,
+                image_gen_model_id=data.image_gen_model_id,
                search_space_id=data.search_space_id,
                created_by_id=user.id,
            )
--- a/surfsense_backend/app/routes/model_connections_routes.py
+++ b/surfsense_backend/app/routes/model_connections_routes.py
@ -11,6 +11,7 @@ from app.db import (
    ConnectionScope,
    Model,
    ModelSource,
+    NewChatThread,
    Permission,
    SearchSpace,
    User,
@ -708,12 +709,26 @@ async def update_model_roles(
    search_space = await _get_search_space(session, search_space_id)
    updates = data.model_dump(exclude_unset=True)
    if "chat_model_id" in updates:
-        search_space.chat_model_id = await _validate_role_model_id(
+        previous_chat_model_id = search_space.chat_model_id
+        next_chat_model_id = await _validate_role_model_id(
            session,
            search_space_id=search_space_id,
            model_id=updates["chat_model_id"],
            capability="chat",
        )
+        search_space.chat_model_id = next_chat_model_id
+        if next_chat_model_id != previous_chat_model_id:
+            await session.execute(
+                update(NewChatThread)
+                .where(NewChatThread.search_space_id == search_space_id)
+                .values(pinned_llm_config_id=None)
+            )
+            logger.info(
+                "Cleared auto model pins for search_space_id=%s after chat_model_id change (%s -> %s)",
+                search_space_id,
+                previous_chat_model_id,
+                next_chat_model_id,
+            )
    if "vision_model_id" in updates:
        search_space.vision_model_id = await _validate_role_model_id(
            session,
--- a/surfsense_backend/app/routes/new_llm_config_routes.py
+++ b/surfsense_backend/app/routes/new_llm_config_routes.py
@ -1,480 +0,0 @@
-"""
-API routes for NewLLMConfig CRUD operations.
-
-NewLLMConfig combines model settings with prompt configuration:
- LLM provider, model, API key, etc.
- Configurable system instructions
- Citation toggle
-"""
-
-import logging
-
-from fastapi import APIRouter, Depends, HTTPException
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-
-from app.config import config
-from app.db import (
-    NewLLMConfig,
-    Permission,
-    User,
-    get_async_session,
-)
-from app.prompts.default_system_instructions import get_default_system_instructions
-from app.schemas import (
-    DefaultSystemInstructionsResponse,
-    GlobalNewLLMConfigRead,
-    NewLLMConfigCreate,
-    NewLLMConfigRead,
-    NewLLMConfigUpdate,
-)
-from app.services.llm_service import validate_llm_config
-from app.services.provider_capabilities import derive_supports_image_input
-from app.users import current_active_user
-from app.utils.rbac import check_permission
-
-router = APIRouter()
-logger = logging.getLogger(__name__)
-
-
-def _serialize_byok_config(config: NewLLMConfig) -> NewLLMConfigRead:
-    """Augment a BYOK chat config row with the derived ``supports_image_input``.
-
-    There is no DB column for ``supports_image_input`` — the value is
-    resolved at the API boundary from LiteLLM's authoritative model map
-    (default-allow on unknown). Returning ``NewLLMConfigRead`` here keeps
-    the response shape consistent across list / detail / create / update
-    endpoints without having to remember to set the field at every call
-    site.
-    """
-    provider_value = (
-        config.provider.value
-        if hasattr(config.provider, "value")
-        else str(config.provider)
-    )
-    litellm_params = config.litellm_params or {}
-    base_model = (
-        litellm_params.get("base_model") if isinstance(litellm_params, dict) else None
-    )
-    supports_image_input = derive_supports_image_input(
-        provider=provider_value.lower(),
-        model_name=config.model_name,
-        base_model=base_model,
-        custom_provider=config.custom_provider,
-    )
-    # ``model_validate`` runs the Pydantic conversion using the ORM
-    # attribute access path enabled by ``ConfigDict(from_attributes=True)``,
-    # then we layer the derived field on. ``model_copy(update=...)`` keeps
-    # the surface immutable from the caller's perspective.
-    base_read = NewLLMConfigRead.model_validate(config)
-    return base_read.model_copy(update={"supports_image_input": supports_image_input})
-
-
-# =============================================================================
-# Global Configs Routes
-# =============================================================================
-
-
-@router.get("/global-new-llm-configs", response_model=list[GlobalNewLLMConfigRead])
-async def get_global_new_llm_configs(
-    user: User = Depends(current_active_user),
-):
-    """
-    Get all available global NewLLMConfig configurations.
-    These are pre-configured by the system administrator and available to all users.
-    API keys are not exposed through this endpoint.
-
-    Includes:
-    - Auto mode (ID 0): Uses LiteLLM Router for automatic load balancing
-    - Global configs (negative IDs): Individual pre-configured LLM providers
-    """
-    try:
-        global_configs = config.GLOBAL_LLM_CONFIGS
-        safe_configs = []
-
-        # Only include Auto mode if there are actual global configs to route to
-        # Auto mode requires at least one global config with valid API key
-        if global_configs and len(global_configs) > 0:
-            safe_configs.append(
-                {
-                    "id": 0,
-                    "name": "Auto (Fastest)",
-                    "description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling. Recommended for most users.",
-                    "provider": "AUTO",
-                    "custom_provider": None,
-                    "model_name": "auto",
-                    "api_base": None,
-                    "litellm_params": {},
-                    "system_instructions": "",
-                    "use_default_system_instructions": True,
-                    "citations_enabled": True,
-                    "is_global": True,
-                    "is_auto_mode": True,
-                    "billing_tier": "free",
-                    "is_premium": False,
-                    "anonymous_enabled": False,
-                    "seo_enabled": False,
-                    "seo_slug": None,
-                    "seo_title": None,
-                    "seo_description": None,
-                    "quota_reserve_tokens": None,
-                    # Auto routes across the configured pool, which usually
-                    # includes at least one vision-capable deployment, so
-                    # treat Auto as image-capable. The router itself will
-                    # still pick a vision-capable deployment for messages
-                    # carrying image_url blocks (LiteLLM Router falls back
-                    # on ``404`` per its ``allowed_fails`` policy).
-                    "supports_image_input": True,
-                }
-            )
-
-        # Add individual global configs
-        for cfg in global_configs:
-            # Capability resolution: explicit value (YAML override or OR
-            # `_supports_image_input(model)` payload baked in by the
-            # OpenRouter integration service) wins. Fall back to the
-            # LiteLLM-driven helper which default-allows on unknown so
-            # we don't hide vision-capable models that happen to lack a
-            # YAML annotation. The streaming task safety net is the
-            # only place a False ever blocks.
-            if "supports_image_input" in cfg:
-                supports_image_input = bool(cfg.get("supports_image_input"))
-            else:
-                cfg_litellm_params = cfg.get("litellm_params") or {}
-                cfg_base_model = (
-                    cfg_litellm_params.get("base_model")
-                    if isinstance(cfg_litellm_params, dict)
-                    else None
-                )
-                supports_image_input = derive_supports_image_input(
-                    provider=cfg.get("provider") or cfg.get("litellm_provider"),
-                    model_name=cfg.get("model_name"),
-                    base_model=cfg_base_model,
-                    custom_provider=cfg.get("custom_provider"),
-                )
-
-            safe_config = {
-                "id": cfg.get("id"),
-                "name": cfg.get("name"),
-                "description": cfg.get("description"),
-                "provider": cfg.get("provider") or cfg.get("litellm_provider"),
-                "custom_provider": cfg.get("custom_provider"),
-                "model_name": cfg.get("model_name"),
-                "api_base": cfg.get("api_base") or None,
-                "litellm_params": cfg.get("litellm_params", {}),
-                # New prompt configuration fields
-                "system_instructions": cfg.get("system_instructions", ""),
-                "use_default_system_instructions": cfg.get(
-                    "use_default_system_instructions", True
-                ),
-                "citations_enabled": cfg.get("citations_enabled", True),
-                "is_global": True,
-                "billing_tier": cfg.get("billing_tier", "free"),
-                "is_premium": cfg.get("billing_tier", "free") == "premium",
-                "anonymous_enabled": cfg.get("anonymous_enabled", False),
-                "seo_enabled": cfg.get("seo_enabled", False),
-                "seo_slug": cfg.get("seo_slug"),
-                "seo_title": cfg.get("seo_title"),
-                "seo_description": cfg.get("seo_description"),
-                "quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
-                "supports_image_input": supports_image_input,
-            }
-            safe_configs.append(safe_config)
-
-        return safe_configs
-    except Exception as e:
-        logger.exception("Failed to fetch global NewLLMConfigs")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch global configurations: {e!s}"
-        ) from e
-
-
-# =============================================================================
-# CRUD Routes
-# =============================================================================
-
-
-@router.post("/new-llm-configs", response_model=NewLLMConfigRead)
-async def create_new_llm_config(
-    config_data: NewLLMConfigCreate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Create a new NewLLMConfig for a search space.
-    Requires LLM_CONFIGS_CREATE permission.
-    """
-    try:
-        # Verify user has permission
-        await check_permission(
-            session,
-            user,
-            config_data.search_space_id,
-            Permission.LLM_CONFIGS_CREATE.value,
-            "You don't have permission to create LLM configurations in this search space",
-        )
-
-        # Validate the LLM configuration by making a test API call
-        is_valid, error_message = await validate_llm_config(
-            provider=config_data.provider.value,
-            model_name=config_data.model_name,
-            api_key=config_data.api_key,
-            api_base=config_data.api_base,
-            custom_provider=config_data.custom_provider,
-            litellm_params=config_data.litellm_params,
-        )
-
-        if not is_valid:
-            raise HTTPException(
-                status_code=400,
-                detail=f"Invalid LLM configuration: {error_message}",
-            )
-
-        # Create the config with user association
-        db_config = NewLLMConfig(**config_data.model_dump(), user_id=user.id)
-        session.add(db_config)
-        await session.commit()
-        await session.refresh(db_config)
-
-        return _serialize_byok_config(db_config)
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to create NewLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to create configuration: {e!s}"
-        ) from e
-
-
-@router.get("/new-llm-configs", response_model=list[NewLLMConfigRead])
-async def list_new_llm_configs(
-    search_space_id: int,
-    skip: int = 0,
-    limit: int = 100,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Get all NewLLMConfigs for a search space.
-    Requires LLM_CONFIGS_READ permission.
-    """
-    try:
-        # Verify user has permission
-        await check_permission(
-            session,
-            user,
-            search_space_id,
-            Permission.LLM_CONFIGS_READ.value,
-            "You don't have permission to view LLM configurations in this search space",
-        )
-
-        result = await session.execute(
-            select(NewLLMConfig)
-            .filter(NewLLMConfig.search_space_id == search_space_id)
-            .order_by(NewLLMConfig.created_at.desc())
-            .offset(skip)
-            .limit(limit)
-        )
-
-        return [_serialize_byok_config(cfg) for cfg in result.scalars().all()]
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to list NewLLMConfigs")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch configurations: {e!s}"
-        ) from e
-
-
-@router.get(
-    "/new-llm-configs/default-system-instructions",
-    response_model=DefaultSystemInstructionsResponse,
-)
-async def get_default_system_instructions_endpoint(
-    user: User = Depends(current_active_user),
-):
-    """
-    Get the default SURFSENSE_SYSTEM_INSTRUCTIONS template.
-    Useful for pre-populating the UI when creating a new configuration.
-    """
-    return DefaultSystemInstructionsResponse(
-        default_system_instructions=get_default_system_instructions()
-    )
-
-
-@router.get("/new-llm-configs/{config_id}", response_model=NewLLMConfigRead)
-async def get_new_llm_config(
-    config_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Get a specific NewLLMConfig by ID.
-    Requires LLM_CONFIGS_READ permission.
-    """
-    try:
-        result = await session.execute(
-            select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
-        )
-        config = result.scalars().first()
-
-        if not config:
-            raise HTTPException(status_code=404, detail="Configuration not found")
-
-        # Verify user has permission
-        await check_permission(
-            session,
-            user,
-            config.search_space_id,
-            Permission.LLM_CONFIGS_READ.value,
-            "You don't have permission to view LLM configurations in this search space",
-        )
-
-        return _serialize_byok_config(config)
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to get NewLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch configuration: {e!s}"
-        ) from e
-
-
-@router.put("/new-llm-configs/{config_id}", response_model=NewLLMConfigRead)
-async def update_new_llm_config(
-    config_id: int,
-    update_data: NewLLMConfigUpdate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Update an existing NewLLMConfig.
-    Requires LLM_CONFIGS_UPDATE permission.
-    """
-    try:
-        result = await session.execute(
-            select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
-        )
-        config = result.scalars().first()
-
-        if not config:
-            raise HTTPException(status_code=404, detail="Configuration not found")
-
-        # Verify user has permission
-        await check_permission(
-            session,
-            user,
-            config.search_space_id,
-            Permission.LLM_CONFIGS_UPDATE.value,
-            "You don't have permission to update LLM configurations in this search space",
-        )
-
-        update_dict = update_data.model_dump(exclude_unset=True)
-
-        # If updating LLM settings, validate them
-        if any(
-            key in update_dict
-            for key in [
-                "provider",
-                "model_name",
-                "api_key",
-                "api_base",
-                "custom_provider",
-                "litellm_params",
-            ]
-        ):
-            # Build the validation config from existing + updates
-            validation_config = {
-                "provider": update_dict.get("provider", config.provider).value
-                if hasattr(update_dict.get("provider", config.provider), "value")
-                else update_dict.get("provider", config.provider.value),
-                "model_name": update_dict.get("model_name", config.model_name),
-                "api_key": update_dict.get("api_key", config.api_key),
-                "api_base": update_dict.get("api_base", config.api_base),
-                "custom_provider": update_dict.get(
-                    "custom_provider", config.custom_provider
-                ),
-                "litellm_params": update_dict.get(
-                    "litellm_params", config.litellm_params
-                ),
-            }
-
-            is_valid, error_message = await validate_llm_config(
-                provider=validation_config["provider"],
-                model_name=validation_config["model_name"],
-                api_key=validation_config["api_key"],
-                api_base=validation_config["api_base"],
-                custom_provider=validation_config["custom_provider"],
-                litellm_params=validation_config["litellm_params"],
-            )
-
-            if not is_valid:
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"Invalid LLM configuration: {error_message}",
-                )
-
-        # Apply updates
-        for key, value in update_dict.items():
-            setattr(config, key, value)
-
-        await session.commit()
-        await session.refresh(config)
-
-        return _serialize_byok_config(config)
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to update NewLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to update configuration: {e!s}"
-        ) from e
-
-
-@router.delete("/new-llm-configs/{config_id}", response_model=dict)
-async def delete_new_llm_config(
-    config_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Delete a NewLLMConfig.
-    Requires LLM_CONFIGS_DELETE permission.
-    """
-    try:
-        result = await session.execute(
-            select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
-        )
-        config = result.scalars().first()
-
-        if not config:
-            raise HTTPException(status_code=404, detail="Configuration not found")
-
-        # Verify user has permission
-        await check_permission(
-            session,
-            user,
-            config.search_space_id,
-            Permission.LLM_CONFIGS_DELETE.value,
-            "You don't have permission to delete LLM configurations in this search space",
-        )
-
-        await session.delete(config)
-        await session.commit()
-
-        return {"message": "Configuration deleted successfully", "id": config_id}
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to delete NewLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to delete configuration: {e!s}"
-        ) from e
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@ -1,27 +1,20 @@
 import logging

 from fastapi import APIRouter, Depends, HTTPException
-from sqlalchemy import func, update
+from sqlalchemy import func
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select

-from app.config import config
 from app.db import (
-    ImageGenerationConfig,
-    NewChatThread,
-    NewLLMConfig,
    Permission,
    SearchSpace,
    SearchSpaceMembership,
    SearchSpaceRole,
    User,
-    VisionLLMConfig,
    get_async_session,
    get_default_roles_config,
 )
 from app.schemas import (
-    LLMPreferencesRead,
-    LLMPreferencesUpdate,
    SearchSpaceCreate,
    SearchSpaceRead,
    SearchSpaceUpdate,
@ -377,357 +370,6 @@ async def delete_search_space(
        ) from e


-# =============================================================================
-# LLM Preferences Routes
-# =============================================================================
-
-
-async def _get_llm_config_by_id(
-    session: AsyncSession, config_id: int | None
-) -> dict | None:
-    """
-    Get an LLM config by ID as a dictionary. Returns database config for positive IDs,
-    global config for negative IDs, Auto mode config for ID 0, or None if ID is None.
-    """
-    if config_id is None:
-        return None
-
-    # Auto mode (ID 0) - uses LiteLLM Router for load balancing
-    if config_id == 0:
-        return {
-            "id": 0,
-            "name": "Auto (Fastest)",
-            "description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
-            "provider": "AUTO",
-            "custom_provider": None,
-            "model_name": "auto",
-            "api_base": None,
-            "litellm_params": {},
-            "system_instructions": "",
-            "use_default_system_instructions": True,
-            "citations_enabled": True,
-            "is_global": True,
-            "is_auto_mode": True,
-        }
-
-    if config_id < 0:
-        # Global config - find from YAML
-        global_configs = config.GLOBAL_LLM_CONFIGS
-        for cfg in global_configs:
-            if cfg.get("id") == config_id:
-                return {
-                    "id": cfg.get("id"),
-                    "name": cfg.get("name"),
-                    "description": cfg.get("description"),
-                    "provider": cfg.get("provider") or cfg.get("litellm_provider"),
-                    "custom_provider": cfg.get("custom_provider"),
-                    "model_name": cfg.get("model_name"),
-                    "api_base": cfg.get("api_base"),
-                    "litellm_params": cfg.get("litellm_params", {}),
-                    "system_instructions": cfg.get("system_instructions", ""),
-                    "use_default_system_instructions": cfg.get(
-                        "use_default_system_instructions", True
-                    ),
-                    "citations_enabled": cfg.get("citations_enabled", True),
-                    "is_global": True,
-                }
-        return None
-    else:
-        # Database config - convert to dict
-        result = await session.execute(
-            select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if db_config:
-            return {
-                "id": db_config.id,
-                "name": db_config.name,
-                "description": db_config.description,
-                "provider": db_config.provider.value if db_config.provider else None,
-                "custom_provider": db_config.custom_provider,
-                "model_name": db_config.model_name,
-                "api_key": db_config.api_key,
-                "api_base": db_config.api_base,
-                "litellm_params": db_config.litellm_params or {},
-                "system_instructions": db_config.system_instructions or "",
-                "use_default_system_instructions": db_config.use_default_system_instructions,
-                "citations_enabled": db_config.citations_enabled,
-                "created_at": db_config.created_at.isoformat()
-                if db_config.created_at
-                else None,
-                "search_space_id": db_config.search_space_id,
-            }
-        return None
-
-
-async def _get_image_gen_config_by_id(
-    session: AsyncSession, config_id: int | None
-) -> dict | None:
-    """
-    Get an image generation config by ID as a dictionary.
-    Returns Auto mode for ID 0, global config for negative IDs,
-    DB ImageGenerationConfig for positive IDs, or None.
-    """
-    if config_id is None:
-        return None
-
-    if config_id == 0:
-        return {
-            "id": 0,
-            "name": "Auto (Fastest)",
-            "description": "Automatically routes requests across available image generation providers",
-            "provider": "AUTO",
-            "model_name": "auto",
-            "is_global": True,
-            "is_auto_mode": True,
-            "billing_tier": "free",
-        }
-
-    if config_id < 0:
-        for cfg in config.GLOBAL_IMAGE_GEN_CONFIGS:
-            if cfg.get("id") == config_id:
-                return {
-                    "id": cfg.get("id"),
-                    "name": cfg.get("name"),
-                    "description": cfg.get("description"),
-                    "provider": cfg.get("provider") or cfg.get("litellm_provider"),
-                    "custom_provider": cfg.get("custom_provider"),
-                    "model_name": cfg.get("model_name"),
-                    "api_base": cfg.get("api_base") or None,
-                    "api_version": cfg.get("api_version") or None,
-                    "litellm_params": cfg.get("litellm_params", {}),
-                    "is_global": True,
-                    "billing_tier": cfg.get("billing_tier", "free"),
-                }
-        return None
-
-    # Positive ID: query ImageGenerationConfig table
-    result = await session.execute(
-        select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
-    )
-    db_config = result.scalars().first()
-    if db_config:
-        return {
-            "id": db_config.id,
-            "name": db_config.name,
-            "description": db_config.description,
-            "provider": db_config.provider.value if db_config.provider else None,
-            "custom_provider": db_config.custom_provider,
-            "model_name": db_config.model_name,
-            "api_base": db_config.api_base,
-            "api_version": db_config.api_version,
-            "litellm_params": db_config.litellm_params or {},
-            "created_at": db_config.created_at.isoformat()
-            if db_config.created_at
-            else None,
-            "search_space_id": db_config.search_space_id,
-        }
-    return None
-
-
-async def _get_vision_llm_config_by_id(
-    session: AsyncSession, config_id: int | None
-) -> dict | None:
-    if config_id is None:
-        return None
-
-    if config_id == 0:
-        return {
-            "id": 0,
-            "name": "Auto (Fastest)",
-            "description": "Automatically routes requests across available vision LLM providers",
-            "provider": "AUTO",
-            "model_name": "auto",
-            "is_global": True,
-            "is_auto_mode": True,
-            "billing_tier": "free",
-        }
-
-    if config_id < 0:
-        for cfg in config.GLOBAL_VISION_LLM_CONFIGS:
-            if cfg.get("id") == config_id:
-                return {
-                    "id": cfg.get("id"),
-                    "name": cfg.get("name"),
-                    "description": cfg.get("description"),
-                    "provider": cfg.get("provider") or cfg.get("litellm_provider"),
-                    "custom_provider": cfg.get("custom_provider"),
-                    "model_name": cfg.get("model_name"),
-                    "api_base": cfg.get("api_base") or None,
-                    "api_version": cfg.get("api_version") or None,
-                    "litellm_params": cfg.get("litellm_params", {}),
-                    "is_global": True,
-                    "billing_tier": cfg.get("billing_tier", "free"),
-                }
-        return None
-
-    result = await session.execute(
-        select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
-    )
-    db_config = result.scalars().first()
-    if db_config:
-        return {
-            "id": db_config.id,
-            "name": db_config.name,
-            "description": db_config.description,
-            "provider": db_config.provider.value if db_config.provider else None,
-            "custom_provider": db_config.custom_provider,
-            "model_name": db_config.model_name,
-            "api_base": db_config.api_base,
-            "api_version": db_config.api_version,
-            "litellm_params": db_config.litellm_params or {},
-            "created_at": db_config.created_at.isoformat()
-            if db_config.created_at
-            else None,
-            "search_space_id": db_config.search_space_id,
-        }
-    return None
-
-
-@router.get(
-    "/search-spaces/{search_space_id}/llm-preferences",
-    response_model=LLMPreferencesRead,
-)
-async def get_llm_preferences(
-    search_space_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Get LLM preferences (role assignments) for a search space.
-    Requires LLM_CONFIGS_READ permission.
-    """
-    try:
-        # Check permission
-        await check_permission(
-            session,
-            user,
-            search_space_id,
-            Permission.LLM_CONFIGS_READ.value,
-            "You don't have permission to view LLM preferences",
-        )
-
-        result = await session.execute(
-            select(SearchSpace).filter(SearchSpace.id == search_space_id)
-        )
-        search_space = result.scalars().first()
-
-        if not search_space:
-            raise HTTPException(status_code=404, detail="Search space not found")
-
-        # Get full config objects for each role
-        agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
-        image_generation_config = await _get_image_gen_config_by_id(
-            session, search_space.image_generation_config_id
-        )
-        vision_llm_config = await _get_vision_llm_config_by_id(
-            session, search_space.vision_llm_config_id
-        )
-
-        return LLMPreferencesRead(
-            agent_llm_id=search_space.agent_llm_id,
-            image_generation_config_id=search_space.image_generation_config_id,
-            vision_llm_config_id=search_space.vision_llm_config_id,
-            agent_llm=agent_llm,
-            image_generation_config=image_generation_config,
-            vision_llm_config=vision_llm_config,
-        )
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to get LLM preferences")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to get LLM preferences: {e!s}"
-        ) from e
-
-
-@router.put(
-    "/search-spaces/{search_space_id}/llm-preferences",
-    response_model=LLMPreferencesRead,
-)
-async def update_llm_preferences(
-    search_space_id: int,
-    preferences: LLMPreferencesUpdate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    """
-    Update LLM preferences (role assignments) for a search space.
-    Requires LLM_CONFIGS_UPDATE permission.
-    """
-    try:
-        # Check permission
-        await check_permission(
-            session,
-            user,
-            search_space_id,
-            Permission.LLM_CONFIGS_UPDATE.value,
-            "You don't have permission to update LLM preferences",
-        )
-
-        result = await session.execute(
-            select(SearchSpace).filter(SearchSpace.id == search_space_id)
-        )
-        search_space = result.scalars().first()
-
-        if not search_space:
-            raise HTTPException(status_code=404, detail="Search space not found")
-
-        # Update preferences
-        update_data = preferences.model_dump(exclude_unset=True)
-        previous_agent_llm_id = search_space.agent_llm_id
-        for key, value in update_data.items():
-            setattr(search_space, key, value)
-
-        agent_llm_changed = (
-            "agent_llm_id" in update_data
-            and update_data["agent_llm_id"] != previous_agent_llm_id
-        )
-        if agent_llm_changed:
-            await session.execute(
-                update(NewChatThread)
-                .where(NewChatThread.search_space_id == search_space_id)
-                .values(pinned_llm_config_id=None)
-            )
-            logger.info(
-                "Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",
-                search_space_id,
-                previous_agent_llm_id,
-                update_data["agent_llm_id"],
-            )
-
-        await session.commit()
-        await session.refresh(search_space)
-
-        # Get full config objects for response
-        agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
-        image_generation_config = await _get_image_gen_config_by_id(
-            session, search_space.image_generation_config_id
-        )
-        vision_llm_config = await _get_vision_llm_config_by_id(
-            session, search_space.vision_llm_config_id
-        )
-
-        return LLMPreferencesRead(
-            agent_llm_id=search_space.agent_llm_id,
-            image_generation_config_id=search_space.image_generation_config_id,
-            vision_llm_config_id=search_space.vision_llm_config_id,
-            agent_llm=agent_llm,
-            image_generation_config=image_generation_config,
-            vision_llm_config=vision_llm_config,
-        )
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to update LLM preferences")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to update LLM preferences: {e!s}"
-        ) from e
-
-
@router.get("/searchspaces/{search_space_id}/snapshots")
 async def list_search_space_snapshots(
    search_space_id: int,
--- a/surfsense_backend/app/routes/vision_llm_routes.py
+++ b/surfsense_backend/app/routes/vision_llm_routes.py
@ -1,304 +0,0 @@
-import logging
-
-from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.config import config
-from app.db import (
-    Permission,
-    User,
-    VisionLLMConfig,
-    get_async_session,
-)
-from app.schemas import (
-    GlobalVisionLLMConfigRead,
-    VisionLLMConfigCreate,
-    VisionLLMConfigRead,
-    VisionLLMConfigUpdate,
-)
-from app.services.vision_model_list_service import get_vision_model_list
-from app.users import current_active_user
-from app.utils.rbac import check_permission
-
-router = APIRouter()
-logger = logging.getLogger(__name__)
-
-
-# =============================================================================
-# Vision Model Catalogue (from OpenRouter, filtered for image-input models)
-# =============================================================================
-
-
-class VisionModelListItem(BaseModel):
-    value: str
-    label: str
-    provider: str
-    context_window: str | None = None
-
-
-@router.get("/vision-models", response_model=list[VisionModelListItem])
-async def list_vision_models(
-    user: User = Depends(current_active_user),
-):
-    """Return vision-capable models sourced from OpenRouter (filtered by image input)."""
-    try:
-        return await get_vision_model_list()
-    except Exception as e:
-        logger.exception("Failed to fetch vision model list")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch vision model list: {e!s}"
-        ) from e
-
-
-# =============================================================================
-# Global Vision LLM Configs (from YAML)
-# =============================================================================
-
-
-@router.get(
-    "/global-vision-llm-configs",
-    response_model=list[GlobalVisionLLMConfigRead],
-)
-async def get_global_vision_llm_configs(
-    user: User = Depends(current_active_user),
-):
-    try:
-        global_configs = config.GLOBAL_VISION_LLM_CONFIGS
-        safe_configs = []
-
-        if global_configs and len(global_configs) > 0:
-            safe_configs.append(
-                {
-                    "id": 0,
-                    "name": "Auto (Fastest)",
-                    "description": "Automatically routes across available vision LLM providers.",
-                    "provider": "AUTO",
-                    "custom_provider": None,
-                    "model_name": "auto",
-                    "api_base": None,
-                    "api_version": None,
-                    "litellm_params": {},
-                    "is_global": True,
-                    "is_auto_mode": True,
-                    # Auto mode treated as free until per-deployment billing-tier
-                    # surfacing lands; see ``get_vision_llm`` for parity.
-                    "billing_tier": "free",
-                    "is_premium": False,
-                }
-            )
-
-        for cfg in global_configs:
-            billing_tier = str(cfg.get("billing_tier", "free")).lower()
-            safe_configs.append(
-                {
-                    "id": cfg.get("id"),
-                    "name": cfg.get("name"),
-                    "description": cfg.get("description"),
-                    "provider": cfg.get("provider") or cfg.get("litellm_provider"),
-                    "custom_provider": cfg.get("custom_provider"),
-                    "model_name": cfg.get("model_name"),
-                    "api_base": cfg.get("api_base") or None,
-                    "api_version": cfg.get("api_version") or None,
-                    "litellm_params": cfg.get("litellm_params", {}),
-                    "is_global": True,
-                    "billing_tier": billing_tier,
-                    # Mirror chat (``new_llm_config_routes``) so the new-chat
-                    # selector's premium badge logic keys off the same
-                    # field across chat / image / vision tabs.
-                    "is_premium": billing_tier == "premium",
-                    "quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
-                    "input_cost_per_token": cfg.get("input_cost_per_token"),
-                    "output_cost_per_token": cfg.get("output_cost_per_token"),
-                }
-            )
-
-        return safe_configs
-    except Exception as e:
-        logger.exception("Failed to fetch global vision LLM configs")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch configs: {e!s}"
-        ) from e
-
-
-# =============================================================================
-# VisionLLMConfig CRUD
-# =============================================================================
-
-
-@router.post("/vision-llm-configs", response_model=VisionLLMConfigRead)
-async def create_vision_llm_config(
-    config_data: VisionLLMConfigCreate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    try:
-        await check_permission(
-            session,
-            user,
-            config_data.search_space_id,
-            Permission.VISION_CONFIGS_CREATE.value,
-            "You don't have permission to create vision LLM configs in this search space",
-        )
-
-        db_config = VisionLLMConfig(**config_data.model_dump(), user_id=user.id)
-        session.add(db_config)
-        await session.commit()
-        await session.refresh(db_config)
-        return db_config
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to create VisionLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to create config: {e!s}"
-        ) from e
-
-
-@router.get("/vision-llm-configs", response_model=list[VisionLLMConfigRead])
-async def list_vision_llm_configs(
-    search_space_id: int,
-    skip: int = 0,
-    limit: int = 100,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    try:
-        await check_permission(
-            session,
-            user,
-            search_space_id,
-            Permission.VISION_CONFIGS_READ.value,
-            "You don't have permission to view vision LLM configs in this search space",
-        )
-
-        result = await session.execute(
-            select(VisionLLMConfig)
-            .filter(VisionLLMConfig.search_space_id == search_space_id)
-            .order_by(VisionLLMConfig.created_at.desc())
-            .offset(skip)
-            .limit(limit)
-        )
-        return result.scalars().all()
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to list VisionLLMConfigs")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch configs: {e!s}"
-        ) from e
-
-
-@router.get("/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead)
-async def get_vision_llm_config(
-    config_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    try:
-        result = await session.execute(
-            select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if not db_config:
-            raise HTTPException(status_code=404, detail="Config not found")
-
-        await check_permission(
-            session,
-            user,
-            db_config.search_space_id,
-            Permission.VISION_CONFIGS_READ.value,
-            "You don't have permission to view vision LLM configs in this search space",
-        )
-        return db_config
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Failed to get VisionLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to fetch config: {e!s}"
-        ) from e
-
-
-@router.put("/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead)
-async def update_vision_llm_config(
-    config_id: int,
-    update_data: VisionLLMConfigUpdate,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    try:
-        result = await session.execute(
-            select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if not db_config:
-            raise HTTPException(status_code=404, detail="Config not found")
-
-        await check_permission(
-            session,
-            user,
-            db_config.search_space_id,
-            Permission.VISION_CONFIGS_CREATE.value,
-            "You don't have permission to update vision LLM configs in this search space",
-        )
-
-        for key, value in update_data.model_dump(exclude_unset=True).items():
-            setattr(db_config, key, value)
-
-        await session.commit()
-        await session.refresh(db_config)
-        return db_config
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to update VisionLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to update config: {e!s}"
-        ) from e
-
-
-@router.delete("/vision-llm-configs/{config_id}", response_model=dict)
-async def delete_vision_llm_config(
-    config_id: int,
-    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
-):
-    try:
-        result = await session.execute(
-            select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
-        )
-        db_config = result.scalars().first()
-        if not db_config:
-            raise HTTPException(status_code=404, detail="Config not found")
-
-        await check_permission(
-            session,
-            user,
-            db_config.search_space_id,
-            Permission.VISION_CONFIGS_DELETE.value,
-            "You don't have permission to delete vision LLM configs in this search space",
-        )
-
-        await session.delete(db_config)
-        await session.commit()
-        return {
-            "message": "Vision LLM config deleted successfully",
-            "id": config_id,
-        }
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        await session.rollback()
-        logger.exception("Failed to delete VisionLLMConfig")
-        raise HTTPException(
-            status_code=500, detail=f"Failed to delete config: {e!s}"
-        ) from e
--- a/surfsense_backend/app/schemas/init.py
+++ b/surfsense_backend/app/schemas/init.py
@ -34,11 +34,6 @@ from .folders import (
 )
 from .google_drive import DriveItem, GoogleDriveIndexingOptions, GoogleDriveIndexRequest
 from .image_generation import (
-    GlobalImageGenConfigRead,
-    ImageGenerationConfigCreate,
-    ImageGenerationConfigPublic,
-    ImageGenerationConfigRead,
-    ImageGenerationConfigUpdate,
    ImageGenerationCreate,
    ImageGenerationListRead,
    ImageGenerationRead,
@ -74,16 +69,6 @@ from .new_chat import (
    ThreadListItem,
    ThreadListResponse,
 )
-from .new_llm_config import (
-    DefaultSystemInstructionsResponse,
-    GlobalNewLLMConfigRead,
-    LLMPreferencesRead,
-    LLMPreferencesUpdate,
-    NewLLMConfigCreate,
-    NewLLMConfigPublic,
-    NewLLMConfigRead,
-    NewLLMConfigUpdate,
-)
 from .rbac_schemas import (
    InviteAcceptRequest,
    InviteAcceptResponse,
@ -142,14 +127,6 @@ from .video_presentations import (
    VideoPresentationRead,
    VideoPresentationUpdate,
 )
-from .vision_llm import (
-    GlobalVisionLLMConfigRead,
-    VisionLLMConfigCreate,
-    VisionLLMConfigPublic,
-    VisionLLMConfigRead,
-    VisionLLMConfigUpdate,
-)
-
 __all__ = [ 
    # Folder schemas
    "BulkDocumentMove",
@ -169,7 +146,6 @@ __all__ = [
    "CreditPurchaseHistoryResponse",
    "CreditPurchaseRead",
    "CreditStripeStatusResponse",
-    "DefaultSystemInstructionsResponse",
    # Document schemas
    "DocumentBase",
    "DocumentMove",
@ -192,19 +168,10 @@ __all__ = [
    "FolderRead",
    "FolderReorder",
    "FolderUpdate",
-    "GlobalImageGenConfigRead",
-    "GlobalNewLLMConfigRead",
-    # Vision LLM Config schemas
-    "GlobalVisionLLMConfigRead",
    "GoogleDriveIndexRequest",
    "GoogleDriveIndexingOptions",
    # Base schemas
    "IDModel",
-    # Image Generation Config schemas
-    "ImageGenerationConfigCreate",
-    "ImageGenerationConfigPublic",
-    "ImageGenerationConfigRead",
-    "ImageGenerationConfigUpdate",
    # Image Generation schemas
    "ImageGenerationCreate",
    "ImageGenerationListRead",
@ -216,9 +183,6 @@ __all__ = [
    "InviteInfoResponse",
    "InviteRead",
    "InviteUpdate",
-    # LLM Preferences schemas
-    "LLMPreferencesRead",
-    "LLMPreferencesUpdate",
    # Log schemas
    "LogBase",
    "LogCreate",
@ -255,11 +219,6 @@ __all__ = [
    "NewChatThreadRead",
    "NewChatThreadUpdate",
    "NewChatThreadWithMessages",
-    # NewLLMConfig schemas
-    "NewLLMConfigCreate",
-    "NewLLMConfigPublic",
-    "NewLLMConfigRead",
-    "NewLLMConfigUpdate",
    "PagePurchaseHistoryResponse",
    "PagePurchaseRead",
    "PaginatedResponse",
@ -303,8 +262,4 @@ __all__ = [
    "VideoPresentationCreate",
    "VideoPresentationRead",
    "VideoPresentationUpdate",
-    "VisionLLMConfigCreate",
-    "VisionLLMConfigPublic",
-    "VisionLLMConfigRead",
-    "VisionLLMConfigUpdate",
 ]
--- a/surfsense_backend/app/schemas/image_generation.py
+++ b/surfsense_backend/app/schemas/image_generation.py
@ -1,109 +1,10 @@
-"""
-Pydantic schemas for Image Generation configs and generation requests.
+"""Pydantic schemas for image generation requests/results."""

-ImageGenerationConfig: CRUD schemas for user-created image gen model configs.
-ImageGeneration: Schemas for the actual image generation requests/results.
-GlobalImageGenConfigRead: Schema for admin-configured YAML configs.
-"""
-
-import uuid
 from datetime import datetime
 from typing import Any

 from pydantic import BaseModel, ConfigDict, Field

-from app.db import ImageGenProvider
-
-# =============================================================================
-# ImageGenerationConfig CRUD Schemas
-# =============================================================================
-
-
-class ImageGenerationConfigBase(BaseModel):
-    """Base schema with fields for ImageGenerationConfig."""
-
-    name: str = Field(
-        ..., max_length=100, description="User-friendly name for the config"
-    )
-    description: str | None = Field(
-        None, max_length=500, description="Optional description"
-    )
-    provider: ImageGenProvider = Field(
-        ...,
-        description="Image generation provider (OpenAI, Azure, Google AI Studio, Vertex AI, Bedrock, Recraft, OpenRouter, Xinference, Nscale)",
-    )
-    custom_provider: str | None = Field(
-        None, max_length=100, description="Custom provider name"
-    )
-    model_name: str = Field(
-        ..., max_length=100, description="Model name (e.g., dall-e-3, gpt-image-1)"
-    )
-    api_key: str = Field(..., description="API key for the provider")
-    api_base: str | None = Field(
-        None, max_length=500, description="Optional API base URL"
-    )
-    api_version: str | None = Field(
-        None,
-        max_length=50,
-        description="Azure-specific API version (e.g., '2024-02-15-preview')",
-    )
-    litellm_params: dict[str, Any] | None = Field(
-        default=None, description="Additional LiteLLM parameters"
-    )
-
-
-class ImageGenerationConfigCreate(ImageGenerationConfigBase):
-    """Schema for creating a new ImageGenerationConfig."""
-
-    search_space_id: int = Field(
-        ..., description="Search space ID to associate the config with"
-    )
-
-
-class ImageGenerationConfigUpdate(BaseModel):
-    """Schema for updating an existing ImageGenerationConfig. All fields optional."""
-
-    name: str | None = Field(None, max_length=100)
-    description: str | None = Field(None, max_length=500)
-    provider: ImageGenProvider | None = None
-    custom_provider: str | None = Field(None, max_length=100)
-    model_name: str | None = Field(None, max_length=100)
-    api_key: str | None = None
-    api_base: str | None = Field(None, max_length=500)
-    api_version: str | None = Field(None, max_length=50)
-    litellm_params: dict[str, Any] | None = None
-
-
-class ImageGenerationConfigRead(ImageGenerationConfigBase):
-    """Schema for reading an ImageGenerationConfig (includes id and timestamps)."""
-
-    id: int
-    created_at: datetime
-    search_space_id: int
-    user_id: uuid.UUID
-
-    model_config = ConfigDict(from_attributes=True)
-
-
-class ImageGenerationConfigPublic(BaseModel):
-    """Public schema that hides the API key (for list views)."""
-
-    id: int
-    name: str
-    description: str | None = None
-    provider: ImageGenProvider
-    custom_provider: str | None = None
-    model_name: str
-    api_base: str | None = None
-    api_version: str | None = None
-    litellm_params: dict[str, Any] | None = None
-    created_at: datetime
-    search_space_id: int
-    user_id: uuid.UUID
-
-    model_config = ConfigDict(from_attributes=True)
-
-
 # =============================================================================
 # ImageGeneration (request/result) Schemas
 # =============================================================================
@ -136,12 +37,12 @@ class ImageGenerationCreate(BaseModel):
    search_space_id: int = Field(
        ..., description="Search space ID to associate the generation with"
    )
-    image_generation_config_id: int | None = Field(
+    image_gen_model_id: int | None = Field(
        None,
        description=(
-            "Image generation config ID. "
-            "0 = Auto mode (router), negative = global YAML config, positive = DB config. "
-            "If not provided, uses the search space's image_generation_config_id preference."
+            "Image generation model ID. "
+            "0 = Auto mode, negative = GLOBAL model, positive = BYOK Model row. "
+            "If not provided, uses the search space's image_gen_model_id preference."
        ),
    )

@ -157,7 +58,7 @@ class ImageGenerationRead(BaseModel):
    size: str | None = None
    style: str | None = None
    response_format: str | None = None
-    image_generation_config_id: int | None = None
+    image_gen_model_id: int | None = None
    response_data: dict[str, Any] | None = None
    error_message: str | None = None
    search_space_id: int
@ -204,57 +105,3 @@ class ImageGenerationListRead(BaseModel):
            image_count=image_count,
        )

-
-# =============================================================================
-# Global Image Gen Config (from YAML)
-# =============================================================================
-
-
-class GlobalImageGenConfigRead(BaseModel):
-    """
-    Schema for reading global image generation configs from YAML.
-    Global configs have negative IDs. API key is hidden.
-    ID 0 is reserved for Auto mode (LiteLLM Router load balancing).
-
-    The ``billing_tier`` field allows the frontend to show a Premium/Free
-    badge and (more importantly) tells the backend whether to debit the
-    user's premium credit pool when this config is used. ``"free"`` is
-    the default for backward compatibility — admins must explicitly opt
-    a global config into ``"premium"``.
-    """
-
-    id: int = Field(
-        ...,
-        description="Config ID: 0 for Auto mode, negative for global configs",
-    )
-    name: str
-    description: str | None = None
-    provider: str
-    custom_provider: str | None = None
-    model_name: str
-    api_base: str | None = None
-    api_version: str | None = None
-    litellm_params: dict[str, Any] | None = None
-    is_global: bool = True
-    is_auto_mode: bool = False
-    billing_tier: str = Field(
-        default="free",
-        description="'free' or 'premium'. Premium debits the user's premium credit pool (USD-cost-based).",
-    )
-    is_premium: bool = Field(
-        default=False,
-        description=(
-            "Convenience boolean derived server-side from "
-            "``billing_tier == 'premium'``. The new-chat model selector "
-            "keys its Free/Premium badge off this field for parity with "
-            "chat (`GlobalLLMConfigRead.is_premium`)."
-        ),
-    )
-    quota_reserve_micros: int | None = Field(
-        default=None,
-        description=(
-            "Optional override for the reservation amount (in micro-USD) used when "
-            "this image generation is premium. Falls back to "
-            "QUOTA_DEFAULT_IMAGE_RESERVE_MICROS when omitted."
-        ),
-    )
--- a/surfsense_backend/app/schemas/new_llm_config.py
+++ b/surfsense_backend/app/schemas/new_llm_config.py
@ -1,256 +0,0 @@
-"""
-Pydantic schemas for the NewLLMConfig API.
-
-NewLLMConfig combines model settings with prompt configuration:
- LLM provider, model, API key, etc.
- Configurable system instructions
- Citation toggle
-"""
-
-import uuid
-from datetime import datetime
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict, Field
-
-from app.db import LiteLLMProvider
-
-
-class NewLLMConfigBase(BaseModel):
-    """Base schema with common fields for NewLLMConfig."""
-
-    name: str = Field(
-        ..., max_length=100, description="User-friendly name for the configuration"
-    )
-    description: str | None = Field(
-        None, max_length=500, description="Optional description"
-    )
-
-    # Model Configuration
-    provider: LiteLLMProvider = Field(..., description="LiteLLM provider type")
-    custom_provider: str | None = Field(
-        None, max_length=100, description="Custom provider name when provider is CUSTOM"
-    )
-    model_name: str = Field(
-        ..., max_length=100, description="Model name without provider prefix"
-    )
-    api_key: str = Field(..., description="API key for the provider")
-    api_base: str | None = Field(
-        None, max_length=500, description="Optional API base URL"
-    )
-    litellm_params: dict[str, Any] | None = Field(
-        default=None, description="Additional LiteLLM parameters"
-    )
-
-    # Prompt Configuration
-    system_instructions: str = Field(
-        default="",
-        description="Custom system instructions. Empty string uses default SURFSENSE_SYSTEM_INSTRUCTIONS.",
-    )
-    use_default_system_instructions: bool = Field(
-        default=True,
-        description="Whether to use default instructions when system_instructions is empty",
-    )
-    citations_enabled: bool = Field(
-        default=True,
-        description="Whether to include citation instructions in the system prompt",
-    )
-
-
-class NewLLMConfigCreate(NewLLMConfigBase):
-    """Schema for creating a new NewLLMConfig."""
-
-    search_space_id: int = Field(
-        ..., description="Search space ID to associate the config with"
-    )
-
-
-class NewLLMConfigUpdate(BaseModel):
-    """Schema for updating an existing NewLLMConfig. All fields are optional."""
-
-    name: str | None = Field(None, max_length=100)
-    description: str | None = Field(None, max_length=500)
-
-    # Model Configuration
-    provider: LiteLLMProvider | None = None
-    custom_provider: str | None = Field(None, max_length=100)
-    model_name: str | None = Field(None, max_length=100)
-    api_key: str | None = None
-    api_base: str | None = Field(None, max_length=500)
-    litellm_params: dict[str, Any] | None = None
-
-    # Prompt Configuration
-    system_instructions: str | None = None
-    use_default_system_instructions: bool | None = None
-    citations_enabled: bool | None = None
-
-
-class NewLLMConfigRead(NewLLMConfigBase):
-    """Schema for reading a NewLLMConfig (includes id and timestamps)."""
-
-    id: int
-    created_at: datetime
-    search_space_id: int
-    user_id: uuid.UUID
-    # Capability flag derived at the API boundary (no DB column). Default
-    # True matches the conservative-allow stance — a BYOK row that the
-    # route forgot to augment is not pre-judged. The streaming-task
-    # safety net is the only place a False actually blocks a request.
-    supports_image_input: bool = Field(
-        default=True,
-        description=(
-            "Whether the BYOK chat config can accept image inputs. Derived "
-            "at the route boundary from LiteLLM's authoritative model map "
-            "(``litellm.supports_vision``) — there is no DB column. "
-            "Default True is the conservative-allow stance for unknown / "
-            "unmapped models."
-        ),
-    )
-
-    model_config = ConfigDict(from_attributes=True)
-
-
-class NewLLMConfigPublic(BaseModel):
-    """
-    Public schema for NewLLMConfig that hides the API key.
-    Used when returning configs in list views or to users who shouldn't see keys.
-    """
-
-    id: int
-    name: str
-    description: str | None = None
-
-    # Model Configuration (no api_key)
-    provider: LiteLLMProvider
-    custom_provider: str | None = None
-    model_name: str
-    api_base: str | None = None
-    litellm_params: dict[str, Any] | None = None
-
-    # Prompt Configuration
-    system_instructions: str
-    use_default_system_instructions: bool
-    citations_enabled: bool
-
-    created_at: datetime
-    search_space_id: int
-    user_id: uuid.UUID
-    # Capability flag derived at the API boundary (see NewLLMConfigRead).
-    supports_image_input: bool = Field(
-        default=True,
-        description=(
-            "Whether the BYOK chat config can accept image inputs. Derived "
-            "at the route boundary from LiteLLM's authoritative model map. "
-            "Default True is the conservative-allow stance."
-        ),
-    )
-
-    model_config = ConfigDict(from_attributes=True)
-
-
-class DefaultSystemInstructionsResponse(BaseModel):
-    """Response schema for getting default system instructions."""
-
-    default_system_instructions: str = Field(
-        ..., description="The default SURFSENSE_SYSTEM_INSTRUCTIONS template"
-    )
-
-
-class GlobalNewLLMConfigRead(BaseModel):
-    """
-    Schema for reading global LLM configs from YAML.
-    Global configs have negative IDs and no search_space_id.
-    API key is hidden for security.
-
-    ID 0 is reserved for Auto mode which uses LiteLLM Router for load balancing.
-    """
-
-    id: int = Field(
-        ...,
-        description="Config ID: 0 for Auto mode, negative for global configs",
-    )
-    name: str
-    description: str | None = None
-
-    # Model Configuration (no api_key)
-    provider: str  # String because YAML doesn't enforce enum, "AUTO" for Auto mode
-    custom_provider: str | None = None
-    model_name: str
-    api_base: str | None = None
-    litellm_params: dict[str, Any] | None = None
-
-    # Prompt Configuration
-    system_instructions: str = ""
-    use_default_system_instructions: bool = True
-    citations_enabled: bool = True
-
-    is_global: bool = True  # Always true for global configs
-    is_auto_mode: bool = False  # True only for Auto mode (ID 0)
-
-    billing_tier: str = "free"
-    is_premium: bool = False
-    anonymous_enabled: bool = False
-    seo_enabled: bool = False
-    seo_slug: str | None = None
-    seo_title: str | None = None
-    seo_description: str | None = None
-    quota_reserve_tokens: int | None = None
-    supports_image_input: bool = Field(
-        default=True,
-        description=(
-            "Whether the model accepts image inputs (multimodal vision). "
-            "Derived server-side: OpenRouter dynamic configs use "
-            "``architecture.input_modalities``; YAML / BYOK use LiteLLM's "
-            "authoritative model map (``litellm.supports_vision``). The "
-            "new-chat selector hints with a 'No image' badge when this is "
-            "False and there are pending image attachments. The streaming "
-            "task fails fast only when LiteLLM *explicitly* marks a model "
-            "as text-only — unknown / unmapped models default-allow."
-        ),
-    )
-
-
-# =============================================================================
-# LLM Preferences Schemas (for role assignments)
-# =============================================================================
-
-
-class LLMPreferencesRead(BaseModel):
-    """Schema for reading LLM preferences (role assignments) for a search space."""
-
-    agent_llm_id: int | None = Field(
-        None, description="ID of the LLM config to use for agent/chat tasks"
-    )
-    image_generation_config_id: int | None = Field(
-        None, description="ID of the image generation config to use"
-    )
-    vision_llm_config_id: int | None = Field(
-        None,
-        description="ID of the vision LLM config to use for vision/screenshot analysis",
-    )
-    agent_llm: dict[str, Any] | None = Field(
-        None, description="Full config for chat model"
-    )
-    image_generation_config: dict[str, Any] | None = Field(
-        None, description="Full config for image generation"
-    )
-    vision_llm_config: dict[str, Any] | None = Field(
-        None, description="Full config for vision LLM"
-    )
-
-    model_config = ConfigDict(from_attributes=True)
-
-
-class LLMPreferencesUpdate(BaseModel):
-    """Schema for updating LLM preferences."""
-
-    agent_llm_id: int | None = Field(
-        None, description="ID of the LLM config to use for agent/chat tasks"
-    )
-    image_generation_config_id: int | None = Field(
-        None, description="ID of the image generation config to use"
-    )
-    vision_llm_config_id: int | None = Field(
-        None,
-        description="ID of the vision LLM config to use for vision/screenshot analysis",
-    )
--- a/surfsense_backend/app/schemas/vision_llm.py
+++ b/surfsense_backend/app/schemas/vision_llm.py
@ -1,116 +0,0 @@
-import uuid
-from datetime import datetime
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict, Field
-
-from app.db import VisionProvider
-
-
-class VisionLLMConfigBase(BaseModel):
-    name: str = Field(..., max_length=100)
-    description: str | None = Field(None, max_length=500)
-    provider: VisionProvider = Field(...)
-    custom_provider: str | None = Field(None, max_length=100)
-    model_name: str = Field(..., max_length=100)
-    api_key: str = Field(...)
-    api_base: str | None = Field(None, max_length=500)
-    api_version: str | None = Field(None, max_length=50)
-    litellm_params: dict[str, Any] | None = Field(default=None)
-
-
-class VisionLLMConfigCreate(VisionLLMConfigBase):
-    search_space_id: int = Field(...)
-
-
-class VisionLLMConfigUpdate(BaseModel):
-    name: str | None = Field(None, max_length=100)
-    description: str | None = Field(None, max_length=500)
-    provider: VisionProvider | None = None
-    custom_provider: str | None = Field(None, max_length=100)
-    model_name: str | None = Field(None, max_length=100)
-    api_key: str | None = None
-    api_base: str | None = Field(None, max_length=500)
-    api_version: str | None = Field(None, max_length=50)
-    litellm_params: dict[str, Any] | None = None
-
-
-class VisionLLMConfigRead(VisionLLMConfigBase):
-    id: int
-    created_at: datetime
-    search_space_id: int
-    user_id: uuid.UUID
-
-    model_config = ConfigDict(from_attributes=True)
-
-
-class VisionLLMConfigPublic(BaseModel):
-    id: int
-    name: str
-    description: str | None = None
-    provider: VisionProvider
-    custom_provider: str | None = None
-    model_name: str
-    api_base: str | None = None
-    api_version: str | None = None
-    litellm_params: dict[str, Any] | None = None
-    created_at: datetime
-    search_space_id: int
-    user_id: uuid.UUID
-
-    model_config = ConfigDict(from_attributes=True)
-
-
-class GlobalVisionLLMConfigRead(BaseModel):
-    """Schema for reading global vision LLM configs from YAML.
-
-    The ``billing_tier`` field allows the frontend to show a Premium/Free
-    badge and (more importantly) tells the backend whether to debit the
-    user's premium credit pool when this config is used. ``"free"`` is
-    the default for backward compatibility — admins must explicitly opt
-    a global config into ``"premium"``.
-    """
-
-    id: int = Field(...)
-    name: str
-    description: str | None = None
-    provider: str
-    custom_provider: str | None = None
-    model_name: str
-    api_base: str | None = None
-    api_version: str | None = None
-    litellm_params: dict[str, Any] | None = None
-    is_global: bool = True
-    is_auto_mode: bool = False
-    billing_tier: str = Field(
-        default="free",
-        description="'free' or 'premium'. Premium debits the user's premium credit pool (USD-cost-based).",
-    )
-    is_premium: bool = Field(
-        default=False,
-        description=(
-            "Convenience boolean derived server-side from "
-            "``billing_tier == 'premium'``. The new-chat model selector "
-            "keys its Free/Premium badge off this field for parity with "
-            "chat (`GlobalLLMConfigRead.is_premium`)."
-        ),
-    )
-    quota_reserve_tokens: int | None = Field(
-        default=None,
-        description=(
-            "Optional override for the per-call reservation in *tokens* — "
-            "converted to micro-USD via the model's input/output prices at "
-            "reservation time. Falls back to QUOTA_DEFAULT_RESERVE_TOKENS."
-        ),
-    )
-    input_cost_per_token: float | None = Field(
-        default=None,
-        description=(
-            "Optional input price in USD/token. Used by pricing_registration to "
-            "register custom Azure / OpenRouter aliases with LiteLLM at startup."
-        ),
-    )
-    output_cost_per_token: float | None = Field(
-        default=None,
-        description="Optional output price in USD/token. Pair with input_cost_per_token.",
-    )
--- a/surfsense_backend/app/services/auto_model_pin_service.py
+++ b/surfsense_backend/app/services/auto_model_pin_service.py
@ -1,13 +1,13 @@
-"""Resolve and persist Auto (Fastest) model pins per chat thread.
+"""Resolve and persist Auto model pins per chat thread.

-Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
-resolve that virtual mode to one concrete global LLM config exactly once and
+Auto is represented by ``chat_model_id == 0``. For chat threads we
+resolve that virtual mode to one concrete global model exactly once and
 persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
 subsequent turns are stable.

 Single-writer invariant: this module is the only writer of
 ``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
-``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
+``model_connections_routes`` when a search space's ``chat_model_id`` changes).
 Therefore a non-NULL value unambiguously means "this thread has an
 Auto-resolved pin"; no separate source/policy column is needed.
 """
@ -33,8 +33,10 @@ from app.services.token_quota_service import TokenQuotaService

 logger = logging.getLogger(__name__)

-AUTO_FASTEST_ID = 0
-AUTO_FASTEST_MODE = "auto_fastest"
+AUTO_MODE_ID = 0
+# Stable internal hash namespace for deterministic per-thread selection.
+# Do not rename: changing this rebalances Auto's model choice for new pins.
+AUTO_PIN_HASH_NAMESPACE = "auto_fastest"
 _RUNTIME_COOLDOWN_SECONDS = 600
 _HEALTHY_TTL_SECONDS = 45

@ -383,7 +385,7 @@ def _select_pin(eligible: list[dict], thread_id: int) -> tuple[dict, int]:
    pool = tier_a if tier_a else eligible
    pool = sorted(pool, key=lambda c: -int(c.get("quality_score") or 0))
    top_k = pool[:_QUALITY_TOP_K]
-    digest = hashlib.sha256(f"{AUTO_FASTEST_MODE}:{thread_id}".encode()).digest()
+    digest = hashlib.sha256(f"{AUTO_PIN_HASH_NAMESPACE}:{thread_id}".encode()).digest()
    idx = int.from_bytes(digest[:8], "big") % len(top_k)
    return top_k[idx], len(top_k)

@ -425,7 +427,7 @@ async def resolve_or_get_pinned_llm_config_id(
    exclude_config_ids: set[int] | None = None,
    requires_image_input: bool = False,
 ) -> AutoPinResolution:
-    """Resolve Auto (Fastest) to one concrete config id and persist the pin.
+    """Resolve Auto to one concrete config id and persist the pin.

    For non-auto selections, this function clears any existing pin and returns
    the selected id as-is.
@ -457,7 +459,7 @@ async def resolve_or_get_pinned_llm_config_id(
        )

    # Explicit model selected: clear any stale pin.
-    if selected_llm_config_id != AUTO_FASTEST_ID:
+    if selected_llm_config_id != AUTO_MODE_ID:
        if thread.pinned_llm_config_id is not None:
            thread.pinned_llm_config_id = None
            await session.commit()
--- a/surfsense_backend/app/services/billable_calls.py
+++ b/surfsense_backend/app/services/billable_calls.py
@ -450,10 +450,10 @@ async def _resolve_agent_billing_for_search_space(
    Used by Celery tasks (podcast generation, video presentation) to bill the
    search-space owner's premium credit pool when the chat model is premium.

-    Resolution rules mirror chat at ``stream_new_chat.py:2294-2351``:
+    Resolution rules mirror the chat model role resolver:

-    - Search space not found / no ``agent_llm_id``: raise ``ValueError``.
-    - **Auto mode** (``id == AUTO_FASTEST_ID == 0``):
+    - Search space not found / no ``chat_model_id``: raise ``ValueError``.
+    - **Auto mode** (``id == AUTO_MODE_ID == 0``):
        * ``thread_id`` is set: delegate to
          ``resolve_or_get_pinned_llm_config_id`` (the same call chat uses) and
          recurse into the resolved id. Reuses chat's existing pin if present
@ -469,9 +469,8 @@ async def _resolve_agent_billing_for_search_space(
      (defaults to ``"free"`` via ``app/config/__init__.py:52`` setdefault),
      ``base_model = litellm_params.get("base_model") or model_name`` —
      NOT provider-prefixed, matching chat's cost-map lookup convention.
-    - **Positive id** (user BYOK ``NewLLMConfig``): always free (matches
-      ``AgentConfig.from_new_llm_config`` which hard-codes ``billing_tier="free"``);
-      ``base_model`` from ``litellm_params`` or ``model_name``.
+    - **Positive id** (user BYOK ``Model``): always free; ``base_model`` from
+      the model catalog override or the upstream ``model_id``.

    Note on imports: ``llm_service``, ``auto_model_pin_service``, and
    ``llm_router_service`` are imported lazily inside the function body to
@ -480,8 +479,9 @@ async def _resolve_agent_billing_for_search_space(
    ``billable_calls.py``'s module load path.
    """
    from sqlalchemy import select
+    from sqlalchemy.orm import selectinload

-    from app.db import NewLLMConfig, SearchSpace
+    from app.db import Model, SearchSpace

    result = await session.execute(
        select(SearchSpace).where(SearchSpace.id == search_space_id)
@ -490,20 +490,20 @@ async def _resolve_agent_billing_for_search_space(
    if search_space is None:
        raise ValueError(f"Search space {search_space_id} not found")

-    agent_llm_id = search_space.agent_llm_id
-    if agent_llm_id is None:
+    chat_model_id = search_space.chat_model_id
+    if chat_model_id is None:
        raise ValueError(
-            f"Search space {search_space_id} has no agent_llm_id configured"
+            f"Search space {search_space_id} has no chat_model_id configured"
        )

    owner_user_id: UUID = search_space.user_id

    from app.services.auto_model_pin_service import (
-        AUTO_FASTEST_ID,
+        AUTO_MODE_ID,
        resolve_or_get_pinned_llm_config_id,
    )

-    if agent_llm_id == AUTO_FASTEST_ID:
+    if chat_model_id == AUTO_MODE_ID:
        if thread_id is None:
            return owner_user_id, "free", "auto"
        try:
@ -512,7 +512,7 @@ async def _resolve_agent_billing_for_search_space(
                thread_id=thread_id,
                search_space_id=search_space_id,
                user_id=str(owner_user_id),
-                selected_llm_config_id=AUTO_FASTEST_ID,
+                selected_llm_config_id=AUTO_MODE_ID,
            )
        except ValueError:
            logger.warning(
@ -523,28 +523,35 @@ async def _resolve_agent_billing_for_search_space(
                exc_info=True,
            )
            return owner_user_id, "free", "auto"
-        agent_llm_id = resolution.resolved_llm_config_id
+        chat_model_id = resolution.resolved_llm_config_id

-    if agent_llm_id < 0:
+    if chat_model_id < 0:
        from app.services.llm_service import get_global_llm_config

-        cfg = get_global_llm_config(agent_llm_id) or {}
+        cfg = get_global_llm_config(chat_model_id) or {}
        billing_tier = str(cfg.get("billing_tier", "free")).lower()
        litellm_params = cfg.get("litellm_params") or {}
        base_model = litellm_params.get("base_model") or cfg.get("model_name") or ""
        return owner_user_id, billing_tier, base_model

-    nlc_result = await session.execute(
-        select(NewLLMConfig).where(
-            NewLLMConfig.id == agent_llm_id,
-            NewLLMConfig.search_space_id == search_space_id,
-        )
+    model_result = await session.execute(
+        select(Model)
+        .options(selectinload(Model.connection))
+        .where(Model.id == chat_model_id, Model.enabled.is_(True))
    )
-    nlc = nlc_result.scalars().first()
+    model = model_result.scalars().first()
    base_model = ""
-    if nlc is not None:
-        litellm_params = nlc.litellm_params or {}
-        base_model = litellm_params.get("base_model") or nlc.model_name or ""
+    if (
+        model is not None
+        and model.connection is not None
+        and model.connection.enabled
+        and (
+            model.connection.search_space_id in (None, search_space_id)
+            and model.connection.user_id in (None, owner_user_id)
+        )
+    ):
+        catalog = model.catalog or {}
+        base_model = catalog.get("base_model") or model.model_id or ""
    return owner_user_id, "free", base_model


--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -14,7 +14,11 @@ from app.services.auto_model_pin_service import (
    auto_model_candidates,
    choose_auto_model_candidate,
 )
-from app.services.llm_router_service import AUTO_MODE_ID, ChatLiteLLMRouter, is_auto_mode
+from app.services.llm_router_service import (
+    AUTO_MODE_ID,
+    ChatLiteLLMRouter,
+    is_auto_mode,
+)
 from app.services.model_capabilities import has_capability
 from app.services.model_resolver import native_connection_from_config, to_litellm
 from app.services.token_tracking_service import token_tracker
@ -96,26 +100,16 @@ class LLMRole:
 def get_global_llm_config(llm_config_id: int) -> dict | None:
    """
    Get a global LLM configuration by ID.
-    Global configs have negative IDs. ID 0 is reserved for Auto mode.
+    Global configs have negative IDs. Auto mode (ID 0) is resolved through the
+    model-candidate pipeline, not this legacy config lookup.

    Args:
-        llm_config_id: The ID of the global config (should be negative or 0 for Auto)
+        llm_config_id: The ID of the global config (must be negative)

    Returns:
        dict: Global config dictionary or None if not found
    """
-    # Auto mode (ID 0) is handled separately via the router
-    if llm_config_id == AUTO_MODE_ID:
-        return {
-            "id": AUTO_MODE_ID,
-            "name": "Auto (Fastest)",
-            "description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
-            "provider": "AUTO",
-            "model_name": "auto",
-            "is_auto_mode": True,
-        }
-
-    if llm_config_id > 0:
+    if llm_config_id >= 0:
        return None

    for cfg in config.GLOBAL_LLM_CONFIGS:
--- a/surfsense_backend/app/services/model_list_service.py
+++ b/surfsense_backend/app/services/model_list_service.py
@ -24,7 +24,7 @@ CACHE_TTL_SECONDS = 86400  # 24 hours
 _cache: list[dict] | None = None
 _cache_timestamp: float = 0

-# Maps OpenRouter provider slug → our LiteLLMProvider enum value.
+# Maps OpenRouter provider slug to native LiteLLM provider prefixes.
 # Only providers where the model-name part (after the slash) can be
 # used directly with the native provider's litellm prefix are listed.
 #
--- a/surfsense_backend/app/services/openrouter_integration_service.py
+++ b/surfsense_backend/app/services/openrouter_integration_service.py
@ -281,7 +281,7 @@ def _generate_configs(

    OpenRouter's own ``openrouter/free`` meta-router is filtered out upstream
    via ``_EXCLUDED_MODEL_IDS``; we don't expose a redundant auto-select layer
-    because our own Auto (Fastest) pin + 24 h refresh + repair logic already
+    because our own Auto pin + 24 h refresh + repair logic already
    cover the catalogue-churn case.
    """
    id_offset: int = settings.get("id_offset", -10000)
@ -346,7 +346,7 @@ def _generate_configs(
            # ``"No endpoints found that support image input"``.
            "supports_image_input": bool(normalized.get("supports_image_input")),
            _OPENROUTER_DYNAMIC_MARKER: True,
-            # Auto (Fastest) ranking metadata. ``quality_score`` is initialised
+            # Auto ranking metadata. ``quality_score`` is initialised
            # to the static score and gets re-blended with health on the next
            # ``_enrich_health`` pass (synchronous on refresh, deferred on cold
            # start so startup latency is unchanged).
@ -361,11 +361,7 @@ def _generate_configs(
    return configs


-# ID-offset bands used to keep dynamic OpenRouter configs in their own
-# namespace per surface. Image / vision get separate bands so a single
-# Postgres-INTEGER cfg ID is unambiguous about which selector it belongs to.
 _OPENROUTER_IMAGE_ID_OFFSET_DEFAULT = -20000
-_OPENROUTER_VISION_ID_OFFSET_DEFAULT = -30000


 def _generate_image_gen_configs(
@ -431,89 +427,6 @@ def _generate_image_gen_configs(
    return configs


-def _generate_vision_llm_configs(
-    raw_models: list[dict], settings: dict[str, Any]
-) -> list[dict]:
-    """Convert OpenRouter vision-capable LLMs into global vision-LLM config
-    dicts (matches the YAML shape consumed by ``vision_llm_routes``).
-
-    Filter:
-      - architecture.input_modalities contains "image"
-      - architecture.output_modalities contains "text"
-      - compatible provider (excluded slugs blocked)
-      - allowed model id (excluded list blocked)
-
-    Vision-LLM is invoked from the indexer (image extraction during
-    document upload) via ``langchain_litellm.ChatLiteLLM.ainvoke``, so
-    the chat-only ``_supports_tool_calling`` and ``_has_sufficient_context``
-    filters do not apply: a small-context vision model that doesn't
-    advertise tool-calling is still perfectly viable for "describe this
-    image" prompts.
-    """
-    id_offset: int = int(
-        settings.get("vision_id_offset") or _OPENROUTER_VISION_ID_OFFSET_DEFAULT
-    )
-    api_key: str = settings.get("api_key", "")
-    rpm: int = settings.get("rpm", 200)
-    tpm: int = settings.get("tpm", 1_000_000)
-    free_rpm: int = settings.get("free_rpm", 20)
-    free_tpm: int = settings.get("free_tpm", 100_000)
-    quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
-    litellm_params: dict = settings.get("litellm_params") or {}
-
-    vision_models = [
-        m
-        for m in raw_models
-        if supports_image_input(m)
-        and _shared_is_compatible_provider(m)
-        and _shared_is_allowed_model(m)
-        and "/" in m.get("id", "")
-    ]
-
-    configs: list[dict] = []
-    taken: set[int] = set()
-    for model in vision_models:
-        model_id: str = model["id"]
-        name: str = model.get("name", model_id)
-        tier = _openrouter_tier(model)
-        pricing = model.get("pricing") or {}
-
-        # Capture per-token prices so ``pricing_registration`` can
-        # register them with LiteLLM at startup (and so the cost
-        # estimator in ``estimate_call_reserve_micros`` can resolve
-        # them at reserve time).
-        try:
-            input_cost = float(pricing.get("prompt", 0) or 0)
-        except (TypeError, ValueError):
-            input_cost = 0.0
-        try:
-            output_cost = float(pricing.get("completion", 0) or 0)
-        except (TypeError, ValueError):
-            output_cost = 0.0
-
-        cfg: dict[str, Any] = {
-            "id": _stable_config_id(model_id, id_offset, taken),
-            "name": name,
-            "description": f"{name} via OpenRouter (vision)",
-            "provider": "openrouter",
-            "model_name": model_id,
-            "api_key": api_key,
-            "api_base": "https://openrouter.ai/api/v1",
-            "api_version": None,
-            "rpm": free_rpm if tier == "free" else rpm,
-            "tpm": free_tpm if tier == "free" else tpm,
-            "litellm_params": dict(litellm_params),
-            "billing_tier": tier,
-            "quota_reserve_tokens": quota_reserve_tokens,
-            "input_cost_per_token": input_cost or None,
-            "output_cost_per_token": output_cost or None,
-            _OPENROUTER_DYNAMIC_MARKER: True,
-        }
-        configs.append(cfg)
-
-    return configs
-
-
 class OpenRouterIntegrationService:
    """Singleton that manages the dynamic OpenRouter model catalogue."""

@ -724,7 +637,7 @@ class OpenRouterIntegrationService:
        return counts

    # ------------------------------------------------------------------
-    # Auto (Fastest) health enrichment
+    # Auto health enrichment
    # ------------------------------------------------------------------

    async def _enrich_health_safely(
--- a/surfsense_backend/app/services/pricing_registration.py
+++ b/surfsense_backend/app/services/pricing_registration.py
@ -154,10 +154,8 @@ def _register_chat_shape_configs(
                input_cost = _safe_float(entry.get("prompt"))
                output_cost = _safe_float(entry.get("completion"))
            else:
-                # Vision configs from ``_generate_vision_llm_configs``
-                # carry their pricing inline because the OpenRouter
-                # raw-pricing cache is keyed by chat-catalogue model_id;
-                # vision flows pick up the inline values here.
+                # Some dynamically materialized configs can carry pricing
+                # inline when the raw OpenRouter cache has no matching entry.
                input_cost = _safe_float(cfg.get("input_cost_per_token"))
                output_cost = _safe_float(cfg.get("output_cost_per_token"))
            if input_cost == 0.0 and output_cost == 0.0:
--- a/surfsense_backend/app/services/quality_score.py
+++ b/surfsense_backend/app/services/quality_score.py
@ -1,4 +1,4 @@
-"""Pure-function quality scoring for Auto (Fastest) model selection.
+"""Pure-function quality scoring for Auto model selection.

 This module is import-free of any service / request-path dependencies. All
 numbers are computed once during the OpenRouter refresh tick (or YAML load)
--- a/surfsense_backend/app/services/vision_llm_router_service.py
+++ b/surfsense_backend/app/services/vision_llm_router_service.py
@ -1,160 +0,0 @@
-import logging
-from typing import Any
-
-from litellm import Router
-
-from app.services.model_resolver import native_connection_from_config, to_litellm
-
-logger = logging.getLogger(__name__)
-
-VISION_AUTO_MODE_ID = 0
-
-class VisionLLMRouterService:
-    _instance = None
-    _router: Router | None = None
-    _model_list: list[dict] = []
-    _router_settings: dict = {}
-    _initialized: bool = False
-
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    @classmethod
-    def get_instance(cls) -> "VisionLLMRouterService":
-        if cls._instance is None:
-            cls._instance = cls()
-        return cls._instance
-
-    @classmethod
-    def initialize(
-        cls,
-        global_configs: list[dict],
-        router_settings: dict | None = None,
-    ) -> None:
-        instance = cls.get_instance()
-
-        if instance._initialized:
-            logger.debug("Vision LLM Router already initialized, skipping")
-            return
-
-        model_list = []
-        for config in global_configs:
-            deployment = cls._config_to_deployment(config)
-            if deployment:
-                model_list.append(deployment)
-
-        if not model_list:
-            logger.warning(
-                "No valid vision LLM configs found for router initialization"
-            )
-            return
-
-        instance._model_list = model_list
-        instance._router_settings = router_settings or {}
-
-        default_settings = {
-            "routing_strategy": "usage-based-routing",
-            "num_retries": 3,
-            "allowed_fails": 3,
-            "cooldown_time": 60,
-            "retry_after": 5,
-        }
-
-        final_settings = {**default_settings, **instance._router_settings}
-
-        try:
-            instance._router = Router(
-                model_list=model_list,
-                routing_strategy=final_settings.get(
-                    "routing_strategy", "usage-based-routing"
-                ),
-                num_retries=final_settings.get("num_retries", 3),
-                allowed_fails=final_settings.get("allowed_fails", 3),
-                cooldown_time=final_settings.get("cooldown_time", 60),
-                set_verbose=False,
-            )
-            instance._initialized = True
-            logger.info(
-                "Vision LLM Router initialized with %d deployments, strategy: %s",
-                len(model_list),
-                final_settings.get("routing_strategy"),
-            )
-        except Exception as e:
-            logger.error(f"Failed to initialize Vision LLM Router: {e}")
-            instance._router = None
-
-    @classmethod
-    def _config_to_deployment(cls, config: dict) -> dict | None:
-        try:
-            if not config.get("model_name") or not config.get("api_key"):
-                return None
-
-            model_string, resolved_kwargs = to_litellm(
-                native_connection_from_config(config),
-                config["model_name"],
-            )
-            litellm_params: dict[str, Any] = {"model": model_string, **resolved_kwargs}
-
-            deployment: dict[str, Any] = {
-                "model_name": "auto",
-                "litellm_params": litellm_params,
-            }
-
-            if config.get("rpm"):
-                deployment["rpm"] = config["rpm"]
-            if config.get("tpm"):
-                deployment["tpm"] = config["tpm"]
-
-            return deployment
-
-        except Exception as e:
-            logger.warning(f"Failed to convert vision config to deployment: {e}")
-            return None
-
-    @classmethod
-    def get_router(cls) -> Router | None:
-        instance = cls.get_instance()
-        return instance._router
-
-    @classmethod
-    def is_initialized(cls) -> bool:
-        instance = cls.get_instance()
-        return instance._initialized and instance._router is not None
-
-    @classmethod
-    def get_model_count(cls) -> int:
-        instance = cls.get_instance()
-        return len(instance._model_list)
-
-
-def is_vision_auto_mode(config_id: int | None) -> bool:
-    return config_id == VISION_AUTO_MODE_ID
-
-
-def build_vision_model_string(
-    litellm_provider: str, model_name: str, custom_provider: str | None
-) -> str:
-    if custom_provider:
-        return f"{custom_provider}/{model_name}"
-    return f"{litellm_provider}/{model_name}"
-
-
-def get_global_vision_llm_config(config_id: int) -> dict | None:
-    from app.config import config
-
-    if config_id == VISION_AUTO_MODE_ID:
-        return {
-            "id": VISION_AUTO_MODE_ID,
-            "name": "Auto (Fastest)",
-            "provider": "AUTO",
-            "model_name": "auto",
-            "is_auto_mode": True,
-        }
-    if config_id > 0:
-        return None
-    for cfg in config.GLOBAL_VISION_LLM_CONFIGS:
-        if cfg.get("id") == config_id:
-            return cfg
-    return None
--- a/surfsense_backend/app/services/vision_model_list_service.py
+++ b/surfsense_backend/app/services/vision_model_list_service.py
@ -1,134 +0,0 @@
-"""
-Service for fetching and caching the vision-capable model list.
-
-Reuses the same OpenRouter public API and local fallback as the LLM model
-list service, but filters for models that accept image input.
-"""
-
-import json
-import logging
-import time
-from pathlib import Path
-
-import httpx
-
-logger = logging.getLogger(__name__)
-
-OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
-FALLBACK_FILE = (
-    Path(__file__).parent.parent / "config" / "vision_model_list_fallback.json"
-)
-CACHE_TTL_SECONDS = 86400  # 24 hours
-
-_cache: list[dict] | None = None
-_cache_timestamp: float = 0
-
-OPENROUTER_SLUG_TO_VISION_PROVIDER: dict[str, str] = {
-    "openai": "OPENAI",
-    "anthropic": "ANTHROPIC",
-    "google": "GOOGLE",
-    "mistralai": "MISTRAL",
-    "x-ai": "XAI",
-}
-
-
-def _format_context_length(length: int | None) -> str | None:
-    if not length:
-        return None
-    if length >= 1_000_000:
-        return f"{length / 1_000_000:g}M"
-    if length >= 1_000:
-        return f"{length / 1_000:g}K"
-    return str(length)
-
-
-async def _fetch_from_openrouter() -> list[dict] | None:
-    try:
-        async with httpx.AsyncClient(timeout=15) as client:
-            response = await client.get(OPENROUTER_API_URL)
-            response.raise_for_status()
-            data = response.json()
-            return data.get("data", [])
-    except Exception as e:
-        logger.warning("Failed to fetch from OpenRouter API for vision models: %s", e)
-        return None
-
-
-def _load_fallback() -> list[dict]:
-    try:
-        with open(FALLBACK_FILE, encoding="utf-8") as f:
-            return json.load(f)
-    except Exception as e:
-        logger.error("Failed to load vision model fallback list: %s", e)
-        return []
-
-
-def _is_vision_model(model: dict) -> bool:
-    """Return True if the model accepts image input and outputs text."""
-    arch = model.get("architecture", {})
-    input_mods = arch.get("input_modalities", [])
-    output_mods = arch.get("output_modalities", [])
-    return "image" in input_mods and "text" in output_mods
-
-
-def _process_vision_models(raw_models: list[dict]) -> list[dict]:
-    processed: list[dict] = []
-
-    for model in raw_models:
-        model_id: str = model.get("id", "")
-        name: str = model.get("name", "")
-        context_length = model.get("context_length")
-
-        if "/" not in model_id:
-            continue
-
-        if not _is_vision_model(model):
-            continue
-
-        provider_slug, model_name = model_id.split("/", 1)
-        context_window = _format_context_length(context_length)
-
-        processed.append(
-            {
-                "value": model_id,
-                "label": name,
-                "provider": "OPENROUTER",
-                "context_window": context_window,
-            }
-        )
-
-        direct_provider = OPENROUTER_SLUG_TO_VISION_PROVIDER.get(provider_slug)
-        if direct_provider:
-            if direct_provider == "GOOGLE" and not model_name.startswith("gemini-"):
-                continue
-
-            processed.append(
-                {
-                    "value": model_name,
-                    "label": name,
-                    "provider": direct_provider,
-                    "context_window": context_window,
-                }
-            )
-
-    return processed
-
-
-async def get_vision_model_list() -> list[dict]:
-    global _cache, _cache_timestamp
-
-    if _cache is not None and (time.time() - _cache_timestamp) < CACHE_TTL_SECONDS:
-        return _cache
-
-    raw_models = await _fetch_from_openrouter()
-
-    if raw_models is None:
-        logger.info("Using fallback vision model list")
-        return _load_fallback()
-
-    processed = _process_vision_models(raw_models)
-
-    _cache = processed
-    _cache_timestamp = time.time()
-
-    return processed