SurfSense/surfsense_backend/app/agents/new_chat/llm_config.py

"""
LLM configuration utilities for SurfSense agents.

This module provides functions for loading LLM configurations from:
1. Auto mode (ID 0) - Uses LiteLLM Router for load balancing
2. YAML files (global configs with negative IDs)
3. Database NewLLMConfig table (user-created configs with positive IDs)

It also provides utilities for creating ChatLiteLLM instances and
managing prompt configurations.
"""

from dataclasses import dataclass
from pathlib import Path

import yaml
from langchain_litellm import ChatLiteLLM
from litellm import get_model_info
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from app.services.llm_router_service import (
    AUTO_MODE_ID,
    ChatLiteLLMRouter,
    LLMRouterService,
    get_auto_mode_llm,
    is_auto_mode,
)

# Provider mapping for LiteLLM model string construction
PROVIDER_MAP = {
    "OPENAI": "openai",
    "ANTHROPIC": "anthropic",
    "GROQ": "groq",
    "COHERE": "cohere",
    "GOOGLE": "gemini",
    "OLLAMA": "ollama_chat",
    "MISTRAL": "mistral",
    "AZURE_OPENAI": "azure",
    "OPENROUTER": "openrouter",
    "XAI": "xai",
    "BEDROCK": "bedrock",
    "VERTEX_AI": "vertex_ai",
    "TOGETHER_AI": "together_ai",
    "FIREWORKS_AI": "fireworks_ai",
    "DEEPSEEK": "openai",
    "ALIBABA_QWEN": "openai",
    "MOONSHOT": "openai",
    "ZHIPU": "openai",
    "GITHUB_MODELS": "github",
    "REPLICATE": "replicate",
    "PERPLEXITY": "perplexity",
    "ANYSCALE": "anyscale",
    "DEEPINFRA": "deepinfra",
    "CEREBRAS": "cerebras",
    "SAMBANOVA": "sambanova",
    "AI21": "ai21",
    "CLOUDFLARE": "cloudflare",
    "DATABRICKS": "databricks",
    "COMETAPI": "cometapi",
    "HUGGINGFACE": "huggingface",
    "MINIMAX": "openai",
    "CUSTOM": "custom",
}


def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
    """Attach a ``profile`` dict to ChatLiteLLM with model context metadata."""
    try:
        info = get_model_info(model_string)
        max_input_tokens = info.get("max_input_tokens")
        if isinstance(max_input_tokens, int) and max_input_tokens > 0:
            llm.profile = {
                "max_input_tokens": max_input_tokens,
                "max_input_tokens_upper": max_input_tokens,
                "token_count_model": model_string,
                "token_count_models": [model_string],
            }
    except Exception:
        return


@dataclass
class AgentConfig:
    """
    Complete configuration for the SurfSense agent.

    This combines LLM settings with prompt configuration from NewLLMConfig.
    Supports Auto mode (ID 0) which uses LiteLLM Router for load balancing.
    """

    # LLM Model Settings
    provider: str
    model_name: str
    api_key: str
    api_base: str | None = None
    custom_provider: str | None = None
    litellm_params: dict | None = None

    # Prompt Configuration
    system_instructions: str | None = None
    use_default_system_instructions: bool = True
    citations_enabled: bool = True

    # Metadata
    config_id: int | None = None
    config_name: str | None = None

    # Auto mode flag
    is_auto_mode: bool = False

    @classmethod
    def from_auto_mode(cls) -> "AgentConfig":
        """
        Create an AgentConfig for Auto mode (LiteLLM Router load balancing).

        Returns:
            AgentConfig instance configured for Auto mode
        """
        return cls(
            provider="AUTO",
            model_name="auto",
            api_key="",  # Not needed for router
            api_base=None,
            custom_provider=None,
            litellm_params=None,
            system_instructions=None,
            use_default_system_instructions=True,
            citations_enabled=True,
            config_id=AUTO_MODE_ID,
            config_name="Auto (Fastest)",
            is_auto_mode=True,
        )

    @classmethod
    def from_new_llm_config(cls, config) -> "AgentConfig":
        """
        Create an AgentConfig from a NewLLMConfig database model.

        Args:
            config: NewLLMConfig database model instance

        Returns:
            AgentConfig instance
        """
        return cls(
            provider=config.provider.value
            if hasattr(config.provider, "value")
            else str(config.provider),
            model_name=config.model_name,
            api_key=config.api_key,
            api_base=config.api_base,
            custom_provider=config.custom_provider,
            litellm_params=config.litellm_params,
            system_instructions=config.system_instructions,
            use_default_system_instructions=config.use_default_system_instructions,
            citations_enabled=config.citations_enabled,
            config_id=config.id,
            config_name=config.name,
            is_auto_mode=False,
        )

    @classmethod
    def from_yaml_config(cls, yaml_config: dict) -> "AgentConfig":
        """
        Create an AgentConfig from a YAML configuration dictionary.

        YAML configs now support the same prompt configuration fields as NewLLMConfig:
        - system_instructions: Custom system instructions (empty string uses defaults)
        - use_default_system_instructions: Whether to use default instructions
        - citations_enabled: Whether citations are enabled

        Args:
            yaml_config: Configuration dictionary from YAML file

        Returns:
            AgentConfig instance
        """
        # Get system instructions from YAML, default to empty string
        system_instructions = yaml_config.get("system_instructions", "")

        return cls(
            provider=yaml_config.get("provider", "").upper(),
            model_name=yaml_config.get("model_name", ""),
            api_key=yaml_config.get("api_key", ""),
            api_base=yaml_config.get("api_base"),
            custom_provider=yaml_config.get("custom_provider"),
            litellm_params=yaml_config.get("litellm_params"),
            # Prompt configuration from YAML (with defaults for backwards compatibility)
            system_instructions=system_instructions if system_instructions else None,
            use_default_system_instructions=yaml_config.get(
                "use_default_system_instructions", True
            ),
            citations_enabled=yaml_config.get("citations_enabled", True),
            config_id=yaml_config.get("id"),
            config_name=yaml_config.get("name"),
            is_auto_mode=False,
        )


def load_llm_config_from_yaml(llm_config_id: int = -1) -> dict | None:
    """
    Load a specific LLM config from global_llm_config.yaml.

    Args:
        llm_config_id: The id of the config to load (default: -1)

    Returns:
        LLM config dict or None if not found
    """
    # Get the config file path
    base_dir = Path(__file__).resolve().parent.parent.parent.parent
    config_file = base_dir / "app" / "config" / "global_llm_config.yaml"

    # Fallback to example file if main config doesn't exist
    if not config_file.exists():
        config_file = base_dir / "app" / "config" / "global_llm_config.example.yaml"
        if not config_file.exists():
            print("Error: No global_llm_config.yaml or example file found")
            return None

    try:
        with open(config_file, encoding="utf-8") as f:
            data = yaml.safe_load(f)
            configs = data.get("global_llm_configs", [])
            for cfg in configs:
                if isinstance(cfg, dict) and cfg.get("id") == llm_config_id:
                    return cfg

            print(f"Error: Global LLM config id {llm_config_id} not found")
            return None
    except Exception as e:
        print(f"Error loading config: {e}")
        return None


async def load_new_llm_config_from_db(
    session: AsyncSession,
    config_id: int,
) -> "AgentConfig | None":
    """
    Load a NewLLMConfig from the database by ID.

    Args:
        session: AsyncSession for database access
        config_id: The ID of the NewLLMConfig to load

    Returns:
        AgentConfig instance or None if not found
    """
    # Import here to avoid circular imports
    from app.db import NewLLMConfig

    try:
        result = await session.execute(
            select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
        )
        config = result.scalars().first()

        if not config:
            print(f"Error: NewLLMConfig with id {config_id} not found")
            return None

        return AgentConfig.from_new_llm_config(config)
    except Exception as e:
        print(f"Error loading NewLLMConfig from database: {e}")
        return None


async def load_agent_llm_config_for_search_space(
    session: AsyncSession,
    search_space_id: int,
) -> "AgentConfig | None":
    """
    Load the agent LLM configuration for a search space.

    This loads the LLM config based on the search space's agent_llm_id setting:
    - Positive ID: Load from NewLLMConfig database table
    - Negative ID: Load from YAML global configs
    - None: Falls back to first global config (id=-1)

    Args:
        session: AsyncSession for database access
        search_space_id: The search space ID

    Returns:
        AgentConfig instance or None if not found
    """
    # Import here to avoid circular imports
    from app.db import SearchSpace

    try:
        # Get the search space to check its agent_llm_id preference
        result = await session.execute(
            select(SearchSpace).filter(SearchSpace.id == search_space_id)
        )
        search_space = result.scalars().first()

        if not search_space:
            print(f"Error: SearchSpace with id {search_space_id} not found")
            return None

        # Use agent_llm_id from search space, fallback to -1 (first global config)
        config_id = (
            search_space.agent_llm_id if search_space.agent_llm_id is not None else -1
        )

        # Load the config using the unified loader
        return await load_agent_config(session, config_id, search_space_id)
    except Exception as e:
        print(f"Error loading agent LLM config for search space {search_space_id}: {e}")
        return None


async def load_agent_config(
    session: AsyncSession,
    config_id: int,
    search_space_id: int | None = None,
) -> "AgentConfig | None":
    """
    Load an agent configuration, supporting Auto mode, YAML, and database configs.

    This is the main entry point for loading configurations:
    - ID 0: Auto mode (uses LiteLLM Router for load balancing)
    - Negative IDs: Load from YAML file (global configs)
    - Positive IDs: Load from NewLLMConfig database table

    Args:
        session: AsyncSession for database access
        config_id: The config ID (0 for Auto, negative for YAML, positive for database)
        search_space_id: Optional search space ID for context

    Returns:
        AgentConfig instance or None if not found
    """
    # Auto mode (ID 0) - use LiteLLM Router
    if is_auto_mode(config_id):
        if not LLMRouterService.is_initialized():
            print("Error: Auto mode requested but LLM Router not initialized")
            return None
        return AgentConfig.from_auto_mode()

    if config_id < 0:
        # Load from YAML (global configs have negative IDs)
        yaml_config = load_llm_config_from_yaml(config_id)
        if yaml_config:
            return AgentConfig.from_yaml_config(yaml_config)
        return None
    else:
        # Load from database (NewLLMConfig)
        return await load_new_llm_config_from_db(session, config_id)


def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
    """
    Create a ChatLiteLLM instance from a global LLM config dictionary.

    Args:
        llm_config: LLM configuration dictionary from YAML

    Returns:
        ChatLiteLLM instance or None on error
    """
    # Build the model string
    if llm_config.get("custom_provider"):
        model_string = f"{llm_config['custom_provider']}/{llm_config['model_name']}"
    else:
        provider = llm_config.get("provider", "").upper()
        provider_prefix = PROVIDER_MAP.get(provider, provider.lower())
        model_string = f"{provider_prefix}/{llm_config['model_name']}"

    # Create ChatLiteLLM instance with streaming enabled
    litellm_kwargs = {
        "model": model_string,
        "api_key": llm_config.get("api_key"),
        "streaming": True,  # Enable streaming for real-time token streaming
    }

    # Add optional parameters
    if llm_config.get("api_base"):
        litellm_kwargs["api_base"] = llm_config["api_base"]

    # Add any additional litellm parameters
    if llm_config.get("litellm_params"):
        litellm_kwargs.update(llm_config["litellm_params"])

    llm = ChatLiteLLM(**litellm_kwargs)
    _attach_model_profile(llm, model_string)
    return llm


def create_chat_litellm_from_agent_config(
    agent_config: AgentConfig,
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
    """
    Create a ChatLiteLLM or ChatLiteLLMRouter instance from an AgentConfig.

    For Auto mode configs, returns a ChatLiteLLMRouter that uses LiteLLM Router
    for automatic load balancing across available providers.

    Args:
        agent_config: AgentConfig instance

    Returns:
        ChatLiteLLM or ChatLiteLLMRouter instance, or None on error
    """
    # Handle Auto mode - return ChatLiteLLMRouter
    if agent_config.is_auto_mode:
        if not LLMRouterService.is_initialized():
            print("Error: Auto mode requested but LLM Router not initialized")
            return None
        try:
            return get_auto_mode_llm()
        except Exception as e:
            print(f"Error creating ChatLiteLLMRouter: {e}")
            return None

    # Build the model string
    if agent_config.custom_provider:
        model_string = f"{agent_config.custom_provider}/{agent_config.model_name}"
    else:
        provider_prefix = PROVIDER_MAP.get(
            agent_config.provider, agent_config.provider.lower()
        )
        model_string = f"{provider_prefix}/{agent_config.model_name}"

    # Create ChatLiteLLM instance with streaming enabled
    litellm_kwargs = {
        "model": model_string,
        "api_key": agent_config.api_key,
        "streaming": True,  # Enable streaming for real-time token streaming
    }

    # Add optional parameters
    if agent_config.api_base:
        litellm_kwargs["api_base"] = agent_config.api_base

    # Add any additional litellm parameters
    if agent_config.litellm_params:
        litellm_kwargs.update(agent_config.litellm_params)

    llm = ChatLiteLLM(**litellm_kwargs)
    _attach_model_profile(llm, model_string)
    return llm