mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
hotpatch(cloud): add llm load balancing
This commit is contained in:
parent
5d5f9d3bfb
commit
6fb656fd8f
21 changed files with 1324 additions and 103 deletions
|
|
@ -2,8 +2,9 @@
|
|||
LLM configuration utilities for SurfSense agents.
|
||||
|
||||
This module provides functions for loading LLM configurations from:
|
||||
1. YAML files (global configs with negative IDs)
|
||||
2. Database NewLLMConfig table (user-created configs with positive IDs)
|
||||
1. Auto mode (ID 0) - Uses LiteLLM Router for load balancing
|
||||
2. YAML files (global configs with negative IDs)
|
||||
3. Database NewLLMConfig table (user-created configs with positive IDs)
|
||||
|
||||
It also provides utilities for creating ChatLiteLLM instances and
|
||||
managing prompt configurations.
|
||||
|
|
@ -17,6 +18,13 @@ from langchain_litellm import ChatLiteLLM
|
|||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.services.llm_router_service import (
|
||||
AUTO_MODE_ID,
|
||||
ChatLiteLLMRouter,
|
||||
LLMRouterService,
|
||||
is_auto_mode,
|
||||
)
|
||||
|
||||
# Provider mapping for LiteLLM model string construction
|
||||
PROVIDER_MAP = {
|
||||
"OPENAI": "openai",
|
||||
|
|
@ -58,6 +66,7 @@ class AgentConfig:
|
|||
Complete configuration for the SurfSense agent.
|
||||
|
||||
This combines LLM settings with prompt configuration from NewLLMConfig.
|
||||
Supports Auto mode (ID 0) which uses LiteLLM Router for load balancing.
|
||||
"""
|
||||
|
||||
# LLM Model Settings
|
||||
|
|
@ -77,6 +86,32 @@ class AgentConfig:
|
|||
config_id: int | None = None
|
||||
config_name: str | None = None
|
||||
|
||||
# Auto mode flag
|
||||
is_auto_mode: bool = False
|
||||
|
||||
@classmethod
|
||||
def from_auto_mode(cls) -> "AgentConfig":
|
||||
"""
|
||||
Create an AgentConfig for Auto mode (LiteLLM Router load balancing).
|
||||
|
||||
Returns:
|
||||
AgentConfig instance configured for Auto mode
|
||||
"""
|
||||
return cls(
|
||||
provider="AUTO",
|
||||
model_name="auto",
|
||||
api_key="", # Not needed for router
|
||||
api_base=None,
|
||||
custom_provider=None,
|
||||
litellm_params=None,
|
||||
system_instructions=None,
|
||||
use_default_system_instructions=True,
|
||||
citations_enabled=True,
|
||||
config_id=AUTO_MODE_ID,
|
||||
config_name="Auto (Load Balanced)",
|
||||
is_auto_mode=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_new_llm_config(cls, config) -> "AgentConfig":
|
||||
"""
|
||||
|
|
@ -102,6 +137,7 @@ class AgentConfig:
|
|||
citations_enabled=config.citations_enabled,
|
||||
config_id=config.id,
|
||||
config_name=config.name,
|
||||
is_auto_mode=False,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
|
@ -138,6 +174,7 @@ class AgentConfig:
|
|||
citations_enabled=yaml_config.get("citations_enabled", True),
|
||||
config_id=yaml_config.get("id"),
|
||||
config_name=yaml_config.get("name"),
|
||||
is_auto_mode=False,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -261,20 +298,28 @@ async def load_agent_config(
|
|||
search_space_id: int | None = None,
|
||||
) -> "AgentConfig | None":
|
||||
"""
|
||||
Load an agent configuration, supporting both YAML (negative IDs) and database (positive IDs) configs.
|
||||
Load an agent configuration, supporting Auto mode, YAML, and database configs.
|
||||
|
||||
This is the main entry point for loading configurations:
|
||||
- ID 0: Auto mode (uses LiteLLM Router for load balancing)
|
||||
- Negative IDs: Load from YAML file (global configs)
|
||||
- Positive IDs: Load from NewLLMConfig database table
|
||||
|
||||
Args:
|
||||
session: AsyncSession for database access
|
||||
config_id: The config ID (negative for YAML, positive for database)
|
||||
config_id: The config ID (0 for Auto, negative for YAML, positive for database)
|
||||
search_space_id: Optional search space ID for context
|
||||
|
||||
Returns:
|
||||
AgentConfig instance or None if not found
|
||||
"""
|
||||
# Auto mode (ID 0) - use LiteLLM Router
|
||||
if is_auto_mode(config_id):
|
||||
if not LLMRouterService.is_initialized():
|
||||
print("Error: Auto mode requested but LLM Router not initialized")
|
||||
return None
|
||||
return AgentConfig.from_auto_mode()
|
||||
|
||||
if config_id < 0:
|
||||
# Load from YAML (global configs have negative IDs)
|
||||
yaml_config = load_llm_config_from_yaml(config_id)
|
||||
|
|
@ -324,16 +369,30 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
|
|||
|
||||
def create_chat_litellm_from_agent_config(
|
||||
agent_config: AgentConfig,
|
||||
) -> ChatLiteLLM | None:
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""
|
||||
Create a ChatLiteLLM instance from an AgentConfig.
|
||||
Create a ChatLiteLLM or ChatLiteLLMRouter instance from an AgentConfig.
|
||||
|
||||
For Auto mode configs, returns a ChatLiteLLMRouter that uses LiteLLM Router
|
||||
for automatic load balancing across available providers.
|
||||
|
||||
Args:
|
||||
agent_config: AgentConfig instance
|
||||
|
||||
Returns:
|
||||
ChatLiteLLM instance or None on error
|
||||
ChatLiteLLM or ChatLiteLLMRouter instance, or None on error
|
||||
"""
|
||||
# Handle Auto mode - return ChatLiteLLMRouter
|
||||
if agent_config.is_auto_mode:
|
||||
if not LLMRouterService.is_initialized():
|
||||
print("Error: Auto mode requested but LLM Router not initialized")
|
||||
return None
|
||||
try:
|
||||
return ChatLiteLLMRouter()
|
||||
except Exception as e:
|
||||
print(f"Error creating ChatLiteLLMRouter: {e}")
|
||||
return None
|
||||
|
||||
# Build the model string
|
||||
if agent_config.custom_provider:
|
||||
model_string = f"{agent_config.custom_provider}/{agent_config.model_name}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue