mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-28 18:36:23 +02:00
hotpatch(cloud): add llm load balancing
This commit is contained in:
parent
5d5f9d3bfb
commit
6fb656fd8f
21 changed files with 1324 additions and 103 deletions
|
|
@ -8,6 +8,12 @@ from sqlalchemy.future import select
|
|||
|
||||
from app.config import config
|
||||
from app.db import NewLLMConfig, SearchSpace
|
||||
from app.services.llm_router_service import (
|
||||
AUTO_MODE_ID,
|
||||
ChatLiteLLMRouter,
|
||||
LLMRouterService,
|
||||
is_auto_mode,
|
||||
)
|
||||
|
||||
# Configure litellm to automatically drop unsupported parameters
|
||||
litellm.drop_params = True
|
||||
|
|
@ -23,15 +29,26 @@ class LLMRole:
|
|||
def get_global_llm_config(llm_config_id: int) -> dict | None:
|
||||
"""
|
||||
Get a global LLM configuration by ID.
|
||||
Global configs have negative IDs.
|
||||
Global configs have negative IDs. ID 0 is reserved for Auto mode.
|
||||
|
||||
Args:
|
||||
llm_config_id: The ID of the global config (should be negative)
|
||||
llm_config_id: The ID of the global config (should be negative or 0 for Auto)
|
||||
|
||||
Returns:
|
||||
dict: Global config dictionary or None if not found
|
||||
"""
|
||||
if llm_config_id >= 0:
|
||||
# Auto mode (ID 0) is handled separately via the router
|
||||
if llm_config_id == AUTO_MODE_ID:
|
||||
return {
|
||||
"id": AUTO_MODE_ID,
|
||||
"name": "Auto (Load Balanced)",
|
||||
"description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
|
||||
"provider": "AUTO",
|
||||
"model_name": "auto",
|
||||
"is_auto_mode": True,
|
||||
}
|
||||
|
||||
if llm_config_id > 0:
|
||||
return None
|
||||
|
||||
for cfg in config.GLOBAL_LLM_CONFIGS:
|
||||
|
|
@ -145,19 +162,22 @@ async def validate_llm_config(
|
|||
|
||||
async def get_search_space_llm_instance(
|
||||
session: AsyncSession, search_space_id: int, role: str
|
||||
) -> ChatLiteLLM | None:
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""
|
||||
Get a ChatLiteLLM instance for a specific search space and role.
|
||||
|
||||
LLM preferences are stored at the search space level and shared by all members.
|
||||
|
||||
If Auto mode (ID 0) is configured, returns a ChatLiteLLMRouter that uses
|
||||
LiteLLM Router for automatic load balancing across available providers.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
search_space_id: Search Space ID
|
||||
role: LLM role ('agent' or 'document_summary')
|
||||
|
||||
Returns:
|
||||
ChatLiteLLM instance or None if not found
|
||||
ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
|
||||
"""
|
||||
try:
|
||||
# Get the search space with its LLM preferences
|
||||
|
|
@ -180,10 +200,28 @@ async def get_search_space_llm_instance(
|
|||
logger.error(f"Invalid LLM role: {role}")
|
||||
return None
|
||||
|
||||
if not llm_config_id:
|
||||
if llm_config_id is None:
|
||||
logger.error(f"No {role} LLM configured for search space {search_space_id}")
|
||||
return None
|
||||
|
||||
# Check for Auto mode (ID 0) - use router for load balancing
|
||||
if is_auto_mode(llm_config_id):
|
||||
if not LLMRouterService.is_initialized():
|
||||
logger.error(
|
||||
"Auto mode requested but LLM Router not initialized. "
|
||||
"Ensure global_llm_config.yaml exists with valid configs."
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
logger.debug(
|
||||
f"Using Auto mode (LLM Router) for search space {search_space_id}, role {role}"
|
||||
)
|
||||
return ChatLiteLLMRouter()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create ChatLiteLLMRouter: {e}")
|
||||
return None
|
||||
|
||||
# Check if this is a global config (negative ID)
|
||||
if llm_config_id < 0:
|
||||
global_config = get_global_llm_config(llm_config_id)
|
||||
|
|
@ -328,14 +366,14 @@ async def get_search_space_llm_instance(
|
|||
|
||||
async def get_agent_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""Get the search space's agent LLM instance for chat operations."""
|
||||
return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT)
|
||||
|
||||
|
||||
async def get_document_summary_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""Get the search space's document summary LLM instance."""
|
||||
return await get_search_space_llm_instance(
|
||||
session, search_space_id, LLMRole.DOCUMENT_SUMMARY
|
||||
|
|
@ -345,7 +383,7 @@ async def get_document_summary_llm(
|
|||
# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
|
||||
async def get_user_long_context_llm(
|
||||
session: AsyncSession, user_id: str, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""
|
||||
Deprecated: Use get_document_summary_llm instead.
|
||||
The user_id parameter is ignored as LLM preferences are now per-search-space.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue