hotpatch(cloud): add llm load balancing

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-01-29 15:28:31 -08:00
parent 5d5f9d3bfb
commit 6fb656fd8f
21 changed files with 1324 additions and 103 deletions

View file

@ -8,6 +8,12 @@ from sqlalchemy.future import select
from app.config import config
from app.db import NewLLMConfig, SearchSpace
from app.services.llm_router_service import (
AUTO_MODE_ID,
ChatLiteLLMRouter,
LLMRouterService,
is_auto_mode,
)
# Configure litellm to automatically drop unsupported parameters
litellm.drop_params = True
@ -23,15 +29,26 @@ class LLMRole:
def get_global_llm_config(llm_config_id: int) -> dict | None:
"""
Get a global LLM configuration by ID.
Global configs have negative IDs.
Global configs have negative IDs. ID 0 is reserved for Auto mode.
Args:
llm_config_id: The ID of the global config (should be negative)
llm_config_id: The ID of the global config (should be negative or 0 for Auto)
Returns:
dict: Global config dictionary or None if not found
"""
if llm_config_id >= 0:
# Auto mode (ID 0) is handled separately via the router
if llm_config_id == AUTO_MODE_ID:
return {
"id": AUTO_MODE_ID,
"name": "Auto (Load Balanced)",
"description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
"provider": "AUTO",
"model_name": "auto",
"is_auto_mode": True,
}
if llm_config_id > 0:
return None
for cfg in config.GLOBAL_LLM_CONFIGS:
@ -145,19 +162,22 @@ async def validate_llm_config(
async def get_search_space_llm_instance(
session: AsyncSession, search_space_id: int, role: str
) -> ChatLiteLLM | None:
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""
Get a ChatLiteLLM instance for a specific search space and role.
LLM preferences are stored at the search space level and shared by all members.
If Auto mode (ID 0) is configured, returns a ChatLiteLLMRouter that uses
LiteLLM Router for automatic load balancing across available providers.
Args:
session: Database session
search_space_id: Search Space ID
role: LLM role ('agent' or 'document_summary')
Returns:
ChatLiteLLM instance or None if not found
ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
"""
try:
# Get the search space with its LLM preferences
@ -180,10 +200,28 @@ async def get_search_space_llm_instance(
logger.error(f"Invalid LLM role: {role}")
return None
if not llm_config_id:
if llm_config_id is None:
logger.error(f"No {role} LLM configured for search space {search_space_id}")
return None
# Check for Auto mode (ID 0) - use router for load balancing
if is_auto_mode(llm_config_id):
if not LLMRouterService.is_initialized():
logger.error(
"Auto mode requested but LLM Router not initialized. "
"Ensure global_llm_config.yaml exists with valid configs."
)
return None
try:
logger.debug(
f"Using Auto mode (LLM Router) for search space {search_space_id}, role {role}"
)
return ChatLiteLLMRouter()
except Exception as e:
logger.error(f"Failed to create ChatLiteLLMRouter: {e}")
return None
# Check if this is a global config (negative ID)
if llm_config_id < 0:
global_config = get_global_llm_config(llm_config_id)
@ -328,14 +366,14 @@ async def get_search_space_llm_instance(
async def get_agent_llm(
session: AsyncSession, search_space_id: int
) -> ChatLiteLLM | None:
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""Get the search space's agent LLM instance for chat operations."""
return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT)
async def get_document_summary_llm(
session: AsyncSession, search_space_id: int
) -> ChatLiteLLM | None:
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""Get the search space's document summary LLM instance."""
return await get_search_space_llm_instance(
session, search_space_id, LLMRole.DOCUMENT_SUMMARY
@ -345,7 +383,7 @@ async def get_document_summary_llm(
# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
async def get_user_long_context_llm(
session: AsyncSession, user_id: str, search_space_id: int
) -> ChatLiteLLM | None:
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""
Deprecated: Use get_document_summary_llm instead.
The user_id parameter is ignored as LLM preferences are now per-search-space.