From 1f1d513437ad42d9d93f7709f2ba795d57d70e2a Mon Sep 17 00:00:00 2001 From: Eric Lammertsma Date: Thu, 29 Jan 2026 15:16:08 -0500 Subject: [PATCH 01/10] fix(database): handle duplicate podcast_status type creation gracefully Modify the upgrade function to prevent errors when creating the podcast_status ENUM type by wrapping the creation in a DO block that catches duplicate_object exceptions. --- .../alembic/versions/82_add_podcast_status_and_thread.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py b/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py index fd4eed89f..46e39fa3b 100644 --- a/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py +++ b/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py @@ -20,7 +20,11 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: op.execute( """ - CREATE TYPE podcast_status AS ENUM ('pending', 'generating', 'ready', 'failed'); + DO $$ BEGIN + CREATE TYPE podcast_status AS ENUM ('pending', 'generating', 'ready', 'failed'); + EXCEPTION + WHEN duplicate_object THEN null; + END $$; """ ) From 6fb656fd8fe0a1a527a7e3786a92cc5cbd9d659b Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 29 Jan 2026 15:28:31 -0800 Subject: [PATCH 02/10] hotpatch(cloud): add llm load balancing --- ...migrate_global_llm_configs_to_auto_mode.py | 75 +++ .../app/agents/new_chat/chat_deepagent.py | 4 +- .../app/agents/new_chat/llm_config.py | 73 +- surfsense_backend/app/app.py | 4 +- surfsense_backend/app/celery_app.py | 14 + surfsense_backend/app/config/__init__.py | 60 ++ .../app/config/global_llm_config.example.yaml | 43 ++ surfsense_backend/app/db.py | 13 +- .../app/routes/new_llm_config_routes.py | 26 +- .../app/routes/search_spaces_routes.py | 20 +- .../app/schemas/new_llm_config.py | 10 +- .../app/services/llm_router_service.py | 632 ++++++++++++++++++ surfsense_backend/app/services/llm_service.py | 56 +- .../app/tasks/chat/stream_new_chat.py | 4 + .../[search_space_id]/client-layout.tsx | 8 +- .../[search_space_id]/onboard/page.tsx | 8 +- .../components/assistant-ui/thread.tsx | 3 +- .../new-chat/model-config-sidebar.tsx | 132 +++- .../components/new-chat/model-selector.tsx | 75 ++- .../components/settings/llm-role-manager.tsx | 161 +++-- .../contracts/types/new-llm-config.types.ts | 6 +- 21 files changed, 1324 insertions(+), 103 deletions(-) create mode 100644 surfsense_backend/alembic/versions/84_migrate_global_llm_configs_to_auto_mode.py create mode 100644 surfsense_backend/app/services/llm_router_service.py diff --git a/surfsense_backend/alembic/versions/84_migrate_global_llm_configs_to_auto_mode.py b/surfsense_backend/alembic/versions/84_migrate_global_llm_configs_to_auto_mode.py new file mode 100644 index 000000000..c852712d7 --- /dev/null +++ b/surfsense_backend/alembic/versions/84_migrate_global_llm_configs_to_auto_mode.py @@ -0,0 +1,75 @@ +"""Migrate global LLM configs to Auto mode + +Revision ID: 84 +Revises: 83 + +This migration updates existing search spaces that use global LLM configs +(negative IDs) to use the new Auto mode (ID 0) instead. + +Auto mode uses LiteLLM Router to automatically load balance requests across +all configured global LLM providers, which helps avoid rate limits. + +Changes: +1. Update agent_llm_id from negative values to 0 (Auto mode) +2. Update document_summary_llm_id from negative values to 0 (Auto mode) +3. Update NULL values to 0 (Auto mode) as the new default + +Note: This migration preserves any custom user-created LLM configs (positive IDs). +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "84" +down_revision: str | None = "83" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Migrate global LLM config IDs (negative) and NULL to Auto mode (0).""" + # Update agent_llm_id: convert negative values and NULL to 0 (Auto mode) + op.execute( + """ + UPDATE searchspaces + SET agent_llm_id = 0 + WHERE agent_llm_id < 0 OR agent_llm_id IS NULL + """ + ) + + # Update document_summary_llm_id: convert negative values and NULL to 0 (Auto mode) + op.execute( + """ + UPDATE searchspaces + SET document_summary_llm_id = 0 + WHERE document_summary_llm_id < 0 OR document_summary_llm_id IS NULL + """ + ) + + +def downgrade() -> None: + """Revert Auto mode back to the first global config (ID -1). + + Note: This is a best-effort revert. We cannot know which specific + global config each search space was using before, so we default + to -1 (typically the first/primary global config). + """ + # Revert agent_llm_id from Auto mode (0) back to first global config (-1) + op.execute( + """ + UPDATE searchspaces + SET agent_llm_id = -1 + WHERE agent_llm_id = 0 + """ + ) + + # Revert document_summary_llm_id from Auto mode (0) back to first global config (-1) + op.execute( + """ + UPDATE searchspaces + SET document_summary_llm_id = -1 + WHERE document_summary_llm_id = 0 + """ + ) diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index fda22aec3..9c383c308 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -10,8 +10,8 @@ from collections.abc import Sequence from typing import Any from deepagents import create_deep_agent +from langchain_core.language_models import BaseChatModel from langchain_core.tools import BaseTool -from langchain_litellm import ChatLiteLLM from langgraph.types import Checkpointer from sqlalchemy.ext.asyncio import AsyncSession @@ -114,7 +114,7 @@ def _map_connectors_to_searchable_types( async def create_surfsense_deep_agent( - llm: ChatLiteLLM, + llm: BaseChatModel, search_space_id: int, db_session: AsyncSession, connector_service: ConnectorService, diff --git a/surfsense_backend/app/agents/new_chat/llm_config.py b/surfsense_backend/app/agents/new_chat/llm_config.py index a55ed79d3..f9f92959c 100644 --- a/surfsense_backend/app/agents/new_chat/llm_config.py +++ b/surfsense_backend/app/agents/new_chat/llm_config.py @@ -2,8 +2,9 @@ LLM configuration utilities for SurfSense agents. This module provides functions for loading LLM configurations from: -1. YAML files (global configs with negative IDs) -2. Database NewLLMConfig table (user-created configs with positive IDs) +1. Auto mode (ID 0) - Uses LiteLLM Router for load balancing +2. YAML files (global configs with negative IDs) +3. Database NewLLMConfig table (user-created configs with positive IDs) It also provides utilities for creating ChatLiteLLM instances and managing prompt configurations. @@ -17,6 +18,13 @@ from langchain_litellm import ChatLiteLLM from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from app.services.llm_router_service import ( + AUTO_MODE_ID, + ChatLiteLLMRouter, + LLMRouterService, + is_auto_mode, +) + # Provider mapping for LiteLLM model string construction PROVIDER_MAP = { "OPENAI": "openai", @@ -58,6 +66,7 @@ class AgentConfig: Complete configuration for the SurfSense agent. This combines LLM settings with prompt configuration from NewLLMConfig. + Supports Auto mode (ID 0) which uses LiteLLM Router for load balancing. """ # LLM Model Settings @@ -77,6 +86,32 @@ class AgentConfig: config_id: int | None = None config_name: str | None = None + # Auto mode flag + is_auto_mode: bool = False + + @classmethod + def from_auto_mode(cls) -> "AgentConfig": + """ + Create an AgentConfig for Auto mode (LiteLLM Router load balancing). + + Returns: + AgentConfig instance configured for Auto mode + """ + return cls( + provider="AUTO", + model_name="auto", + api_key="", # Not needed for router + api_base=None, + custom_provider=None, + litellm_params=None, + system_instructions=None, + use_default_system_instructions=True, + citations_enabled=True, + config_id=AUTO_MODE_ID, + config_name="Auto (Load Balanced)", + is_auto_mode=True, + ) + @classmethod def from_new_llm_config(cls, config) -> "AgentConfig": """ @@ -102,6 +137,7 @@ class AgentConfig: citations_enabled=config.citations_enabled, config_id=config.id, config_name=config.name, + is_auto_mode=False, ) @classmethod @@ -138,6 +174,7 @@ class AgentConfig: citations_enabled=yaml_config.get("citations_enabled", True), config_id=yaml_config.get("id"), config_name=yaml_config.get("name"), + is_auto_mode=False, ) @@ -261,20 +298,28 @@ async def load_agent_config( search_space_id: int | None = None, ) -> "AgentConfig | None": """ - Load an agent configuration, supporting both YAML (negative IDs) and database (positive IDs) configs. + Load an agent configuration, supporting Auto mode, YAML, and database configs. This is the main entry point for loading configurations: + - ID 0: Auto mode (uses LiteLLM Router for load balancing) - Negative IDs: Load from YAML file (global configs) - Positive IDs: Load from NewLLMConfig database table Args: session: AsyncSession for database access - config_id: The config ID (negative for YAML, positive for database) + config_id: The config ID (0 for Auto, negative for YAML, positive for database) search_space_id: Optional search space ID for context Returns: AgentConfig instance or None if not found """ + # Auto mode (ID 0) - use LiteLLM Router + if is_auto_mode(config_id): + if not LLMRouterService.is_initialized(): + print("Error: Auto mode requested but LLM Router not initialized") + return None + return AgentConfig.from_auto_mode() + if config_id < 0: # Load from YAML (global configs have negative IDs) yaml_config = load_llm_config_from_yaml(config_id) @@ -324,16 +369,30 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None: def create_chat_litellm_from_agent_config( agent_config: AgentConfig, -) -> ChatLiteLLM | None: +) -> ChatLiteLLM | ChatLiteLLMRouter | None: """ - Create a ChatLiteLLM instance from an AgentConfig. + Create a ChatLiteLLM or ChatLiteLLMRouter instance from an AgentConfig. + + For Auto mode configs, returns a ChatLiteLLMRouter that uses LiteLLM Router + for automatic load balancing across available providers. Args: agent_config: AgentConfig instance Returns: - ChatLiteLLM instance or None on error + ChatLiteLLM or ChatLiteLLMRouter instance, or None on error """ + # Handle Auto mode - return ChatLiteLLMRouter + if agent_config.is_auto_mode: + if not LLMRouterService.is_initialized(): + print("Error: Auto mode requested but LLM Router not initialized") + return None + try: + return ChatLiteLLMRouter() + except Exception as e: + print(f"Error creating ChatLiteLLMRouter: {e}") + return None + # Build the model string if agent_config.custom_provider: model_string = f"{agent_config.custom_provider}/{agent_config.model_name}" diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index 3ad9d89bc..01dd0da3d 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -9,7 +9,7 @@ from app.agents.new_chat.checkpointer import ( close_checkpointer, setup_checkpointer_tables, ) -from app.config import config +from app.config import config, initialize_llm_router from app.db import User, create_db_and_tables, get_async_session from app.routes import router as crud_router from app.schemas import UserCreate, UserRead, UserUpdate @@ -23,6 +23,8 @@ async def lifespan(app: FastAPI): await create_db_and_tables() # Setup LangGraph checkpointer tables for conversation persistence await setup_checkpointer_tables() + # Initialize LLM Router for Auto mode load balancing + initialize_llm_router() # Seed Surfsense documentation await seed_surfsense_docs() yield diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index f7bea8cc3..8858c2619 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -4,11 +4,25 @@ import os from celery import Celery from celery.schedules import crontab +from celery.signals import worker_process_init from dotenv import load_dotenv # Load environment variables load_dotenv() + +@worker_process_init.connect +def init_worker(**kwargs): + """Initialize the LLM Router when a Celery worker process starts. + + This ensures the Auto mode (LiteLLM Router) is available for background tasks + like document summarization. + """ + from app.config import initialize_llm_router + + initialize_llm_router() + + # Get Celery configuration from environment CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0") diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index b0c1a2c09..261df4974 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -48,6 +48,63 @@ def load_global_llm_configs(): return [] +def load_router_settings(): + """ + Load router settings for Auto mode from YAML file. + Falls back to default settings if not found. + + Returns: + dict: Router settings dictionary + """ + # Default router settings + default_settings = { + "routing_strategy": "usage-based-routing", + "num_retries": 3, + "allowed_fails": 3, + "cooldown_time": 60, + } + + # Try main config file first + global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" + + if not global_config_file.exists(): + return default_settings + + try: + with open(global_config_file, encoding="utf-8") as f: + data = yaml.safe_load(f) + settings = data.get("router_settings", {}) + # Merge with defaults + return {**default_settings, **settings} + except Exception as e: + print(f"Warning: Failed to load router settings: {e}") + return default_settings + + +def initialize_llm_router(): + """ + Initialize the LLM Router service for Auto mode. + This should be called during application startup. + """ + global_configs = load_global_llm_configs() + router_settings = load_router_settings() + + if not global_configs: + print("Info: No global LLM configs found, Auto mode will not be available") + return + + try: + from app.services.llm_router_service import LLMRouterService + + LLMRouterService.initialize(global_configs, router_settings) + print( + f"Info: LLM Router initialized with {len(global_configs)} models " + f"(strategy: {router_settings.get('routing_strategy', 'usage-based-routing')})" + ) + except Exception as e: + print(f"Warning: Failed to initialize LLM Router: {e}") + + class Config: # Check if ffmpeg is installed if not is_ffmpeg_installed(): @@ -156,6 +213,9 @@ class Config: # These can be used as default options for users GLOBAL_LLM_CONFIGS = load_global_llm_configs() + # Router settings for Auto mode (LiteLLM Router load balancing) + ROUTER_SETTINGS = load_router_settings() + # Chonkie Configuration | Edit this to your needs EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") # Azure OpenAI credentials from environment variables diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index 14a18c99a..75ea238e3 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -10,10 +10,39 @@ # These configurations will be available to all users as a convenient option # Users can choose to use these global configs or add their own # +# AUTO MODE (Recommended): +# - Auto mode (ID: 0) uses LiteLLM Router to automatically load balance across all global configs +# - This helps avoid rate limits by distributing requests across multiple providers +# - New users are automatically assigned Auto mode by default +# - Configure router_settings below to customize the load balancing behavior +# # Structure matches NewLLMConfig: # - LLM model configuration (provider, model_name, api_key, etc.) # - Prompt configuration (system_instructions, citations_enabled) +# Router Settings for Auto Mode +# These settings control how the LiteLLM Router distributes requests across models +router_settings: + # Routing strategy options: + # - "usage-based-routing": Routes to deployment with lowest current usage (recommended for rate limits) + # - "simple-shuffle": Random distribution with optional RPM/TPM weighting + # - "least-busy": Routes to least busy deployment + # - "latency-based-routing": Routes based on response latency + routing_strategy: "usage-based-routing" + + # Number of retries before failing + num_retries: 3 + + # Number of failures allowed before cooling down a deployment + allowed_fails: 3 + + # Cooldown time in seconds after allowed_fails is exceeded + cooldown_time: 60 + + # Fallback models (optional) - when primary fails, try these + # Format: [{"primary_model": ["fallback1", "fallback2"]}] + # fallbacks: [] + global_llm_configs: # Example: OpenAI GPT-4 Turbo with citations enabled - id: -1 @@ -23,6 +52,9 @@ global_llm_configs: model_name: "gpt-4-turbo-preview" api_key: "sk-your-openai-api-key-here" api_base: "" + # Rate limits for load balancing (requests/tokens per minute) + rpm: 500 # Requests per minute + tpm: 100000 # Tokens per minute litellm_params: temperature: 0.7 max_tokens: 4000 @@ -39,6 +71,8 @@ global_llm_configs: model_name: "claude-3-opus-20240229" api_key: "sk-ant-your-anthropic-api-key-here" api_base: "" + rpm: 1000 + tpm: 100000 litellm_params: temperature: 0.7 max_tokens: 4000 @@ -54,6 +88,8 @@ global_llm_configs: model_name: "gpt-3.5-turbo" api_key: "sk-your-openai-api-key-here" api_base: "" + rpm: 3500 # GPT-3.5 has higher rate limits + tpm: 200000 litellm_params: temperature: 0.5 max_tokens: 2000 @@ -69,6 +105,8 @@ global_llm_configs: model_name: "deepseek-chat" api_key: "your-deepseek-api-key-here" api_base: "https://api.deepseek.com/v1" + rpm: 60 + tpm: 100000 litellm_params: temperature: 0.7 max_tokens: 4000 @@ -92,6 +130,8 @@ global_llm_configs: model_name: "llama3-70b-8192" api_key: "your-groq-api-key-here" api_base: "" + rpm: 30 # Groq has lower rate limits on free tier + tpm: 14400 litellm_params: temperature: 0.7 max_tokens: 8000 @@ -100,6 +140,7 @@ global_llm_configs: citations_enabled: true # Notes: +# - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing # - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB) # - IDs should be unique and sequential (e.g., -1, -2, -3, etc.) # - The 'api_key' field will not be exposed to users via API @@ -107,3 +148,5 @@ global_llm_configs: # - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty # - citations_enabled: true = include citation instructions, false = include anti-citation instructions # - All standard LiteLLM providers are supported +# - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute) +# These help the router distribute load evenly and avoid rate limit errors diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 876bc1d3c..360e0e975 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -807,11 +807,16 @@ class SearchSpace(BaseModel, TimestampMixin): ) # User's custom instructions # Search space-level LLM preferences (shared by all members) - # Note: These can be negative IDs for global configs (from YAML) or positive IDs for custom configs (from DB) - agent_llm_id = Column(Integer, nullable=True) # For agent/chat operations + # Note: ID values: + # - 0: Auto mode (uses LiteLLM Router for load balancing) - default for new search spaces + # - Negative IDs: Global configs from YAML + # - Positive IDs: Custom configs from DB (NewLLMConfig table) + agent_llm_id = Column( + Integer, nullable=True, default=0 + ) # For agent/chat operations, defaults to Auto mode document_summary_llm_id = Column( - Integer, nullable=True - ) # For document summarization + Integer, nullable=True, default=0 + ) # For document summarization, defaults to Auto mode user_id = Column( UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False diff --git a/surfsense_backend/app/routes/new_llm_config_routes.py b/surfsense_backend/app/routes/new_llm_config_routes.py index d54b95bad..150dfa9f1 100644 --- a/surfsense_backend/app/routes/new_llm_config_routes.py +++ b/surfsense_backend/app/routes/new_llm_config_routes.py @@ -50,13 +50,33 @@ async def get_global_new_llm_configs( These are pre-configured by the system administrator and available to all users. API keys are not exposed through this endpoint. - Global configs have negative IDs to distinguish from user-created configs. + Includes: + - Auto mode (ID 0): Uses LiteLLM Router for automatic load balancing + - Global configs (negative IDs): Individual pre-configured LLM providers """ try: global_configs = config.GLOBAL_LLM_CONFIGS - # Transform to new structure, hiding API keys - safe_configs = [] + # Start with Auto mode as the first option (recommended default) + safe_configs = [ + { + "id": 0, + "name": "Auto (Load Balanced)", + "description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling. Recommended for most users.", + "provider": "AUTO", + "custom_provider": None, + "model_name": "auto", + "api_base": None, + "litellm_params": {}, + "system_instructions": "", + "use_default_system_instructions": True, + "citations_enabled": True, + "is_global": True, + "is_auto_mode": True, + } + ] + + # Add individual global configs for cfg in global_configs: safe_config = { "id": cfg.get("id"), diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 147f515b3..297f9be5f 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -314,11 +314,29 @@ async def _get_llm_config_by_id( ) -> dict | None: """ Get an LLM config by ID as a dictionary. Returns database config for positive IDs, - global config for negative IDs, or None if ID is None. + global config for negative IDs, Auto mode config for ID 0, or None if ID is None. """ if config_id is None: return None + # Auto mode (ID 0) - uses LiteLLM Router for load balancing + if config_id == 0: + return { + "id": 0, + "name": "Auto (Load Balanced)", + "description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling", + "provider": "AUTO", + "custom_provider": None, + "model_name": "auto", + "api_base": None, + "litellm_params": {}, + "system_instructions": "", + "use_default_system_instructions": True, + "citations_enabled": True, + "is_global": True, + "is_auto_mode": True, + } + if config_id < 0: # Global config - find from YAML global_configs = config.GLOBAL_LLM_CONFIGS diff --git a/surfsense_backend/app/schemas/new_llm_config.py b/surfsense_backend/app/schemas/new_llm_config.py index 67979f176..286c07843 100644 --- a/surfsense_backend/app/schemas/new_llm_config.py +++ b/surfsense_backend/app/schemas/new_llm_config.py @@ -135,14 +135,19 @@ class GlobalNewLLMConfigRead(BaseModel): Schema for reading global LLM configs from YAML. Global configs have negative IDs and no search_space_id. API key is hidden for security. + + ID 0 is reserved for Auto mode which uses LiteLLM Router for load balancing. """ - id: int = Field(..., description="Negative ID for global configs") + id: int = Field( + ..., + description="Config ID: 0 for Auto mode, negative for global configs", + ) name: str description: str | None = None # LLM Model Configuration (no api_key) - provider: str # String because YAML doesn't enforce enum + provider: str # String because YAML doesn't enforce enum, "AUTO" for Auto mode custom_provider: str | None = None model_name: str api_base: str | None = None @@ -154,6 +159,7 @@ class GlobalNewLLMConfigRead(BaseModel): citations_enabled: bool = True is_global: bool = True # Always true for global configs + is_auto_mode: bool = False # True only for Auto mode (ID 0) # ============================================================================= diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py new file mode 100644 index 000000000..95c0d116b --- /dev/null +++ b/surfsense_backend/app/services/llm_router_service.py @@ -0,0 +1,632 @@ +""" +LiteLLM Router Service for Load Balancing + +This module provides a singleton LiteLLM Router for automatic load balancing +across multiple LLM deployments. It handles: +- Rate limit management with automatic cooldowns +- Automatic failover and retries +- Usage-based routing to distribute load evenly + +The router is initialized from global LLM configs and provides both +synchronous ChatLiteLLM-like interface and async methods. +""" + +import logging +from typing import Any + +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models import BaseChatModel +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from litellm import Router + +logger = logging.getLogger(__name__) + +# Special ID for Auto mode - uses router for load balancing +AUTO_MODE_ID = 0 + +# Provider mapping for LiteLLM model string construction +PROVIDER_MAP = { + "OPENAI": "openai", + "ANTHROPIC": "anthropic", + "GROQ": "groq", + "COHERE": "cohere", + "GOOGLE": "gemini", + "OLLAMA": "ollama", + "MISTRAL": "mistral", + "AZURE_OPENAI": "azure", + "OPENROUTER": "openrouter", + "COMETAPI": "cometapi", + "XAI": "xai", + "BEDROCK": "bedrock", + "AWS_BEDROCK": "bedrock", # Legacy support + "VERTEX_AI": "vertex_ai", + "TOGETHER_AI": "together_ai", + "FIREWORKS_AI": "fireworks_ai", + "REPLICATE": "replicate", + "PERPLEXITY": "perplexity", + "ANYSCALE": "anyscale", + "DEEPINFRA": "deepinfra", + "CEREBRAS": "cerebras", + "SAMBANOVA": "sambanova", + "AI21": "ai21", + "CLOUDFLARE": "cloudflare", + "DATABRICKS": "databricks", + "DEEPSEEK": "openai", + "ALIBABA_QWEN": "openai", + "MOONSHOT": "openai", + "ZHIPU": "openai", + "HUGGINGFACE": "huggingface", + "CUSTOM": "custom", +} + + +class LLMRouterService: + """ + Singleton service for managing LiteLLM Router. + + The router provides automatic load balancing, failover, and rate limit + handling across multiple LLM deployments. + """ + + _instance = None + _router: Router | None = None + _model_list: list[dict] = [] + _router_settings: dict = {} + _initialized: bool = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + @classmethod + def get_instance(cls) -> "LLMRouterService": + """Get the singleton instance of the router service.""" + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def initialize( + cls, + global_configs: list[dict], + router_settings: dict | None = None, + ) -> None: + """ + Initialize the router with global LLM configurations. + + Args: + global_configs: List of global LLM config dictionaries from YAML + router_settings: Optional router settings (routing_strategy, num_retries, etc.) + """ + instance = cls.get_instance() + + if instance._initialized: + logger.debug("LLM Router already initialized, skipping") + return + + # Build model list from global configs + model_list = [] + for config in global_configs: + deployment = cls._config_to_deployment(config) + if deployment: + model_list.append(deployment) + + if not model_list: + logger.warning("No valid LLM configs found for router initialization") + return + + instance._model_list = model_list + instance._router_settings = router_settings or {} + + # Default router settings optimized for rate limit handling + default_settings = { + "routing_strategy": "usage-based-routing", # Best for rate limit management + "num_retries": 3, + "allowed_fails": 3, + "cooldown_time": 60, # Cooldown for 60 seconds after failures + "retry_after": 5, # Wait 5 seconds between retries + } + + # Merge with provided settings + final_settings = {**default_settings, **instance._router_settings} + + try: + instance._router = Router( + model_list=model_list, + routing_strategy=final_settings.get( + "routing_strategy", "usage-based-routing" + ), + num_retries=final_settings.get("num_retries", 3), + allowed_fails=final_settings.get("allowed_fails", 3), + cooldown_time=final_settings.get("cooldown_time", 60), + set_verbose=False, # Disable verbose logging in production + ) + instance._initialized = True + logger.info( + f"LLM Router initialized with {len(model_list)} deployments, " + f"strategy: {final_settings.get('routing_strategy')}" + ) + except Exception as e: + logger.error(f"Failed to initialize LLM Router: {e}") + instance._router = None + + @classmethod + def _config_to_deployment(cls, config: dict) -> dict | None: + """ + Convert a global LLM config to a router deployment entry. + + Args: + config: Global LLM config dictionary + + Returns: + Router deployment dictionary or None if invalid + """ + try: + # Skip if essential fields are missing + if not config.get("model_name") or not config.get("api_key"): + return None + + # Build model string + if config.get("custom_provider"): + model_string = f"{config['custom_provider']}/{config['model_name']}" + else: + provider = config.get("provider", "").upper() + provider_prefix = PROVIDER_MAP.get(provider, provider.lower()) + model_string = f"{provider_prefix}/{config['model_name']}" + + # Build litellm params + litellm_params = { + "model": model_string, + "api_key": config.get("api_key"), + } + + # Add optional api_base + if config.get("api_base"): + litellm_params["api_base"] = config["api_base"] + + # Add any additional litellm parameters + if config.get("litellm_params"): + litellm_params.update(config["litellm_params"]) + + # Extract rate limits if provided + deployment = { + "model_name": "auto", # All configs use same alias for unified routing + "litellm_params": litellm_params, + } + + # Add rate limits from config if available + if config.get("rpm"): + deployment["rpm"] = config["rpm"] + if config.get("tpm"): + deployment["tpm"] = config["tpm"] + + return deployment + + except Exception as e: + logger.warning(f"Failed to convert config to deployment: {e}") + return None + + @classmethod + def get_router(cls) -> Router | None: + """Get the initialized router instance.""" + instance = cls.get_instance() + return instance._router + + @classmethod + def is_initialized(cls) -> bool: + """Check if the router has been initialized.""" + instance = cls.get_instance() + return instance._initialized and instance._router is not None + + @classmethod + def get_model_count(cls) -> int: + """Get the number of models in the router.""" + instance = cls.get_instance() + return len(instance._model_list) + + +class ChatLiteLLMRouter(BaseChatModel): + """ + A LangChain-compatible chat model that uses LiteLLM Router for load balancing. + + This wraps the LiteLLM Router to provide the same interface as ChatLiteLLM, + making it a drop-in replacement for auto-mode routing. + """ + + # Use model_config for Pydantic v2 compatibility + model_config = {"arbitrary_types_allowed": True} + + # Public attributes that Pydantic will manage + model: str = "auto" + streaming: bool = True + + # Bound tools and tool choice for tool calling + _bound_tools: list[dict] | None = None + _tool_choice: str | dict | None = None + _router: Router | None = None + + def __init__( + self, + router: Router | None = None, + bound_tools: list[dict] | None = None, + tool_choice: str | dict | None = None, + **kwargs, + ): + """ + Initialize the ChatLiteLLMRouter. + + Args: + router: LiteLLM Router instance. If None, uses the global singleton. + bound_tools: Pre-bound tools for tool calling + tool_choice: Tool choice configuration + """ + try: + super().__init__(**kwargs) + # Store router and tools as private attributes + resolved_router = router or LLMRouterService.get_router() + object.__setattr__(self, "_router", resolved_router) + object.__setattr__(self, "_bound_tools", bound_tools) + object.__setattr__(self, "_tool_choice", tool_choice) + if not self._router: + raise ValueError( + "LLM Router not initialized. Call LLMRouterService.initialize() first." + ) + logger.info( + f"ChatLiteLLMRouter initialized with {LLMRouterService.get_model_count()} models" + ) + except Exception as e: + logger.error(f"Failed to initialize ChatLiteLLMRouter: {e}") + raise + + @property + def _llm_type(self) -> str: + return "litellm-router" + + @property + def _identifying_params(self) -> dict[str, Any]: + return { + "model": self.model, + "model_count": LLMRouterService.get_model_count(), + } + + def bind_tools( + self, + tools: list[Any], + *, + tool_choice: str | dict | None = None, + **kwargs: Any, + ) -> "ChatLiteLLMRouter": + """ + Bind tools to the model for function/tool calling. + + Args: + tools: List of tools to bind (can be LangChain tools, Pydantic models, or dicts) + tool_choice: Optional tool choice strategy ("auto", "required", "none", or specific tool) + **kwargs: Additional arguments + + Returns: + New ChatLiteLLMRouter instance with tools bound + """ + from langchain_core.utils.function_calling import convert_to_openai_tool + + # Convert tools to OpenAI format + formatted_tools = [] + for tool in tools: + if isinstance(tool, dict): + # Already in dict format + formatted_tools.append(tool) + else: + # Convert using LangChain utility + try: + formatted_tools.append(convert_to_openai_tool(tool)) + except Exception as e: + logger.warning(f"Failed to convert tool {tool}: {e}") + continue + + # Create a new instance with tools bound + return ChatLiteLLMRouter( + router=self._router, + bound_tools=formatted_tools if formatted_tools else None, + tool_choice=tool_choice, + model=self.model, + streaming=self.streaming, + **kwargs, + ) + + def _generate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + """ + Generate a response using the router (synchronous). + """ + if not self._router: + raise ValueError("Router not initialized") + + # Convert LangChain messages to OpenAI format + formatted_messages = self._convert_messages(messages) + + # Add tools if bound + call_kwargs = {**kwargs} + if self._bound_tools: + call_kwargs["tools"] = self._bound_tools + if self._tool_choice is not None: + call_kwargs["tool_choice"] = self._tool_choice + + # Call router completion + response = self._router.completion( + model=self.model, + messages=formatted_messages, + stop=stop, + **call_kwargs, + ) + + # Convert response to ChatResult with potential tool calls + message = self._convert_response_to_message(response.choices[0].message) + generation = ChatGeneration(message=message) + + return ChatResult(generations=[generation]) + + async def _agenerate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + """ + Generate a response using the router (asynchronous). + """ + if not self._router: + raise ValueError("Router not initialized") + + # Convert LangChain messages to OpenAI format + formatted_messages = self._convert_messages(messages) + + # Add tools if bound + call_kwargs = {**kwargs} + if self._bound_tools: + call_kwargs["tools"] = self._bound_tools + if self._tool_choice is not None: + call_kwargs["tool_choice"] = self._tool_choice + + # Call router async completion + response = await self._router.acompletion( + model=self.model, + messages=formatted_messages, + stop=stop, + **call_kwargs, + ) + + # Convert response to ChatResult with potential tool calls + message = self._convert_response_to_message(response.choices[0].message) + generation = ChatGeneration(message=message) + + return ChatResult(generations=[generation]) + + def _stream( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ): + """ + Stream a response using the router (synchronous). + """ + if not self._router: + raise ValueError("Router not initialized") + + formatted_messages = self._convert_messages(messages) + + # Add tools if bound + call_kwargs = {**kwargs} + if self._bound_tools: + call_kwargs["tools"] = self._bound_tools + if self._tool_choice is not None: + call_kwargs["tool_choice"] = self._tool_choice + + # Call router completion with streaming + response = self._router.completion( + model=self.model, + messages=formatted_messages, + stop=stop, + stream=True, + **call_kwargs, + ) + + # Yield chunks + for chunk in response: + if hasattr(chunk, "choices") and chunk.choices: + delta = chunk.choices[0].delta + chunk_msg = self._convert_delta_to_chunk(delta) + if chunk_msg: + yield ChatGenerationChunk(message=chunk_msg) + + async def _astream( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ): + """ + Stream a response using the router (asynchronous). + """ + if not self._router: + raise ValueError("Router not initialized") + + formatted_messages = self._convert_messages(messages) + + # Add tools if bound + call_kwargs = {**kwargs} + if self._bound_tools: + call_kwargs["tools"] = self._bound_tools + if self._tool_choice is not None: + call_kwargs["tool_choice"] = self._tool_choice + + # Call router async completion with streaming + response = await self._router.acompletion( + model=self.model, + messages=formatted_messages, + stop=stop, + stream=True, + **call_kwargs, + ) + + # Yield chunks asynchronously + async for chunk in response: + if hasattr(chunk, "choices") and chunk.choices: + delta = chunk.choices[0].delta + chunk_msg = self._convert_delta_to_chunk(delta) + if chunk_msg: + yield ChatGenerationChunk(message=chunk_msg) + + def _convert_messages(self, messages: list[BaseMessage]) -> list[dict]: + """Convert LangChain messages to OpenAI format.""" + from langchain_core.messages import ( + AIMessage as AIMsg, + HumanMessage, + SystemMessage, + ToolMessage, + ) + + result = [] + for msg in messages: + if isinstance(msg, SystemMessage): + result.append({"role": "system", "content": msg.content}) + elif isinstance(msg, HumanMessage): + result.append({"role": "user", "content": msg.content}) + elif isinstance(msg, AIMsg): + ai_msg: dict[str, Any] = {"role": "assistant"} + if msg.content: + ai_msg["content"] = msg.content + # Handle tool calls + if hasattr(msg, "tool_calls") and msg.tool_calls: + ai_msg["tool_calls"] = [ + { + "id": tc.get("id", ""), + "type": "function", + "function": { + "name": tc.get("name", ""), + "arguments": tc.get("args", "{}") + if isinstance(tc.get("args"), str) + else __import__("json").dumps(tc.get("args", {})), + }, + } + for tc in msg.tool_calls + ] + result.append(ai_msg) + elif isinstance(msg, ToolMessage): + result.append( + { + "role": "tool", + "tool_call_id": msg.tool_call_id, + "content": msg.content + if isinstance(msg.content, str) + else __import__("json").dumps(msg.content), + } + ) + else: + # Fallback for other message types + role = getattr(msg, "type", "user") + if role == "human": + role = "user" + elif role == "ai": + role = "assistant" + result.append({"role": role, "content": msg.content}) + + return result + + def _convert_response_to_message(self, response_message: Any) -> AIMessage: + """Convert a LiteLLM response message to a LangChain AIMessage.""" + import json + + content = getattr(response_message, "content", None) or "" + + # Check for tool calls + tool_calls = [] + if hasattr(response_message, "tool_calls") and response_message.tool_calls: + for tc in response_message.tool_calls: + tool_call = { + "id": tc.id if hasattr(tc, "id") else "", + "name": tc.function.name if hasattr(tc, "function") else "", + "args": {}, + } + # Parse arguments + if hasattr(tc, "function") and hasattr(tc.function, "arguments"): + try: + tool_call["args"] = json.loads(tc.function.arguments) + except json.JSONDecodeError: + tool_call["args"] = tc.function.arguments + tool_calls.append(tool_call) + + if tool_calls: + return AIMessage(content=content, tool_calls=tool_calls) + return AIMessage(content=content) + + def _convert_delta_to_chunk(self, delta: Any) -> AIMessageChunk | None: + """Convert a streaming delta to an AIMessageChunk.""" + + content = getattr(delta, "content", None) or "" + + # Check for tool calls in delta + tool_call_chunks = [] + if hasattr(delta, "tool_calls") and delta.tool_calls: + for tc in delta.tool_calls: + chunk = { + "index": tc.index if hasattr(tc, "index") else 0, + "id": tc.id if hasattr(tc, "id") else None, + "name": tc.function.name + if hasattr(tc, "function") and hasattr(tc.function, "name") + else None, + "args": tc.function.arguments + if hasattr(tc, "function") and hasattr(tc.function, "arguments") + else "", + } + tool_call_chunks.append(chunk) + + if content or tool_call_chunks: + if tool_call_chunks: + return AIMessageChunk( + content=content, tool_call_chunks=tool_call_chunks + ) + return AIMessageChunk(content=content) + + return None + + +def get_auto_mode_llm() -> ChatLiteLLMRouter | None: + """ + Get a ChatLiteLLMRouter instance for auto mode. + + Returns: + ChatLiteLLMRouter instance or None if router not initialized + """ + if not LLMRouterService.is_initialized(): + logger.warning("LLM Router not initialized for auto mode") + return None + + try: + return ChatLiteLLMRouter() + except Exception as e: + logger.error(f"Failed to create ChatLiteLLMRouter: {e}") + return None + + +def is_auto_mode(llm_config_id: int | None) -> bool: + """ + Check if the given LLM config ID represents Auto mode. + + Args: + llm_config_id: The LLM config ID to check + + Returns: + True if this is Auto mode, False otherwise + """ + return llm_config_id == AUTO_MODE_ID diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index 33f073d61..f0198d91f 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -8,6 +8,12 @@ from sqlalchemy.future import select from app.config import config from app.db import NewLLMConfig, SearchSpace +from app.services.llm_router_service import ( + AUTO_MODE_ID, + ChatLiteLLMRouter, + LLMRouterService, + is_auto_mode, +) # Configure litellm to automatically drop unsupported parameters litellm.drop_params = True @@ -23,15 +29,26 @@ class LLMRole: def get_global_llm_config(llm_config_id: int) -> dict | None: """ Get a global LLM configuration by ID. - Global configs have negative IDs. + Global configs have negative IDs. ID 0 is reserved for Auto mode. Args: - llm_config_id: The ID of the global config (should be negative) + llm_config_id: The ID of the global config (should be negative or 0 for Auto) Returns: dict: Global config dictionary or None if not found """ - if llm_config_id >= 0: + # Auto mode (ID 0) is handled separately via the router + if llm_config_id == AUTO_MODE_ID: + return { + "id": AUTO_MODE_ID, + "name": "Auto (Load Balanced)", + "description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling", + "provider": "AUTO", + "model_name": "auto", + "is_auto_mode": True, + } + + if llm_config_id > 0: return None for cfg in config.GLOBAL_LLM_CONFIGS: @@ -145,19 +162,22 @@ async def validate_llm_config( async def get_search_space_llm_instance( session: AsyncSession, search_space_id: int, role: str -) -> ChatLiteLLM | None: +) -> ChatLiteLLM | ChatLiteLLMRouter | None: """ Get a ChatLiteLLM instance for a specific search space and role. LLM preferences are stored at the search space level and shared by all members. + If Auto mode (ID 0) is configured, returns a ChatLiteLLMRouter that uses + LiteLLM Router for automatic load balancing across available providers. + Args: session: Database session search_space_id: Search Space ID role: LLM role ('agent' or 'document_summary') Returns: - ChatLiteLLM instance or None if not found + ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found """ try: # Get the search space with its LLM preferences @@ -180,10 +200,28 @@ async def get_search_space_llm_instance( logger.error(f"Invalid LLM role: {role}") return None - if not llm_config_id: + if llm_config_id is None: logger.error(f"No {role} LLM configured for search space {search_space_id}") return None + # Check for Auto mode (ID 0) - use router for load balancing + if is_auto_mode(llm_config_id): + if not LLMRouterService.is_initialized(): + logger.error( + "Auto mode requested but LLM Router not initialized. " + "Ensure global_llm_config.yaml exists with valid configs." + ) + return None + + try: + logger.debug( + f"Using Auto mode (LLM Router) for search space {search_space_id}, role {role}" + ) + return ChatLiteLLMRouter() + except Exception as e: + logger.error(f"Failed to create ChatLiteLLMRouter: {e}") + return None + # Check if this is a global config (negative ID) if llm_config_id < 0: global_config = get_global_llm_config(llm_config_id) @@ -328,14 +366,14 @@ async def get_search_space_llm_instance( async def get_agent_llm( session: AsyncSession, search_space_id: int -) -> ChatLiteLLM | None: +) -> ChatLiteLLM | ChatLiteLLMRouter | None: """Get the search space's agent LLM instance for chat operations.""" return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT) async def get_document_summary_llm( session: AsyncSession, search_space_id: int -) -> ChatLiteLLM | None: +) -> ChatLiteLLM | ChatLiteLLMRouter | None: """Get the search space's document summary LLM instance.""" return await get_search_space_llm_instance( session, search_space_id, LLMRole.DOCUMENT_SUMMARY @@ -345,7 +383,7 @@ async def get_document_summary_llm( # Backward-compatible alias (LLM preferences are now per-search-space, not per-user) async def get_user_long_context_llm( session: AsyncSession, user_id: str, search_space_id: int -) -> ChatLiteLLM | None: +) -> ChatLiteLLM | ChatLiteLLMRouter | None: """ Deprecated: Use get_document_summary_llm instead. The user_id parameter is ignored as LLM preferences are now per-search-space. diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 12d7cbd4e..688777203 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1215,8 +1215,12 @@ async def stream_new_chat( except Exception as e: # Handle any errors + import traceback + error_message = f"Error during chat: {e!s}" print(f"[stream_new_chat] {error_message}") + print(f"[stream_new_chat] Exception type: {type(e).__name__}") + print(f"[stream_new_chat] Traceback:\n{traceback.format_exc()}") # Close any open text block if current_text_id is not None: diff --git a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx index 8418d4719..b9ddb9b74 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx @@ -46,7 +46,13 @@ export function DashboardClientLayout({ const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); const isOnboardingComplete = useCallback(() => { - return !!(preferences.agent_llm_id && preferences.document_summary_llm_id); + // Check that both LLM IDs are set (including 0 for Auto mode) + return ( + preferences.agent_llm_id !== null && + preferences.agent_llm_id !== undefined && + preferences.document_summary_llm_id !== null && + preferences.document_summary_llm_id !== undefined + ); }, [preferences]); const { data: access = null, isLoading: accessLoading } = useAtomValue(myAccessAtom); diff --git a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx index 1b7fa297f..8709a4491 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx @@ -53,8 +53,12 @@ export default function OnboardPage() { } }, []); - // Check if onboarding is already complete - const isOnboardingComplete = preferences.agent_llm_id && preferences.document_summary_llm_id; + // Check if onboarding is already complete (including 0 for Auto mode) + const isOnboardingComplete = + preferences.agent_llm_id !== null && + preferences.agent_llm_id !== undefined && + preferences.document_summary_llm_id !== null && + preferences.document_summary_llm_id !== undefined; // If onboarding is already complete, redirect immediately useEffect(() => { diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index db5015023..e063d780e 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -485,7 +485,8 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false if (agentLlmId === null || agentLlmId === undefined) return false; // Check if the configured model actually exists - if (agentLlmId < 0) { + // Auto mode (ID 0) and global configs (negative IDs) are in globalConfigs + if (agentLlmId <= 0) { return globalConfigs?.some((c) => c.id === agentLlmId) ?? false; } return userConfigs?.some((c) => c.id === agentLlmId) ?? false; diff --git a/surfsense_web/components/new-chat/model-config-sidebar.tsx b/surfsense_web/components/new-chat/model-config-sidebar.tsx index 2e22612ad..3e731c164 100644 --- a/surfsense_web/components/new-chat/model-config-sidebar.tsx +++ b/surfsense_web/components/new-chat/model-config-sidebar.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtomValue } from "jotai"; -import { AlertCircle, Bot, ChevronRight, Globe, User, X } from "lucide-react"; +import { AlertCircle, Bot, ChevronRight, Globe, Shuffle, User, X, Zap } from "lucide-react"; import { AnimatePresence, motion } from "motion/react"; import { useCallback, useEffect, useState } from "react"; import { createPortal } from "react-dom"; @@ -62,9 +62,13 @@ export function ModelConfigSidebar({ return () => window.removeEventListener("keydown", handleEscape); }, [open, onOpenChange]); + // Check if this is Auto mode + const isAutoMode = config && "is_auto_mode" in config && config.is_auto_mode; + // Get title based on mode const getTitle = () => { if (mode === "create") return "Add New Configuration"; + if (isAutoMode) return "Auto Mode (Load Balanced)"; if (isGlobal) return "View Global Configuration"; return "Edit Configuration"; }; @@ -187,15 +191,37 @@ export function ModelConfigSidebar({ )} > {/* Header */} -
+
-
- +
+ {isAutoMode ? ( + + ) : ( + + )}

{getTitle()}

- {isGlobal ? ( + {isAutoMode ? ( + + + Recommended + + ) : isGlobal ? ( Global @@ -206,7 +232,7 @@ export function ModelConfigSidebar({ Custom ) : null} - {config && ( + {config && !isAutoMode && ( {config.model_name} )}
@@ -226,8 +252,19 @@ export function ModelConfigSidebar({ {/* Content - use overflow-y-auto instead of ScrollArea for better compatibility */}
+ {/* Auto mode info banner */} + {isAutoMode && ( + + + + Auto mode automatically distributes requests across all available LLM + providers to optimize performance and avoid rate limits. + + + )} + {/* Global config notice */} - {isGlobal && mode !== "create" && ( + {isGlobal && !isAutoMode && mode !== "create" && ( @@ -247,6 +284,87 @@ export function ModelConfigSidebar({ mode="create" submitLabel="Create & Use" /> + ) : isAutoMode && config ? ( + // Special view for Auto mode +
+ {/* Auto Mode Features */} +
+
+
+ How It Works +
+

{config.description}

+
+ +
+ +
+
+ Key Benefits +
+
+
+ +
+

+ Automatic Load Balancing +

+

+ Distributes requests across all configured LLM providers +

+
+
+
+ +
+

+ Rate Limit Protection +

+

+ Automatically handles rate limits with cooldowns and retries +

+
+
+
+ +
+

+ Automatic Failover +

+

+ Falls back to other providers if one becomes unavailable +

+
+
+
+
+
+ + {/* Action Buttons */} +
+ + +
+
) : isGlobal && config ? ( // Read-only view for global configs
diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx index 515fe9f32..ec1143e04 100644 --- a/surfsense_web/components/new-chat/model-selector.tsx +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -10,6 +10,7 @@ import { Globe, Plus, Settings2, + Shuffle, Sparkles, User, Zap, @@ -43,8 +44,14 @@ import type { import { cn } from "@/lib/utils"; // Provider icons mapping -const getProviderIcon = (provider: string) => { +const getProviderIcon = (provider: string, isAutoMode?: boolean) => { const iconClass = "size-4"; + + // Special icon for Auto mode + if (isAutoMode || provider?.toUpperCase() === "AUTO") { + return ; + } + switch (provider?.toUpperCase()) { case "OPENAI": return ; @@ -90,14 +97,19 @@ export function ModelSelector({ onEdit, onAddNew, className }: ModelSelectorProp const agentLlmId = preferences.agent_llm_id; if (agentLlmId === null || agentLlmId === undefined) return null; - // Check if it's a global config (negative ID) - if (agentLlmId < 0) { + // Check if it's Auto mode (ID 0) or global config (negative ID) + if (agentLlmId <= 0) { return globalConfigs?.find((c) => c.id === agentLlmId) ?? null; } // Otherwise, check user configs return userConfigs?.find((c) => c.id === agentLlmId) ?? null; }, [preferences, globalConfigs, userConfigs]); + // Check if current config is Auto mode + const isCurrentAutoMode = useMemo(() => { + return currentConfig && "is_auto_mode" in currentConfig && currentConfig.is_auto_mode; + }, [currentConfig]); + // Filter configs based on search const filteredGlobalConfigs = useMemo(() => { if (!globalConfigs) return []; @@ -184,14 +196,23 @@ export function ModelSelector({ onEdit, onAddNew, className }: ModelSelectorProp ) : currentConfig ? ( <> - {getProviderIcon(currentConfig.provider)} + {getProviderIcon(currentConfig.provider, isCurrentAutoMode ?? false)} {currentConfig.name} - - {currentConfig.model_name.split("/").pop()?.slice(0, 10) || - currentConfig.model_name.slice(0, 10)} - + {isCurrentAutoMode ? ( + + Balanced + + ) : ( + + {currentConfig.model_name.split("/").pop()?.slice(0, 10) || + currentConfig.model_name.slice(0, 10)} + + )} ) : ( <> @@ -246,6 +267,7 @@ export function ModelSelector({ onEdit, onAddNew, className }: ModelSelectorProp
{filteredGlobalConfigs.map((config) => { const isSelected = currentConfig?.id === config.id; + const isAutoMode = "is_auto_mode" in config && config.is_auto_mode; return (
-
{getProviderIcon(config.provider)}
+
+ {getProviderIcon(config.provider, isAutoMode)} +
{config.name} + {isAutoMode && ( + + Recommended + + )} {isSelected && }
- {config.model_name} + {isAutoMode ? "Auto load balancing" : config.model_name} - {config.citations_enabled && ( + {!isAutoMode && config.citations_enabled && (
- + {!isAutoMode && ( + + )}
); diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index 649507d77..dac68a358 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -1,7 +1,16 @@ "use client"; import { useAtomValue } from "jotai"; -import { AlertCircle, Bot, CheckCircle, FileText, RefreshCw, RotateCcw, Save } from "lucide-react"; +import { + AlertCircle, + Bot, + CheckCircle, + FileText, + RefreshCw, + RotateCcw, + Save, + Shuffle, +} from "lucide-react"; import { motion } from "motion/react"; import { useEffect, useState } from "react"; import { toast } from "sonner"; @@ -24,6 +33,7 @@ import { SelectValue, } from "@/components/ui/select"; import { Spinner } from "@/components/ui/spinner"; +import { cn } from "@/lib/utils"; const ROLE_DESCRIPTIONS = { agent: { @@ -71,8 +81,8 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { const { mutateAsync: updatePreferences } = useAtomValue(updateLLMPreferencesMutationAtom); const [assignments, setAssignments] = useState({ - agent_llm_id: preferences.agent_llm_id || "", - document_summary_llm_id: preferences.document_summary_llm_id || "", + agent_llm_id: preferences.agent_llm_id ?? "", + document_summary_llm_id: preferences.document_summary_llm_id ?? "", }); const [hasChanges, setHasChanges] = useState(false); @@ -80,8 +90,8 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { useEffect(() => { const newAssignments = { - agent_llm_id: preferences.agent_llm_id || "", - document_summary_llm_id: preferences.document_summary_llm_id || "", + agent_llm_id: preferences.agent_llm_id ?? "", + document_summary_llm_id: preferences.document_summary_llm_id ?? "", }; setAssignments(newAssignments); setHasChanges(false); @@ -97,8 +107,8 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { // Check if there are changes compared to current preferences const currentPrefs = { - agent_llm_id: preferences.agent_llm_id || "", - document_summary_llm_id: preferences.document_summary_llm_id || "", + agent_llm_id: preferences.agent_llm_id ?? "", + document_summary_llm_id: preferences.document_summary_llm_id ?? "", }; const hasChangesNow = Object.keys(newAssignments).some( @@ -141,13 +151,19 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { const handleReset = () => { setAssignments({ - agent_llm_id: preferences.agent_llm_id || "", - document_summary_llm_id: preferences.document_summary_llm_id || "", + agent_llm_id: preferences.agent_llm_id ?? "", + document_summary_llm_id: preferences.document_summary_llm_id ?? "", }); setHasChanges(false); }; - const isAssignmentComplete = assignments.agent_llm_id && assignments.document_summary_llm_id; + const isAssignmentComplete = + assignments.agent_llm_id !== "" && + assignments.agent_llm_id !== null && + assignments.agent_llm_id !== undefined && + assignments.document_summary_llm_id !== "" && + assignments.document_summary_llm_id !== null && + assignments.document_summary_llm_id !== undefined; // Combine global and custom configs (new system) const allConfigs = [ @@ -300,22 +316,47 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
Global Configurations
- {globalConfigs.map((config) => ( - -
- - {config.provider} - - {config.name} - - ({config.model_name}) - - - 🌐 Global - -
-
- ))} + {globalConfigs.map((config) => { + const isAutoMode = + "is_auto_mode" in config && config.is_auto_mode; + return ( + +
+ {isAutoMode ? ( + + + AUTO + + ) : ( + + {config.provider} + + )} + {config.name} + {!isAutoMode && ( + + ({config.model_name}) + + )} + {isAutoMode ? ( + + Recommended + + ) : ( + + 🌐 Global + + )} +
+
+ ); + })} )} @@ -349,27 +390,65 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
{assignedConfig && ( -
+
- + {"is_auto_mode" in assignedConfig && assignedConfig.is_auto_mode ? ( + + ) : ( + + )} Assigned: - - {assignedConfig.provider} - - {assignedConfig.name} - {"is_global" in assignedConfig && assignedConfig.is_global && ( - - 🌐 Global + {"is_auto_mode" in assignedConfig && assignedConfig.is_auto_mode ? ( + + AUTO + + ) : ( + + {assignedConfig.provider} )} + {assignedConfig.name} + {"is_auto_mode" in assignedConfig && assignedConfig.is_auto_mode ? ( + + Recommended + + ) : ( + "is_global" in assignedConfig && + assignedConfig.is_global && ( + + 🌐 Global + + ) + )}
-
- Model: {assignedConfig.model_name} -
- {assignedConfig.api_base && ( -
- Base: {assignedConfig.api_base} + {"is_auto_mode" in assignedConfig && assignedConfig.is_auto_mode ? ( +
+ Automatically load balances across all available LLM providers
+ ) : ( + <> +
+ Model: {assignedConfig.model_name} +
+ {assignedConfig.api_base && ( +
+ Base: {assignedConfig.api_base} +
+ )} + )}
)} diff --git a/surfsense_web/contracts/types/new-llm-config.types.ts b/surfsense_web/contracts/types/new-llm-config.types.ts index c94178183..f397d4f08 100644 --- a/surfsense_web/contracts/types/new-llm-config.types.ts +++ b/surfsense_web/contracts/types/new-llm-config.types.ts @@ -136,14 +136,15 @@ export const getDefaultSystemInstructionsResponse = z.object({ /** * Global NewLLMConfig - from YAML, has negative IDs + * ID 0 is reserved for "Auto" mode which uses LiteLLM Router for load balancing */ export const globalNewLLMConfig = z.object({ - id: z.number(), // Negative IDs for global configs + id: z.number(), // 0 for Auto mode, negative IDs for global configs name: z.string(), description: z.string().nullable().optional(), // LLM Model Configuration (no api_key) - provider: z.string(), // String because YAML doesn't enforce enum + provider: z.string(), // String because YAML doesn't enforce enum, "AUTO" for Auto mode custom_provider: z.string().nullable().optional(), model_name: z.string(), api_base: z.string().nullable().optional(), @@ -155,6 +156,7 @@ export const globalNewLLMConfig = z.object({ citations_enabled: z.boolean().default(true), is_global: z.literal(true), + is_auto_mode: z.boolean().optional().default(false), // True only for Auto mode (ID 0) }); export const getGlobalNewLLMConfigsResponse = z.array(globalNewLLMConfig); From 6aa3d88f68e2599352f4c6e434410a74d72970d5 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:07:19 +0530 Subject: [PATCH 03/10] refactor(ElectricProvider): streamline initialization logic and enhance loading behavior for user-specific database --- .../components/providers/ElectricProvider.tsx | 48 +++++-------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/surfsense_web/components/providers/ElectricProvider.tsx b/surfsense_web/components/providers/ElectricProvider.tsx index 4aa83b304..aded9533a 100644 --- a/surfsense_web/components/providers/ElectricProvider.tsx +++ b/surfsense_web/components/providers/ElectricProvider.tsx @@ -1,6 +1,7 @@ "use client"; import { useAtomValue } from "jotai"; +import { usePathname } from "next/navigation"; import { useEffect, useRef, useState } from "react"; import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; @@ -18,15 +19,8 @@ interface ElectricProviderProps { } /** - * ElectricProvider initializes the Electric SQL client with user-specific PGlite database - * and provides it to children via context. - * - * KEY BEHAVIORS: - * 1. Single initialization point - only this provider creates the Electric client - * 2. Creates user-specific database (isolated per user) - * 3. Cleans up other users' databases on login - * 4. Re-initializes when user changes - * 5. Provides client via context - hooks should use useElectricClient() + * Initializes user-specific PGlite database with Electric SQL sync. + * Handles user isolation, cleanup, and re-initialization on user change. */ export function ElectricProvider({ children }: ElectricProviderProps) { const [electricClient, setElectricClient] = useState(null); @@ -38,15 +32,13 @@ export function ElectricProvider({ children }: ElectricProviderProps) { } = useAtomValue(currentUserAtom); const previousUserIdRef = useRef(null); const initializingRef = useRef(false); + const pathname = usePathname(); useEffect(() => { - // Skip on server side if (typeof window === "undefined") return; - // If no user is logged in, don't initialize Electric - // The app can still function without real-time sync for non-authenticated pages + // No user logged in - cleanup if previous user existed if (!isUserLoaded || !user?.id) { - // If we had a previous user and now logged out, cleanup if (previousUserIdRef.current && isElectricInitialized()) { console.log("[ElectricProvider] User logged out, cleaning up..."); cleanupElectric().then(() => { @@ -59,25 +51,17 @@ export function ElectricProvider({ children }: ElectricProviderProps) { const userId = String(user.id); - // If already initialized for THIS user, skip - if (electricClient && previousUserIdRef.current === userId) { + // Skip if already initialized for this user or currently initializing + if ((electricClient && previousUserIdRef.current === userId) || initializingRef.current) { return; } - // Prevent concurrent initialization attempts - if (initializingRef.current) { - return; - } - - // User changed or first initialization initializingRef.current = true; let mounted = true; async function init() { try { console.log(`[ElectricProvider] Initializing for user: ${userId}`); - - // If different user was previously initialized, cleanup will happen inside initElectric const client = await initElectric(userId); if (mounted) { @@ -90,7 +74,6 @@ export function ElectricProvider({ children }: ElectricProviderProps) { console.error("[ElectricProvider] Failed to initialize:", err); if (mounted) { setError(err instanceof Error ? err : new Error("Failed to initialize Electric SQL")); - // Set client to null so hooks know initialization failed setElectricClient(null); } } finally { @@ -101,38 +84,33 @@ export function ElectricProvider({ children }: ElectricProviderProps) { } init(); - return () => { mounted = false; }; }, [user?.id, isUserLoaded, electricClient]); - // Check if user is authenticated first (has bearer token) - // This prevents showing loading screen for unauthenticated users on homepage const hasToken = typeof window !== "undefined" && !!getBearerToken(); - // Determine if we should show loading - const shouldShowLoading = hasToken && isUserLoaded && !!user?.id && !electricClient && !error; + // Only block UI on dashboard routes; public pages render immediately + const requiresElectricLoading = pathname?.startsWith("/dashboard"); + const shouldShowLoading = + hasToken && isUserLoaded && !!user?.id && !electricClient && !error && requiresElectricLoading; - // Use global loading hook with ownership tracking - prevents flash during transitions useGlobalLoadingEffect(shouldShowLoading); - // For non-authenticated pages (like landing page), render immediately with null context - // Also render immediately if user query failed (e.g., token expired) + // Render immediately for unauthenticated users or failed user queries if (!hasToken || !isUserLoaded || !user?.id || isUserError) { return {children}; } - // Return children with null context while initializing - the global provider handles the loading UI + // Render with null context while initializing if (!electricClient && !error) { return {children}; } - // If there's an error, still render but warn if (error) { console.warn("[ElectricProvider] Initialization failed, sync may not work:", error.message); } - // Provide the Electric client to children return {children}; } From 8ca1b2b90adab9b8cde12a762003f1fc6a9e756a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:20:11 +0530 Subject: [PATCH 04/10] refactor(ElectricClient): update debug mode for electricSync to activate only in development environment --- surfsense_web/lib/electric/client.ts | 45 ++++++++++++++-------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts index 148da58ec..d25e268be 100644 --- a/surfsense_web/lib/electric/client.ts +++ b/surfsense_web/lib/electric/client.ts @@ -165,8 +165,8 @@ export async function initElectric(userId: string): Promise { dataDir: dbName, relaxedDurability: true, extensions: { - // Enable debug mode in electricSync to see detailed sync logs - electric: electricSync({ debug: true }), + // Enable debug mode in electricSync only in development + electric: electricSync({ debug: process.env.NODE_ENV === "development" }), live, // Enable live queries for real-time updates }, }); @@ -341,26 +341,27 @@ export async function initElectric(userId: string): Promise { console.log("[Electric] Where clause:", where, "Validated:", validatedWhere); try { - // Debug: Test Electric SQL connection directly first - // Use validatedWhere to ensure proper URL encoding - const testUrl = `${electricUrl}/v1/shape?table=${table}&offset=-1${validatedWhere ? `&where=${encodeURIComponent(validatedWhere)}` : ""}`; - console.log("[Electric] Testing Electric SQL directly:", testUrl); - try { - const testResponse = await fetch(testUrl); - const testHeaders = { - handle: testResponse.headers.get("electric-handle"), - offset: testResponse.headers.get("electric-offset"), - upToDate: testResponse.headers.get("electric-up-to-date"), - }; - console.log("[Electric] Direct Electric SQL response headers:", testHeaders); - const testData = await testResponse.json(); - console.log( - "[Electric] Direct Electric SQL data count:", - Array.isArray(testData) ? testData.length : "not array", - testData - ); - } catch (testErr) { - console.error("[Electric] Direct Electric SQL test failed:", testErr); + // Debug: Test Electric SQL connection directly first (DEV ONLY - skipped in production) + if (process.env.NODE_ENV === "development") { + const testUrl = `${electricUrl}/v1/shape?table=${table}&offset=-1${validatedWhere ? `&where=${encodeURIComponent(validatedWhere)}` : ""}`; + console.log("[Electric] Testing Electric SQL directly:", testUrl); + try { + const testResponse = await fetch(testUrl); + const testHeaders = { + handle: testResponse.headers.get("electric-handle"), + offset: testResponse.headers.get("electric-offset"), + upToDate: testResponse.headers.get("electric-up-to-date"), + }; + console.log("[Electric] Direct Electric SQL response headers:", testHeaders); + const testData = await testResponse.json(); + console.log( + "[Electric] Direct Electric SQL data count:", + Array.isArray(testData) ? testData.length : "not array", + testData + ); + } catch (testErr) { + console.error("[Electric] Direct Electric SQL test failed:", testErr); + } } // Use PGlite's electric sync plugin to sync the shape From 4526b656a48de4456802467d95bdec4746fd9361 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 30 Jan 2026 19:55:48 +0530 Subject: [PATCH 05/10] fix: update default date range for Google Calendar events and improve query parameter handling --- .../google_calendar_indexer.py | 6 ++-- .../app/utils/document_converters.py | 8 ++++- .../lib/apis/connectors-api.service.ts | 30 +++++++++++-------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index 2365ff984..31796b4cd 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -191,10 +191,10 @@ async def index_google_calendar_events( ) else: calculated_start_date = datetime.now() - timedelta( - days=30 - ) # Use 30 days as default for calendar events + days=365 + ) # Use 365 days as default for calendar events (matches frontend) logger.info( - f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (30 days ago) as start date" + f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date" ) # Use calculated dates if not provided diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index 279b1dbf6..d7e1db71d 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -11,7 +11,13 @@ def get_model_context_window(model_name: str) -> int: """Get the total context window size for a model (input + output tokens).""" try: model_info = get_model_info(model_name) - context_window = model_info.get("max_input_tokens", 4096) # Default fallback + context_window = model_info.get("max_input_tokens") + # Handle case where key exists but value is None + if context_window is None: + print( + f"Warning: max_input_tokens is None for {model_name}, using default 4096 tokens." + ) + return 4096 # Conservative fallback return context_window except Exception as e: print( diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts index 567db38de..10e08dc71 100644 --- a/surfsense_web/lib/apis/connectors-api.service.ts +++ b/surfsense_web/lib/apis/connectors-api.service.ts @@ -49,12 +49,14 @@ class ConnectorsApiService { throw new ValidationError(`Invalid request: ${errorMessage}`); } - // Transform query params to be string values + // Transform query params to be string values, filtering out undefined/null const transformedQueryParams = parsedRequest.data.queryParams ? Object.fromEntries( - Object.entries(parsedRequest.data.queryParams).map(([k, v]) => { - return [k, String(v)]; - }) + Object.entries(parsedRequest.data.queryParams) + .filter(([_, v]) => v !== undefined && v !== null) + .map(([k, v]) => { + return [k, String(v)]; + }) ) : undefined; @@ -102,11 +104,13 @@ class ConnectorsApiService { const { data, queryParams } = parsedRequest.data; - // Transform query params to be string values + // Transform query params to be string values, filtering out undefined/null const transformedQueryParams = Object.fromEntries( - Object.entries(queryParams).map(([k, v]) => { - return [k, String(v)]; - }) + Object.entries(queryParams) + .filter(([_, v]) => v !== undefined && v !== null) + .map(([k, v]) => { + return [k, String(v)]; + }) ); const queryString = new URLSearchParams(transformedQueryParams).toString(); @@ -174,11 +178,13 @@ class ConnectorsApiService { const { connector_id, queryParams, body } = parsedRequest.data; - // Transform query params to be string values + // Transform query params to be string values, filtering out undefined/null const transformedQueryParams = Object.fromEntries( - Object.entries(queryParams).map(([k, v]) => { - return [k, String(v)]; - }) + Object.entries(queryParams) + .filter(([_, v]) => v !== undefined && v !== null) + .map(([k, v]) => { + return [k, String(v)]; + }) ); const queryString = new URLSearchParams(transformedQueryParams).toString(); From 9771a88380d3386e8076b3d11f9743c47680c946 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:51:03 +0530 Subject: [PATCH 06/10] fix: refine date handling in Google Calendar connector to ensure accurate same-day queries --- .../app/connectors/google_calendar_connector.py | 11 +++++++---- .../google_calendar_indexer.py | 17 ----------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/surfsense_backend/app/connectors/google_calendar_connector.py b/surfsense_backend/app/connectors/google_calendar_connector.py index d8160cf25..7e24f3642 100644 --- a/surfsense_backend/app/connectors/google_calendar_connector.py +++ b/surfsense_backend/app/connectors/google_calendar_connector.py @@ -246,15 +246,18 @@ class GoogleCalendarConnector: dt_start = isoparse(start_date) dt_end = isoparse(end_date) + # Set start to beginning of day (00:00:00) and end to end of day (23:59:59) + # This ensures same-date queries work (e.g., start=2026-01-23, end=2026-01-23) + # and matches the Composio connector behavior if dt_start.tzinfo is None: - dt_start = dt_start.replace(tzinfo=pytz.UTC) + dt_start = dt_start.replace(hour=0, minute=0, second=0, tzinfo=pytz.UTC) else: - dt_start = dt_start.astimezone(pytz.UTC) + dt_start = dt_start.astimezone(pytz.UTC).replace(hour=0, minute=0, second=0) if dt_end.tzinfo is None: - dt_end = dt_end.replace(tzinfo=pytz.UTC) + dt_end = dt_end.replace(hour=23, minute=59, second=59, tzinfo=pytz.UTC) else: - dt_end = dt_end.astimezone(pytz.UTC) + dt_end = dt_end.astimezone(pytz.UTC).replace(hour=23, minute=59, second=59) if dt_start >= dt_end: return [], ( diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index 31796b4cd..81d33b5e2 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -209,23 +209,6 @@ async def index_google_calendar_events( start_date_str = start_date end_date_str = end_date - # If start_date and end_date are the same, adjust end_date to be one day later - # to ensure valid date range (start_date must be strictly before end_date) - if start_date_str == end_date_str: - # Parse the date and add one day to ensure valid range - dt = isoparse(end_date_str) - if dt.tzinfo is None: - dt = dt.replace(tzinfo=pytz.UTC) - else: - dt = dt.astimezone(pytz.UTC) - # Add one day to end_date to make it strictly after start_date - dt_end = dt + timedelta(days=1) - end_date_str = dt_end.strftime("%Y-%m-%d") - logger.info( - f"Adjusted end_date from {end_date} to {end_date_str} " - f"to ensure valid date range (start_date must be strictly before end_date)" - ) - await task_logger.log_task_progress( log_entry, f"Fetching Google Calendar events from {start_date_str} to {end_date_str}", From 71b0e02676918c6062249eff17815d1b8cc69551 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:51:12 +0530 Subject: [PATCH 07/10] fix: remove unused animation classes from dialog component for cleaner code --- surfsense_web/components/ui/dialog.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_web/components/ui/dialog.tsx b/surfsense_web/components/ui/dialog.tsx index f3fa856d3..e5d9fb93f 100644 --- a/surfsense_web/components/ui/dialog.tsx +++ b/surfsense_web/components/ui/dialog.tsx @@ -38,7 +38,7 @@ const DialogContent = React.forwardRef< Date: Fri, 30 Jan 2026 21:16:40 +0300 Subject: [PATCH 08/10] fix: use ollama_chat provider for proper tool --- surfsense_backend/app/agents/new_chat/llm_config.py | 2 +- surfsense_backend/app/services/llm_service.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/llm_config.py b/surfsense_backend/app/agents/new_chat/llm_config.py index f9f92959c..12b389a2d 100644 --- a/surfsense_backend/app/agents/new_chat/llm_config.py +++ b/surfsense_backend/app/agents/new_chat/llm_config.py @@ -32,7 +32,7 @@ PROVIDER_MAP = { "GROQ": "groq", "COHERE": "cohere", "GOOGLE": "gemini", - "OLLAMA": "ollama", + "OLLAMA": "ollama_chat", "MISTRAL": "mistral", "AZURE_OPENAI": "azure", "OPENROUTER": "openrouter", diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index f0198d91f..e21259990 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -94,7 +94,7 @@ async def validate_llm_config( "GROQ": "groq", "COHERE": "cohere", "GOOGLE": "gemini", - "OLLAMA": "ollama", + "OLLAMA": "ollama_chat", "MISTRAL": "mistral", "AZURE_OPENAI": "azure", "OPENROUTER": "openrouter", @@ -241,7 +241,7 @@ async def get_search_space_llm_instance( "GROQ": "groq", "COHERE": "cohere", "GOOGLE": "gemini", - "OLLAMA": "ollama", + "OLLAMA": "ollama_chat", "MISTRAL": "mistral", "AZURE_OPENAI": "azure", "OPENROUTER": "openrouter", @@ -311,7 +311,7 @@ async def get_search_space_llm_instance( "GROQ": "groq", "COHERE": "cohere", "GOOGLE": "gemini", - "OLLAMA": "ollama", + "OLLAMA": "ollama_chat", "MISTRAL": "mistral", "AZURE_OPENAI": "azure", "OPENROUTER": "openrouter", From 0ba70401e952191b8fb2dd5d7d9f043cdb11e108 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:57:18 +0530 Subject: [PATCH 09/10] feat: enhance date range selector with last indexed timestamp for improved user context --- .../components/date-range-selector.tsx | 17 ++++++++++++++++- .../views/connector-edit-view.tsx | 1 + .../views/indexing-configuration-view.tsx | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx b/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx index 48dc2a6c2..7490aa959 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx @@ -15,6 +15,7 @@ interface DateRangeSelectorProps { onStartDateChange: (date: Date | undefined) => void; onEndDateChange: (date: Date | undefined) => void; allowFutureDates?: boolean; // Allow future dates for calendar connectors + lastIndexedAt?: string | null; // Last sync timestamp to show in default placeholder } export const DateRangeSelector: FC = ({ @@ -23,7 +24,21 @@ export const DateRangeSelector: FC = ({ onStartDateChange, onEndDateChange, allowFutureDates = false, + lastIndexedAt, }) => { + // Get the placeholder text for start date based on whether connector was previously indexed + const getStartDatePlaceholder = () => { + if (lastIndexedAt) { + const date = new Date(lastIndexedAt); + const currentYear = new Date().getFullYear(); + const indexedYear = date.getFullYear(); + // Show year only if different from current year + const formatStr = indexedYear === currentYear ? "MMM d, HH:mm" : "MMM d, yyyy HH:mm"; + const formattedDate = format(date, formatStr); + return `Since (${formattedDate})`; + } + return "Default (1 year ago)"; + }; const handleLast30Days = () => { const today = new Date(); onStartDateChange(subDays(today, 30)); @@ -73,7 +88,7 @@ export const DateRangeSelector: FC = ({ )} > - {startDate ? format(startDate, "PPP") : "Default (1 year ago)"} + {startDate ? format(startDate, "PPP") : getStartDatePlaceholder()} diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx index 430aa927c..cfdebee60 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx @@ -227,6 +227,7 @@ export const ConnectorEditView: FC = ({ connector.connector_type === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" || connector.connector_type === "LUMA_CONNECTOR" } + lastIndexedAt={connector.last_indexed_at} /> )} diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx index 72069441a..b885f35da 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx @@ -165,6 +165,7 @@ export const IndexingConfigurationView: FC = ({ config.connectorType === "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" || config.connectorType === "LUMA_CONNECTOR" } + lastIndexedAt={connector?.last_indexed_at} /> )} From bcc77c2864fa7d73f41bb7c81d3d277a565a4ec0 Mon Sep 17 00:00:00 2001 From: akc Date: Sat, 31 Jan 2026 01:50:05 +0300 Subject: [PATCH 10/10] fix: also update PROVIDER_MAP in llm_router_service.py for Auto mode --- surfsense_backend/app/services/llm_router_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py index 95c0d116b..3a19f0b8e 100644 --- a/surfsense_backend/app/services/llm_router_service.py +++ b/surfsense_backend/app/services/llm_router_service.py @@ -32,7 +32,7 @@ PROVIDER_MAP = { "GROQ": "groq", "COHERE": "cohere", "GOOGLE": "gemini", - "OLLAMA": "ollama", + "OLLAMA": "ollama_chat", "MISTRAL": "mistral", "AZURE_OPENAI": "azure", "OPENROUTER": "openrouter",