feat: Enhance LLM configuration and routing with model profile attachment

- Added `_attach_model_profile` function to attach model context metadata to `ChatLiteLLM`. - Updated `create_chat_litellm_from_config` and `create_chat_litellm_from_agent_config` to utilize the new profile attachment. - Improved context profile caching in `llm_router_service.py` to include both minimum and maximum input tokens, along with token model names for better context management. - Introduced new methods for token counting and context trimming based on model profiles.
2026-04-25 16:56:22 +02:00 · 2026-03-10 18:18:59 -07:00 · 2026-03-10 18:18:59 -07:00 · eec4db4a3b
commit eec4db4a3b
parent 5571e8aa53
2 changed files with 310 additions and 7 deletions
--- a/surfsense_backend/app/agents/new_chat/llm_config.py
+++ b/surfsense_backend/app/agents/new_chat/llm_config.py
@ -15,6 +15,7 @@ from pathlib import Path

 import yaml
 from langchain_litellm import ChatLiteLLM
+from litellm import get_model_info
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

@ -62,6 +63,22 @@ PROVIDER_MAP = {
 }


+def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
+    """Attach a ``profile`` dict to ChatLiteLLM with model context metadata."""
+    try:
+        info = get_model_info(model_string)
+        max_input_tokens = info.get("max_input_tokens")
+        if isinstance(max_input_tokens, int) and max_input_tokens > 0:
+            llm.profile = {
+                "max_input_tokens": max_input_tokens,
+                "max_input_tokens_upper": max_input_tokens,
+                "token_count_model": model_string,
+                "token_count_models": [model_string],
+            }
+    except Exception:
+        return
+
+
@dataclass
 class AgentConfig:
    """
@ -366,7 +383,9 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
    if llm_config.get("litellm_params"):
        litellm_kwargs.update(llm_config["litellm_params"])

-    return ChatLiteLLM(**litellm_kwargs)
+    llm = ChatLiteLLM(**litellm_kwargs)
+    _attach_model_profile(llm, model_string)
+    return llm


 def create_chat_litellm_from_agent_config(
@ -419,4 +438,6 @@ def create_chat_litellm_from_agent_config(
    if agent_config.litellm_params:
        litellm_kwargs.update(agent_config.litellm_params)

-    return ChatLiteLLM(**litellm_kwargs)
+    llm = ChatLiteLLM(**litellm_kwargs)
+    _attach_model_profile(llm, model_string)
+    return llm