feat: Enhance LLM configuration and routing with model profile attachment

- Added `_attach_model_profile` function to attach model context metadata to `ChatLiteLLM`.
- Updated `create_chat_litellm_from_config` and `create_chat_litellm_from_agent_config` to utilize the new profile attachment.
- Improved context profile caching in `llm_router_service.py` to include both minimum and maximum input tokens, along with token model names for better context management.
- Introduced new methods for token counting and context trimming based on model profiles.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-03-10 18:18:59 -07:00
parent 5571e8aa53
commit eec4db4a3b
2 changed files with 310 additions and 7 deletions

View file

@ -15,6 +15,7 @@ from pathlib import Path
import yaml
from langchain_litellm import ChatLiteLLM
from litellm import get_model_info
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@ -62,6 +63,22 @@ PROVIDER_MAP = {
}
def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
"""Attach a ``profile`` dict to ChatLiteLLM with model context metadata."""
try:
info = get_model_info(model_string)
max_input_tokens = info.get("max_input_tokens")
if isinstance(max_input_tokens, int) and max_input_tokens > 0:
llm.profile = {
"max_input_tokens": max_input_tokens,
"max_input_tokens_upper": max_input_tokens,
"token_count_model": model_string,
"token_count_models": [model_string],
}
except Exception:
return
@dataclass
class AgentConfig:
"""
@ -366,7 +383,9 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
if llm_config.get("litellm_params"):
litellm_kwargs.update(llm_config["litellm_params"])
return ChatLiteLLM(**litellm_kwargs)
llm = ChatLiteLLM(**litellm_kwargs)
_attach_model_profile(llm, model_string)
return llm
def create_chat_litellm_from_agent_config(
@ -419,4 +438,6 @@ def create_chat_litellm_from_agent_config(
if agent_config.litellm_params:
litellm_kwargs.update(agent_config.litellm_params)
return ChatLiteLLM(**litellm_kwargs)
llm = ChatLiteLLM(**litellm_kwargs)
_attach_model_profile(llm, model_string)
return llm