mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
feat: Enhance LLM configuration and routing with model profile attachment
- Added `_attach_model_profile` function to attach model context metadata to `ChatLiteLLM`. - Updated `create_chat_litellm_from_config` and `create_chat_litellm_from_agent_config` to utilize the new profile attachment. - Improved context profile caching in `llm_router_service.py` to include both minimum and maximum input tokens, along with token model names for better context management. - Introduced new methods for token counting and context trimming based on model profiles.
This commit is contained in:
parent
5571e8aa53
commit
eec4db4a3b
2 changed files with 310 additions and 7 deletions
|
|
@ -15,6 +15,7 @@ from pathlib import Path
|
|||
|
||||
import yaml
|
||||
from langchain_litellm import ChatLiteLLM
|
||||
from litellm import get_model_info
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
|
@ -62,6 +63,22 @@ PROVIDER_MAP = {
|
|||
}
|
||||
|
||||
|
||||
def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
|
||||
"""Attach a ``profile`` dict to ChatLiteLLM with model context metadata."""
|
||||
try:
|
||||
info = get_model_info(model_string)
|
||||
max_input_tokens = info.get("max_input_tokens")
|
||||
if isinstance(max_input_tokens, int) and max_input_tokens > 0:
|
||||
llm.profile = {
|
||||
"max_input_tokens": max_input_tokens,
|
||||
"max_input_tokens_upper": max_input_tokens,
|
||||
"token_count_model": model_string,
|
||||
"token_count_models": [model_string],
|
||||
}
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentConfig:
|
||||
"""
|
||||
|
|
@ -366,7 +383,9 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
|
|||
if llm_config.get("litellm_params"):
|
||||
litellm_kwargs.update(llm_config["litellm_params"])
|
||||
|
||||
return ChatLiteLLM(**litellm_kwargs)
|
||||
llm = ChatLiteLLM(**litellm_kwargs)
|
||||
_attach_model_profile(llm, model_string)
|
||||
return llm
|
||||
|
||||
|
||||
def create_chat_litellm_from_agent_config(
|
||||
|
|
@ -419,4 +438,6 @@ def create_chat_litellm_from_agent_config(
|
|||
if agent_config.litellm_params:
|
||||
litellm_kwargs.update(agent_config.litellm_params)
|
||||
|
||||
return ChatLiteLLM(**litellm_kwargs)
|
||||
llm = ChatLiteLLM(**litellm_kwargs)
|
||||
_attach_model_profile(llm, model_string)
|
||||
return llm
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue