mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 13:22:41 +02:00
feat: prompt caching
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
- Updated `litellm` dependency version from `1.83.4` to `1.83.7`. - Adjusted `aiohttp` version from `3.13.5` to `3.13.4` in the lock file. - Implemented `apply_litellm_prompt_caching` in `chat_deepagent.py` to improve prompt caching. - Added model name resolution logic in `chat_deepagent.py` to ensure correct provider-variant dispatch. - Enhanced `llm_config.py` to configure prompt caching for various LLM providers. - Updated tests to verify correct model name forwarding and prompt caching behavior.
This commit is contained in:
parent
360b5f8e3a
commit
e57c3a7d0c
12 changed files with 877 additions and 156 deletions
|
|
@ -27,6 +27,7 @@ from litellm import get_model_info
|
|||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
|
||||
from app.services.llm_router_service import (
|
||||
AUTO_MODE_ID,
|
||||
ChatLiteLLMRouter,
|
||||
|
|
@ -494,6 +495,11 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
|
|||
|
||||
llm = SanitizedChatLiteLLM(**litellm_kwargs)
|
||||
_attach_model_profile(llm, model_string)
|
||||
# Configure LiteLLM-native prompt caching (cache_control_injection_points
|
||||
# for Anthropic/Bedrock/Vertex/Gemini/Azure-AI/OpenRouter/Databricks/etc.).
|
||||
# ``agent_config=None`` here — the YAML path doesn't have provider intent
|
||||
# in a structured form, so we set only the universal injection points.
|
||||
apply_litellm_prompt_caching(llm)
|
||||
return llm
|
||||
|
||||
|
||||
|
|
@ -518,7 +524,16 @@ def create_chat_litellm_from_agent_config(
|
|||
print("Error: Auto mode requested but LLM Router not initialized")
|
||||
return None
|
||||
try:
|
||||
return get_auto_mode_llm()
|
||||
router_llm = get_auto_mode_llm()
|
||||
if router_llm is not None:
|
||||
# Universal cache_control_injection_points only — auto-mode
|
||||
# fans out across providers, so OpenAI-only kwargs (e.g.
|
||||
# ``prompt_cache_key``) are left off here. ``drop_params``
|
||||
# would strip them at the provider boundary anyway, but
|
||||
# there's no point setting them when we don't know the
|
||||
# destination.
|
||||
apply_litellm_prompt_caching(router_llm, agent_config=agent_config)
|
||||
return router_llm
|
||||
except Exception as e:
|
||||
print(f"Error creating ChatLiteLLMRouter: {e}")
|
||||
return None
|
||||
|
|
@ -549,4 +564,9 @@ def create_chat_litellm_from_agent_config(
|
|||
|
||||
llm = SanitizedChatLiteLLM(**litellm_kwargs)
|
||||
_attach_model_profile(llm, model_string)
|
||||
# Build-time prompt caching: sets ``cache_control_injection_points`` for
|
||||
# all providers and (for OpenAI/DeepSeek/xAI) ``prompt_cache_retention``.
|
||||
# Per-thread ``prompt_cache_key`` is layered on later in
|
||||
# ``create_surfsense_deep_agent`` once ``thread_id`` is known.
|
||||
apply_litellm_prompt_caching(llm, agent_config=agent_config)
|
||||
return llm
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue