feat: enhance caching mechanisms to prevent memory leaks

- Improved in-memory rate limiting by evicting timestamps outside the current window and cleaning up empty keys.
- Updated LLM router service to cache context profiles and avoid redundant computations.
- Introduced cache eviction logic for MCP tools and sandbox instances to manage memory usage effectively.
- Added garbage collection triggers in chat streaming functions to reclaim resources promptly.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-27 17:56:00 -08:00
parent 08829c110c
commit f4b2ab0899
7 changed files with 127 additions and 60 deletions

View file

@ -22,6 +22,7 @@ from app.services.llm_router_service import (
AUTO_MODE_ID,
ChatLiteLLMRouter,
LLMRouterService,
get_auto_mode_llm,
is_auto_mode,
)
@ -389,7 +390,7 @@ def create_chat_litellm_from_agent_config(
print("Error: Auto mode requested but LLM Router not initialized")
return None
try:
return ChatLiteLLMRouter()
return get_auto_mode_llm()
except Exception as e:
print(f"Error creating ChatLiteLLMRouter: {e}")
return None