feat: enhance caching mechanisms to prevent memory leaks

- Improved in-memory rate limiting by evicting timestamps outside the current window and cleaning up empty keys.
- Updated LLM router service to cache context profiles and avoid redundant computations.
- Introduced cache eviction logic for MCP tools and sandbox instances to manage memory usage effectively.
- Added garbage collection triggers in chat streaming functions to reclaim resources promptly.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-27 17:56:00 -08:00
parent 08829c110c
commit f4b2ab0899
7 changed files with 127 additions and 60 deletions

View file

@ -10,6 +10,7 @@ Supports loading LLM configurations from:
"""
import asyncio
import gc
import json
import logging
import re
@ -1476,6 +1477,16 @@ async def stream_new_chat(
_try_persist_and_delete_sandbox(chat_id, stream_result.sandbox_files)
# Trigger a GC pass so LangGraph agent graphs, tool closures, and
# LLM wrappers with potential circular refs are reclaimed promptly.
collected = gc.collect()
if collected:
_perf_log.info(
"[stream_new_chat] gc.collect() reclaimed %d objects (chat_id=%s)",
collected,
chat_id,
)
async def stream_resume_chat(
chat_id: int,
@ -1662,3 +1673,10 @@ async def stream_resume_chat(
)
_try_persist_and_delete_sandbox(chat_id, stream_result.sandbox_files)
collected = gc.collect()
if collected:
_perf_log.info(
"[stream_resume] gc.collect() reclaimed %d objects (chat_id=%s)",
collected,
chat_id,
)