feat: enhance caching mechanisms to prevent memory leaks

- Improved in-memory rate limiting by evicting timestamps outside the current window and cleaning up empty keys.
- Updated LLM router service to cache context profiles and avoid redundant computations.
- Introduced cache eviction logic for MCP tools and sandbox instances to manage memory usage effectively.
- Added garbage collection triggers in chat streaming functions to reclaim resources promptly.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-27 17:56:00 -08:00
parent 08829c110c
commit f4b2ab0899
7 changed files with 127 additions and 60 deletions

View file

@ -27,9 +27,24 @@ from app.db import SearchSourceConnector, SearchSourceConnectorType
logger = logging.getLogger(__name__)
_MCP_CACHE_TTL_SECONDS = 300 # 5 minutes
_MCP_CACHE_MAX_SIZE = 50
_mcp_tools_cache: dict[int, tuple[float, list[StructuredTool]]] = {}
def _evict_expired_mcp_cache() -> None:
"""Remove expired entries from the MCP tools cache to prevent unbounded growth."""
now = time.monotonic()
expired = [
k
for k, (ts, _) in _mcp_tools_cache.items()
if now - ts >= _MCP_CACHE_TTL_SECONDS
]
for k in expired:
del _mcp_tools_cache[k]
if expired:
logger.debug("Evicted %d expired MCP cache entries", len(expired))
def _create_dynamic_input_model_from_schema(
tool_name: str,
input_schema: dict[str, Any],
@ -392,6 +407,8 @@ async def load_mcp_tools(
List of LangChain StructuredTool instances
"""
_evict_expired_mcp_cache()
now = time.monotonic()
cached = _mcp_tools_cache.get(search_space_id)
if cached is not None:
@ -445,6 +462,11 @@ async def load_mcp_tools(
)
_mcp_tools_cache[search_space_id] = (now, tools)
if len(_mcp_tools_cache) > _MCP_CACHE_MAX_SIZE:
oldest_key = min(_mcp_tools_cache, key=lambda k: _mcp_tools_cache[k][0])
del _mcp_tools_cache[oldest_key]
logger.info(f"Loaded {len(tools)} MCP tools for search space {search_space_id}")
return tools