feat: enhance caching mechanisms to prevent memory leaks

- Improved in-memory rate limiting by evicting timestamps outside the current window and cleaning up empty keys. - Updated LLM router service to cache context profiles and avoid redundant computations. - Introduced cache eviction logic for MCP tools and sandbox instances to manage memory usage effectively. - Added garbage collection triggers in chat streaming functions to reclaim resources promptly.
2026-04-25 00:36:31 +02:00 · 2026-02-27 17:56:00 -08:00 · 2026-02-27 17:56:00 -08:00 · f4b2ab0899
commit f4b2ab0899
parent 08829c110c
7 changed files with 127 additions and 60 deletions
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -10,6 +10,7 @@ Supports loading LLM configurations from:
 """

 import asyncio
+import gc
 import json
 import logging
 import re
@ -1476,6 +1477,16 @@ async def stream_new_chat(

        _try_persist_and_delete_sandbox(chat_id, stream_result.sandbox_files)

+        # Trigger a GC pass so LangGraph agent graphs, tool closures, and
+        # LLM wrappers with potential circular refs are reclaimed promptly.
+        collected = gc.collect()
+        if collected:
+            _perf_log.info(
+                "[stream_new_chat] gc.collect() reclaimed %d objects (chat_id=%s)",
+                collected,
+                chat_id,
+            )
+

 async def stream_resume_chat(
    chat_id: int,
@ -1662,3 +1673,10 @@ async def stream_resume_chat(
                )

        _try_persist_and_delete_sandbox(chat_id, stream_result.sandbox_files)
+        collected = gc.collect()
+        if collected:
+            _perf_log.info(
+                "[stream_resume] gc.collect() reclaimed %d objects (chat_id=%s)",
+                collected,
+                chat_id,
+            )