mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-31 19:45:15 +02:00
feat: enhance caching mechanisms to prevent memory leaks
- Improved in-memory rate limiting by evicting timestamps outside the current window and cleaning up empty keys. - Updated LLM router service to cache context profiles and avoid redundant computations. - Introduced cache eviction logic for MCP tools and sandbox instances to manage memory usage effectively. - Added garbage collection triggers in chat streaming functions to reclaim resources promptly.
This commit is contained in:
parent
08829c110c
commit
f4b2ab0899
7 changed files with 127 additions and 60 deletions
|
|
@ -103,22 +103,24 @@ def _check_rate_limit_memory(
|
|||
now = time.monotonic()
|
||||
|
||||
with _memory_lock:
|
||||
# Evict timestamps outside the current window
|
||||
_memory_rate_limits[key] = [
|
||||
t for t in _memory_rate_limits[key] if now - t < window_seconds
|
||||
]
|
||||
timestamps = [t for t in _memory_rate_limits[key] if now - t < window_seconds]
|
||||
|
||||
if len(_memory_rate_limits[key]) >= max_requests:
|
||||
if not timestamps:
|
||||
_memory_rate_limits.pop(key, None)
|
||||
else:
|
||||
_memory_rate_limits[key] = timestamps
|
||||
|
||||
if len(timestamps) >= max_requests:
|
||||
rate_limit_logger.warning(
|
||||
f"Rate limit exceeded (in-memory fallback) on {scope} for IP {client_ip} "
|
||||
f"({len(_memory_rate_limits[key])}/{max_requests} in {window_seconds}s)"
|
||||
f"({len(timestamps)}/{max_requests} in {window_seconds}s)"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail="RATE_LIMIT_EXCEEDED",
|
||||
)
|
||||
|
||||
_memory_rate_limits[key].append(now)
|
||||
_memory_rate_limits[key] = [*timestamps, now]
|
||||
|
||||
|
||||
def _check_rate_limit(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue