feat: implement agent caches and fix invalid prompt cache configs
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions

- Added a new function `_warm_agent_jit_caches` to pre-warm agent caches at startup, reducing cold invocation costs.
- Updated the `SurfSenseContextSchema` to include per-invocation fields for better state management during agent execution.
- Introduced caching mechanisms in various tools to ensure fresh database sessions are used, improving performance and reliability.
- Enhanced middleware to support new context features and improve error handling during connector and document type discovery.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-03 06:03:40 -07:00
parent 90a653c8c7
commit a34f1fb25c
60 changed files with 8477 additions and 5381 deletions

View file

@ -1,6 +1,8 @@
import asyncio
import os
import time
from datetime import datetime
from threading import Lock
from typing import Any
import httpx
@ -2769,12 +2771,22 @@ class ConnectorService:
"""
Get all available (enabled) connector types for a search space.
Phase 1.4: results are cached per ``search_space_id`` for
:data:`_DISCOVERY_TTL_SECONDS`. Cache key is independent of session
identity the cached value is plain data, safe to share across
requests. Invalidate on connector add/update/delete via
:func:`invalidate_connector_discovery_cache`.
Args:
search_space_id: The search space ID
Returns:
List of SearchSourceConnectorType enums for enabled connectors
"""
cached = _get_cached_connectors(search_space_id)
if cached is not None:
return list(cached)
query = (
select(SearchSourceConnector.connector_type)
.filter(
@ -2784,8 +2796,9 @@ class ConnectorService:
)
result = await self.session.execute(query)
connector_types = result.scalars().all()
return list(connector_types)
connector_types = list(result.scalars().all())
_set_cached_connectors(search_space_id, connector_types)
return connector_types
async def get_available_document_types(
self,
@ -2794,12 +2807,22 @@ class ConnectorService:
"""
Get all document types that have at least one document in the search space.
Phase 1.4: cached per ``search_space_id`` for
:data:`_DISCOVERY_TTL_SECONDS`. Invalidate via
:func:`invalidate_connector_discovery_cache` when a connector
finishes indexing new documents (or document types are otherwise
added/removed).
Args:
search_space_id: The search space ID
Returns:
List of document type strings that have documents indexed
"""
cached = _get_cached_doc_types(search_space_id)
if cached is not None:
return list(cached)
from sqlalchemy import distinct
from app.db import Document
@ -2809,5 +2832,164 @@ class ConnectorService:
)
result = await self.session.execute(query)
doc_types = result.scalars().all()
return [str(dt) for dt in doc_types]
doc_types = [str(dt) for dt in result.scalars().all()]
_set_cached_doc_types(search_space_id, doc_types)
return doc_types
# ---------------------------------------------------------------------------
# Connector / document-type discovery TTL cache (Phase 1.4)
# ---------------------------------------------------------------------------
#
# Both ``get_available_connectors`` and ``get_available_document_types`` are
# called on EVERY chat turn from ``create_surfsense_deep_agent``. Each query
# hits Postgres and contributes to per-turn agent build latency. Their
# results change infrequently — only when the user adds/edits/removes a
# connector, or when an indexer commits a new document type. A short TTL
# cache (default 30s, env-tunable) collapses N concurrent calls into one
# DB roundtrip with bounded staleness.
#
# Invalidation: connector mutation routes (create / update / delete) call
# ``invalidate_connector_discovery_cache(search_space_id)`` to clear the
# entry for the affected space. Multi-replica deployments still pay one
# DB roundtrip per replica per TTL window, which is fine — staleness is
# bounded and the alternative (cross-replica fanout) is not worth the
# coupling here.
_DISCOVERY_TTL_SECONDS: float = float(
os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30")
)
# Per-search-space caches. Keyed by ``search_space_id``; value is
# ``(expires_at_monotonic, payload)``. Plain dicts protected by a lock —
# read-mostly workload, sub-microsecond contention.
_connectors_cache: dict[int, tuple[float, list[SearchSourceConnectorType]]] = {}
_doc_types_cache: dict[int, tuple[float, list[str]]] = {}
_cache_lock = Lock()
def _get_cached_connectors(
search_space_id: int,
) -> list[SearchSourceConnectorType] | None:
if _DISCOVERY_TTL_SECONDS <= 0:
return None
with _cache_lock:
entry = _connectors_cache.get(search_space_id)
if entry is None:
return None
expires_at, payload = entry
if time.monotonic() >= expires_at:
_connectors_cache.pop(search_space_id, None)
return None
return payload
def _set_cached_connectors(
search_space_id: int, payload: list[SearchSourceConnectorType]
) -> None:
if _DISCOVERY_TTL_SECONDS <= 0:
return
expires_at = time.monotonic() + _DISCOVERY_TTL_SECONDS
with _cache_lock:
_connectors_cache[search_space_id] = (expires_at, list(payload))
def _get_cached_doc_types(search_space_id: int) -> list[str] | None:
if _DISCOVERY_TTL_SECONDS <= 0:
return None
with _cache_lock:
entry = _doc_types_cache.get(search_space_id)
if entry is None:
return None
expires_at, payload = entry
if time.monotonic() >= expires_at:
_doc_types_cache.pop(search_space_id, None)
return None
return payload
def _set_cached_doc_types(search_space_id: int, payload: list[str]) -> None:
if _DISCOVERY_TTL_SECONDS <= 0:
return
expires_at = time.monotonic() + _DISCOVERY_TTL_SECONDS
with _cache_lock:
_doc_types_cache[search_space_id] = (expires_at, list(payload))
def invalidate_connector_discovery_cache(search_space_id: int | None = None) -> None:
"""Drop cached discovery results for ``search_space_id`` (or all spaces).
Connector CRUD routes / indexer pipelines call this when they mutate
the rows backing :func:`ConnectorService.get_available_connectors` /
:func:`get_available_document_types`. ``None`` clears every space
useful in tests and on bulk imports.
"""
with _cache_lock:
if search_space_id is None:
_connectors_cache.clear()
_doc_types_cache.clear()
else:
_connectors_cache.pop(search_space_id, None)
_doc_types_cache.pop(search_space_id, None)
def _invalidate_connectors_only(search_space_id: int | None = None) -> None:
with _cache_lock:
if search_space_id is None:
_connectors_cache.clear()
else:
_connectors_cache.pop(search_space_id, None)
def _invalidate_doc_types_only(search_space_id: int | None = None) -> None:
with _cache_lock:
if search_space_id is None:
_doc_types_cache.clear()
else:
_doc_types_cache.pop(search_space_id, None)
def _register_invalidation_listeners() -> None:
"""Wire SQLAlchemy ORM events so cache stays consistent automatically.
Listening on ``after_insert`` / ``after_update`` / ``after_delete``
means every successful INSERT/UPDATE/DELETE that goes through the ORM
invalidates the affected search space's cached discovery payload —
no need to sprinkle ``invalidate_*`` calls across 30+ connector
routes. Bulk operations that bypass the ORM (e.g.
``session.execute(insert(...))`` without a mapped object) still need
explicit invalidation; document indexers already commit through the
ORM so document-type discovery is covered.
"""
from sqlalchemy import event
# Imported here (not at module top) to avoid a circular import:
# app.services.connector_service is itself imported from app.db's
# ecosystem indirectly via several CRUD modules.
from app.db import Document, SearchSourceConnector
def _connector_changed(_mapper, _connection, target) -> None:
sid = getattr(target, "search_space_id", None)
if sid is not None:
_invalidate_connectors_only(int(sid))
def _document_changed(_mapper, _connection, target) -> None:
sid = getattr(target, "search_space_id", None)
if sid is not None:
_invalidate_doc_types_only(int(sid))
for evt in ("after_insert", "after_update", "after_delete"):
event.listen(SearchSourceConnector, evt, _connector_changed)
event.listen(Document, evt, _document_changed)
try:
_register_invalidation_listeners()
except Exception: # pragma: no cover - defensive; never block module import
import logging as _logging
_logging.getLogger(__name__).exception(
"Failed to register connector discovery cache invalidation listeners; "
"stale cache risk: explicit invalidate_connector_discovery_cache calls "
"may be required."
)