mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-06 14:22:47 +02:00
feat: implement agent caches and fix invalid prompt cache configs
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
- Added a new function `_warm_agent_jit_caches` to pre-warm agent caches at startup, reducing cold invocation costs. - Updated the `SurfSenseContextSchema` to include per-invocation fields for better state management during agent execution. - Introduced caching mechanisms in various tools to ensure fresh database sessions are used, improving performance and reliability. - Enhanced middleware to support new context features and improve error handling during connector and document type discovery.
This commit is contained in:
parent
90a653c8c7
commit
a34f1fb25c
60 changed files with 8477 additions and 5381 deletions
|
|
@ -1,6 +1,8 @@
|
|||
import asyncio
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
from threading import Lock
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
|
@ -2769,12 +2771,22 @@ class ConnectorService:
|
|||
"""
|
||||
Get all available (enabled) connector types for a search space.
|
||||
|
||||
Phase 1.4: results are cached per ``search_space_id`` for
|
||||
:data:`_DISCOVERY_TTL_SECONDS`. Cache key is independent of session
|
||||
identity — the cached value is plain data, safe to share across
|
||||
requests. Invalidate on connector add/update/delete via
|
||||
:func:`invalidate_connector_discovery_cache`.
|
||||
|
||||
Args:
|
||||
search_space_id: The search space ID
|
||||
|
||||
Returns:
|
||||
List of SearchSourceConnectorType enums for enabled connectors
|
||||
"""
|
||||
cached = _get_cached_connectors(search_space_id)
|
||||
if cached is not None:
|
||||
return list(cached)
|
||||
|
||||
query = (
|
||||
select(SearchSourceConnector.connector_type)
|
||||
.filter(
|
||||
|
|
@ -2784,8 +2796,9 @@ class ConnectorService:
|
|||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
connector_types = result.scalars().all()
|
||||
return list(connector_types)
|
||||
connector_types = list(result.scalars().all())
|
||||
_set_cached_connectors(search_space_id, connector_types)
|
||||
return connector_types
|
||||
|
||||
async def get_available_document_types(
|
||||
self,
|
||||
|
|
@ -2794,12 +2807,22 @@ class ConnectorService:
|
|||
"""
|
||||
Get all document types that have at least one document in the search space.
|
||||
|
||||
Phase 1.4: cached per ``search_space_id`` for
|
||||
:data:`_DISCOVERY_TTL_SECONDS`. Invalidate via
|
||||
:func:`invalidate_connector_discovery_cache` when a connector
|
||||
finishes indexing new documents (or document types are otherwise
|
||||
added/removed).
|
||||
|
||||
Args:
|
||||
search_space_id: The search space ID
|
||||
|
||||
Returns:
|
||||
List of document type strings that have documents indexed
|
||||
"""
|
||||
cached = _get_cached_doc_types(search_space_id)
|
||||
if cached is not None:
|
||||
return list(cached)
|
||||
|
||||
from sqlalchemy import distinct
|
||||
|
||||
from app.db import Document
|
||||
|
|
@ -2809,5 +2832,164 @@ class ConnectorService:
|
|||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
doc_types = result.scalars().all()
|
||||
return [str(dt) for dt in doc_types]
|
||||
doc_types = [str(dt) for dt in result.scalars().all()]
|
||||
_set_cached_doc_types(search_space_id, doc_types)
|
||||
return doc_types
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Connector / document-type discovery TTL cache (Phase 1.4)
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Both ``get_available_connectors`` and ``get_available_document_types`` are
|
||||
# called on EVERY chat turn from ``create_surfsense_deep_agent``. Each query
|
||||
# hits Postgres and contributes to per-turn agent build latency. Their
|
||||
# results change infrequently — only when the user adds/edits/removes a
|
||||
# connector, or when an indexer commits a new document type. A short TTL
|
||||
# cache (default 30s, env-tunable) collapses N concurrent calls into one
|
||||
# DB roundtrip with bounded staleness.
|
||||
#
|
||||
# Invalidation: connector mutation routes (create / update / delete) call
|
||||
# ``invalidate_connector_discovery_cache(search_space_id)`` to clear the
|
||||
# entry for the affected space. Multi-replica deployments still pay one
|
||||
# DB roundtrip per replica per TTL window, which is fine — staleness is
|
||||
# bounded and the alternative (cross-replica fanout) is not worth the
|
||||
# coupling here.
|
||||
|
||||
_DISCOVERY_TTL_SECONDS: float = float(
|
||||
os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30")
|
||||
)
|
||||
|
||||
# Per-search-space caches. Keyed by ``search_space_id``; value is
|
||||
# ``(expires_at_monotonic, payload)``. Plain dicts protected by a lock —
|
||||
# read-mostly workload, sub-microsecond contention.
|
||||
_connectors_cache: dict[int, tuple[float, list[SearchSourceConnectorType]]] = {}
|
||||
_doc_types_cache: dict[int, tuple[float, list[str]]] = {}
|
||||
_cache_lock = Lock()
|
||||
|
||||
|
||||
def _get_cached_connectors(
|
||||
search_space_id: int,
|
||||
) -> list[SearchSourceConnectorType] | None:
|
||||
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||
return None
|
||||
with _cache_lock:
|
||||
entry = _connectors_cache.get(search_space_id)
|
||||
if entry is None:
|
||||
return None
|
||||
expires_at, payload = entry
|
||||
if time.monotonic() >= expires_at:
|
||||
_connectors_cache.pop(search_space_id, None)
|
||||
return None
|
||||
return payload
|
||||
|
||||
|
||||
def _set_cached_connectors(
|
||||
search_space_id: int, payload: list[SearchSourceConnectorType]
|
||||
) -> None:
|
||||
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||
return
|
||||
expires_at = time.monotonic() + _DISCOVERY_TTL_SECONDS
|
||||
with _cache_lock:
|
||||
_connectors_cache[search_space_id] = (expires_at, list(payload))
|
||||
|
||||
|
||||
def _get_cached_doc_types(search_space_id: int) -> list[str] | None:
|
||||
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||
return None
|
||||
with _cache_lock:
|
||||
entry = _doc_types_cache.get(search_space_id)
|
||||
if entry is None:
|
||||
return None
|
||||
expires_at, payload = entry
|
||||
if time.monotonic() >= expires_at:
|
||||
_doc_types_cache.pop(search_space_id, None)
|
||||
return None
|
||||
return payload
|
||||
|
||||
|
||||
def _set_cached_doc_types(search_space_id: int, payload: list[str]) -> None:
|
||||
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||
return
|
||||
expires_at = time.monotonic() + _DISCOVERY_TTL_SECONDS
|
||||
with _cache_lock:
|
||||
_doc_types_cache[search_space_id] = (expires_at, list(payload))
|
||||
|
||||
|
||||
def invalidate_connector_discovery_cache(search_space_id: int | None = None) -> None:
|
||||
"""Drop cached discovery results for ``search_space_id`` (or all spaces).
|
||||
|
||||
Connector CRUD routes / indexer pipelines call this when they mutate
|
||||
the rows backing :func:`ConnectorService.get_available_connectors` /
|
||||
:func:`get_available_document_types`. ``None`` clears every space —
|
||||
useful in tests and on bulk imports.
|
||||
"""
|
||||
with _cache_lock:
|
||||
if search_space_id is None:
|
||||
_connectors_cache.clear()
|
||||
_doc_types_cache.clear()
|
||||
else:
|
||||
_connectors_cache.pop(search_space_id, None)
|
||||
_doc_types_cache.pop(search_space_id, None)
|
||||
|
||||
|
||||
def _invalidate_connectors_only(search_space_id: int | None = None) -> None:
|
||||
with _cache_lock:
|
||||
if search_space_id is None:
|
||||
_connectors_cache.clear()
|
||||
else:
|
||||
_connectors_cache.pop(search_space_id, None)
|
||||
|
||||
|
||||
def _invalidate_doc_types_only(search_space_id: int | None = None) -> None:
|
||||
with _cache_lock:
|
||||
if search_space_id is None:
|
||||
_doc_types_cache.clear()
|
||||
else:
|
||||
_doc_types_cache.pop(search_space_id, None)
|
||||
|
||||
|
||||
def _register_invalidation_listeners() -> None:
|
||||
"""Wire SQLAlchemy ORM events so cache stays consistent automatically.
|
||||
|
||||
Listening on ``after_insert`` / ``after_update`` / ``after_delete``
|
||||
means every successful INSERT/UPDATE/DELETE that goes through the ORM
|
||||
invalidates the affected search space's cached discovery payload —
|
||||
no need to sprinkle ``invalidate_*`` calls across 30+ connector
|
||||
routes. Bulk operations that bypass the ORM (e.g.
|
||||
``session.execute(insert(...))`` without a mapped object) still need
|
||||
explicit invalidation; document indexers already commit through the
|
||||
ORM so document-type discovery is covered.
|
||||
"""
|
||||
from sqlalchemy import event
|
||||
|
||||
# Imported here (not at module top) to avoid a circular import:
|
||||
# app.services.connector_service is itself imported from app.db's
|
||||
# ecosystem indirectly via several CRUD modules.
|
||||
from app.db import Document, SearchSourceConnector
|
||||
|
||||
def _connector_changed(_mapper, _connection, target) -> None:
|
||||
sid = getattr(target, "search_space_id", None)
|
||||
if sid is not None:
|
||||
_invalidate_connectors_only(int(sid))
|
||||
|
||||
def _document_changed(_mapper, _connection, target) -> None:
|
||||
sid = getattr(target, "search_space_id", None)
|
||||
if sid is not None:
|
||||
_invalidate_doc_types_only(int(sid))
|
||||
|
||||
for evt in ("after_insert", "after_update", "after_delete"):
|
||||
event.listen(SearchSourceConnector, evt, _connector_changed)
|
||||
event.listen(Document, evt, _document_changed)
|
||||
|
||||
|
||||
try:
|
||||
_register_invalidation_listeners()
|
||||
except Exception: # pragma: no cover - defensive; never block module import
|
||||
import logging as _logging
|
||||
|
||||
_logging.getLogger(__name__).exception(
|
||||
"Failed to register connector discovery cache invalidation listeners; "
|
||||
"stale cache risk: explicit invalidate_connector_discovery_cache calls "
|
||||
"may be required."
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue