mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-08 15:22:39 +02:00
feat: implement agent caches and fix invalid prompt cache configs
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
- Added a new function `_warm_agent_jit_caches` to pre-warm agent caches at startup, reducing cold invocation costs. - Updated the `SurfSenseContextSchema` to include per-invocation fields for better state management during agent execution. - Introduced caching mechanisms in various tools to ensure fresh database sessions are used, improving performance and reliability. - Enhanced middleware to support new context features and improve error handling during connector and document type discovery.
This commit is contained in:
parent
90a653c8c7
commit
a34f1fb25c
60 changed files with 8477 additions and 5381 deletions
|
|
@ -40,6 +40,13 @@ from langchain_core.tools import BaseTool
|
|||
from langgraph.types import Checkpointer
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.new_chat.agent_cache import (
|
||||
flags_signature,
|
||||
get_cache,
|
||||
stable_hash,
|
||||
system_prompt_hash,
|
||||
tools_signature,
|
||||
)
|
||||
from app.agents.new_chat.context import SurfSenseContextSchema
|
||||
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
||||
from app.agents.new_chat.filesystem_backends import build_backend_resolver
|
||||
|
|
@ -53,6 +60,7 @@ from app.agents.new_chat.middleware import (
|
|||
DedupHITLToolCallsMiddleware,
|
||||
DoomLoopMiddleware,
|
||||
FileIntentMiddleware,
|
||||
FlattenSystemMessageMiddleware,
|
||||
KnowledgeBasePersistenceMiddleware,
|
||||
KnowledgePriorityMiddleware,
|
||||
KnowledgeTreeMiddleware,
|
||||
|
|
@ -330,23 +338,39 @@ async def create_surfsense_deep_agent(
|
|||
else None,
|
||||
)
|
||||
|
||||
# Discover available connectors and document types for this search space
|
||||
# Discover available connectors and document types for this search space.
|
||||
#
|
||||
# NOTE: These two calls cannot be parallelized via ``asyncio.gather``.
|
||||
# ``ConnectorService`` shares a single ``AsyncSession`` (``self.session``);
|
||||
# SQLAlchemy explicitly forbids concurrent operations on the same session
|
||||
# ("This session is provisioning a new connection; concurrent operations
|
||||
# are not permitted on the same session"). The Phase 1.4 in-process TTL
|
||||
# cache in ``connector_service`` already collapses the warm path to a
|
||||
# near-zero pair of dict lookups, so sequential awaits cost nothing in
|
||||
# the common case while remaining correct on cold cache misses.
|
||||
available_connectors: list[str] | None = None
|
||||
available_document_types: list[str] | None = None
|
||||
|
||||
_t0 = time.perf_counter()
|
||||
try:
|
||||
connector_types = await connector_service.get_available_connectors(
|
||||
search_space_id
|
||||
)
|
||||
if connector_types:
|
||||
available_connectors = _map_connectors_to_searchable_types(connector_types)
|
||||
try:
|
||||
connector_types_result = await connector_service.get_available_connectors(
|
||||
search_space_id
|
||||
)
|
||||
if connector_types_result:
|
||||
available_connectors = _map_connectors_to_searchable_types(
|
||||
connector_types_result
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning("Failed to discover available connectors: %s", e)
|
||||
|
||||
available_document_types = await connector_service.get_available_document_types(
|
||||
search_space_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
try:
|
||||
available_document_types = (
|
||||
await connector_service.get_available_document_types(search_space_id)
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning("Failed to discover available document types: %s", e)
|
||||
except Exception as e: # pragma: no cover - defensive outer guard
|
||||
logging.warning(f"Failed to discover available connectors/document types: {e}")
|
||||
_perf_log.info(
|
||||
"[create_agent] Connector/doc-type discovery in %.3fs",
|
||||
|
|
@ -469,29 +493,77 @@ async def create_surfsense_deep_agent(
|
|||
# entire middleware build + main-graph compile into a single
|
||||
# ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
|
||||
# event loop stays responsive.
|
||||
#
|
||||
# PHASE 1: cache the resulting compiled graph. ``agent_cache`` is keyed
|
||||
# on every per-request value that any middleware in the stack closes
|
||||
# over in ``__init__`` — drop one and you risk leaking state across
|
||||
# threads. Hits collapse this whole block to a microsecond lookup;
|
||||
# misses pay the original CPU cost AND populate the cache.
|
||||
config_id = agent_config.config_id if agent_config is not None else None
|
||||
|
||||
async def _build_agent() -> Any:
|
||||
return await asyncio.to_thread(
|
||||
_build_compiled_agent_blocking,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
final_system_prompt=final_system_prompt,
|
||||
backend_resolver=backend_resolver,
|
||||
filesystem_mode=filesystem_selection.mode,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
thread_id=thread_id,
|
||||
visibility=visibility,
|
||||
anon_session_id=anon_session_id,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
# ``mentioned_document_ids`` is consumed by
|
||||
# ``KnowledgePriorityMiddleware`` per turn via
|
||||
# ``runtime.context`` (Phase 1.5). We still pass the
|
||||
# caller-provided list here for the legacy fallback path
|
||||
# (cache disabled / context not propagated) — the middleware
|
||||
# drains its own copy after the first read so a cached graph
|
||||
# never replays stale mentions.
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
max_input_tokens=_max_input_tokens,
|
||||
flags=_flags,
|
||||
checkpointer=checkpointer,
|
||||
)
|
||||
|
||||
_t0 = time.perf_counter()
|
||||
agent = await asyncio.to_thread(
|
||||
_build_compiled_agent_blocking,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
final_system_prompt=final_system_prompt,
|
||||
backend_resolver=backend_resolver,
|
||||
filesystem_mode=filesystem_selection.mode,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
thread_id=thread_id,
|
||||
visibility=visibility,
|
||||
anon_session_id=anon_session_id,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
max_input_tokens=_max_input_tokens,
|
||||
flags=_flags,
|
||||
checkpointer=checkpointer,
|
||||
)
|
||||
if _flags.enable_agent_cache and not _flags.disable_new_agent_stack:
|
||||
# Cache key components — order matters only for human readability;
|
||||
# the resulting hash is what's stored. Every component must
|
||||
# rotate on a real shape change AND stay stable across identical
|
||||
# invocations.
|
||||
cache_key = stable_hash(
|
||||
"v1", # schema version of the key — bump if components change
|
||||
config_id,
|
||||
thread_id,
|
||||
user_id,
|
||||
search_space_id,
|
||||
visibility,
|
||||
filesystem_selection.mode,
|
||||
anon_session_id,
|
||||
tools_signature(
|
||||
tools,
|
||||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
),
|
||||
flags_signature(_flags),
|
||||
system_prompt_hash(final_system_prompt),
|
||||
_max_input_tokens,
|
||||
# ``mentioned_document_ids`` deliberately omitted — middleware
|
||||
# reads it from ``runtime.context`` (Phase 1.5).
|
||||
)
|
||||
agent = await get_cache().get_or_build(cache_key, builder=_build_agent)
|
||||
else:
|
||||
agent = await _build_agent()
|
||||
_perf_log.info(
|
||||
"[create_agent] Middleware stack + graph compiled in %.3fs",
|
||||
"[create_agent] Middleware stack + graph compiled in %.3fs (cache=%s)",
|
||||
time.perf_counter() - _t0,
|
||||
"on"
|
||||
if _flags.enable_agent_cache and not _flags.disable_new_agent_stack
|
||||
else "off",
|
||||
)
|
||||
|
||||
_perf_log.info(
|
||||
|
|
@ -1038,6 +1110,14 @@ def _build_compiled_agent_blocking(
|
|||
noop_mw,
|
||||
retry_mw,
|
||||
fallback_mw,
|
||||
# Coalesce a multi-text-block system message into one block
|
||||
# immediately before the model call. Sits innermost on the
|
||||
# system-message-mutation chain so it observes every appender
|
||||
# (todo / filesystem / skills / subagents …) and prevents
|
||||
# OpenRouter→Anthropic from redistributing ``cache_control``
|
||||
# across N blocks and tripping Anthropic's 4-breakpoint cap.
|
||||
# See ``middleware/flatten_system.py`` for full rationale.
|
||||
FlattenSystemMessageMiddleware(),
|
||||
# Tool-call repair must run after model emits but before
|
||||
# permission / dedup / doom-loop interpret the calls.
|
||||
repair_mw,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue