feat: implement agent caches and fix invalid prompt cache configs

- Added a new function `_warm_agent_jit_caches` to pre-warm agent caches at startup, reducing cold invocation costs. - Updated the `SurfSenseContextSchema` to include per-invocation fields for better state management during agent execution. - Introduced caching mechanisms in various tools to ensure fresh database sessions are used, improving performance and reliability. - Enhanced middleware to support new context features and improve error handling during connector and document type discovery.
2026-05-04 13:22:41 +02:00 · 2026-05-03 06:03:40 -07:00 · 2026-05-03 06:03:40 -07:00 · a34f1fb25c
commit a34f1fb25c
parent 90a653c8c7
60 changed files with 8477 additions and 5381 deletions
--- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
+++ b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
@ -475,3 +475,190 @@ class TestKBSearchPlanSchema:
            )
        )
        assert plan.is_recency_query is False
+
+
+# ── mentioned_document_ids cross-turn drain ────────────────────────────
+
+
+class TestKnowledgePriorityMentionDrain:
+    """Regression tests for the cross-turn ``mentioned_document_ids`` drain.
+
+    The compiled-agent cache reuses a single :class:`KnowledgePriorityMiddleware`
+    instance across turns of the same thread. ``mentioned_document_ids``
+    can therefore enter the middleware via two paths:
+
+    1. The constructor closure (``__init__(mentioned_document_ids=...)``) —
+       seeded by the cache-miss build on turn 1.
+    2. ``runtime.context.mentioned_document_ids`` — supplied freshly per
+       turn by the streaming task.
+
+    Without the drain fix, an empty ``runtime.context.mentioned_document_ids``
+    on turn 2 would fall through to the closure (because ``[]`` is falsy in
+    Python) and replay turn 1's mentions. This class pins down the
+    correct behaviour: the runtime path is authoritative even when empty,
+    and the closure is drained the first time the runtime path fires so
+    no later turn can ever resurrect stale state.
+    """
+
+    @staticmethod
+    def _make_runtime(mention_ids: list[int]):
+        """Minimal runtime stub exposing only ``runtime.context.mentioned_document_ids``."""
+        from types import SimpleNamespace
+
+        return SimpleNamespace(
+            context=SimpleNamespace(mentioned_document_ids=mention_ids),
+        )
+
+    @staticmethod
+    def _planner_llm() -> "FakeLLM":
+        # Planner returns a stable, non-recency plan so we always land in
+        # the hybrid-search branch (where ``fetch_mentioned_documents`` is
+        # invoked alongside the main search).
+        return FakeLLM(
+            json.dumps(
+                {
+                    "optimized_query": "follow up question",
+                    "start_date": None,
+                    "end_date": None,
+                    "is_recency_query": False,
+                }
+            )
+        )
+
+    async def test_runtime_context_overrides_closure_and_drains_it(self, monkeypatch):
+        """Turn 1 with mentions in BOTH closure and runtime context: the
+        runtime path wins AND the closure is drained so a future turn
+        cannot replay it.
+        """
+        fetched_ids: list[list[int]] = []
+
+        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
+            fetched_ids.append(list(document_ids))
+            return []
+
+        async def fake_search_knowledge_base(**_kwargs):
+            return []
+
+        monkeypatch.setattr(
+            "app.agents.new_chat.middleware.knowledge_search.fetch_mentioned_documents",
+            fake_fetch_mentioned_documents,
+        )
+        monkeypatch.setattr(
+            "app.agents.new_chat.middleware.knowledge_search.search_knowledge_base",
+            fake_search_knowledge_base,
+        )
+
+        middleware = KnowledgeBaseSearchMiddleware(
+            llm=self._planner_llm(),
+            search_space_id=42,
+            mentioned_document_ids=[1, 2, 3],
+        )
+
+        await middleware.abefore_agent(
+            {"messages": [HumanMessage(content="what is in those docs?")]},
+            runtime=self._make_runtime([1, 2, 3]),
+        )
+
+        assert fetched_ids == [[1, 2, 3]], (
+            "runtime.context mentions must be the source of truth on turn 1"
+        )
+        assert middleware.mentioned_document_ids == [], (
+            "closure must be drained the first time the runtime path fires "
+            "so no later turn can replay stale mentions"
+        )
+
+    async def test_empty_runtime_context_does_not_replay_closure_mentions(
+        self, monkeypatch
+    ):
+        """Regression: turn 2 with NO mentions must not surface turn 1's
+        mentions from the constructor closure.
+
+        Before the fix, ``if ctx_mentions:`` treated an empty list as
+        absent and fell through to ``elif self.mentioned_document_ids:``,
+        replaying turn 1's mentions. This test pins down the corrected
+        behaviour.
+        """
+        fetched_ids: list[list[int]] = []
+
+        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
+            fetched_ids.append(list(document_ids))
+            return []
+
+        async def fake_search_knowledge_base(**_kwargs):
+            return []
+
+        monkeypatch.setattr(
+            "app.agents.new_chat.middleware.knowledge_search.fetch_mentioned_documents",
+            fake_fetch_mentioned_documents,
+        )
+        monkeypatch.setattr(
+            "app.agents.new_chat.middleware.knowledge_search.search_knowledge_base",
+            fake_search_knowledge_base,
+        )
+
+        # Simulate a cached middleware instance whose closure was seeded
+        # by a previous turn's cache-miss build (mentions=[1,2,3]).
+        middleware = KnowledgeBaseSearchMiddleware(
+            llm=self._planner_llm(),
+            search_space_id=42,
+            mentioned_document_ids=[1, 2, 3],
+        )
+
+        # Turn 2: streaming task supplies an EMPTY mention list (no
+        # mentions on this follow-up turn).
+        await middleware.abefore_agent(
+            {"messages": [HumanMessage(content="what about the next steps?")]},
+            runtime=self._make_runtime([]),
+        )
+
+        assert fetched_ids == [], (
+            "fetch_mentioned_documents must NOT be called when the runtime "
+            "context says there are no mentions for this turn"
+        )
+
+    async def test_legacy_path_fires_only_when_runtime_context_absent(
+        self, monkeypatch
+    ):
+        """Backward-compat: if a caller doesn't supply runtime.context (old
+        non-streaming code path), the closure-injected mentions are still
+        honoured exactly once and then drained.
+        """
+        fetched_ids: list[list[int]] = []
+
+        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
+            fetched_ids.append(list(document_ids))
+            return []
+
+        async def fake_search_knowledge_base(**_kwargs):
+            return []
+
+        monkeypatch.setattr(
+            "app.agents.new_chat.middleware.knowledge_search.fetch_mentioned_documents",
+            fake_fetch_mentioned_documents,
+        )
+        monkeypatch.setattr(
+            "app.agents.new_chat.middleware.knowledge_search.search_knowledge_base",
+            fake_search_knowledge_base,
+        )
+
+        middleware = KnowledgeBaseSearchMiddleware(
+            llm=self._planner_llm(),
+            search_space_id=42,
+            mentioned_document_ids=[7, 8],
+        )
+
+        # First call: no runtime → legacy path uses the closure.
+        await middleware.abefore_agent(
+            {"messages": [HumanMessage(content="initial question")]},
+            runtime=None,
+        )
+        # Second call: still no runtime — closure already drained, so no replay.
+        await middleware.abefore_agent(
+            {"messages": [HumanMessage(content="follow up")]},
+            runtime=None,
+        )
+
+        assert fetched_ids == [[7, 8]], (
+            "legacy path must honour the closure exactly once and then drain it"
+        )
+        assert middleware.mentioned_document_ids == []