From 28b13ed25bef6d8173b50ae489ad54d815223150 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 4 Jun 2026 12:19:22 +0200 Subject: [PATCH] refactor(agents): move context schema + state reducers to app/agents/shared (slice 2) Continue promoting the shared agent toolkit out of `new_chat` into the cross-agent `app/agents/shared` kernel. - state_reducers.py: clean move (no single-agent importer); all 7 importers flipped to app.agents.shared.state_reducers. - context.py: moved to app.agents.shared.context; flipped the multi-agent, app, automations, chat-flows and monolith importers. A thin re-export shim remains at new_chat/context.py because the not-yet-retired single-agent (chat_deepagent) and the new_chat package __init__ still import it; the shim goes away with the single-agent deletion. - Updated the stream parity test's annotation normalizer to strip the new app.agents.shared.context. prefix (SurfSenseContextSchema.__module__ changed with the move), keeping monolith<->flows signature parity intact. Behavior-preserving: definitions unchanged; only import paths move. 1219 tests green. --- .../main_agent/graph/compile_graph_sync.py | 2 +- .../filesystem/tools/move_file/helpers.py | 2 +- .../shared/filesystem/tools/rm/helpers.py | 2 +- .../shared/filesystem/tools/rmdir/helpers.py | 2 +- .../app/agents/new_chat/context.py | 79 ++++--------------- .../app/agents/new_chat/filesystem_state.py | 4 +- .../agents/new_chat/middleware/filesystem.py | 2 +- .../new_chat/middleware/kb_persistence.py | 2 +- .../app/agents/shared/context.py | 71 +++++++++++++++++ .../{new_chat => shared}/state_reducers.py | 0 surfsense_backend/app/app.py | 2 +- .../actions/builtin/agent_task/invoke.py | 2 +- .../app/tasks/chat/stream_new_chat.py | 2 +- .../flows/new_chat/runtime_context.py | 2 +- .../flows/resume_chat/runtime_context.py | 2 +- .../agents/new_chat/test_state_reducers.py | 2 +- .../test_parallel_refactor_parity.py | 4 +- 17 files changed, 101 insertions(+), 81 deletions(-) create mode 100644 surfsense_backend/app/agents/shared/context.py rename surfsense_backend/app/agents/{new_chat => shared}/state_reducers.py (100%) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py index b86da932a..4de4e9cfe 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py @@ -14,7 +14,7 @@ from langgraph.types import Checkpointer from app.agents.multi_agent_chat.middleware.stack import ( build_main_agent_deepagent_middleware, ) -from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.shared.context import SurfSenseContextSchema from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.new_chat.filesystem_selection import FilesystemMode from app.db import ChatVisibility diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/move_file/helpers.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/move_file/helpers.py index 7613f62f1..22c0c2524 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/move_file/helpers.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/move_file/helpers.py @@ -11,7 +11,7 @@ from langgraph.types import Command from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT -from app.agents.new_chat.state_reducers import _CLEAR +from app.agents.shared.state_reducers import _CLEAR if TYPE_CHECKING: from ...middleware import SurfSenseFilesystemMiddleware diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rm/helpers.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rm/helpers.py index 8a02544d8..4019bc9e9 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rm/helpers.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rm/helpers.py @@ -15,7 +15,7 @@ from langgraph.types import Command from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT -from app.agents.new_chat.state_reducers import _CLEAR +from app.agents.shared.state_reducers import _CLEAR if TYPE_CHECKING: from ...middleware import SurfSenseFilesystemMiddleware diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rmdir/helpers.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rmdir/helpers.py index de5afe722..9daa99aec 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rmdir/helpers.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem/tools/rmdir/helpers.py @@ -16,7 +16,7 @@ from langgraph.types import Command from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT -from app.agents.new_chat.state_reducers import _CLEAR +from app.agents.shared.state_reducers import _CLEAR from ...middleware.path_resolution import current_cwd from ...shared.paths import is_ancestor_of diff --git a/surfsense_backend/app/agents/new_chat/context.py b/surfsense_backend/app/agents/new_chat/context.py index 1b3ea3d20..f77b1709d 100644 --- a/surfsense_backend/app/agents/new_chat/context.py +++ b/surfsense_backend/app/agents/new_chat/context.py @@ -1,71 +1,20 @@ -""" -Context schema definitions for SurfSense agents. +"""Backward-compatible shim. -This module defines the per-invocation context object passed to the SurfSense -deep agent via ``agent.astream_events(..., context=ctx)`` (LangGraph >= 0.6). - -The agent's compiled graph is the same across invocations (and cached by -``agent_cache``), so anything that varies per turn — the user mentions a -specific document, the front-end issues a unique ``request_id``, etc. — -MUST live on this context object instead of being captured into a -middleware ``__init__`` closure. Middlewares read fields back via -``runtime.context.``; tools read them via ``runtime.context``. - -This object is read inside both ``KnowledgePriorityMiddleware`` (for -``mentioned_document_ids``) and any future middleware that needs -per-request state without invalidating the compiled-agent cache. +The agent context schema moved to :mod:`app.agents.shared.context` as part of +promoting the shared agent toolkit out of ``new_chat`` into the cross-agent +kernel. Import from there directly; this re-export keeps the remaining +importers (the not-yet-retired single-agent stack and the ``new_chat`` package +__init__) working during the migration and will be removed with them. """ from __future__ import annotations -from dataclasses import dataclass, field -from typing import TypedDict +from app.agents.shared.context import ( + FileOperationContractState, + SurfSenseContextSchema, +) - -class FileOperationContractState(TypedDict): - intent: str - confidence: float - suggested_path: str - timestamp: str - turn_id: str - - -@dataclass -class SurfSenseContextSchema: - """ - Per-invocation context for the SurfSense deep agent. - - Defaults are chosen so the dataclass can be safely default-constructed - (LangGraph's ``Runtime.context`` itself defaults to ``None`` if no - context is supplied — see ``langgraph.runtime.Runtime``). All fields - are optional; consumers must None-check before reading. - - Phase 1.5 fields: - search_space_id: Search space the request is scoped to. - mentioned_document_ids: KB documents the user @-mentioned this turn. - Read by ``KnowledgePriorityMiddleware`` to seed its priority - list. Stays out of the compiled-agent cache key — that's the - whole point of putting it here. - mentioned_folder_ids: KB folders the user @-mentioned this turn - (cloud filesystem mode). Surfaced as ``[USER-MENTIONED]`` - entries in ```` so the agent prioritises - walking those folders with ``ls`` / ``find_documents``. - file_operation_contract: One-shot file operation contract emitted - by ``FileIntentMiddleware`` for the upcoming turn. - turn_id / request_id: Correlation IDs surfaced by the streaming - task; populated for telemetry. - - Phase 2 will extend with: thread_id, user_id, visibility, - filesystem_mode, anon_session_id, available_connectors, - available_document_types, created_by_id (everything currently captured - by middleware ``__init__`` closures). - """ - - search_space_id: int | None = None - mentioned_document_ids: list[int] = field(default_factory=list) - mentioned_folder_ids: list[int] = field(default_factory=list) - mentioned_connector_ids: list[int] = field(default_factory=list) - mentioned_connectors: list[dict[str, object]] = field(default_factory=list) - file_operation_contract: FileOperationContractState | None = None - turn_id: str | None = None - request_id: str | None = None +__all__ = [ + "FileOperationContractState", + "SurfSenseContextSchema", +] diff --git a/surfsense_backend/app/agents/new_chat/filesystem_state.py b/surfsense_backend/app/agents/new_chat/filesystem_state.py index de2c94b41..c2c23483f 100644 --- a/surfsense_backend/app/agents/new_chat/filesystem_state.py +++ b/surfsense_backend/app/agents/new_chat/filesystem_state.py @@ -20,7 +20,7 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics: * ``workspace_tree_text`` — pre-rendered ```` body for the turn. Tools mutate these fields ONLY via ``Command(update=...)`` returns; the -reducers in :mod:`app.agents.new_chat.state_reducers` handle merging. +reducers in :mod:`app.agents.shared.state_reducers` handle merging. """ from __future__ import annotations @@ -30,7 +30,7 @@ from typing import Annotated, Any, NotRequired from deepagents.middleware.filesystem import FilesystemState from typing_extensions import TypedDict -from app.agents.new_chat.state_reducers import ( +from app.agents.shared.state_reducers import ( _add_unique_reducer, _dict_merge_with_tombstones_reducer, _int_counter_merge_reducer, diff --git a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py index c46eb98a5..7987ec466 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py +++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py @@ -62,7 +62,7 @@ from app.agents.new_chat.sandbox import ( get_or_create_sandbox, is_sandbox_enabled, ) -from app.agents.new_chat.state_reducers import _CLEAR +from app.agents.shared.state_reducers import _CLEAR logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py index c88dced85..88d89b287 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py +++ b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py @@ -54,7 +54,7 @@ from app.agents.new_chat.path_resolver import ( safe_folder_segment, virtual_path_to_doc, ) -from app.agents.new_chat.state_reducers import _CLEAR +from app.agents.shared.state_reducers import _CLEAR from app.agents.shared.receipt import Receipt, make_receipt from app.db import ( AgentActionLog, diff --git a/surfsense_backend/app/agents/shared/context.py b/surfsense_backend/app/agents/shared/context.py new file mode 100644 index 000000000..1b3ea3d20 --- /dev/null +++ b/surfsense_backend/app/agents/shared/context.py @@ -0,0 +1,71 @@ +""" +Context schema definitions for SurfSense agents. + +This module defines the per-invocation context object passed to the SurfSense +deep agent via ``agent.astream_events(..., context=ctx)`` (LangGraph >= 0.6). + +The agent's compiled graph is the same across invocations (and cached by +``agent_cache``), so anything that varies per turn — the user mentions a +specific document, the front-end issues a unique ``request_id``, etc. — +MUST live on this context object instead of being captured into a +middleware ``__init__`` closure. Middlewares read fields back via +``runtime.context.``; tools read them via ``runtime.context``. + +This object is read inside both ``KnowledgePriorityMiddleware`` (for +``mentioned_document_ids``) and any future middleware that needs +per-request state without invalidating the compiled-agent cache. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TypedDict + + +class FileOperationContractState(TypedDict): + intent: str + confidence: float + suggested_path: str + timestamp: str + turn_id: str + + +@dataclass +class SurfSenseContextSchema: + """ + Per-invocation context for the SurfSense deep agent. + + Defaults are chosen so the dataclass can be safely default-constructed + (LangGraph's ``Runtime.context`` itself defaults to ``None`` if no + context is supplied — see ``langgraph.runtime.Runtime``). All fields + are optional; consumers must None-check before reading. + + Phase 1.5 fields: + search_space_id: Search space the request is scoped to. + mentioned_document_ids: KB documents the user @-mentioned this turn. + Read by ``KnowledgePriorityMiddleware`` to seed its priority + list. Stays out of the compiled-agent cache key — that's the + whole point of putting it here. + mentioned_folder_ids: KB folders the user @-mentioned this turn + (cloud filesystem mode). Surfaced as ``[USER-MENTIONED]`` + entries in ```` so the agent prioritises + walking those folders with ``ls`` / ``find_documents``. + file_operation_contract: One-shot file operation contract emitted + by ``FileIntentMiddleware`` for the upcoming turn. + turn_id / request_id: Correlation IDs surfaced by the streaming + task; populated for telemetry. + + Phase 2 will extend with: thread_id, user_id, visibility, + filesystem_mode, anon_session_id, available_connectors, + available_document_types, created_by_id (everything currently captured + by middleware ``__init__`` closures). + """ + + search_space_id: int | None = None + mentioned_document_ids: list[int] = field(default_factory=list) + mentioned_folder_ids: list[int] = field(default_factory=list) + mentioned_connector_ids: list[int] = field(default_factory=list) + mentioned_connectors: list[dict[str, object]] = field(default_factory=list) + file_operation_contract: FileOperationContractState | None = None + turn_id: str | None = None + request_id: str | None = None diff --git a/surfsense_backend/app/agents/new_chat/state_reducers.py b/surfsense_backend/app/agents/shared/state_reducers.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/state_reducers.py rename to surfsense_backend/app/agents/shared/state_reducers.py diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index 11a55e948..ffb159148 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -487,7 +487,7 @@ async def _warm_agent_jit_caches() -> None: ) from langchain_core.tools import tool - from app.agents.new_chat.context import SurfSenseContextSchema + from app.agents.shared.context import SurfSenseContextSchema # Minimal LLM stub. ``FakeListChatModel`` satisfies # ``BaseChatModel`` without any network or auth — perfect for diff --git a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py index 99e295f30..d8e2f2073 100644 --- a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py +++ b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py @@ -11,7 +11,7 @@ from langgraph.types import Command from sqlalchemy.ext.asyncio import AsyncSession from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent -from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.shared.context import SurfSenseContextSchema from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text from app.db import ChatVisibility, async_session_maker from app.schemas.new_chat import MentionedDocumentInfo diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 5c0d6921b..25d85258e 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -29,7 +29,7 @@ from sqlalchemy.future import select from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent from app.agents.new_chat.checkpointer import get_checkpointer -from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.shared.context import SurfSenseContextSchema from app.agents.shared.errors import BusyError from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection from app.agents.new_chat.llm_config import ( diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py index 2bbb0b769..e93eff241 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py @@ -8,7 +8,7 @@ mention lists / request ids / turn ids without rebuilding the graph. from __future__ import annotations -from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.shared.context import SurfSenseContextSchema def build_new_chat_runtime_context( diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py index 59d5d8ca7..765e7b84d 100644 --- a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py +++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py @@ -7,7 +7,7 @@ can rely on ``runtime.context`` always being populated. from __future__ import annotations -from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.shared.context import SurfSenseContextSchema def build_resume_chat_runtime_context( diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py index 185753990..e584f9a34 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py @@ -4,7 +4,7 @@ from __future__ import annotations import pytest -from app.agents.new_chat.state_reducers import ( +from app.agents.shared.state_reducers import ( _CLEAR, _add_unique_reducer, _dict_merge_with_tombstones_reducer, diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py index 655f34fa6..77846444a 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py @@ -30,7 +30,7 @@ from unittest.mock import AsyncMock, patch import pytest -from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.shared.context import SurfSenseContextSchema from app.services.new_streaming_service import VercelStreamingService from app.tasks.chat.stream_new_chat import ( stream_new_chat as old_stream_new_chat, @@ -98,7 +98,7 @@ def _normalize_annotation(ann: Any) -> str: .replace("collections.abc.", "") .replace("app.db.", "") .replace("app.agents.new_chat.filesystem_selection.", "") - .replace("app.agents.new_chat.context.", "") + .replace("app.agents.shared.context.", "") ) # Unwrap ```` → ``int`` (legacy-side type objects). if cleaned.startswith(""):