refactor(agents): move context schema + state reducers to app/agents/shared (slice 2)

Continue promoting the shared agent toolkit out of `new_chat` into the
cross-agent `app/agents/shared` kernel.

- state_reducers.py: clean move (no single-agent importer); all 7 importers
  flipped to app.agents.shared.state_reducers.
- context.py: moved to app.agents.shared.context; flipped the multi-agent,
  app, automations, chat-flows and monolith importers. A thin re-export shim
  remains at new_chat/context.py because the not-yet-retired single-agent
  (chat_deepagent) and the new_chat package __init__ still import it; the shim
  goes away with the single-agent deletion.
- Updated the stream parity test's annotation normalizer to strip the new
  app.agents.shared.context. prefix (SurfSenseContextSchema.__module__ changed
  with the move), keeping monolith<->flows signature parity intact.

Behavior-preserving: definitions unchanged; only import paths move. 1219 tests green.
This commit is contained in:
CREDO23 2026-06-04 12:19:22 +02:00
parent 0354f73f29
commit 28b13ed25b
17 changed files with 101 additions and 81 deletions

View file

@ -14,7 +14,7 @@ from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.middleware.stack import (
build_main_agent_deepagent_middleware,
)
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.db import ChatVisibility

View file

@ -11,7 +11,7 @@ from langgraph.types import Command
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.state_reducers import _CLEAR
if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware

View file

@ -15,7 +15,7 @@ from langgraph.types import Command
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.state_reducers import _CLEAR
if TYPE_CHECKING:
from ...middleware import SurfSenseFilesystemMiddleware

View file

@ -16,7 +16,7 @@ from langgraph.types import Command
from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.state_reducers import _CLEAR
from ...middleware.path_resolution import current_cwd
from ...shared.paths import is_ancestor_of

View file

@ -1,71 +1,20 @@
"""
Context schema definitions for SurfSense agents.
"""Backward-compatible shim.
This module defines the per-invocation context object passed to the SurfSense
deep agent via ``agent.astream_events(..., context=ctx)`` (LangGraph >= 0.6).
The agent's compiled graph is the same across invocations (and cached by
``agent_cache``), so anything that varies per turn the user mentions a
specific document, the front-end issues a unique ``request_id``, etc.
MUST live on this context object instead of being captured into a
middleware ``__init__`` closure. Middlewares read fields back via
``runtime.context.<field>``; tools read them via ``runtime.context``.
This object is read inside both ``KnowledgePriorityMiddleware`` (for
``mentioned_document_ids``) and any future middleware that needs
per-request state without invalidating the compiled-agent cache.
The agent context schema moved to :mod:`app.agents.shared.context` as part of
promoting the shared agent toolkit out of ``new_chat`` into the cross-agent
kernel. Import from there directly; this re-export keeps the remaining
importers (the not-yet-retired single-agent stack and the ``new_chat`` package
__init__) working during the migration and will be removed with them.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TypedDict
from app.agents.shared.context import (
FileOperationContractState,
SurfSenseContextSchema,
)
class FileOperationContractState(TypedDict):
intent: str
confidence: float
suggested_path: str
timestamp: str
turn_id: str
@dataclass
class SurfSenseContextSchema:
"""
Per-invocation context for the SurfSense deep agent.
Defaults are chosen so the dataclass can be safely default-constructed
(LangGraph's ``Runtime.context`` itself defaults to ``None`` if no
context is supplied see ``langgraph.runtime.Runtime``). All fields
are optional; consumers must None-check before reading.
Phase 1.5 fields:
search_space_id: Search space the request is scoped to.
mentioned_document_ids: KB documents the user @-mentioned this turn.
Read by ``KnowledgePriorityMiddleware`` to seed its priority
list. Stays out of the compiled-agent cache key that's the
whole point of putting it here.
mentioned_folder_ids: KB folders the user @-mentioned this turn
(cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
entries in ``<priority_documents>`` so the agent prioritises
walking those folders with ``ls`` / ``find_documents``.
file_operation_contract: One-shot file operation contract emitted
by ``FileIntentMiddleware`` for the upcoming turn.
turn_id / request_id: Correlation IDs surfaced by the streaming
task; populated for telemetry.
Phase 2 will extend with: thread_id, user_id, visibility,
filesystem_mode, anon_session_id, available_connectors,
available_document_types, created_by_id (everything currently captured
by middleware ``__init__`` closures).
"""
search_space_id: int | None = None
mentioned_document_ids: list[int] = field(default_factory=list)
mentioned_folder_ids: list[int] = field(default_factory=list)
mentioned_connector_ids: list[int] = field(default_factory=list)
mentioned_connectors: list[dict[str, object]] = field(default_factory=list)
file_operation_contract: FileOperationContractState | None = None
turn_id: str | None = None
request_id: str | None = None
__all__ = [
"FileOperationContractState",
"SurfSenseContextSchema",
]

View file

@ -20,7 +20,7 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
* ``workspace_tree_text`` pre-rendered ``<workspace_tree>`` body for the turn.
Tools mutate these fields ONLY via ``Command(update=...)`` returns; the
reducers in :mod:`app.agents.new_chat.state_reducers` handle merging.
reducers in :mod:`app.agents.shared.state_reducers` handle merging.
"""
from __future__ import annotations
@ -30,7 +30,7 @@ from typing import Annotated, Any, NotRequired
from deepagents.middleware.filesystem import FilesystemState
from typing_extensions import TypedDict
from app.agents.new_chat.state_reducers import (
from app.agents.shared.state_reducers import (
_add_unique_reducer,
_dict_merge_with_tombstones_reducer,
_int_counter_merge_reducer,

View file

@ -62,7 +62,7 @@ from app.agents.new_chat.sandbox import (
get_or_create_sandbox,
is_sandbox_enabled,
)
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.state_reducers import _CLEAR
logger = logging.getLogger(__name__)

View file

@ -54,7 +54,7 @@ from app.agents.new_chat.path_resolver import (
safe_folder_segment,
virtual_path_to_doc,
)
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.state_reducers import _CLEAR
from app.agents.shared.receipt import Receipt, make_receipt
from app.db import (
AgentActionLog,

View file

@ -0,0 +1,71 @@
"""
Context schema definitions for SurfSense agents.
This module defines the per-invocation context object passed to the SurfSense
deep agent via ``agent.astream_events(..., context=ctx)`` (LangGraph >= 0.6).
The agent's compiled graph is the same across invocations (and cached by
``agent_cache``), so anything that varies per turn the user mentions a
specific document, the front-end issues a unique ``request_id``, etc.
MUST live on this context object instead of being captured into a
middleware ``__init__`` closure. Middlewares read fields back via
``runtime.context.<field>``; tools read them via ``runtime.context``.
This object is read inside both ``KnowledgePriorityMiddleware`` (for
``mentioned_document_ids``) and any future middleware that needs
per-request state without invalidating the compiled-agent cache.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TypedDict
class FileOperationContractState(TypedDict):
intent: str
confidence: float
suggested_path: str
timestamp: str
turn_id: str
@dataclass
class SurfSenseContextSchema:
"""
Per-invocation context for the SurfSense deep agent.
Defaults are chosen so the dataclass can be safely default-constructed
(LangGraph's ``Runtime.context`` itself defaults to ``None`` if no
context is supplied see ``langgraph.runtime.Runtime``). All fields
are optional; consumers must None-check before reading.
Phase 1.5 fields:
search_space_id: Search space the request is scoped to.
mentioned_document_ids: KB documents the user @-mentioned this turn.
Read by ``KnowledgePriorityMiddleware`` to seed its priority
list. Stays out of the compiled-agent cache key that's the
whole point of putting it here.
mentioned_folder_ids: KB folders the user @-mentioned this turn
(cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
entries in ``<priority_documents>`` so the agent prioritises
walking those folders with ``ls`` / ``find_documents``.
file_operation_contract: One-shot file operation contract emitted
by ``FileIntentMiddleware`` for the upcoming turn.
turn_id / request_id: Correlation IDs surfaced by the streaming
task; populated for telemetry.
Phase 2 will extend with: thread_id, user_id, visibility,
filesystem_mode, anon_session_id, available_connectors,
available_document_types, created_by_id (everything currently captured
by middleware ``__init__`` closures).
"""
search_space_id: int | None = None
mentioned_document_ids: list[int] = field(default_factory=list)
mentioned_folder_ids: list[int] = field(default_factory=list)
mentioned_connector_ids: list[int] = field(default_factory=list)
mentioned_connectors: list[dict[str, object]] = field(default_factory=list)
file_operation_contract: FileOperationContractState | None = None
turn_id: str | None = None
request_id: str | None = None

View file

@ -487,7 +487,7 @@ async def _warm_agent_jit_caches() -> None:
)
from langchain_core.tools import tool
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
# Minimal LLM stub. ``FakeListChatModel`` satisfies
# ``BaseChatModel`` without any network or auth — perfect for

View file

@ -11,7 +11,7 @@ from langgraph.types import Command
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
from app.db import ChatVisibility, async_session_maker
from app.schemas.new_chat import MentionedDocumentInfo

View file

@ -29,7 +29,7 @@ from sqlalchemy.future import select
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
from app.agents.new_chat.checkpointer import get_checkpointer
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
from app.agents.shared.errors import BusyError
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
from app.agents.new_chat.llm_config import (

View file

@ -8,7 +8,7 @@ mention lists / request ids / turn ids without rebuilding the graph.
from __future__ import annotations
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
def build_new_chat_runtime_context(

View file

@ -7,7 +7,7 @@ can rely on ``runtime.context`` always being populated.
from __future__ import annotations
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
def build_resume_chat_runtime_context(

View file

@ -4,7 +4,7 @@ from __future__ import annotations
import pytest
from app.agents.new_chat.state_reducers import (
from app.agents.shared.state_reducers import (
_CLEAR,
_add_unique_reducer,
_dict_merge_with_tombstones_reducer,

View file

@ -30,7 +30,7 @@ from unittest.mock import AsyncMock, patch
import pytest
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.shared.context import SurfSenseContextSchema
from app.services.new_streaming_service import VercelStreamingService
from app.tasks.chat.stream_new_chat import (
stream_new_chat as old_stream_new_chat,
@ -98,7 +98,7 @@ def _normalize_annotation(ann: Any) -> str:
.replace("collections.abc.", "")
.replace("app.db.", "")
.replace("app.agents.new_chat.filesystem_selection.", "")
.replace("app.agents.new_chat.context.", "")
.replace("app.agents.shared.context.", "")
)
# Unwrap ``<class 'int'>`` → ``int`` (legacy-side type objects).
if cleaned.startswith("<class '") and cleaned.endswith("'>"):