2026-06-04 12:19:22 +02:00
|
|
|
"""
|
|
|
|
|
Context schema definitions for SurfSense agents.
|
|
|
|
|
|
|
|
|
|
This module defines the per-invocation context object passed to the SurfSense
|
|
|
|
|
deep agent via ``agent.astream_events(..., context=ctx)`` (LangGraph >= 0.6).
|
|
|
|
|
|
|
|
|
|
The agent's compiled graph is the same across invocations (and cached by
|
|
|
|
|
``agent_cache``), so anything that varies per turn — the user mentions a
|
|
|
|
|
specific document, the front-end issues a unique ``request_id``, etc. —
|
|
|
|
|
MUST live on this context object instead of being captured into a
|
|
|
|
|
middleware ``__init__`` closure. Middlewares read fields back via
|
|
|
|
|
``runtime.context.<field>``; tools read them via ``runtime.context``.
|
|
|
|
|
|
2026-06-25 18:37:14 +02:00
|
|
|
This object is read by the ``search_knowledge_base`` tool (for
|
|
|
|
|
``mentioned_document_ids``) and any middleware that needs per-request
|
|
|
|
|
state without invalidating the compiled-agent cache.
|
2026-06-04 12:19:22 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
from typing import TypedDict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FileOperationContractState(TypedDict):
|
|
|
|
|
intent: str
|
|
|
|
|
confidence: float
|
|
|
|
|
suggested_path: str
|
|
|
|
|
timestamp: str
|
|
|
|
|
turn_id: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class SurfSenseContextSchema:
|
|
|
|
|
"""
|
|
|
|
|
Per-invocation context for the SurfSense deep agent.
|
|
|
|
|
|
|
|
|
|
Defaults are chosen so the dataclass can be safely default-constructed
|
|
|
|
|
(LangGraph's ``Runtime.context`` itself defaults to ``None`` if no
|
|
|
|
|
context is supplied — see ``langgraph.runtime.Runtime``). All fields
|
|
|
|
|
are optional; consumers must None-check before reading.
|
|
|
|
|
|
|
|
|
|
Phase 1.5 fields:
|
|
|
|
|
search_space_id: Search space the request is scoped to.
|
|
|
|
|
mentioned_document_ids: KB documents the user @-mentioned this turn.
|
2026-06-25 18:37:14 +02:00
|
|
|
Read by the ``search_knowledge_base`` tool to pin these docs
|
|
|
|
|
into the retrieval scope. Stays out of the compiled-agent cache
|
|
|
|
|
key — that's the whole point of putting it here.
|
2026-06-04 12:19:22 +02:00
|
|
|
mentioned_folder_ids: KB folders the user @-mentioned this turn
|
2026-06-25 18:37:14 +02:00
|
|
|
(cloud filesystem mode). Pinned into the ``search_knowledge_base``
|
|
|
|
|
retrieval scope so matches from those folders are prioritised.
|
2026-06-05 11:15:13 +02:00
|
|
|
file_operation_contract: One-shot file operation contract for the
|
|
|
|
|
upcoming turn (reserved; not currently populated).
|
2026-06-04 12:19:22 +02:00
|
|
|
turn_id / request_id: Correlation IDs surfaced by the streaming
|
|
|
|
|
task; populated for telemetry.
|
|
|
|
|
|
|
|
|
|
Phase 2 will extend with: thread_id, user_id, visibility,
|
|
|
|
|
filesystem_mode, anon_session_id, available_connectors,
|
|
|
|
|
available_document_types, created_by_id (everything currently captured
|
|
|
|
|
by middleware ``__init__`` closures).
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
search_space_id: int | None = None
|
|
|
|
|
mentioned_document_ids: list[int] = field(default_factory=list)
|
|
|
|
|
mentioned_folder_ids: list[int] = field(default_factory=list)
|
|
|
|
|
mentioned_connector_ids: list[int] = field(default_factory=list)
|
|
|
|
|
mentioned_connectors: list[dict[str, object]] = field(default_factory=list)
|
|
|
|
|
file_operation_contract: FileOperationContractState | None = None
|
|
|
|
|
turn_id: str | None = None
|
|
|
|
|
request_id: str | None = None
|