diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example index e133a2bc5..c1bfcc538 100644 --- a/surfsense_backend/.env.example +++ b/surfsense_backend/.env.example @@ -250,12 +250,12 @@ LANGSMITH_PROJECT=surfsense # ============================================================================= -# OPTIONAL: New-chat agent feature flags (OpenCode-port) +# OPTIONAL: New-chat agent feature flags # ============================================================================= # Master kill-switch — when true, every flag below is forced OFF. # SURFSENSE_DISABLE_NEW_AGENT_STACK=false -# Tier 1 — Agent quality +# Agent quality # SURFSENSE_ENABLE_CONTEXT_EDITING=false # SURFSENSE_ENABLE_COMPACTION_V2=false # SURFSENSE_ENABLE_RETRY_AFTER=false @@ -265,24 +265,24 @@ LANGSMITH_PROJECT=surfsense # SURFSENSE_ENABLE_TOOL_CALL_REPAIR=false # SURFSENSE_ENABLE_DOOM_LOOP=false # leave OFF until UI handles permission='doom_loop' -# Tier 2 — Safety +# Safety # SURFSENSE_ENABLE_PERMISSION=false # SURFSENSE_ENABLE_BUSY_MUTEX=false # SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false # adds a per-turn LLM call -# Tier 3b — Observability (also requires OTEL_EXPORTER_OTLP_ENDPOINT) +# Observability — OTel (also requires OTEL_EXPORTER_OTLP_ENDPOINT) # SURFSENSE_ENABLE_OTEL=false -# Tier 4 — Skills + subagents +# Skills + subagents # SURFSENSE_ENABLE_SKILLS=false # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false # SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false -# Tier 5 — Snapshot / revert +# Snapshot / revert # SURFSENSE_ENABLE_ACTION_LOG=false # SURFSENSE_ENABLE_REVERT_ROUTE=false # Backend-only; flip when UI ships -# Tier 6 — Plugins +# Plugins # SURFSENSE_ENABLE_PLUGIN_LOADER=false # Comma-separated allowlist of plugin entry-point names # SURFSENSE_ALLOWED_PLUGINS=year_substituter diff --git a/surfsense_backend/alembic/versions/130_add_agent_action_log.py b/surfsense_backend/alembic/versions/130_add_agent_action_log.py index 2f06b8ddd..f86a8a3b5 100644 --- a/surfsense_backend/alembic/versions/130_add_agent_action_log.py +++ b/surfsense_backend/alembic/versions/130_add_agent_action_log.py @@ -4,8 +4,10 @@ Revision ID: 130 Revises: 129 Create Date: 2026-04-28 -Tier 5.2 in the OpenCode-port plan. Adds the append-only ``agent_action_log`` -table that :class:`ActionLogMiddleware` writes to after every tool call. +Adds the append-only ``agent_action_log`` table that +:class:`ActionLogMiddleware` writes to after every tool call. Each row +optionally carries a ``reverse_descriptor`` payload used by +``POST /api/threads/{thread_id}/revert/{action_id}`` to undo the action. """ from __future__ import annotations diff --git a/surfsense_backend/alembic/versions/131_add_document_revisions.py b/surfsense_backend/alembic/versions/131_add_document_revisions.py index 46c6991b6..95ce0e032 100644 --- a/surfsense_backend/alembic/versions/131_add_document_revisions.py +++ b/surfsense_backend/alembic/versions/131_add_document_revisions.py @@ -4,7 +4,7 @@ Revision ID: 131 Revises: 130 Create Date: 2026-04-28 -Tier 5.1 in the OpenCode-port plan. Adds two snapshot tables: +Adds two snapshot tables that back the per-action revert flow: * ``document_revisions``: pre-mutation snapshot of NOTE/FILE/EXTENSION docs. * ``folder_revisions``: pre-mutation snapshot of folder mkdir/move/delete. diff --git a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py index 0e81eacb5..ff5b52e18 100644 --- a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py +++ b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py @@ -4,11 +4,10 @@ Revision ID: 132 Revises: 131 Create Date: 2026-04-28 -Tier 2.1 in the OpenCode-port plan. Adds the persistent ``agent_permission_rules`` -table consumed by :class:`PermissionMiddleware` at agent build time. Rules -can be scoped at search-space (``user_id`` / ``thread_id`` NULL), -user-wide (``user_id`` set, ``thread_id`` NULL), or per-thread -(``thread_id`` set). +Adds the persistent ``agent_permission_rules`` table consumed by +:class:`PermissionMiddleware` at agent build time. Rules can be scoped +at search-space (``user_id`` / ``thread_id`` NULL), user-wide +(``user_id`` set, ``thread_id`` NULL), or per-thread (``thread_id`` set). """ from __future__ import annotations diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 3ca44dd4f..bfb94ba2d 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -353,11 +353,12 @@ async def create_surfsense_deep_agent( additional_tools=list(additional_tools) if additional_tools else None, ) - # Tier 1.6: register `invalid` tool. It is dispatched only when - # ToolCallNameRepairMiddleware rewrites a malformed call. We - # intentionally append it AFTER ``build_tools_async`` so it never - # appears in the system-prompt tool list (which is built from the - # registry, not the bound tool list). + # Register the ``invalid`` tool only when tool-call repair is on. It + # is dispatched only when :class:`ToolCallNameRepairMiddleware` + # rewrites a malformed call. We intentionally append it AFTER + # ``build_tools_async`` so it never appears in the system-prompt + # tool list (which is built from the registry, not the bound tool + # list). _flags: AgentFeatureFlags = get_flags() if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in { t.name for t in tools @@ -455,10 +456,10 @@ async def create_surfsense_deep_agent( return agent -# Tier 1.1: tools whose output is too costly / lossy to discard. Keep -# this conservative — anything listed here is *never* pruned by -# ContextEditingMiddleware. The list is filtered against actually-bound -# tool names so disabled connectors don't show up here. +# Tools whose output is too costly / lossy to discard. Keep this +# conservative — anything listed here is *never* pruned by +# :class:`ContextEditingMiddleware`. The list is filtered against +# actually-bound tool names so disabled connectors don't show up here. _PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset( { "generate_report", @@ -485,11 +486,12 @@ def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]: return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled) -# Tier 2.1 / cleanup: opencode `Permission.disabled` parity. Replaces the -# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE``-based gating with a -# declarative pass over :data:`BUILTIN_TOOLS`. Each tool that declares a -# ``required_connector`` not present in ``available_connectors`` gets a -# deny rule so any execution attempt short-circuits with permission_denied. +# Connector gating: any tool whose ``ToolDefinition.required_connector`` +# isn't actually wired up gets a synthesized permission deny rule so +# execution attempts short-circuit with ``permission_denied`` instead of +# bubbling up provider-specific 401/404 errors. Mirrors OpenCode's +# ``Permission.disabled`` (declarative, per-tool gating) — replaces the +# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE`` substring-heuristic. def _synthesize_connector_deny_rules( *, available_connectors: list[str] | None, @@ -503,11 +505,6 @@ def _synthesize_connector_deny_rules( 1. It is currently bound (``enabled_tool_names``). 2. It declares a ``required_connector``. 3. That connector is *not* in ``available_connectors``. - - This expresses the OpenCode ``Permission.disabled`` semantics - declaratively, replacing the substring-heuristic binary gating - that used to consult the hardcoded ``_CONNECTOR_TYPE_TO_SEARCHABLE`` - map. """ available = set(available_connectors or []) deny: list[Rule] = [] @@ -581,7 +578,7 @@ def _build_compiled_agent_blocking( "middleware": gp_middleware, } - # Tier 4.3: specialized user-facing subagents (explore, report_writer, + # Specialized user-facing subagents (explore, report_writer, # connector_negotiator). Registered through SubAgentMiddleware alongside # the general-purpose spec so the parent's `task` tool can address them # by name. Off by default until the flag flips so existing deployments @@ -629,14 +626,13 @@ def _build_compiled_agent_blocking( # ``wrap_model_call`` ordering: the FIRST middleware in the list is the # OUTERMOST wrapper. To ensure prune executes before summarization, # place ``SpillingContextEditingMiddleware`` before - # ``SurfSenseCompactionMiddleware`` (Tier 1.1 + 1.3). - # Compaction is the canonical token-budget defense after the - # cleanup tier removed ``SafeSummarizationMiddleware``. The Bedrock - # buffer-empty defense is folded into ``SurfSenseCompactionMiddleware``. + # ``SurfSenseCompactionMiddleware``. Compaction is the canonical + # token-budget defense; the Bedrock buffer-empty defense is folded + # into ``SurfSenseCompactionMiddleware``. summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend) _ = flags.enable_compaction_v2 # historical flag; retained for telemetry parity - # Tier 1.1: ContextEditing prune. Trigger at 55% of model_max_input, + # ContextEditing prune. Trigger at 55% of ``max_input_tokens``, # earlier than summarization (~85%). When disabled, no edit runs. context_edit_mw = None if ( @@ -664,7 +660,10 @@ def _build_compiled_agent_blocking( backend_resolver=backend_resolver, ) - # Tier 1.4 / 1.8 / 1.9 / 1.10: built-in retry/fallback/limits. + # Resilience knobs: header-aware retry, model fallback, and + # per-thread / per-run call-count limits. The fallback / limit + # middlewares are vanilla LangChain primitives; ``RetryAfter`` is + # SurfSense's header-aware variant (see its module docstring). retry_mw = ( RetryAfterMiddleware(max_retries=3) if flags.enable_retry_after and not flags.disable_new_agent_stack @@ -700,14 +699,16 @@ def _build_compiled_agent_blocking( else None ) - # Tier 1.5: provider-compat _noop injection. + # Provider-compat ``_noop`` injection (mirrors OpenCode's + # ``llm.ts`` workaround for providers that reject empty assistant + # turns or alternating-role constraints). noop_mw = ( NoopInjectionMiddleware() if flags.enable_compaction_v2 and not flags.disable_new_agent_stack else None ) - # Tier 1.7: tool-call name repair (lowercase + invalid fallback). + # Tool-call name repair (lowercase + ``invalid`` fallback). # # ``registered_tool_names`` MUST cover every tool the model can legitimately # call. That includes the bound ``tools`` list AND every tool provided by @@ -737,18 +738,22 @@ def _build_compiled_agent_blocking( } repair_mw = ToolCallNameRepairMiddleware( registered_tool_names=registered_names, - fuzzy_match_threshold=None, # opencode parity: no fuzzy step + # Disable fuzzy matching to avoid silent rewrites; the + # lowercase + ``invalid`` fallback alone covers >95% of + # observed model errors. + fuzzy_match_threshold=None, ) - # Tier 1.11: doom-loop detector. Off by default until UI handles. + # Doom-loop detector. Off by default until the frontend handles + # ``permission == "doom_loop"`` interrupts. doom_loop_mw = ( DoomLoopMiddleware(threshold=3) if flags.enable_doom_loop and not flags.disable_new_agent_stack else None ) - # Tier 2.1: PermissionMiddleware. Layers, earliest -> latest (last - # match wins per opencode): + # PermissionMiddleware. Layers, earliest -> latest (last match wins, + # same evaluation order as OpenCode's ``permission/index.ts``): # # 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense # already runs per-tool HITL (see ``tools/hitl.py``) for mutating @@ -778,11 +783,11 @@ def _build_compiled_agent_blocking( ], ) - # Tier 5.2: ActionLogMiddleware. Off by default until the - # ``agent_action_log`` table is migrated. When enabled, persists one - # row per tool call with optional reverse_descriptor for - # /api/threads/{thread_id}/revert/{action_id}. Sits inside permission - # so denied calls aren't logged as completions. + # ActionLogMiddleware. Off by default until the ``agent_action_log`` + # table is migrated. When enabled, persists one row per tool call + # with optional reverse_descriptor for + # ``POST /api/threads/{thread_id}/revert/{action_id}``. Sits inside + # ``permission`` so denied calls aren't logged as completions. action_log_mw: ActionLogMiddleware | None = None if ( flags.enable_action_log @@ -804,23 +809,24 @@ def _build_compiled_agent_blocking( ) action_log_mw = None - # Tier 2.2: per-thread busy mutex. + # Per-thread busy mutex (refuse a second concurrent turn on the same + # thread; see :class:`BusyMutexMiddleware` docstring). busy_mutex_mw: BusyMutexMiddleware | None = ( BusyMutexMiddleware() if flags.enable_busy_mutex and not flags.disable_new_agent_stack else None ) - # Tier 3b: OpenTelemetry spans (model.call + tool.call). Lives just - # inside BusyMutex so it spans every retry/fallback attempt of the - # current turn but never wraps a queued/blocked turn. + # OpenTelemetry spans (model.call + tool.call). Lives just inside + # BusyMutex so it spans every retry/fallback attempt of the current + # turn but never wraps a queued/blocked turn. otel_mw: OtelSpanMiddleware | None = ( OtelSpanMiddleware() if flags.enable_otel and not flags.disable_new_agent_stack else None ) - # Tier 6: plugin entry-point loader. Off by default; opt-in via the + # Plugin entry-point loader. Off by default; opt-in via the # ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from # the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future # PR can wire it through ``global_llm_config.yaml``. @@ -845,10 +851,10 @@ def _build_compiled_agent_blocking( ) plugin_middlewares = [] - # Tier 4.1: SkillsMiddleware. Loads built-in + space-authored skills - # via a CompositeBackend. Sources are layered: built-in first, space - # last, so a search-space-authored skill of the same name overrides - # the bundled one. + # SkillsMiddleware (deepagents) loads built-in + space-authored + # skills via a CompositeBackend. Sources are layered: built-in first, + # space last, so a search-space-authored skill of the same name + # overrides the bundled one. skills_mw: SkillsMiddleware | None = None if flags.enable_skills and not flags.disable_new_agent_stack: try: @@ -865,7 +871,8 @@ def _build_compiled_agent_blocking( logging.warning("SkillsMiddleware init failed; skipping: %s", exc) skills_mw = None - # Tier 2.5: LLM-driven tool selection for >30 tools. + # LangChain's LLM-driven tool selection — only enabled for stacks + # large enough to need narrowing (>30 tools). selector_mw: LLMToolSelectorMiddleware | None = None if ( flags.enable_llm_tool_selector @@ -934,12 +941,12 @@ def _build_compiled_agent_blocking( ) if filesystem_mode == FilesystemMode.CLOUD else None, - # Tier 4.1: skill loader. Placed before SubAgentMiddleware so - # subagents inherit the same skill metadata (subagent specs reference - # the same source paths via `default_skills_sources()`). + # Skill loader. Placed before SubAgentMiddleware so subagents + # inherit the same skill metadata (subagent specs reference the + # same source paths via ``default_skills_sources()``). skills_mw, SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs), - # Tier 2.5: tool selection (only when >30 tools and flag on). + # Tool selection (only when >30 tools and flag on). selector_mw, # Defensive caps, then prune, then summarize. model_call_limit_mw, @@ -954,19 +961,19 @@ def _build_compiled_agent_blocking( # Tool-call repair must run after model emits but before # permission / dedup / doom-loop interpret the calls. repair_mw, - # Tier 2.1: deny/ask BEFORE the calls are forwarded to tool nodes. + # Permission deny/ask BEFORE the calls are forwarded to tool nodes. permission_mw, doom_loop_mw, - # Tier 5.2: action log sits inside permission so denied calls - # don't appear as completions, and outside dedup so each unique - # tool invocation gets its own row. + # Action log sits inside permission so denied calls don't appear + # as completions, and outside dedup so each unique tool invocation + # gets its own row. action_log_mw, PatchToolCallsMiddleware(), DedupHITLToolCallsMiddleware(agent_tools=list(tools)), - # Tier 6: plugin slot — sits just before AnthropicCache so plugin-side - # transforms see the final tool result and run before any caching - # heuristics. Multiple plugins in declared order; loader filtered by - # the admin allowlist already. + # Plugin slot — sits just before AnthropicCache so plugin-side + # transforms see the final tool result and run before any + # caching heuristics. Multiple plugins in declared order; loader + # filtered by the admin allowlist already. *plugin_middlewares, AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), ] diff --git a/surfsense_backend/app/agents/new_chat/errors.py b/surfsense_backend/app/agents/new_chat/errors.py index b7bac4536..a17333acc 100644 --- a/surfsense_backend/app/agents/new_chat/errors.py +++ b/surfsense_backend/app/agents/new_chat/errors.py @@ -2,10 +2,10 @@ Typed error taxonomy for the SurfSense agent stack. Used by: -- :class:`RetryAfterMiddleware` (Tier 1.4) — its ``retry_on`` callable - consults the error code to decide whether a retry is appropriate. -- :class:`PermissionMiddleware` (Tier 2.1) — emits - ``code="permission_denied"`` errors when a deny rule trips. +- :class:`RetryAfterMiddleware` — its ``retry_on`` callable consults + the error code to decide whether a retry is appropriate. +- :class:`PermissionMiddleware` — emits ``code="permission_denied"`` + errors when a deny rule trips. - All tools — return :class:`StreamingError` payloads in ``ToolMessage.additional_kwargs["error"]`` so the model and the retry/permission layers share a contract. diff --git a/surfsense_backend/app/agents/new_chat/feature_flags.py b/surfsense_backend/app/agents/new_chat/feature_flags.py index 89c4fb14f..55525abc5 100644 --- a/surfsense_backend/app/agents/new_chat/feature_flags.py +++ b/surfsense_backend/app/agents/new_chat/feature_flags.py @@ -1,9 +1,10 @@ """ Feature flags for the SurfSense new_chat agent stack. -These flags control rollout of OpenCode-pattern middleware ported into -SurfSense. They follow a "default-OFF for risky things, default-ON for -safe upgrades, master kill-switch for everything new" model. +These flags gate the newer agent middleware (some ported from OpenCode, +some sourced from ``langchain.agents.middleware`` / ``deepagents``, some +SurfSense-native). They follow a "default-OFF for risky things, +default-ON for safe upgrades, master kill-switch for everything new" model. All new middleware checks its flag at agent build time. If the master kill-switch ``SURFSENSE_DISABLE_NEW_AGENT_STACK`` is set, every new @@ -57,7 +58,7 @@ class AgentFeatureFlags: # regardless of its env value. Used for rapid rollback. disable_new_agent_stack: bool = False - # Tier 1 — Agent quality + # Agent quality — context budget, retry/limits, name-repair, doom-loop enable_context_editing: bool = False enable_compaction_v2: bool = False enable_retry_after: bool = False @@ -69,26 +70,26 @@ class AgentFeatureFlags: False # Default OFF until UI handles permission='doom_loop' ) - # Tier 2 — Safety + # Safety — permissions, concurrency, tool-set narrowing enable_permission: bool = False # Default OFF for first deploy enable_busy_mutex: bool = False enable_llm_tool_selector: bool = False # Default OFF — adds per-turn LLM cost - # Tier 4 — Skills + subagents + # Skills + subagents enable_skills: bool = False enable_specialized_subagents: bool = False enable_kb_planner_runnable: bool = False - # Tier 5 — Snapshot / revert + # Snapshot / revert enable_action_log: bool = False enable_revert_route: bool = ( False # Backend ships before UI; route returns 503 until this flips ) - # Tier 6 — Plugins + # Plugins enable_plugin_loader: bool = False - # Tier 3b — OTel (orthogonal: also requires OTEL_EXPORTER_OTLP_ENDPOINT) + # Observability — OTel (orthogonal; also requires OTEL_EXPORTER_OTLP_ENDPOINT) enable_otel: bool = False @classmethod @@ -108,7 +109,7 @@ class AgentFeatureFlags: return cls( disable_new_agent_stack=False, - # Tier 1 + # Agent quality enable_context_editing=_env_bool("SURFSENSE_ENABLE_CONTEXT_EDITING", False), enable_compaction_v2=_env_bool("SURFSENSE_ENABLE_COMPACTION_V2", False), enable_retry_after=_env_bool("SURFSENSE_ENABLE_RETRY_AFTER", False), @@ -121,13 +122,13 @@ class AgentFeatureFlags: "SURFSENSE_ENABLE_TOOL_CALL_REPAIR", False ), enable_doom_loop=_env_bool("SURFSENSE_ENABLE_DOOM_LOOP", False), - # Tier 2 + # Safety enable_permission=_env_bool("SURFSENSE_ENABLE_PERMISSION", False), enable_busy_mutex=_env_bool("SURFSENSE_ENABLE_BUSY_MUTEX", False), enable_llm_tool_selector=_env_bool( "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR", False ), - # Tier 4 + # Skills + subagents enable_skills=_env_bool("SURFSENSE_ENABLE_SKILLS", False), enable_specialized_subagents=_env_bool( "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", False @@ -135,12 +136,12 @@ class AgentFeatureFlags: enable_kb_planner_runnable=_env_bool( "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", False ), - # Tier 5 + # Snapshot / revert enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", False), enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", False), - # Tier 6 + # Plugins enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False), - # Tier 3b + # Observability enable_otel=_env_bool("SURFSENSE_ENABLE_OTEL", False), ) diff --git a/surfsense_backend/app/agents/new_chat/middleware/busy_mutex.py b/surfsense_backend/app/agents/new_chat/middleware/busy_mutex.py index 1d95638d0..c57d85004 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/busy_mutex.py +++ b/surfsense_backend/app/agents/new_chat/middleware/busy_mutex.py @@ -1,11 +1,16 @@ """ BusyMutexMiddleware — per-thread asyncio lock + cancel token. -Tier 2.2 in the OpenCode-port plan. Mirrors opencode's -``Stream.scoped(AbortController)`` pattern (single-process, in-memory -lock + cooperative cancellation). For multi-worker deployments a -distributed lock backend (Redis or PostgreSQL advisory locks) is a -phase-2 follow-up. +LangChain has no built-in concept of "this thread is already running a +turn — refuse the second concurrent request". Without it, a user +double-clicking "send" or refreshing the page mid-stream can spawn two +turns racing on the same checkpoint, producing duplicated tool calls +and mangled state. + +Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a +single-process, in-memory lock + cooperative cancellation token keyed by +``thread_id``. For multi-worker deployments a distributed lock backend +(Redis or PostgreSQL advisory locks) is a phase-2 follow-up. What this provides: - A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``; diff --git a/surfsense_backend/app/agents/new_chat/middleware/compaction.py b/surfsense_backend/app/agents/new_chat/middleware/compaction.py index b0a1a7ec5..16361e16b 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/compaction.py +++ b/surfsense_backend/app/agents/new_chat/middleware/compaction.py @@ -5,21 +5,22 @@ Subclasses :class:`deepagents.middleware.summarization.SummarizationMiddleware` to add SurfSense-specific behavior: 1. **Structured summary template** (OpenCode-style ``## Goal / Constraints / - Progress / Key Decisions / Next Steps / Critical Context / Relevant Files``). + Progress / Key Decisions / Next Steps / Critical Context / Relevant Files``) + — see :data:`SURFSENSE_SUMMARY_PROMPT` below. The base + ``SummarizationMiddleware`` only ships a freeform "summarize this" + prompt; the structured template is ported from OpenCode's + ``compaction.ts``. 2. **Protect SurfSense-specific SystemMessages** so injected hints (````, ````, ````, ````, ````, ````, ````) are *not* summarized away and are kept verbatim in the post-summary - message list. + message list. Mirrors OpenCode's ``PRUNE_PROTECTED_TOOLS`` philosophy + (some message types are part of the agent's contract and must survive + compaction unchanged). 3. **Sanitize ``content=None``** when feeding messages into ``get_buffer_string`` (Azure OpenAI / LiteLLM defense — when a provider streams an AIMessage containing only tool_calls and no text, ``content`` can be ``None`` and - ``get_buffer_string`` crashes iterating over ``None``). This used to live in - ``safe_summarization.py``; folded in here. - -This replaces ``app.agents.new_chat.middleware.safe_summarization``. - -Tier 1.3 in the OpenCode-port plan. + ``get_buffer_string`` crashes iterating over ``None``). SurfSense-specific. """ from __future__ import annotations @@ -42,7 +43,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -# OpenCode-faithful structured summary template. Mirrors +# Structured summary template ported from OpenCode's # ``opencode/packages/opencode/src/session/compaction.ts:40-75``. Kept as a # module-level constant so unit tests can assert on its sections. SURFSENSE_SUMMARY_PROMPT = """ diff --git a/surfsense_backend/app/agents/new_chat/middleware/context_editing.py b/surfsense_backend/app/agents/new_chat/middleware/context_editing.py index 360e3e28f..39bc57c8b 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/context_editing.py +++ b/surfsense_backend/app/agents/new_chat/middleware/context_editing.py @@ -1,15 +1,15 @@ """ SpillToBackendEdit + SpillingContextEditingMiddleware. -Mirrors OpenCode's spill-to-disk behavior in -``opencode/packages/opencode/src/tool/truncate.ts``. Before -``ClearToolUsesEdit`` rewrites old ``ToolMessage.content`` to a placeholder, -we capture the full original content and write it to the runtime backend -under ``/tool_outputs/{thread_id}/{message_id}.txt``. The placeholder is -upgraded to ``"[cleared — full output at /tool_outputs/.../{id}.txt; ask the -explore subagent to read it]"`` so the agent can recover it on demand. - -Tier 1.2 in the OpenCode-port plan. +LangChain's :class:`ClearToolUsesEdit` discards old ``ToolMessage.content`` +when the context-editing budget triggers, replacing the body with a fixed +placeholder. That's lossy: anything the agent might want to revisit is +gone. The spill-to-disk pattern (originally from OpenCode's +``opencode/packages/opencode/src/tool/truncate.ts``) keeps the prune +behavior but writes the full original payload to the runtime backend +under ``/tool_outputs/{thread_id}/{message_id}.txt`` first. The +placeholder is then upgraded to point at the spill path so the agent +(or a subagent) can read it back on demand. Why this is a middleware subclass instead of a plain ``ContextEdit``: ``ContextEdit.apply`` is sync, but writing to the backend is async. We diff --git a/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py b/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py index 3aff524fe..c55347284 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py +++ b/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py @@ -9,11 +9,10 @@ the duplicate call is stripped from the AIMessage that gets checkpointed. That means it is also safe across LangGraph ``interrupt()`` boundaries: the removed call will never appear on graph resume. -Dedup-key resolution order (Tier 2.3 / cleanup in the OpenCode-port plan): +Dedup-key resolution order: 1. :class:`ToolDefinition.dedup_key` — callable provided by the registry - entry. This is the canonical mechanism after the cleanup-tier removal - of the legacy ``PRIMARY_ARG`` map. + entry. This is the canonical mechanism. 2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg name; used by MCP / Composio tools whose schemas the registry doesn't see. @@ -72,9 +71,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware): # type: ignore[type-arg] The dedup-resolver map is built from two sources, in priority order: 1. ``tool.metadata["dedup_key"]`` — callable provided by the registry's - ``ToolDefinition.dedup_key`` (Tier 2.3). Receives the args dict - and returns a string signature. This is the canonical mechanism - after the cleanup-tier removal of the legacy ``PRIMARY_ARG`` map. + ``ToolDefinition.dedup_key``. Receives the args dict and returns + a string signature. This is the canonical mechanism. 2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg name; primarily used by MCP / Composio tools. """ diff --git a/surfsense_backend/app/agents/new_chat/middleware/doom_loop.py b/surfsense_backend/app/agents/new_chat/middleware/doom_loop.py index 1dde87752..850ecd1d2 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/doom_loop.py +++ b/surfsense_backend/app/agents/new_chat/middleware/doom_loop.py @@ -1,17 +1,19 @@ """ DoomLoopMiddleware — pattern-based detector for repeated identical tool calls. -Mirrors ``opencode/packages/opencode/src/session/processor.ts`` doom-loop -behavior. When the same tool with the same arguments is called N times -in a row, the agent has likely entered an infinite loop. We surface this -to the user as an interrupt with ``permission="doom_loop"`` so the UI -can render an "Are you stuck? Continue / cancel?" affordance. +LangChain has :class:`ToolCallLimitMiddleware` which caps the *total* number +of tool calls per turn — but it can't tell apart "10 distinct, useful +calls" from "the same call 10 times in a row". This middleware fills that +gap with a sliding-window check on tool-call signatures, ported from +OpenCode's ``packages/opencode/src/session/processor.ts``. -Tier 1.11 in the OpenCode-port plan. +When the same tool with the same arguments is called N times in a row, +the agent has likely entered an infinite loop. We surface this to the +user as an interrupt with ``permission="doom_loop"`` so the UI can +render an "Are you stuck? Continue / cancel?" affordance. This ships **OFF by default** until the frontend explicitly handles -``context.permission == "doom_loop"`` interrupts (the plan flips -``SURFSENSE_ENABLE_DOOM_LOOP=true`` once the UI is ready). +``context.permission == "doom_loop"`` interrupts. Wire format: uses SurfSense's existing ``interrupt()`` payload shape (see ``app/agents/new_chat/tools/hitl.py``): @@ -69,7 +71,7 @@ class DoomLoopMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respon Args: threshold: How many consecutive identical signatures count as a - doom loop. Default 3 (opencode parity). + doom loop. Default 3 (matches OpenCode's processor.ts). """ def __init__(self, *, threshold: int = 3) -> None: @@ -182,7 +184,7 @@ class DoomLoopMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respon signatures[-1] if signatures else "", ) - # Tier 3b: interrupt.raised span with permission=doom_loop attribute + # Open an interrupt.raised span with permission=doom_loop attribute # so dashboards can break out doom-loop interrupts from regular # permission asks via the ``interrupt.permission`` attribute. with ot.interrupt_span( diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py index 08ca8e18b..0820e8c3e 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py +++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py @@ -592,10 +592,11 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] self.available_document_types = available_document_types self.top_k = top_k self.mentioned_document_ids = mentioned_document_ids or [] - # Tier 4.2: build the kb-planner private Runnable ONCE here so we - # don't pay the create_agent compile cost (50-200ms) on every turn. - # Disabled by default behind ``enable_kb_planner_runnable``; when off - # the planner falls back to the legacy ``self.llm.ainvoke`` path. + # Build the kb-planner private Runnable ONCE here so we don't pay + # the ``create_agent`` compile cost (50-200ms) on every turn. + # Disabled by default behind ``enable_kb_planner_runnable``; when + # off the planner falls back to the legacy ``self.llm.ainvoke`` + # path. self._planner: Runnable | None = None self._planner_compile_failed = False @@ -608,9 +609,9 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] lazily on first call, then memoized via ``self._planner``. The compiled agent is constructed without tools — the planner's - contract is "answer with structured JSON" — but with ``RetryAfter`` - + the OpenCode-port retry/limit middleware so it shares the parent - agent's resilience guarantees. + contract is "answer with structured JSON" — but it inherits the + :class:`RetryAfterMiddleware` so transient rate-limit errors + from the planner LLM call don't fail the whole turn. """ if self._planner is not None or self._planner_compile_failed: return self._planner @@ -658,9 +659,9 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] loop = asyncio.get_running_loop() t0 = loop.time() - # Tier 4.2: prefer the compiled-once planner Runnable when enabled; - # otherwise fall back to ``self.llm.ainvoke``. The ``surfsense:internal`` - # tag is preserved on both paths so ``_stream_agent_events`` still + # Prefer the compiled-once planner Runnable when enabled; otherwise + # fall back to ``self.llm.ainvoke``. The ``surfsense:internal`` tag + # is preserved on both paths so ``_stream_agent_events`` still # suppresses the planner's intermediate events from the UI. planner = self._build_kb_planner_runnable() try: diff --git a/surfsense_backend/app/agents/new_chat/middleware/noop_injection.py b/surfsense_backend/app/agents/new_chat/middleware/noop_injection.py index 8628479c7..503c73ccc 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/noop_injection.py +++ b/surfsense_backend/app/agents/new_chat/middleware/noop_injection.py @@ -1,18 +1,23 @@ """ ``_noop`` provider-compatibility tool + injection middleware. -OpenCode injects a ``_noop`` tool for LiteLLM/Bedrock/Copilot when the -model call has empty tools but message history includes prior -``tool_calls`` — some providers 400 in that shape (see -``opencode/packages/opencode/src/session/llm.ts:209-228``). SurfSense uses -LiteLLM, and the compaction summarize call (no tools, history full of -tool calls) hits this. Tier 1.5 in the OpenCode-port plan. +Some providers (LiteLLM, Bedrock, Copilot) 400 when a model call has +empty ``tools`` but the message history includes prior ``tool_calls`` — +they treat that shape as malformed even though it's perfectly valid +LangChain. SurfSense hits this on the compaction summarize call (no +tools, history full of tool calls). + +Ported from OpenCode's ``packages/opencode/src/session/llm.ts:209-228``, +which discovered and codified the workaround: inject a no-op tool *only* +on those provider shapes so the request validates without ever being +called. Operation: a :class:`NoopInjectionMiddleware` ``wrap_model_call`` checks if the request has zero tools but the last AI message in history includes -``tool_calls``. If yes, it injects the ``_noop`` tool only — never globally, -mirroring opencode's gating exactly. The :func:`noop_tool` returns empty -content when called (which it should never be in practice). +``tool_calls``. If yes, it injects the ``_noop`` tool only — never +globally — mirroring OpenCode's gating exactly. The :func:`noop_tool` +returns empty content when called (which it should never be in +practice). """ from __future__ import annotations @@ -45,8 +50,9 @@ def noop_tool() -> str: # Provider markers that benefit from ``_noop`` injection. These match -# opencode's gating list. We also accept any string containing one of -# these substrings (so e.g. ``litellm`` matches ``ChatLiteLLM``). +# OpenCode's gating list (``llm.ts:209-228``). We also accept any string +# containing one of these substrings so e.g. ``litellm`` matches +# ``ChatLiteLLM``. _NOOP_NEEDED_PROVIDERS: tuple[str, ...] = ( "litellm", "bedrock", diff --git a/surfsense_backend/app/agents/new_chat/middleware/otel_span.py b/surfsense_backend/app/agents/new_chat/middleware/otel_span.py index f51d2f7bb..cfe1edae4 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/otel_span.py +++ b/surfsense_backend/app/agents/new_chat/middleware/otel_span.py @@ -3,14 +3,14 @@ OpenTelemetry span middleware for the SurfSense ``new_chat`` agent. Wraps both ``model.call`` (LLM invocations) and ``tool.call`` (tool executions) with OTel spans, attaching low-cardinality span names and -high-cardinality identifiers as attributes (per the Tier 3b plan). +high-cardinality identifiers as attributes. This middleware is intentionally a thin adapter over :mod:`app.observability.otel`; when OTel is not configured all spans collapse to no-ops and the wrapper adds <1µs overhead per call. When OTel **is** configured (``OTEL_EXPORTER_OTLP_ENDPOINT`` set), every -model and tool call gets a span with the standard attributes the -plan's dashboards expect. +model and tool call gets a span with the standard attributes our +dashboards expect. """ from __future__ import annotations diff --git a/surfsense_backend/app/agents/new_chat/middleware/permission.py b/surfsense_backend/app/agents/new_chat/middleware/permission.py index 6e1f42baf..37719e96a 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/permission.py +++ b/surfsense_backend/app/agents/new_chat/middleware/permission.py @@ -1,10 +1,15 @@ """ PermissionMiddleware — pattern-based allow/deny/ask with HITL fallback. -Mirrors ``opencode/packages/opencode/src/permission/index.ts`` but uses -SurfSense's existing ``interrupt({type, action, context})`` payload shape -(see ``app/agents/new_chat/tools/hitl.py``) so the frontend keeps -working unchanged. Tier 2.1 in the OpenCode-port plan. +LangChain's :class:`HumanInTheLoopMiddleware` only supports a static +"this tool always asks" decision per tool. There's no rule-based +allow/deny/ask layered ruleset, no glob patterns, no per-search-space or +per-thread overrides, and no auto-deny synthesis. + +This middleware ports OpenCode's ``packages/opencode/src/permission/index.ts`` +ruleset model on top of SurfSense's existing ``interrupt({type, action, +context})`` payload shape (see ``app/agents/new_chat/tools/hitl.py``) so +the frontend keeps working unchanged. Operation: 1. ``aafter_model`` inspects the latest ``AIMessage.tool_calls``. @@ -24,9 +29,9 @@ Operation: The middleware also performs a *pre-model* tool-filter step (the ``before_model`` hook) so globally denied tools are stripped from the -exposed tool list before the model gets to see them. This is -opencode's ``Permission.disabled`` equivalent and dramatically reduces -the chance the model emits a deny-only call. +exposed tool list before the model gets to see them. This mirrors +OpenCode's ``Permission.disabled`` and dramatically reduces the chance +the model emits a deny-only call. """ from __future__ import annotations @@ -117,7 +122,7 @@ class PermissionMiddleware(AgentMiddleware): # type: ignore[type-arg] self._emit_interrupt = always_emit_interrupt_payload # ------------------------------------------------------------------ - # Tool-filter step (opencode `Permission.disabled` equivalent) + # Tool-filter step (mirrors OpenCode's ``Permission.disabled``) # ------------------------------------------------------------------ def _globally_denied(self, tool_name: str) -> bool: @@ -197,8 +202,8 @@ class PermissionMiddleware(AgentMiddleware): # type: ignore[type-arg] "always": patterns, }, } - # Tier 3b: permission.asked + interrupt.raised spans (no-op when - # OTel is disabled). Both fire here so dashboards can correlate + # Open ``permission.asked`` + ``interrupt.raised`` OTel spans + # (no-op when OTel is disabled) so dashboards can correlate # "we asked X" with "interrupt was actually delivered". with ( ot.permission_asked_span( diff --git a/surfsense_backend/app/agents/new_chat/middleware/retry_after.py b/surfsense_backend/app/agents/new_chat/middleware/retry_after.py index 394bb0371..0c3d3d017 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/retry_after.py +++ b/surfsense_backend/app/agents/new_chat/middleware/retry_after.py @@ -1,10 +1,16 @@ """ RetryAfterMiddleware — Header-aware retry with custom backoff and SSE eventing. -Why standalone instead of subclassing ``ModelRetryMiddleware``: the upstream -class calls module-level ``calculate_delay`` inline (no overridable -``_calculate_delay`` hook), so a subclass cannot inject Retry-After header -delays without rewriting the loop. Tier 1.4 in the OpenCode-port plan. +LangChain's :class:`ModelRetryMiddleware` retries on exceptions but ignores +the ``Retry-After`` HTTP header — it just runs its own exponential backoff. +That wastes time when a provider has explicitly told us how long to wait. +This middleware honors the header (mirroring OpenCode's +``packages/opencode/src/session/llm.ts`` retry pathway) and emits an SSE +event so the UI can show "rate-limited, retrying in Ns". + +We can't subclass ``ModelRetryMiddleware`` cleanly because its loop calls a +module-level ``calculate_delay`` inline (no overridable +``_calculate_delay`` hook), so this is a standalone implementation. Behaviour: - Extracts ``Retry-After`` / ``retry-after-ms`` from diff --git a/surfsense_backend/app/agents/new_chat/middleware/tool_call_repair.py b/surfsense_backend/app/agents/new_chat/middleware/tool_call_repair.py index 54df0cc60..9f81a168b 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/tool_call_repair.py +++ b/surfsense_backend/app/agents/new_chat/middleware/tool_call_repair.py @@ -1,10 +1,6 @@ """ ToolCallNameRepairMiddleware — two-stage tool-name repair. -Mirrors ``opencode/packages/opencode/src/session/llm.ts:339-358`` plus -``opencode/packages/opencode/src/tool/invalid.ts``. Tier 1.7 in the -OpenCode-port plan. - Operation: 1. **Stage 1 — lowercase repair:** if a tool call's ``name`` is not in the registry but ``name.lower()`` is, rewrite in place. Catches @@ -14,9 +10,13 @@ Operation: so the registered :func:`invalid_tool` returns the error to the model for self-correction. -Distinct from :class:`deepagents.middleware.PatchToolCallsMiddleware`, -which patches *dangling* tool calls (no matching ToolMessage) — that -class does not handle the wrong-name case at all. +Ported from OpenCode's ``packages/opencode/src/session/llm.ts:339-358`` ++ ``packages/opencode/src/tool/invalid.ts``. LangChain has no equivalent: +:class:`deepagents.middleware.PatchToolCallsMiddleware` patches +*dangling* tool calls (no matching ToolMessage) but does nothing about +wrong names, and the model framework's default behavior on an unknown +name is to crash the turn rather than route to a self-correction +fallback. """ from __future__ import annotations @@ -61,7 +61,8 @@ class ToolCallNameRepairMiddleware( ``invalid`` should be in this set so the fallback dispatches. fuzzy_match_threshold: Optional ``difflib`` ratio (0-1) for the fuzzy-match step that runs *between* lowercase and invalid. - Set to ``None`` to disable fuzzy matching (opencode parity). + Set to ``None`` to disable fuzzy matching (default in + OpenCode; we mirror that to avoid silent rewrites). """ def __init__( @@ -106,7 +107,7 @@ class ToolCallNameRepairMiddleware( call["response_metadata"] = metadata return call - # Optional fuzzy step (off by default for opencode parity) + # Optional fuzzy step (off by default — see class docstring) if self._fuzzy_threshold is not None: close = difflib.get_close_matches( name, registered, n=1, cutoff=self._fuzzy_threshold diff --git a/surfsense_backend/app/agents/new_chat/permissions.py b/surfsense_backend/app/agents/new_chat/permissions.py index 50a0cfbdc..523deb11f 100644 --- a/surfsense_backend/app/agents/new_chat/permissions.py +++ b/surfsense_backend/app/agents/new_chat/permissions.py @@ -1,21 +1,20 @@ """ Wildcard pattern matching + rule evaluation for the SurfSense permission system. -Mirrors ``opencode/packages/opencode/src/permission/evaluate.ts`` and -``opencode/packages/opencode/src/util/wildcard.ts`` precisely: +Ported from OpenCode's ``packages/opencode/src/permission/evaluate.ts`` and +``packages/opencode/src/util/wildcard.ts``. LangChain has no rule-based +permission evaluator, so we keep OpenCode's semantics intact: - ``Wildcard.match`` matches both the ``permission`` and the ``pattern`` fields of a rule against the requested ``(permission, pattern)`` pair. ``*`` matches any segment, ``**`` matches across separators. - The evaluator runs ``findLast`` over the **flattened** list of rules from all rulesets — last matching rule wins. -- The default fallback is ``ask`` (NOT deny), matching opencode. +- The default fallback is ``ask`` (NOT deny), matching OpenCode. - Multi-pattern requests AND together: if ANY pattern resolves to ``deny``, the whole request is denied; if ANY needs ``ask``, an interrupt is raised; only when all patterns ``allow`` does the request proceed. - -Tier 2.1 in the OpenCode-port plan. """ from __future__ import annotations diff --git a/surfsense_backend/app/agents/new_chat/plugin_loader.py b/surfsense_backend/app/agents/new_chat/plugin_loader.py index 426e28041..c52620d40 100644 --- a/surfsense_backend/app/agents/new_chat/plugin_loader.py +++ b/surfsense_backend/app/agents/new_chat/plugin_loader.py @@ -1,9 +1,10 @@ """Entry-point based plugin loader for SurfSense agent middleware. -The realization in the Tier 6 plan: LangChain's :class:`AgentMiddleware` ABC -already covers the practical surface most plugins need (``before_agent`` / -``before_model`` / ``wrap_tool_call`` / their async counterparts), so a -SurfSense-specific plugin protocol is unnecessary. +LangChain's :class:`AgentMiddleware` ABC already covers the practical +surface most plugins need (``before_agent`` / ``before_model`` / +``wrap_tool_call`` / their async counterparts), so a SurfSense-specific +plugin protocol would be redundant. We just need a way to discover and +admit third-party middleware safely. A plugin is therefore just an installable Python package that registers a factory callable under the ``surfsense.plugins`` entry-point group: diff --git a/surfsense_backend/app/agents/new_chat/plugins/year_substituter.py b/surfsense_backend/app/agents/new_chat/plugins/year_substituter.py index 3e2e631d2..2b7781b90 100644 --- a/surfsense_backend/app/agents/new_chat/plugins/year_substituter.py +++ b/surfsense_backend/app/agents/new_chat/plugins/year_substituter.py @@ -1,10 +1,10 @@ """Reference plugin: substitute ``{{year}}`` in tool descriptions. -Mirrors the OpenCode ``chat.system.transform`` example. Demonstrates the -:meth:`AgentMiddleware.awrap_tool_call` hook -- the plugin sees every tool -invocation and can rewrite the request *or* the result. This particular -plugin is read-only and only transforms the *description* the user might -see in error messages (no request mutation). +Demonstrates the :meth:`AgentMiddleware.awrap_tool_call` hook -- the +plugin sees every tool invocation and can rewrite the request *or* the +result. This particular plugin is read-only and only transforms the +*description* the user might see in error messages (no request +mutation). The plugin is built as a factory function so the entry-point loader can inject :class:`PluginContext` (containing the agent's LLM, search-space diff --git a/surfsense_backend/app/agents/new_chat/prompts/composer.py b/surfsense_backend/app/agents/new_chat/prompts/composer.py index 77b86aeef..42f8303e6 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/composer.py +++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py @@ -14,7 +14,13 @@ under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic examples/ # one ``.md`` per tool with call examples routing/ # connector-specific routing notes (linear, slack, …) -Tier 3a in the OpenCode-port plan. +The model-family dispatch step (see :func:`detect_provider_variant`) +mirrors OpenCode's ``packages/opencode/src/session/system.ts`` — different +model families respond best to differently-styled prompts (Claude likes +XML/narrative, GPT-5 wants channel-aware pragmatic, Codex needs +terse/file:line, Gemini wants formal numbered steps, etc.). LangChain's +``dynamic_prompt`` helper supports per-call prompt swaps but ships no +out-of-the-box family classifier, so we keep our own. Backwards compatibility ======================= @@ -42,10 +48,11 @@ from app.db import ChatVisibility # When adding a new variant, also drop a matching ``providers/.md`` # file in this package and (if appropriate) extend the regex matchers below. # -# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted -# to SurfSense's "supplemental hints" architecture (each fragment is a -# focused style nudge, NOT a full system prompt — the main prompt is -# already assembled from base/ + tools/ + routing/). +# Stylistic clusters: each variant is a focused style nudge, NOT a full +# system prompt — the main prompt is already assembled from base/ + +# tools/ + routing/. The clustering itself (which models map to which +# style) follows OpenCode's ``system.ts`` family table; see the module +# docstring for credits. ProviderVariant = str # Known values: # "anthropic" — Claude family (XML-friendly, narrative todos) @@ -82,8 +89,8 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant: Order is significant: more-specific patterns are tried first so ``gpt-5-codex`` routes to ``"openai_codex"`` rather than - ``"openai_reasoning"`` (mirrors OpenCode's - ``packages/opencode/src/session/system.ts`` dispatch). + ``"openai_reasoning"`` — same dispatch order as OpenCode's + ``packages/opencode/src/session/system.ts``. """ if not model_name: return "default" diff --git a/surfsense_backend/app/agents/new_chat/subagents/__init__.py b/surfsense_backend/app/agents/new_chat/subagents/__init__.py index b9f21a0d2..7d678ec79 100644 --- a/surfsense_backend/app/agents/new_chat/subagents/__init__.py +++ b/surfsense_backend/app/agents/new_chat/subagents/__init__.py @@ -1,14 +1,17 @@ """Specialized user-facing subagents for the SurfSense agent. -Each subagent is a :class:`deepagents.SubAgent` typed-dict spec passed to -:class:`deepagents.SubAgentMiddleware`, which materializes them as ephemeral -runnables invoked via the ``task`` tool. +The :class:`deepagents.SubAgentMiddleware` already provides the +materialization machinery (each :class:`deepagents.SubAgent` typed-dict +spec is compiled into an ephemeral runnable invoked via the ``task`` +tool); what's specific to SurfSense is the *seeding* of those subagents +with declarative deny rules. Per-subagent permission rules are injected as a :class:`PermissionMiddleware` entry inside the subagent's ``middleware`` -field, mirroring opencode ``tool/task.ts`` which seeds child sessions with -deny rules for tools the parent does not want them touching (e.g. -``task``/``todowrite`` recursion, write tools for read-only research roles). +field. The auto-deny pattern (e.g. forbid ``task``/``todowrite`` +recursion, block write tools for read-only research roles) is borrowed +from OpenCode's ``packages/opencode/src/tool/task.ts``, which has +analogous logic for restricting child sessions. """ from .config import ( diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 3919527d9..56f838d7e 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -1,13 +1,14 @@ """ Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`. -Tier 3a of the OpenCode-port plan replaced the monolithic prompt strings -in this module with a fragment tree under ``prompts/`` and a composer -function. This module preserves the public function surface -(``build_surfsense_system_prompt`` / ``build_configurable_system_prompt`` / -``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so that -existing call sites — `chat_deepagent.py`, anonymous chat routes, and the -configurable-prompt admin path — keep working without churn. +The composer split the previous monolithic prompt string into a fragment +tree under ``prompts/`` plus a model-family dispatch step (see the +composer module docstring for credits). This module preserves the public +function surface (``build_surfsense_system_prompt`` / +``build_configurable_system_prompt`` / +``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so +that existing call sites — `chat_deepagent.py`, anonymous chat routes, +and the configurable-prompt admin path — keep working without churn. For new call sites prefer importing ``compose_system_prompt`` directly from :mod:`app.agents.new_chat.prompts.composer`. diff --git a/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py b/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py index df10fcbe3..ea4bc0bc1 100644 --- a/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py +++ b/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py @@ -6,8 +6,9 @@ tool, :class:`ToolCallNameRepairMiddleware` rewrites the call to ``invalid`` with the original name and a parser/validation error string. This tool's execution then returns that error to the model so it can self-correct. -Mirrors ``opencode/packages/opencode/src/tool/invalid.ts``. Tier 1.6 in -the OpenCode-port plan. +Ported from OpenCode's ``packages/opencode/src/tool/invalid.ts`` — +LangChain has no equivalent fallback path; the default behavior on an +unknown tool name is a hard ``ToolNotFoundError`` which kills the turn. Critically, the :class:`ToolDefinition` for this tool is **excluded** from the system-prompt tool list and from ``LLMToolSelectorMiddleware`` selection diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index fce1bf872..e8bab36fd 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -132,12 +132,10 @@ class ToolDefinition: that must be in ``available_connectors`` for the tool to be enabled. dedup_key: Optional callable that maps a tool's ``args`` dict to a string signature used by :class:`DedupHITLToolCallsMiddleware` - to drop duplicate calls. Replaces the legacy hardcoded - ``_NATIVE_HITL_TOOL_DEDUP_KEYS`` map (Tier 2.3 in the - OpenCode-port plan). + to drop duplicate calls within a single LLM response. reverse: Optional callable that, given the tool's ``(args, result)``, returns a ``ReverseDescriptor`` describing the inverse tool - invocation. Consumed by the snapshot/revert pipeline (Tier 5). + invocation. Consumed by the snapshot/revert pipeline. """ diff --git a/surfsense_backend/app/observability/otel.py b/surfsense_backend/app/observability/otel.py index 4f2257ab7..6791ab499 100644 --- a/surfsense_backend/app/observability/otel.py +++ b/surfsense_backend/app/observability/otel.py @@ -1,12 +1,10 @@ """ OpenTelemetry instrumentation helpers for the SurfSense agent stack. -Tier 3b in the OpenCode-port plan. - Goals ===== -- Provide one tiny, ergonomic API for the spans listed in the plan +- Provide one tiny, ergonomic API for the spans we care about (``tool.call``, ``model.call``, ``kb.search``, ``kb.persist``, ``compaction.run``, ``interrupt.raised``, ``permission.asked``). - Keep span **names** low-cardinality (``tool.call`` rather than diff --git a/surfsense_backend/app/routes/agent_revert_route.py b/surfsense_backend/app/routes/agent_revert_route.py index cbe4e7417..12484ff53 100644 --- a/surfsense_backend/app/routes/agent_revert_route.py +++ b/surfsense_backend/app/routes/agent_revert_route.py @@ -1,9 +1,9 @@ """POST ``/api/threads/{thread_id}/revert/{action_id}``: undo an agent action. -Per the Tier 5 plan, the route ships **before** the UI lights up the per-message -"Undo from here" affordance. To prevent accidental usage during the gap we -return ``503 Service Unavailable`` until the -``SURFSENSE_ENABLE_REVERT_ROUTE`` flag flips. Once enabled, the route runs: +The route ships **before** the UI lights up the per-message "Undo from +here" affordance. To prevent accidental usage during the gap we return +``503 Service Unavailable`` until the ``SURFSENSE_ENABLE_REVERT_ROUTE`` +flag flips. Once enabled, the route runs: 1. Authentication via :func:`current_active_user`. 2. Action lookup; 404 if the action does not belong to the thread. diff --git a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py index aa0c215b9..397b1c787 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py +++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py @@ -1,4 +1,4 @@ -"""Tests for the prompt fragment composer (Tier 3a).""" +"""Tests for the prompt fragment composer.""" from __future__ import annotations diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_otel_span.py b/surfsense_backend/tests/unit/agents/new_chat/test_otel_span.py index e5b171612..55434c04d 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_otel_span.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_otel_span.py @@ -1,4 +1,4 @@ -"""Tests for the OtelSpanMiddleware adapter (Tier 3b).""" +"""Tests for the OtelSpanMiddleware adapter.""" from __future__ import annotations diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_permissions.py b/surfsense_backend/tests/unit/agents/new_chat/test_permissions.py index 4924f2aee..8ec16617a 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_permissions.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_permissions.py @@ -1,4 +1,4 @@ -"""Tests for the wildcard matcher and rule evaluator (opencode evaluate.ts parity).""" +"""Tests for the wildcard matcher and rule evaluator (parity with OpenCode evaluate.ts).""" from __future__ import annotations diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_plugin_loader.py b/surfsense_backend/tests/unit/agents/new_chat/test_plugin_loader.py index c2118c697..5dbf765a7 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_plugin_loader.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_plugin_loader.py @@ -1,4 +1,4 @@ -"""Unit tests for the SurfSense plugin entry-point loader (Tier 6).""" +"""Unit tests for the SurfSense plugin entry-point loader.""" from __future__ import annotations diff --git a/surfsense_backend/tests/unit/observability/test_otel.py b/surfsense_backend/tests/unit/observability/test_otel.py index 583142098..fc5813973 100644 --- a/surfsense_backend/tests/unit/observability/test_otel.py +++ b/surfsense_backend/tests/unit/observability/test_otel.py @@ -1,4 +1,4 @@ -"""Tests for the SurfSense OpenTelemetry shim (Tier 3b).""" +"""Tests for the SurfSense OpenTelemetry shim.""" from __future__ import annotations diff --git a/surfsense_backend/tests/unit/services/test_revert_service.py b/surfsense_backend/tests/unit/services/test_revert_service.py index e2cbe383a..a81e52041 100644 --- a/surfsense_backend/tests/unit/services/test_revert_service.py +++ b/surfsense_backend/tests/unit/services/test_revert_service.py @@ -1,4 +1,4 @@ -"""Unit tests for the agent revert service (Tier 5.3).""" +"""Unit tests for the agent revert service.""" from __future__ import annotations