chore: cleaned comments slop

2026-06-16 21:05:20 +02:00 · 2026-04-28 23:52:37 -07:00 · 2026-04-28 23:52:37 -07:00 · f9b5367754
commit f9b5367754
parent f23be16b35
34 changed files with 274 additions and 232 deletions
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -250,12 +250,12 @@ LANGSMITH_PROJECT=surfsense


 # =============================================================================
-# OPTIONAL: New-chat agent feature flags (OpenCode-port)
+# OPTIONAL: New-chat agent feature flags
 # =============================================================================
 # Master kill-switch — when true, every flag below is forced OFF.
 # SURFSENSE_DISABLE_NEW_AGENT_STACK=false

-# Tier 1 — Agent quality
+# Agent quality
 # SURFSENSE_ENABLE_CONTEXT_EDITING=false
 # SURFSENSE_ENABLE_COMPACTION_V2=false
 # SURFSENSE_ENABLE_RETRY_AFTER=false
@ -265,24 +265,24 @@ LANGSMITH_PROJECT=surfsense
 # SURFSENSE_ENABLE_TOOL_CALL_REPAIR=false
 # SURFSENSE_ENABLE_DOOM_LOOP=false   # leave OFF until UI handles permission='doom_loop'

-# Tier 2 — Safety
+# Safety
 # SURFSENSE_ENABLE_PERMISSION=false
 # SURFSENSE_ENABLE_BUSY_MUTEX=false
 # SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false   # adds a per-turn LLM call

-# Tier 3b — Observability (also requires OTEL_EXPORTER_OTLP_ENDPOINT)
+# Observability — OTel (also requires OTEL_EXPORTER_OTLP_ENDPOINT)
 # SURFSENSE_ENABLE_OTEL=false

-# Tier 4 — Skills + subagents
+# Skills + subagents
 # SURFSENSE_ENABLE_SKILLS=false
 # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
 # SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false

-# Tier 5 — Snapshot / revert
+# Snapshot / revert
 # SURFSENSE_ENABLE_ACTION_LOG=false
 # SURFSENSE_ENABLE_REVERT_ROUTE=false        # Backend-only; flip when UI ships

-# Tier 6 — Plugins
+# Plugins
 # SURFSENSE_ENABLE_PLUGIN_LOADER=false
 # Comma-separated allowlist of plugin entry-point names
 # SURFSENSE_ALLOWED_PLUGINS=year_substituter
--- a/surfsense_backend/alembic/versions/130_add_agent_action_log.py
+++ b/surfsense_backend/alembic/versions/130_add_agent_action_log.py
@ -4,8 +4,10 @@ Revision ID: 130
 Revises: 129
 Create Date: 2026-04-28

-Tier 5.2 in the OpenCode-port plan. Adds the append-only ``agent_action_log``
-table that :class:`ActionLogMiddleware` writes to after every tool call.
+Adds the append-only ``agent_action_log`` table that
+:class:`ActionLogMiddleware` writes to after every tool call. Each row
+optionally carries a ``reverse_descriptor`` payload used by
+``POST /api/threads/{thread_id}/revert/{action_id}`` to undo the action.
 """

 from __future__ import annotations
--- a/surfsense_backend/alembic/versions/131_add_document_revisions.py
+++ b/surfsense_backend/alembic/versions/131_add_document_revisions.py
@ -4,7 +4,7 @@ Revision ID: 131
 Revises: 130
 Create Date: 2026-04-28

-Tier 5.1 in the OpenCode-port plan. Adds two snapshot tables:
+Adds two snapshot tables that back the per-action revert flow:

 * ``document_revisions``: pre-mutation snapshot of NOTE/FILE/EXTENSION docs.
 * ``folder_revisions``: pre-mutation snapshot of folder mkdir/move/delete.
--- a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py
+++ b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py
@ -4,11 +4,10 @@ Revision ID: 132
 Revises: 131
 Create Date: 2026-04-28

-Tier 2.1 in the OpenCode-port plan. Adds the persistent ``agent_permission_rules``
-table consumed by :class:`PermissionMiddleware` at agent build time. Rules
-can be scoped at search-space (``user_id`` / ``thread_id`` NULL),
-user-wide (``user_id`` set, ``thread_id`` NULL), or per-thread
-(``thread_id`` set).
+Adds the persistent ``agent_permission_rules`` table consumed by
+:class:`PermissionMiddleware` at agent build time. Rules can be scoped
+at search-space (``user_id`` / ``thread_id`` NULL), user-wide
+(``user_id`` set, ``thread_id`` NULL), or per-thread (``thread_id`` set).
 """

 from __future__ import annotations
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -353,11 +353,12 @@ async def create_surfsense_deep_agent(
        additional_tools=list(additional_tools) if additional_tools else None,
    )

-    # Tier 1.6: register `invalid` tool. It is dispatched only when
-    # ToolCallNameRepairMiddleware rewrites a malformed call. We
-    # intentionally append it AFTER ``build_tools_async`` so it never
-    # appears in the system-prompt tool list (which is built from the
-    # registry, not the bound tool list).
+    # Register the ``invalid`` tool only when tool-call repair is on. It
+    # is dispatched only when :class:`ToolCallNameRepairMiddleware`
+    # rewrites a malformed call. We intentionally append it AFTER
+    # ``build_tools_async`` so it never appears in the system-prompt
+    # tool list (which is built from the registry, not the bound tool
+    # list).
    _flags: AgentFeatureFlags = get_flags()
    if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
        t.name for t in tools
@ -455,10 +456,10 @@ async def create_surfsense_deep_agent(
    return agent


-# Tier 1.1: tools whose output is too costly / lossy to discard. Keep
-# this conservative — anything listed here is *never* pruned by
-# ContextEditingMiddleware. The list is filtered against actually-bound
-# tool names so disabled connectors don't show up here.
+# Tools whose output is too costly / lossy to discard. Keep this
+# conservative — anything listed here is *never* pruned by
+# :class:`ContextEditingMiddleware`. The list is filtered against
+# actually-bound tool names so disabled connectors don't show up here.
 _PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
    {
        "generate_report",
@ -485,11 +486,12 @@ def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
    return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled)


-# Tier 2.1 / cleanup: opencode `Permission.disabled` parity. Replaces the
-# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE``-based gating with a
-# declarative pass over :data:`BUILTIN_TOOLS`. Each tool that declares a
-# ``required_connector`` not present in ``available_connectors`` gets a
-# deny rule so any execution attempt short-circuits with permission_denied.
+# Connector gating: any tool whose ``ToolDefinition.required_connector``
+# isn't actually wired up gets a synthesized permission deny rule so
+# execution attempts short-circuit with ``permission_denied`` instead of
+# bubbling up provider-specific 401/404 errors. Mirrors OpenCode's
+# ``Permission.disabled`` (declarative, per-tool gating) — replaces the
+# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE`` substring-heuristic.
 def _synthesize_connector_deny_rules(
    *,
    available_connectors: list[str] | None,
@ -503,11 +505,6 @@ def _synthesize_connector_deny_rules(
    1. It is currently bound (``enabled_tool_names``).
    2. It declares a ``required_connector``.
    3. That connector is *not* in ``available_connectors``.
-
-    This expresses the OpenCode ``Permission.disabled`` semantics
-    declaratively, replacing the substring-heuristic binary gating
-    that used to consult the hardcoded ``_CONNECTOR_TYPE_TO_SEARCHABLE``
-    map.
    """
    available = set(available_connectors or [])
    deny: list[Rule] = []
@ -581,7 +578,7 @@ def _build_compiled_agent_blocking(
        "middleware": gp_middleware,
    }

-    # Tier 4.3: specialized user-facing subagents (explore, report_writer,
+    # Specialized user-facing subagents (explore, report_writer,
    # connector_negotiator). Registered through SubAgentMiddleware alongside
    # the general-purpose spec so the parent's `task` tool can address them
    # by name. Off by default until the flag flips so existing deployments
@ -629,14 +626,13 @@ def _build_compiled_agent_blocking(
    # ``wrap_model_call`` ordering: the FIRST middleware in the list is the
    # OUTERMOST wrapper. To ensure prune executes before summarization,
    # place ``SpillingContextEditingMiddleware`` before
-    # ``SurfSenseCompactionMiddleware`` (Tier 1.1 + 1.3).
-    # Compaction is the canonical token-budget defense after the
-    # cleanup tier removed ``SafeSummarizationMiddleware``. The Bedrock
-    # buffer-empty defense is folded into ``SurfSenseCompactionMiddleware``.
+    # ``SurfSenseCompactionMiddleware``. Compaction is the canonical
+    # token-budget defense; the Bedrock buffer-empty defense is folded
+    # into ``SurfSenseCompactionMiddleware``.
    summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend)
    _ = flags.enable_compaction_v2  # historical flag; retained for telemetry parity

-    # Tier 1.1: ContextEditing prune. Trigger at 55% of model_max_input,
+    # ContextEditing prune. Trigger at 55% of ``max_input_tokens``,
    # earlier than summarization (~85%). When disabled, no edit runs.
    context_edit_mw = None
    if (
@ -664,7 +660,10 @@ def _build_compiled_agent_blocking(
            backend_resolver=backend_resolver,
        )

-    # Tier 1.4 / 1.8 / 1.9 / 1.10: built-in retry/fallback/limits.
+    # Resilience knobs: header-aware retry, model fallback, and
+    # per-thread / per-run call-count limits. The fallback / limit
+    # middlewares are vanilla LangChain primitives; ``RetryAfter`` is
+    # SurfSense's header-aware variant (see its module docstring).
    retry_mw = (
        RetryAfterMiddleware(max_retries=3)
        if flags.enable_retry_after and not flags.disable_new_agent_stack
@ -700,14 +699,16 @@ def _build_compiled_agent_blocking(
        else None
    )

-    # Tier 1.5: provider-compat _noop injection.
+    # Provider-compat ``_noop`` injection (mirrors OpenCode's
+    # ``llm.ts`` workaround for providers that reject empty assistant
+    # turns or alternating-role constraints).
    noop_mw = (
        NoopInjectionMiddleware()
        if flags.enable_compaction_v2 and not flags.disable_new_agent_stack
        else None
    )

-    # Tier 1.7: tool-call name repair (lowercase + invalid fallback).
+    # Tool-call name repair (lowercase + ``invalid`` fallback).
    #
    # ``registered_tool_names`` MUST cover every tool the model can legitimately
    # call. That includes the bound ``tools`` list AND every tool provided by
@ -737,18 +738,22 @@ def _build_compiled_agent_blocking(
        }
        repair_mw = ToolCallNameRepairMiddleware(
            registered_tool_names=registered_names,
-            fuzzy_match_threshold=None,  # opencode parity: no fuzzy step
+            # Disable fuzzy matching to avoid silent rewrites; the
+            # lowercase + ``invalid`` fallback alone covers >95% of
+            # observed model errors.
+            fuzzy_match_threshold=None,
        )

-    # Tier 1.11: doom-loop detector. Off by default until UI handles.
+    # Doom-loop detector. Off by default until the frontend handles
+    # ``permission == "doom_loop"`` interrupts.
    doom_loop_mw = (
        DoomLoopMiddleware(threshold=3)
        if flags.enable_doom_loop and not flags.disable_new_agent_stack
        else None
    )

-    # Tier 2.1: PermissionMiddleware. Layers, earliest -> latest (last
-    # match wins per opencode):
+    # PermissionMiddleware. Layers, earliest -> latest (last match wins,
+    # same evaluation order as OpenCode's ``permission/index.ts``):
    #
    # 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense
    #    already runs per-tool HITL (see ``tools/hitl.py``) for mutating
@ -778,11 +783,11 @@ def _build_compiled_agent_blocking(
            ],
        )

-    # Tier 5.2: ActionLogMiddleware. Off by default until the
-    # ``agent_action_log`` table is migrated. When enabled, persists one
-    # row per tool call with optional reverse_descriptor for
-    # /api/threads/{thread_id}/revert/{action_id}. Sits inside permission
-    # so denied calls aren't logged as completions.
+    # ActionLogMiddleware. Off by default until the ``agent_action_log``
+    # table is migrated. When enabled, persists one row per tool call
+    # with optional reverse_descriptor for
+    # ``POST /api/threads/{thread_id}/revert/{action_id}``. Sits inside
+    # ``permission`` so denied calls aren't logged as completions.
    action_log_mw: ActionLogMiddleware | None = None
    if (
        flags.enable_action_log
@ -804,23 +809,24 @@ def _build_compiled_agent_blocking(
            )
            action_log_mw = None

-    # Tier 2.2: per-thread busy mutex.
+    # Per-thread busy mutex (refuse a second concurrent turn on the same
+    # thread; see :class:`BusyMutexMiddleware` docstring).
    busy_mutex_mw: BusyMutexMiddleware | None = (
        BusyMutexMiddleware()
        if flags.enable_busy_mutex and not flags.disable_new_agent_stack
        else None
    )

-    # Tier 3b: OpenTelemetry spans (model.call + tool.call). Lives just
-    # inside BusyMutex so it spans every retry/fallback attempt of the
-    # current turn but never wraps a queued/blocked turn.
+    # OpenTelemetry spans (model.call + tool.call). Lives just inside
+    # BusyMutex so it spans every retry/fallback attempt of the current
+    # turn but never wraps a queued/blocked turn.
    otel_mw: OtelSpanMiddleware | None = (
        OtelSpanMiddleware()
        if flags.enable_otel and not flags.disable_new_agent_stack
        else None
    )

-    # Tier 6: plugin entry-point loader. Off by default; opt-in via the
+    # Plugin entry-point loader. Off by default; opt-in via the
    # ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from
    # the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future
    # PR can wire it through ``global_llm_config.yaml``.
@ -845,10 +851,10 @@ def _build_compiled_agent_blocking(
            )
            plugin_middlewares = []

-    # Tier 4.1: SkillsMiddleware. Loads built-in + space-authored skills
-    # via a CompositeBackend. Sources are layered: built-in first, space
-    # last, so a search-space-authored skill of the same name overrides
-    # the bundled one.
+    # SkillsMiddleware (deepagents) loads built-in + space-authored
+    # skills via a CompositeBackend. Sources are layered: built-in first,
+    # space last, so a search-space-authored skill of the same name
+    # overrides the bundled one.
    skills_mw: SkillsMiddleware | None = None
    if flags.enable_skills and not flags.disable_new_agent_stack:
        try:
@ -865,7 +871,8 @@ def _build_compiled_agent_blocking(
            logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
            skills_mw = None

-    # Tier 2.5: LLM-driven tool selection for >30 tools.
+    # LangChain's LLM-driven tool selection — only enabled for stacks
+    # large enough to need narrowing (>30 tools).
    selector_mw: LLMToolSelectorMiddleware | None = None
    if (
        flags.enable_llm_tool_selector
@ -934,12 +941,12 @@ def _build_compiled_agent_blocking(
        )
        if filesystem_mode == FilesystemMode.CLOUD
        else None,
-        # Tier 4.1: skill loader. Placed before SubAgentMiddleware so
-        # subagents inherit the same skill metadata (subagent specs reference
-        # the same source paths via `default_skills_sources()`).
+        # Skill loader. Placed before SubAgentMiddleware so subagents
+        # inherit the same skill metadata (subagent specs reference the
+        # same source paths via ``default_skills_sources()``).
        skills_mw,
        SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs),
-        # Tier 2.5: tool selection (only when >30 tools and flag on).
+        # Tool selection (only when >30 tools and flag on).
        selector_mw,
        # Defensive caps, then prune, then summarize.
        model_call_limit_mw,
@ -954,19 +961,19 @@ def _build_compiled_agent_blocking(
        # Tool-call repair must run after model emits but before
        # permission / dedup / doom-loop interpret the calls.
        repair_mw,
-        # Tier 2.1: deny/ask BEFORE the calls are forwarded to tool nodes.
+        # Permission deny/ask BEFORE the calls are forwarded to tool nodes.
        permission_mw,
        doom_loop_mw,
-        # Tier 5.2: action log sits inside permission so denied calls
-        # don't appear as completions, and outside dedup so each unique
-        # tool invocation gets its own row.
+        # Action log sits inside permission so denied calls don't appear
+        # as completions, and outside dedup so each unique tool invocation
+        # gets its own row.
        action_log_mw,
        PatchToolCallsMiddleware(),
        DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
-        # Tier 6: plugin slot — sits just before AnthropicCache so plugin-side
-        # transforms see the final tool result and run before any caching
-        # heuristics. Multiple plugins in declared order; loader filtered by
-        # the admin allowlist already.
+        # Plugin slot — sits just before AnthropicCache so plugin-side
+        # transforms see the final tool result and run before any
+        # caching heuristics. Multiple plugins in declared order; loader
+        # filtered by the admin allowlist already.
        *plugin_middlewares,
        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
    ]
--- a/surfsense_backend/app/agents/new_chat/errors.py
+++ b/surfsense_backend/app/agents/new_chat/errors.py
@ -2,10 +2,10 @@
 Typed error taxonomy for the SurfSense agent stack.

 Used by:
- :class:`RetryAfterMiddleware` (Tier 1.4) — its ``retry_on`` callable
-  consults the error code to decide whether a retry is appropriate.
- :class:`PermissionMiddleware` (Tier 2.1) — emits
-  ``code="permission_denied"`` errors when a deny rule trips.
+- :class:`RetryAfterMiddleware` — its ``retry_on`` callable consults
+  the error code to decide whether a retry is appropriate.
+- :class:`PermissionMiddleware` — emits ``code="permission_denied"``
+  errors when a deny rule trips.
 - All tools — return :class:`StreamingError` payloads in
  ``ToolMessage.additional_kwargs["error"]`` so the model and the
  retry/permission layers share a contract.
--- a/surfsense_backend/app/agents/new_chat/feature_flags.py
+++ b/surfsense_backend/app/agents/new_chat/feature_flags.py
@ -1,9 +1,10 @@
 """
 Feature flags for the SurfSense new_chat agent stack.

-These flags control rollout of OpenCode-pattern middleware ported into
-SurfSense. They follow a "default-OFF for risky things, default-ON for
-safe upgrades, master kill-switch for everything new" model.
+These flags gate the newer agent middleware (some ported from OpenCode,
+some sourced from ``langchain.agents.middleware`` / ``deepagents``, some
+SurfSense-native). They follow a "default-OFF for risky things,
+default-ON for safe upgrades, master kill-switch for everything new" model.

 All new middleware checks its flag at agent build time. If the master
 kill-switch ``SURFSENSE_DISABLE_NEW_AGENT_STACK`` is set, every new
@ -57,7 +58,7 @@ class AgentFeatureFlags:
    # regardless of its env value. Used for rapid rollback.
    disable_new_agent_stack: bool = False

-    # Tier 1 — Agent quality
+    # Agent quality — context budget, retry/limits, name-repair, doom-loop
    enable_context_editing: bool = False
    enable_compaction_v2: bool = False
    enable_retry_after: bool = False
@ -69,26 +70,26 @@ class AgentFeatureFlags:
        False  # Default OFF until UI handles permission='doom_loop'
    )

-    # Tier 2 — Safety
+    # Safety — permissions, concurrency, tool-set narrowing
    enable_permission: bool = False  # Default OFF for first deploy
    enable_busy_mutex: bool = False
    enable_llm_tool_selector: bool = False  # Default OFF — adds per-turn LLM cost

-    # Tier 4 — Skills + subagents
+    # Skills + subagents
    enable_skills: bool = False
    enable_specialized_subagents: bool = False
    enable_kb_planner_runnable: bool = False

-    # Tier 5 — Snapshot / revert
+    # Snapshot / revert
    enable_action_log: bool = False
    enable_revert_route: bool = (
        False  # Backend ships before UI; route returns 503 until this flips
    )

-    # Tier 6 — Plugins
+    # Plugins
    enable_plugin_loader: bool = False

-    # Tier 3b — OTel (orthogonal: also requires OTEL_EXPORTER_OTLP_ENDPOINT)
+    # Observability — OTel (orthogonal; also requires OTEL_EXPORTER_OTLP_ENDPOINT)
    enable_otel: bool = False

    @classmethod
@ -108,7 +109,7 @@ class AgentFeatureFlags:

        return cls(
            disable_new_agent_stack=False,
-            # Tier 1
+            # Agent quality
            enable_context_editing=_env_bool("SURFSENSE_ENABLE_CONTEXT_EDITING", False),
            enable_compaction_v2=_env_bool("SURFSENSE_ENABLE_COMPACTION_V2", False),
            enable_retry_after=_env_bool("SURFSENSE_ENABLE_RETRY_AFTER", False),
@ -121,13 +122,13 @@ class AgentFeatureFlags:
                "SURFSENSE_ENABLE_TOOL_CALL_REPAIR", False
            ),
            enable_doom_loop=_env_bool("SURFSENSE_ENABLE_DOOM_LOOP", False),
-            # Tier 2
+            # Safety
            enable_permission=_env_bool("SURFSENSE_ENABLE_PERMISSION", False),
            enable_busy_mutex=_env_bool("SURFSENSE_ENABLE_BUSY_MUTEX", False),
            enable_llm_tool_selector=_env_bool(
                "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR", False
            ),
-            # Tier 4
+            # Skills + subagents
            enable_skills=_env_bool("SURFSENSE_ENABLE_SKILLS", False),
            enable_specialized_subagents=_env_bool(
                "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", False
@ -135,12 +136,12 @@ class AgentFeatureFlags:
            enable_kb_planner_runnable=_env_bool(
                "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", False
            ),
-            # Tier 5
+            # Snapshot / revert
            enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", False),
            enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", False),
-            # Tier 6
+            # Plugins
            enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False),
-            # Tier 3b
+            # Observability
            enable_otel=_env_bool("SURFSENSE_ENABLE_OTEL", False),
        )

--- a/surfsense_backend/app/agents/new_chat/middleware/busy_mutex.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/busy_mutex.py
@ -1,11 +1,16 @@
 """
 BusyMutexMiddleware — per-thread asyncio lock + cancel token.

-Tier 2.2 in the OpenCode-port plan. Mirrors opencode's
-``Stream.scoped(AbortController)`` pattern (single-process, in-memory
-lock + cooperative cancellation). For multi-worker deployments a
-distributed lock backend (Redis or PostgreSQL advisory locks) is a
-phase-2 follow-up.
+LangChain has no built-in concept of "this thread is already running a
+turn — refuse the second concurrent request". Without it, a user
+double-clicking "send" or refreshing the page mid-stream can spawn two
+turns racing on the same checkpoint, producing duplicated tool calls
+and mangled state.
+
+Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a
+single-process, in-memory lock + cooperative cancellation token keyed by
+``thread_id``. For multi-worker deployments a distributed lock backend
+(Redis or PostgreSQL advisory locks) is a phase-2 follow-up.

 What this provides:
 - A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``;
--- a/surfsense_backend/app/agents/new_chat/middleware/compaction.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/compaction.py
@ -5,21 +5,22 @@ Subclasses :class:`deepagents.middleware.summarization.SummarizationMiddleware`
 to add SurfSense-specific behavior:

 1. **Structured summary template** (OpenCode-style ``## Goal / Constraints /
-   Progress / Key Decisions / Next Steps / Critical Context / Relevant Files``).
+   Progress / Key Decisions / Next Steps / Critical Context / Relevant Files``)
+   — see :data:`SURFSENSE_SUMMARY_PROMPT` below. The base
+   ``SummarizationMiddleware`` only ships a freeform "summarize this"
+   prompt; the structured template is ported from OpenCode's
+   ``compaction.ts``.
 2. **Protect SurfSense-specific SystemMessages** so injected hints
   (``<priority_documents>``, ``<workspace_tree>``, ``<file_operation_contract>``,
   ``<user_memory>``, ``<team_memory>``, ``<user_name>``, ``<memory_warning>``)
   are *not* summarized away and are kept verbatim in the post-summary
-   message list.
+   message list. Mirrors OpenCode's ``PRUNE_PROTECTED_TOOLS`` philosophy
+   (some message types are part of the agent's contract and must survive
+   compaction unchanged).
 3. **Sanitize ``content=None``** when feeding messages into ``get_buffer_string``
   (Azure OpenAI / LiteLLM defense — when a provider streams an AIMessage
   containing only tool_calls and no text, ``content`` can be ``None`` and
-   ``get_buffer_string`` crashes iterating over ``None``). This used to live in
-   ``safe_summarization.py``; folded in here.
-
-This replaces ``app.agents.new_chat.middleware.safe_summarization``.
-
-Tier 1.3 in the OpenCode-port plan.
+   ``get_buffer_string`` crashes iterating over ``None``). SurfSense-specific.
 """

 from __future__ import annotations
@ -42,7 +43,7 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

-# OpenCode-faithful structured summary template. Mirrors
+# Structured summary template ported from OpenCode's
 # ``opencode/packages/opencode/src/session/compaction.ts:40-75``. Kept as a
 # module-level constant so unit tests can assert on its sections.
 SURFSENSE_SUMMARY_PROMPT = """<role>
--- a/surfsense_backend/app/agents/new_chat/middleware/context_editing.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/context_editing.py
@ -1,15 +1,15 @@
 """
 SpillToBackendEdit + SpillingContextEditingMiddleware.

-Mirrors OpenCode's spill-to-disk behavior in
-``opencode/packages/opencode/src/tool/truncate.ts``. Before
-``ClearToolUsesEdit`` rewrites old ``ToolMessage.content`` to a placeholder,
-we capture the full original content and write it to the runtime backend
-under ``/tool_outputs/{thread_id}/{message_id}.txt``. The placeholder is
-upgraded to ``"[cleared — full output at /tool_outputs/.../{id}.txt; ask the
-explore subagent to read it]"`` so the agent can recover it on demand.
-
-Tier 1.2 in the OpenCode-port plan.
+LangChain's :class:`ClearToolUsesEdit` discards old ``ToolMessage.content``
+when the context-editing budget triggers, replacing the body with a fixed
+placeholder. That's lossy: anything the agent might want to revisit is
+gone. The spill-to-disk pattern (originally from OpenCode's
+``opencode/packages/opencode/src/tool/truncate.ts``) keeps the prune
+behavior but writes the full original payload to the runtime backend
+under ``/tool_outputs/{thread_id}/{message_id}.txt`` first. The
+placeholder is then upgraded to point at the spill path so the agent
+(or a subagent) can read it back on demand.

 Why this is a middleware subclass instead of a plain ``ContextEdit``:
 ``ContextEdit.apply`` is sync, but writing to the backend is async. We
--- a/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/dedup_tool_calls.py
@ -9,11 +9,10 @@ the duplicate call is stripped from the AIMessage that gets checkpointed.
 That means it is also safe across LangGraph ``interrupt()`` boundaries:
 the removed call will never appear on graph resume.

-Dedup-key resolution order (Tier 2.3 / cleanup in the OpenCode-port plan):
+Dedup-key resolution order:

 1. :class:`ToolDefinition.dedup_key` — callable provided by the registry
-   entry. This is the canonical mechanism after the cleanup-tier removal
-   of the legacy ``PRIMARY_ARG`` map.
+   entry. This is the canonical mechanism.
 2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg name;
   used by MCP / Composio tools whose schemas the registry doesn't see.

@ -72,9 +71,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]
    The dedup-resolver map is built from two sources, in priority order:

    1. ``tool.metadata["dedup_key"]`` — callable provided by the registry's
-       ``ToolDefinition.dedup_key`` (Tier 2.3). Receives the args dict
-       and returns a string signature. This is the canonical mechanism
-       after the cleanup-tier removal of the legacy ``PRIMARY_ARG`` map.
+       ``ToolDefinition.dedup_key``. Receives the args dict and returns
+       a string signature. This is the canonical mechanism.
    2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg
       name; primarily used by MCP / Composio tools.
    """
--- a/surfsense_backend/app/agents/new_chat/middleware/doom_loop.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/doom_loop.py
@ -1,17 +1,19 @@
 """
 DoomLoopMiddleware — pattern-based detector for repeated identical tool calls.

-Mirrors ``opencode/packages/opencode/src/session/processor.ts`` doom-loop
-behavior. When the same tool with the same arguments is called N times
-in a row, the agent has likely entered an infinite loop. We surface this
-to the user as an interrupt with ``permission="doom_loop"`` so the UI
-can render an "Are you stuck? Continue / cancel?" affordance.
+LangChain has :class:`ToolCallLimitMiddleware` which caps the *total* number
+of tool calls per turn — but it can't tell apart "10 distinct, useful
+calls" from "the same call 10 times in a row". This middleware fills that
+gap with a sliding-window check on tool-call signatures, ported from
+OpenCode's ``packages/opencode/src/session/processor.ts``.

-Tier 1.11 in the OpenCode-port plan.
+When the same tool with the same arguments is called N times in a row,
+the agent has likely entered an infinite loop. We surface this to the
+user as an interrupt with ``permission="doom_loop"`` so the UI can
+render an "Are you stuck? Continue / cancel?" affordance.

 This ships **OFF by default** until the frontend explicitly handles
-``context.permission == "doom_loop"`` interrupts (the plan flips
-``SURFSENSE_ENABLE_DOOM_LOOP=true`` once the UI is ready).
+``context.permission == "doom_loop"`` interrupts.

 Wire format: uses SurfSense's existing ``interrupt()`` payload shape
 (see ``app/agents/new_chat/tools/hitl.py``):
@ -69,7 +71,7 @@ class DoomLoopMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respon

    Args:
        threshold: How many consecutive identical signatures count as a
-            doom loop. Default 3 (opencode parity).
+            doom loop. Default 3 (matches OpenCode's processor.ts).
    """

    def __init__(self, *, threshold: int = 3) -> None:
@ -182,7 +184,7 @@ class DoomLoopMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respon
            signatures[-1] if signatures else "<empty>",
        )

-        # Tier 3b: interrupt.raised span with permission=doom_loop attribute
+        # Open an interrupt.raised span with permission=doom_loop attribute
        # so dashboards can break out doom-loop interrupts from regular
        # permission asks via the ``interrupt.permission`` attribute.
        with ot.interrupt_span(
--- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
@ -592,10 +592,11 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        self.available_document_types = available_document_types
        self.top_k = top_k
        self.mentioned_document_ids = mentioned_document_ids or []
-        # Tier 4.2: build the kb-planner private Runnable ONCE here so we
-        # don't pay the create_agent compile cost (50-200ms) on every turn.
-        # Disabled by default behind ``enable_kb_planner_runnable``; when off
-        # the planner falls back to the legacy ``self.llm.ainvoke`` path.
+        # Build the kb-planner private Runnable ONCE here so we don't pay
+        # the ``create_agent`` compile cost (50-200ms) on every turn.
+        # Disabled by default behind ``enable_kb_planner_runnable``; when
+        # off the planner falls back to the legacy ``self.llm.ainvoke``
+        # path.
        self._planner: Runnable | None = None
        self._planner_compile_failed = False

@ -608,9 +609,9 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        lazily on first call, then memoized via ``self._planner``.

        The compiled agent is constructed without tools — the planner's
-        contract is "answer with structured JSON" — but with ``RetryAfter``
-        + the OpenCode-port retry/limit middleware so it shares the parent
-        agent's resilience guarantees.
+        contract is "answer with structured JSON" — but it inherits the
+        :class:`RetryAfterMiddleware` so transient rate-limit errors
+        from the planner LLM call don't fail the whole turn.
        """
        if self._planner is not None or self._planner_compile_failed:
            return self._planner
@ -658,9 +659,9 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        loop = asyncio.get_running_loop()
        t0 = loop.time()

-        # Tier 4.2: prefer the compiled-once planner Runnable when enabled;
-        # otherwise fall back to ``self.llm.ainvoke``. The ``surfsense:internal``
-        # tag is preserved on both paths so ``_stream_agent_events`` still
+        # Prefer the compiled-once planner Runnable when enabled; otherwise
+        # fall back to ``self.llm.ainvoke``. The ``surfsense:internal`` tag
+        # is preserved on both paths so ``_stream_agent_events`` still
        # suppresses the planner's intermediate events from the UI.
        planner = self._build_kb_planner_runnable()
        try:
--- a/surfsense_backend/app/agents/new_chat/middleware/noop_injection.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/noop_injection.py
@ -1,18 +1,23 @@
 """
 ``_noop`` provider-compatibility tool + injection middleware.

-OpenCode injects a ``_noop`` tool for LiteLLM/Bedrock/Copilot when the
-model call has empty tools but message history includes prior
-``tool_calls`` — some providers 400 in that shape (see
-``opencode/packages/opencode/src/session/llm.ts:209-228``). SurfSense uses
-LiteLLM, and the compaction summarize call (no tools, history full of
-tool calls) hits this. Tier 1.5 in the OpenCode-port plan.
+Some providers (LiteLLM, Bedrock, Copilot) 400 when a model call has
+empty ``tools`` but the message history includes prior ``tool_calls`` —
+they treat that shape as malformed even though it's perfectly valid
+LangChain. SurfSense hits this on the compaction summarize call (no
+tools, history full of tool calls).
+
+Ported from OpenCode's ``packages/opencode/src/session/llm.ts:209-228``,
+which discovered and codified the workaround: inject a no-op tool *only*
+on those provider shapes so the request validates without ever being
+called.

 Operation: a :class:`NoopInjectionMiddleware` ``wrap_model_call`` checks
 if the request has zero tools but the last AI message in history includes
-``tool_calls``. If yes, it injects the ``_noop`` tool only — never globally,
-mirroring opencode's gating exactly. The :func:`noop_tool` returns empty
-content when called (which it should never be in practice).
+``tool_calls``. If yes, it injects the ``_noop`` tool only — never
+globally — mirroring OpenCode's gating exactly. The :func:`noop_tool`
+returns empty content when called (which it should never be in
+practice).
 """

 from __future__ import annotations
@ -45,8 +50,9 @@ def noop_tool() -> str:


 # Provider markers that benefit from ``_noop`` injection. These match
-# opencode's gating list. We also accept any string containing one of
-# these substrings (so e.g. ``litellm`` matches ``ChatLiteLLM``).
+# OpenCode's gating list (``llm.ts:209-228``). We also accept any string
+# containing one of these substrings so e.g. ``litellm`` matches
+# ``ChatLiteLLM``.
 _NOOP_NEEDED_PROVIDERS: tuple[str, ...] = (
    "litellm",
    "bedrock",
--- a/surfsense_backend/app/agents/new_chat/middleware/otel_span.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/otel_span.py
@ -3,14 +3,14 @@ OpenTelemetry span middleware for the SurfSense ``new_chat`` agent.

 Wraps both ``model.call`` (LLM invocations) and ``tool.call`` (tool
 executions) with OTel spans, attaching low-cardinality span names and
-high-cardinality identifiers as attributes (per the Tier 3b plan).
+high-cardinality identifiers as attributes.

 This middleware is intentionally a thin adapter over
 :mod:`app.observability.otel`; when OTel is not configured all spans
 collapse to no-ops and the wrapper adds <1µs overhead per call. When
 OTel **is** configured (``OTEL_EXPORTER_OTLP_ENDPOINT`` set), every
-model and tool call gets a span with the standard attributes the
-plan's dashboards expect.
+model and tool call gets a span with the standard attributes our
+dashboards expect.
 """

 from __future__ import annotations
--- a/surfsense_backend/app/agents/new_chat/middleware/permission.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/permission.py
@ -1,10 +1,15 @@
 """
 PermissionMiddleware — pattern-based allow/deny/ask with HITL fallback.

-Mirrors ``opencode/packages/opencode/src/permission/index.ts`` but uses
-SurfSense's existing ``interrupt({type, action, context})`` payload shape
-(see ``app/agents/new_chat/tools/hitl.py``) so the frontend keeps
-working unchanged. Tier 2.1 in the OpenCode-port plan.
+LangChain's :class:`HumanInTheLoopMiddleware` only supports a static
+"this tool always asks" decision per tool. There's no rule-based
+allow/deny/ask layered ruleset, no glob patterns, no per-search-space or
+per-thread overrides, and no auto-deny synthesis.
+
+This middleware ports OpenCode's ``packages/opencode/src/permission/index.ts``
+ruleset model on top of SurfSense's existing ``interrupt({type, action,
+context})`` payload shape (see ``app/agents/new_chat/tools/hitl.py``) so
+the frontend keeps working unchanged.

 Operation:
 1. ``aafter_model`` inspects the latest ``AIMessage.tool_calls``.
@ -24,9 +29,9 @@ Operation:

 The middleware also performs a *pre-model* tool-filter step (the
 ``before_model`` hook) so globally denied tools are stripped from the
-exposed tool list before the model gets to see them. This is
-opencode's ``Permission.disabled`` equivalent and dramatically reduces
-the chance the model emits a deny-only call.
+exposed tool list before the model gets to see them. This mirrors
+OpenCode's ``Permission.disabled`` and dramatically reduces the chance
+the model emits a deny-only call.
 """

 from __future__ import annotations
@ -117,7 +122,7 @@ class PermissionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        self._emit_interrupt = always_emit_interrupt_payload

    # ------------------------------------------------------------------
-    # Tool-filter step (opencode `Permission.disabled` equivalent)
+    # Tool-filter step (mirrors OpenCode's ``Permission.disabled``)
    # ------------------------------------------------------------------

    def _globally_denied(self, tool_name: str) -> bool:
@ -197,8 +202,8 @@ class PermissionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
                "always": patterns,
            },
        }
-        # Tier 3b: permission.asked + interrupt.raised spans (no-op when
-        # OTel is disabled). Both fire here so dashboards can correlate
+        # Open ``permission.asked`` + ``interrupt.raised`` OTel spans
+        # (no-op when OTel is disabled) so dashboards can correlate
        # "we asked X" with "interrupt was actually delivered".
        with (
            ot.permission_asked_span(
--- a/surfsense_backend/app/agents/new_chat/middleware/retry_after.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/retry_after.py
@ -1,10 +1,16 @@
 """
 RetryAfterMiddleware — Header-aware retry with custom backoff and SSE eventing.

-Why standalone instead of subclassing ``ModelRetryMiddleware``: the upstream
-class calls module-level ``calculate_delay`` inline (no overridable
-``_calculate_delay`` hook), so a subclass cannot inject Retry-After header
-delays without rewriting the loop. Tier 1.4 in the OpenCode-port plan.
+LangChain's :class:`ModelRetryMiddleware` retries on exceptions but ignores
+the ``Retry-After`` HTTP header — it just runs its own exponential backoff.
+That wastes time when a provider has explicitly told us how long to wait.
+This middleware honors the header (mirroring OpenCode's
+``packages/opencode/src/session/llm.ts`` retry pathway) and emits an SSE
+event so the UI can show "rate-limited, retrying in Ns".
+
+We can't subclass ``ModelRetryMiddleware`` cleanly because its loop calls a
+module-level ``calculate_delay`` inline (no overridable
+``_calculate_delay`` hook), so this is a standalone implementation.

 Behaviour:
 - Extracts ``Retry-After`` / ``retry-after-ms`` from
--- a/surfsense_backend/app/agents/new_chat/middleware/tool_call_repair.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/tool_call_repair.py
@ -1,10 +1,6 @@
 """
 ToolCallNameRepairMiddleware — two-stage tool-name repair.

-Mirrors ``opencode/packages/opencode/src/session/llm.ts:339-358`` plus
-``opencode/packages/opencode/src/tool/invalid.ts``. Tier 1.7 in the
-OpenCode-port plan.
-
 Operation:
 1. **Stage 1 — lowercase repair:** if a tool call's ``name`` is not in
   the registry but ``name.lower()`` is, rewrite in place. Catches
@ -14,9 +10,13 @@ Operation:
   so the registered :func:`invalid_tool` returns the error to the model
   for self-correction.

-Distinct from :class:`deepagents.middleware.PatchToolCallsMiddleware`,
-which patches *dangling* tool calls (no matching ToolMessage) — that
-class does not handle the wrong-name case at all.
+Ported from OpenCode's ``packages/opencode/src/session/llm.ts:339-358``
+ ``packages/opencode/src/tool/invalid.ts``. LangChain has no equivalent:
+:class:`deepagents.middleware.PatchToolCallsMiddleware` patches
+*dangling* tool calls (no matching ToolMessage) but does nothing about
+wrong names, and the model framework's default behavior on an unknown
+name is to crash the turn rather than route to a self-correction
+fallback.
 """

 from __future__ import annotations
@ -61,7 +61,8 @@ class ToolCallNameRepairMiddleware(
            ``invalid`` should be in this set so the fallback dispatches.
        fuzzy_match_threshold: Optional ``difflib`` ratio (0-1) for the
            fuzzy-match step that runs *between* lowercase and invalid.
-            Set to ``None`` to disable fuzzy matching (opencode parity).
+            Set to ``None`` to disable fuzzy matching (default in
+            OpenCode; we mirror that to avoid silent rewrites).
    """

    def __init__(
@ -106,7 +107,7 @@ class ToolCallNameRepairMiddleware(
            call["response_metadata"] = metadata
            return call

-        # Optional fuzzy step (off by default for opencode parity)
+        # Optional fuzzy step (off by default — see class docstring)
        if self._fuzzy_threshold is not None:
            close = difflib.get_close_matches(
                name, registered, n=1, cutoff=self._fuzzy_threshold
--- a/surfsense_backend/app/agents/new_chat/permissions.py
+++ b/surfsense_backend/app/agents/new_chat/permissions.py
@ -1,21 +1,20 @@
 """
 Wildcard pattern matching + rule evaluation for the SurfSense permission system.

-Mirrors ``opencode/packages/opencode/src/permission/evaluate.ts`` and
-``opencode/packages/opencode/src/util/wildcard.ts`` precisely:
+Ported from OpenCode's ``packages/opencode/src/permission/evaluate.ts`` and
+``packages/opencode/src/util/wildcard.ts``. LangChain has no rule-based
+permission evaluator, so we keep OpenCode's semantics intact:

 - ``Wildcard.match`` matches both the ``permission`` and the ``pattern``
  fields of a rule against the requested ``(permission, pattern)`` pair.
  ``*`` matches any segment, ``**`` matches across separators.
 - The evaluator runs ``findLast`` over the **flattened** list of rules
  from all rulesets — last matching rule wins.
- The default fallback is ``ask`` (NOT deny), matching opencode.
+- The default fallback is ``ask`` (NOT deny), matching OpenCode.
 - Multi-pattern requests AND together: if ANY pattern resolves to
  ``deny``, the whole request is denied; if ANY needs ``ask``, an
  interrupt is raised; only when all patterns ``allow`` does the
  request proceed.
-
-Tier 2.1 in the OpenCode-port plan.
 """

 from __future__ import annotations
--- a/surfsense_backend/app/agents/new_chat/plugin_loader.py
+++ b/surfsense_backend/app/agents/new_chat/plugin_loader.py
@ -1,9 +1,10 @@
 """Entry-point based plugin loader for SurfSense agent middleware.

-The realization in the Tier 6 plan: LangChain's :class:`AgentMiddleware` ABC
-already covers the practical surface most plugins need (``before_agent`` /
-``before_model`` / ``wrap_tool_call`` / their async counterparts), so a
-SurfSense-specific plugin protocol is unnecessary.
+LangChain's :class:`AgentMiddleware` ABC already covers the practical
+surface most plugins need (``before_agent`` / ``before_model`` /
+``wrap_tool_call`` / their async counterparts), so a SurfSense-specific
+plugin protocol would be redundant. We just need a way to discover and
+admit third-party middleware safely.

 A plugin is therefore just an installable Python package that registers a
 factory callable under the ``surfsense.plugins`` entry-point group:
--- a/surfsense_backend/app/agents/new_chat/plugins/year_substituter.py
+++ b/surfsense_backend/app/agents/new_chat/plugins/year_substituter.py
@ -1,10 +1,10 @@
 """Reference plugin: substitute ``{{year}}`` in tool descriptions.

-Mirrors the OpenCode ``chat.system.transform`` example. Demonstrates the
-:meth:`AgentMiddleware.awrap_tool_call` hook -- the plugin sees every tool
-invocation and can rewrite the request *or* the result. This particular
-plugin is read-only and only transforms the *description* the user might
-see in error messages (no request mutation).
+Demonstrates the :meth:`AgentMiddleware.awrap_tool_call` hook -- the
+plugin sees every tool invocation and can rewrite the request *or* the
+result. This particular plugin is read-only and only transforms the
+*description* the user might see in error messages (no request
+mutation).

 The plugin is built as a factory function so the entry-point loader can
 inject :class:`PluginContext` (containing the agent's LLM, search-space
--- a/surfsense_backend/app/agents/new_chat/prompts/composer.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py
@ -14,7 +14,13 @@ under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic
      examples/              # one ``<name>.md`` per tool with call examples
      routing/               # connector-specific routing notes (linear, slack, …)

-Tier 3a in the OpenCode-port plan.
+The model-family dispatch step (see :func:`detect_provider_variant`)
+mirrors OpenCode's ``packages/opencode/src/session/system.ts`` — different
+model families respond best to differently-styled prompts (Claude likes
+XML/narrative, GPT-5 wants channel-aware pragmatic, Codex needs
+terse/file:line, Gemini wants formal numbered steps, etc.). LangChain's
+``dynamic_prompt`` helper supports per-call prompt swaps but ships no
+out-of-the-box family classifier, so we keep our own.

 Backwards compatibility
 =======================
@ -42,10 +48,11 @@ from app.db import ChatVisibility
 # When adding a new variant, also drop a matching ``providers/<variant>.md``
 # file in this package and (if appropriate) extend the regex matchers below.
 #
-# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
-# to SurfSense's "supplemental hints" architecture (each fragment is a
-# focused style nudge, NOT a full system prompt — the main prompt is
-# already assembled from base/ + tools/ + routing/).
+# Stylistic clusters: each variant is a focused style nudge, NOT a full
+# system prompt — the main prompt is already assembled from base/ +
+# tools/ + routing/. The clustering itself (which models map to which
+# style) follows OpenCode's ``system.ts`` family table; see the module
+# docstring for credits.
 ProviderVariant = str
 # Known values:
 #   "anthropic"        — Claude family (XML-friendly, narrative todos)
@ -82,8 +89,8 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:

    Order is significant: more-specific patterns are tried first so
    ``gpt-5-codex`` routes to ``"openai_codex"`` rather than
-    ``"openai_reasoning"`` (mirrors OpenCode's
-    ``packages/opencode/src/session/system.ts`` dispatch).
+    ``"openai_reasoning"`` — same dispatch order as OpenCode's
+    ``packages/opencode/src/session/system.ts``.
    """
    if not model_name:
        return "default"
--- a/surfsense_backend/app/agents/new_chat/subagents/init.py
+++ b/surfsense_backend/app/agents/new_chat/subagents/init.py
@ -1,14 +1,17 @@
 """Specialized user-facing subagents for the SurfSense agent.

-Each subagent is a :class:`deepagents.SubAgent` typed-dict spec passed to
-:class:`deepagents.SubAgentMiddleware`, which materializes them as ephemeral
-runnables invoked via the ``task`` tool.
+The :class:`deepagents.SubAgentMiddleware` already provides the
+materialization machinery (each :class:`deepagents.SubAgent` typed-dict
+spec is compiled into an ephemeral runnable invoked via the ``task``
+tool); what's specific to SurfSense is the *seeding* of those subagents
+with declarative deny rules.

 Per-subagent permission rules are injected as a
 :class:`PermissionMiddleware` entry inside the subagent's ``middleware``
-field, mirroring opencode ``tool/task.ts`` which seeds child sessions with
-deny rules for tools the parent does not want them touching (e.g.
-``task``/``todowrite`` recursion, write tools for read-only research roles).
+field. The auto-deny pattern (e.g. forbid ``task``/``todowrite``
+recursion, block write tools for read-only research roles) is borrowed
+from OpenCode's ``packages/opencode/src/tool/task.ts``, which has
+analogous logic for restricting child sessions.
 """

 from .config import (
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -1,13 +1,14 @@
 """
 Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`.

-Tier 3a of the OpenCode-port plan replaced the monolithic prompt strings
-in this module with a fragment tree under ``prompts/`` and a composer
-function. This module preserves the public function surface
-(``build_surfsense_system_prompt`` / ``build_configurable_system_prompt`` /
-``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so that
-existing call sites — `chat_deepagent.py`, anonymous chat routes, and the
-configurable-prompt admin path — keep working without churn.
+The composer split the previous monolithic prompt string into a fragment
+tree under ``prompts/`` plus a model-family dispatch step (see the
+composer module docstring for credits). This module preserves the public
+function surface (``build_surfsense_system_prompt`` /
+``build_configurable_system_prompt`` /
+``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
+that existing call sites — `chat_deepagent.py`, anonymous chat routes,
+and the configurable-prompt admin path — keep working without churn.

 For new call sites prefer importing ``compose_system_prompt`` directly
 from :mod:`app.agents.new_chat.prompts.composer`.
--- a/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py
+++ b/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py
@ -6,8 +6,9 @@ tool, :class:`ToolCallNameRepairMiddleware` rewrites the call to ``invalid``
 with the original name and a parser/validation error string. This tool's
 execution then returns that error to the model so it can self-correct.

-Mirrors ``opencode/packages/opencode/src/tool/invalid.ts``. Tier 1.6 in
-the OpenCode-port plan.
+Ported from OpenCode's ``packages/opencode/src/tool/invalid.ts`` —
+LangChain has no equivalent fallback path; the default behavior on an
+unknown tool name is a hard ``ToolNotFoundError`` which kills the turn.

 Critically, the :class:`ToolDefinition` for this tool is **excluded** from
 the system-prompt tool list and from ``LLMToolSelectorMiddleware`` selection
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -132,12 +132,10 @@ class ToolDefinition:
            that must be in ``available_connectors`` for the tool to be enabled.
        dedup_key: Optional callable that maps a tool's ``args`` dict to a
            string signature used by :class:`DedupHITLToolCallsMiddleware`
-            to drop duplicate calls. Replaces the legacy hardcoded
-            ``_NATIVE_HITL_TOOL_DEDUP_KEYS`` map (Tier 2.3 in the
-            OpenCode-port plan).
+            to drop duplicate calls within a single LLM response.
        reverse: Optional callable that, given the tool's ``(args, result)``,
            returns a ``ReverseDescriptor`` describing the inverse tool
-            invocation. Consumed by the snapshot/revert pipeline (Tier 5).
+            invocation. Consumed by the snapshot/revert pipeline.

    """

--- a/surfsense_backend/app/observability/otel.py
+++ b/surfsense_backend/app/observability/otel.py
@ -1,12 +1,10 @@
 """
 OpenTelemetry instrumentation helpers for the SurfSense agent stack.

-Tier 3b in the OpenCode-port plan.
-
 Goals
 =====

- Provide one tiny, ergonomic API for the spans listed in the plan
+- Provide one tiny, ergonomic API for the spans we care about
  (``tool.call``, ``model.call``, ``kb.search``, ``kb.persist``,
  ``compaction.run``, ``interrupt.raised``, ``permission.asked``).
 - Keep span **names** low-cardinality (``tool.call`` rather than
--- a/surfsense_backend/app/routes/agent_revert_route.py
+++ b/surfsense_backend/app/routes/agent_revert_route.py
@ -1,9 +1,9 @@
 """POST ``/api/threads/{thread_id}/revert/{action_id}``: undo an agent action.

-Per the Tier 5 plan, the route ships **before** the UI lights up the per-message
-"Undo from here" affordance. To prevent accidental usage during the gap we
-return ``503 Service Unavailable`` until the
-``SURFSENSE_ENABLE_REVERT_ROUTE`` flag flips. Once enabled, the route runs:
+The route ships **before** the UI lights up the per-message "Undo from
+here" affordance. To prevent accidental usage during the gap we return
+``503 Service Unavailable`` until the ``SURFSENSE_ENABLE_REVERT_ROUTE``
+flag flips. Once enabled, the route runs:

 1. Authentication via :func:`current_active_user`.
 2. Action lookup; 404 if the action does not belong to the thread.
--- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
@ -1,4 +1,4 @@
-"""Tests for the prompt fragment composer (Tier 3a)."""
+"""Tests for the prompt fragment composer."""

 from __future__ import annotations

--- a/surfsense_backend/tests/unit/agents/new_chat/test_otel_span.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_otel_span.py
@ -1,4 +1,4 @@
-"""Tests for the OtelSpanMiddleware adapter (Tier 3b)."""
+"""Tests for the OtelSpanMiddleware adapter."""

 from __future__ import annotations

--- a/surfsense_backend/tests/unit/agents/new_chat/test_permissions.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_permissions.py
@ -1,4 +1,4 @@
-"""Tests for the wildcard matcher and rule evaluator (opencode evaluate.ts parity)."""
+"""Tests for the wildcard matcher and rule evaluator (parity with OpenCode evaluate.ts)."""

 from __future__ import annotations

--- a/surfsense_backend/tests/unit/agents/new_chat/test_plugin_loader.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_plugin_loader.py
@ -1,4 +1,4 @@
-"""Unit tests for the SurfSense plugin entry-point loader (Tier 6)."""
+"""Unit tests for the SurfSense plugin entry-point loader."""

 from __future__ import annotations

--- a/surfsense_backend/tests/unit/observability/test_otel.py
+++ b/surfsense_backend/tests/unit/observability/test_otel.py
@ -1,4 +1,4 @@
-"""Tests for the SurfSense OpenTelemetry shim (Tier 3b)."""
+"""Tests for the SurfSense OpenTelemetry shim."""

 from __future__ import annotations

--- a/surfsense_backend/tests/unit/services/test_revert_service.py
+++ b/surfsense_backend/tests/unit/services/test_revert_service.py
@ -1,4 +1,4 @@
-"""Unit tests for the agent revert service (Tier 5.3)."""
+"""Unit tests for the agent revert service."""

 from __future__ import annotations