diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py
index b76b54c27..1d6a8763e 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py
@@ -210,10 +210,8 @@ def build_main_agent_deepagent_middleware(
         )
         gp_middleware.insert(_patch_idx, subagent_deny_permission_mw)
 
-    # Defined here (instead of further down with the other ``wrap_model_call``
-    # middlewares) so subagents share the same instances as the parent —
-    # otherwise a connector subagent would die on the first provider hiccup
-    # while the parent stays resilient.
+    # Defined early so the same instances reach both gp_middleware and
+    # subagent_extra_middleware below.
     retry_mw = (
         RetryAfterMiddleware(max_retries=3)
         if flags.enable_retry_after and not flags.disable_new_agent_stack
@@ -230,9 +228,7 @@ def build_main_agent_deepagent_middleware(
             logging.warning("ScopedModelFallbackMiddleware init failed; skipping.")
             fallback_mw = None
 
-    # Cost / loop ceiling shared with subagents. ``state_schema`` of these
-    # middlewares is per-agent; counts are not summed across parent + sub —
-    # the cap acts as a safety net per agent, not a global budget.
+    # Per-agent caps; counts are not summed across parent + subagents.
     model_call_limit_mw = (
         ModelCallLimitMiddleware(
             thread_limit=120,
@@ -250,9 +246,8 @@ def build_main_agent_deepagent_middleware(
         else None
     )
 
-    # Mirror the parent's ordering: retry / fallback / limits wrap caching,
-    # which wraps the model. ``gp_middleware`` is held by reference inside
-    # ``general_purpose_spec`` so this insertion propagates into the spec.
+    # gp_middleware is held by reference inside general_purpose_spec, so
+    # mutating it here propagates into the spec.
     _gp_resilience: list[Any] = [
         m
         for m in (retry_mw, fallback_mw, model_call_limit_mw, tool_call_limit_mw)
diff --git a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py
index bde8edeba..99eb2d74a 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py
@@ -1,17 +1,4 @@
-"""Fallback only on provider/network errors; let programming bugs raise.
-
-Upstream :class:`langchain.agents.middleware.ModelFallbackMiddleware` catches
-every ``Exception``. With a non-provider bug (``KeyError``, ``TypeError``,
-``AttributeError`` from middleware/state), every fallback model in the chain
-hits the same bug — burning latency and tokens before the real cause finally
-surfaces. Scoping the catch to provider-style exception types lets bugs fail
-fast with clean tracebacks.
-
-Class-name matching (instead of ``isinstance`` against imported provider
-types) keeps the dependency surface flat: openai, anthropic, google,
-mistral, etc. all ship their own ``RateLimitError`` and we don't want to
-import them all.
-"""
+"""Fallback only on provider/network errors; let programming bugs raise."""
 
 from __future__ import annotations
 
@@ -26,17 +13,16 @@ if TYPE_CHECKING:
     from langchain_core.messages import AIMessage
 
 
+# Matched by class name across the MRO so we don't have to import every
+# provider SDK (openai/anthropic/google/...). Extend as new providers ship.
 _FALLBACK_ELIGIBLE_NAMES: frozenset[str] = frozenset(
     {
-        # Rate / quota
         "RateLimitError",
-        # Server-side
         "APIStatusError",
         "InternalServerError",
         "ServiceUnavailableError",
         "BadGatewayError",
         "GatewayTimeoutError",
-        # Network
         "APIConnectionError",
         "APITimeoutError",
         "ConnectError",
@@ -45,18 +31,16 @@ _FALLBACK_ELIGIBLE_NAMES: frozenset[str] = frozenset(
         "RemoteProtocolError",
         "TimeoutError",
         "TimeoutException",
-        # Can be extended to other exceptions in the future
     }
 )
 
 
 def _is_fallback_eligible(exc: BaseException) -> bool:
-    """Eligible if the exception or any base in its MRO matches by class name."""
     return any(cls.__name__ in _FALLBACK_ELIGIBLE_NAMES for cls in type(exc).__mro__)
 
 
 class ScopedModelFallbackMiddleware(ModelFallbackMiddleware):
-    """``ModelFallbackMiddleware`` that re-raises non-provider exceptions."""
+    """Re-raise non-provider exceptions instead of walking the fallback chain."""
 
     def wrap_model_call(  # type: ignore[override]
         self,
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py
index 188224074..ab6a644b5 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py
@@ -1,18 +1,4 @@
-"""End-to-end resume-bridge tests against a real LangGraph subagent.
-
-Builds a minimal Pregel subagent that calls ``interrupt(...)`` and drives the
-``task`` tool directly with a hand-crafted ``ToolRuntime``. Exercises the only
-runtime contract we own: parent stashes a decision in
-``config["configurable"]["surfsense_resume_value"]`` -> bridge forwards it as
-``Command(resume={interrupt_id: value})`` -> subagent completes -> return value
-reflects the decision.
-
-We pause the subagent **outside** the parent task tool (calling
-``subagent.ainvoke`` directly) to skip the ``_lg_interrupt`` re-raise path,
-which requires a parent runnable context. The bridge logic under test is the
-*resume* dispatch, not the propagation; propagation is exercised separately in
-its own module's tests.
-"""
+"""End-to-end resume-bridge tests against a real LangGraph subagent."""
 
 from __future__ import annotations
 
@@ -37,8 +23,6 @@ class _SubagentState(TypedDict, total=False):
 
 
 def _build_single_interrupt_subagent():
-    """Subagent that interrupts once, then echoes the resume decision into state."""
-
     def approve_node(state):
         from langchain_core.messages import AIMessage
 
@@ -54,8 +38,6 @@ def _build_single_interrupt_subagent():
                 "review_configs": [{}],
             }
         )
-        # Capture the resume payload verbatim so the test can assert the
-        # bridge forwarded it intact (no reshape, no scalar broadcast).
         return {
             "messages": [AIMessage(content="done")],
             "decision_text": repr(decision),
@@ -81,7 +63,7 @@ def _make_runtime(config: dict) -> ToolRuntime:
 
 @pytest.mark.asyncio
 async def test_resume_bridge_dispatches_decision_into_pending_subagent():
-    """Side-channel decision -> targeted Command(resume) -> subagent completes."""
+    """Side-channel decision must reach the subagent's pending interrupt verbatim."""
     subagent = _build_single_interrupt_subagent()
     task_tool = build_task_tool_with_parent_config(
         [
@@ -93,7 +75,6 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent():
         ]
     )
 
-    # 1. Pause the subagent directly so we can test only the resume path.
     parent_config: dict = {
         "configurable": {"thread_id": "shared-thread"},
         "recursion_limit": 100,
@@ -104,15 +85,11 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent():
         "fixture broken: subagent should be paused on its interrupt"
     )
 
-    # 2. Stash the user's decision on the side-channel — this is what
-    #    ``stream_resume_chat`` does in production.
     parent_config["configurable"]["surfsense_resume_value"] = {
         "decisions": ["APPROVED"]
     }
     runtime = _make_runtime(parent_config)
 
-    # 3. Drive the bridge. Subagent has no remaining interrupt after resume,
-    #    so propagation will not call ``_lg_interrupt`` (no parent ctx needed).
     result = await task_tool.coroutine(
         description="please approve",
         subagent_type="approver",
@@ -121,27 +98,16 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent():
 
     assert isinstance(result, Command)
     update = result.update
-    # Bridge forwards the side-channel payload **verbatim** to the
-    # subagent's ``interrupt()``. A scalar broadcast or accidental
-    # unwrap would change this shape and we want to catch that.
     assert update["decision_text"] == repr({"decisions": ["APPROVED"]})
-
-    # 4. Side-channel was consumed; a stale replay would re-prompt the user.
     assert "surfsense_resume_value" not in parent_config["configurable"]
 
-    # 5. Subagent moved past the interrupt (no pending tasks remain).
     final = await subagent.aget_state(parent_config)
     assert not final.tasks or all(not t.interrupts for t in final.tasks)
 
 
 @pytest.mark.asyncio
 async def test_pending_interrupt_without_resume_value_raises_runtime_error():
-    """Bridge must fail loud if a paused subagent has no decision queued.
-
-    The fail-open alternative (silently re-invoking) would re-fire the
-    same interrupt to the user. The error surfaces a real broken bridge
-    instead of confusing duplicate approval cards.
-    """
+    """Bridge must fail loud rather than silently replay the user's interrupt."""
     subagent = _build_single_interrupt_subagent()
     task_tool = build_task_tool_with_parent_config(
         [
@@ -161,7 +127,6 @@ async def test_pending_interrupt_without_resume_value_raises_runtime_error():
     snap = await subagent.aget_state(parent_config)
     assert snap.tasks and snap.tasks[0].interrupts, "fixture broken"
 
-    # No surfsense_resume_value injected — bridge must refuse to proceed.
     runtime = _make_runtime(parent_config)
 
     with pytest.raises(RuntimeError, match="resume bridge is broken"):
@@ -173,8 +138,6 @@ async def test_pending_interrupt_without_resume_value_raises_runtime_error():
 
 
 def _build_bundle_subagent():
-    """Subagent that raises a 3-action HITL bundle on its only node."""
-
     def bundle_node(state):
         from langchain_core.messages import AIMessage
 
@@ -202,12 +165,7 @@ def _build_bundle_subagent():
 
 @pytest.mark.asyncio
 async def test_bundle_three_mixed_decisions_arrive_in_order():
-    """Approve / edit / reject for a 3-action bundle land at ordinals 0/1/2.
-
-    Catches reshape regressions: truncation, decision collapse, order
-    scrambling, and the legacy single-decision broadcast that would
-    fan-out one verdict to every action.
-    """
+    """Approve / edit / reject for a 3-action bundle must land at ordinals 0/1/2."""
     subagent = _build_bundle_subagent()
     task_tool = build_task_tool_with_parent_config(
         [
@@ -242,11 +200,8 @@ async def test_bundle_three_mixed_decisions_arrive_in_order():
     )
 
     assert isinstance(result, Command)
-    decision_text = result.update["decision_text"]
-    received = ast.literal_eval(decision_text)
-    assert received == decisions_payload, "bundle decisions must arrive verbatim"
-    # Cross-checks for the regressions this test exists to catch.
-    assert len(received["decisions"]) == 3
+    received = ast.literal_eval(result.update["decision_text"])
+    assert received == decisions_payload
     assert received["decisions"][0]["type"] == "approve"
     assert received["decisions"][1]["type"] == "edit"
     assert received["decisions"][1]["args"] == {"args": {"name": "edited-b"}}
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py
index 2060051a2..e73fb2823 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py
@@ -1,10 +1,4 @@
-"""Pure-function tests for the HITL resume side-channel helpers.
-
-Tests the invariant that backs the bridge: a queued resume value must be
-read exactly once per turn. A second read returns ``None`` so the
-parent ``task`` tool falls through to its fail-loud guard rather than
-replaying the same resume payload (which would re-fire the interrupt).
-"""
+"""Resume side-channel must be read exactly once per turn."""
 
 from __future__ import annotations
 
@@ -17,7 +11,6 @@ from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subage
 
 
 def _runtime_with_config(config: dict) -> ToolRuntime:
-    """Real ToolRuntime; only ``.config`` is exercised by the helpers."""
     return ToolRuntime(
         state=None,
         context=None,
@@ -37,9 +30,6 @@ class TestConsumeSurfsenseResume:
         assert consume_surfsense_resume(runtime) == {"decisions": ["approve"]}
 
     def test_second_call_returns_none(self):
-        # Regression guard: a second read must not replay the queued
-        # resume. If it did, the subagent would re-invoke with the
-        # same Command and the user-facing interrupt would fire twice.
         configurable: dict = {"surfsense_resume_value": {"decisions": ["approve"]}}
         runtime = _runtime_with_config({"configurable": configurable})
 
@@ -68,9 +58,6 @@ class TestHasSurfsenseResume:
         assert has_surfsense_resume(runtime) is True
 
     def test_does_not_consume_payload(self):
-        # The fail-loud guard in ``task_tool`` calls ``has_surfsense_resume``
-        # *before* deciding to consume; the check itself must leave the
-        # payload queued for the matching ``consume_surfsense_resume`` call.
         configurable = {"surfsense_resume_value": "approve"}
         runtime = _runtime_with_config({"configurable": configurable})
 
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py
index 859833f1c..5cd62ed36 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py
@@ -1,11 +1,4 @@
-"""Resilience contract for subagents built via ``pack_subagent``.
-
-Subagents (jira, linear, notion, ...) run on the same LLM as the parent. When
-the provider rate-limits or returns an empty stream, a single hiccup must not
-abort the user's HITL flow — the connector subagent has to keep moving. This
-relies on ``ModelFallbackMiddleware`` being usable as a subagent
-``extra_middleware`` so the production builder can wire it in.
-"""
+"""Subagent resilience contract: ``extra_middleware`` reaches the agent chain."""
 
 from __future__ import annotations
 
@@ -32,11 +25,10 @@ from app.agents.multi_agent_chat.subagents.shared.subagent_builder import (
 
 
 class RateLimitError(Exception):
-    """Provider-style 429; matches the scoped-fallback eligibility allowlist by name."""
+    """Name matches the scoped-fallback eligibility allowlist."""
 
 
 class _AlwaysFailingChatModel(BaseChatModel):
-    """Mimics a provider hard-failing on every call (rate limit / empty stream)."""
 
     @property
     def _llm_type(self) -> str:
@@ -76,7 +68,7 @@ class _AlwaysFailingChatModel(BaseChatModel):
 
 @pytest.mark.asyncio
 async def test_subagent_recovers_when_primary_llm_fails():
-    """Primary blows up → fallback in extra_middleware finishes the turn."""
+    """Fallback in ``extra_middleware`` must finish the turn when primary raises."""
     primary = _AlwaysFailingChatModel()
     fallback = FakeMessagesListChatModel(
         responses=[AIMessage(content="recovered via fallback")]
diff --git a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py
index af464d1dc..69f6fe6b7 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py
@@ -1,12 +1,4 @@
-"""Exception-scope contract for ``ScopedModelFallbackMiddleware``.
-
-Upstream ``ModelFallbackMiddleware`` catches every ``Exception`` and walks
-the fallback chain. That means a programming bug (``KeyError`` from a
-botched tool config, ``TypeError`` from middleware, ...) burns 1+N model
-round-trips and ~Nx tokens before its real cause surfaces. The scoped
-variant only falls back on provider/network exception types so bugs fail
-fast, with clean tracebacks.
-"""
+"""``ScopedModelFallbackMiddleware`` triggers fallback only on provider errors."""
 
 from __future__ import annotations
 
@@ -24,8 +16,6 @@ from langchain_core.outputs import ChatGeneration, ChatResult
 
 
 class _RaisingChatModel(BaseChatModel):
-    """LLM that raises a configurable exception on every invocation."""
-
     exc_to_raise: Any
 
     @property
@@ -61,8 +51,6 @@ class _RaisingChatModel(BaseChatModel):
 
 
 class _RecordingChatModel(BaseChatModel):
-    """Returns a fixed message and counts how often it was called."""
-
     response_text: str = "fallback-ok"
     call_count: int = 0
 
@@ -94,14 +82,11 @@ class _RecordingChatModel(BaseChatModel):
         return self._generate(messages, stop, None, **kwargs)
 
 
-# Locally defined provider-style error: importing openai/anthropic/etc.
-# would couple the test to provider SDKs the contract intentionally avoids.
 class RateLimitError(Exception):
-    """Mimics ``openai.RateLimitError`` for name-based eligibility."""
+    """Name matches the scoped-fallback eligibility allowlist."""
 
 
 def _build_agent(primary: BaseChatModel, fallback: BaseChatModel):
-    """Compile a no-tools agent with the scoped fallback wired in."""
     from langchain.agents import create_agent
 
     from app.agents.new_chat.middleware.scoped_model_fallback import (
@@ -118,7 +103,7 @@ def _build_agent(primary: BaseChatModel, fallback: BaseChatModel):
 
 @pytest.mark.asyncio
 async def test_provider_errors_trigger_fallback():
-    """Class names matching the provider allowlist drive the fallback chain."""
+    """Eligible exception names must drive the fallback chain."""
     primary = _RaisingChatModel(exc_to_raise=RateLimitError("429 from provider"))
     fallback = _RecordingChatModel(response_text="recovered")
 
@@ -133,7 +118,7 @@ async def test_provider_errors_trigger_fallback():
 
 @pytest.mark.asyncio
 async def test_programming_errors_propagate_without_invoking_fallback():
-    """``KeyError`` from agent-side bugs must surface immediately, no fallback retry."""
+    """Non-eligible exceptions must propagate; fallback must not be invoked."""
     primary = _RaisingChatModel(exc_to_raise=KeyError("missing_state_field"))
     fallback = _RecordingChatModel(response_text="should-never-arrive")
 
@@ -142,7 +127,4 @@ async def test_programming_errors_propagate_without_invoking_fallback():
     with pytest.raises(KeyError, match="missing_state_field"):
         await agent.ainvoke({"messages": [("user", "hi")]})
 
-    assert fallback.call_count == 0, (
-        "fallback was invoked for a programming error; "
-        "scoping rule is broken"
-    )
+    assert fallback.call_count == 0