feat: enhance task management and timeout configurations in multi-agent chat

- Added new environment variables for controlling task execution limits, including `SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`, `SURFSENSE_TASK_BATCH_CONCURRENCY`, and `SURFSENSE_TASK_BATCH_MAX_SIZE`. - Updated documentation to reflect new batch processing capabilities for `task` calls, allowing for concurrent execution of multiple subagent tasks. - Improved error handling and receipt generation for deliverables, ensuring consistent feedback on task status. - Refactored middleware to incorporate search space ID for better task management.
2026-05-31 19:45:15 +02:00 · 2026-05-27 14:58:10 -07:00 · 2026-05-27 14:58:10 -07:00 · 9d6e9b7e2d
commit 9d6e9b7e2d
parent 820f541f08
66 changed files with 2561 additions and 380 deletions
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/system_prompt.md
@ -42,14 +42,16 @@ Return **only** one JSON object (no markdown/prose):
  "evidence": {
    "artifact_type": "report" | "podcast" | "video_presentation" | "resume" | "image" | null,
    "artifact_id": string | null,
-    "artifact_location": string | null
+    "artifact_location": string | null,
+    "receipts": Receipt[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
+Route-specific rules:
+- `evidence.receipts` quotes the Receipt(s) returned by `generate_report` / `generate_podcast` / `generate_video_presentation` / `generate_resume` / `generate_image` this turn, verbatim. The Receipt's `type` enum is one of `report` | `podcast` | `video_presentation` | `resume` | `image`.
+<include snippet="output_contract_base"/>
 </output_contract>
+
+<include snippet="verifiable_handle"/>
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/generate_image.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/generate_image.py
@ -4,11 +4,15 @@ import hashlib
 import logging
 from typing import Any

+from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
+from langgraph.types import Command
 from litellm import aimage_generation
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.config import config
 from app.db import (
    ImageGeneration,
@ -66,8 +70,9 @@ def create_generate_image_tool(
    @tool
    async def generate_image(
        prompt: str,
+        runtime: ToolRuntime,
        n: int = 1,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """
        Generate an image from a text description using AI image models.

@ -82,6 +87,21 @@ def create_generate_image_tool(
        Returns:
            A dictionary containing the generated image(s) for display in the chat.
        """
+
+        def _failed(payload: dict[str, Any], *, error: str) -> Command:
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="image",
+                    operation="generate",
+                    status="failed",
+                    preview=prompt[:200] if prompt else None,
+                    error=error,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )
+
        try:
            # Use a per-call session so concurrent tool calls don't share an
            # AsyncSession (which is not concurrency-safe). The streaming
@ -93,7 +113,10 @@ def create_generate_image_tool(
                )
                search_space = result.scalars().first()
                if not search_space:
-                    return {"error": "Search space not found"}
+                    return _failed(
+                        {"error": "Search space not found"},
+                        error="Search space not found",
+                    )

                config_id = (
                    search_space.image_generation_config_id or IMAGE_GEN_AUTO_MODE_ID
@ -112,19 +135,19 @@ def create_generate_image_tool(
                # Call litellm based on config type
                if is_image_gen_auto_mode(config_id):
                    if not ImageGenRouterService.is_initialized():
-                        return {
-                            "error": "No image generation models configured. "
+                        err = (
+                            "No image generation models configured. "
                            "Please add an image model in Settings > Image Models."
-                        }
+                        )
+                        return _failed({"error": err}, error=err)
                    response = await ImageGenRouterService.aimage_generation(
                        prompt=prompt, model="auto", **gen_kwargs
                    )
                elif config_id < 0:
                    cfg = _get_global_image_gen_config(config_id)
                    if not cfg:
-                        return {
-                            "error": f"Image generation config {config_id} not found"
-                        }
+                        err = f"Image generation config {config_id} not found"
+                        return _failed({"error": err}, error=err)

                    model_string = _build_model_string(
                        cfg.get("provider", ""),
@ -151,9 +174,8 @@ def create_generate_image_tool(
                    )
                    db_cfg = cfg_result.scalars().first()
                    if not db_cfg:
-                        return {
-                            "error": f"Image generation config {config_id} not found"
-                        }
+                        err = f"Image generation config {config_id} not found"
+                        return _failed({"error": err}, error=err)

                    model_string = _build_model_string(
                        db_cfg.provider.value,
@ -200,7 +222,10 @@ def create_generate_image_tool(
            # Extract image URLs from response
            images = response_dict.get("data", [])
            if not images:
-                return {"error": "No images were generated"}
+                return _failed(
+                    {"error": "No images were generated"},
+                    error="No images were generated",
+                )

            first_image = images[0]
            revised_prompt = first_image.get("revised_prompt", prompt)
@ -219,11 +244,14 @@ def create_generate_image_tool(
                    f"{db_image_gen_id}/image?token={access_token}"
                )
            else:
-                return {"error": "No displayable image data in the response"}
+                return _failed(
+                    {"error": "No displayable image data in the response"},
+                    error="No displayable image data in the response",
+                )

            image_id = f"image-{hashlib.md5(image_url.encode()).hexdigest()[:12]}"

-            return {
+            payload = {
                "id": image_id,
                "assetId": image_url,
                "src": image_url,
@ -236,12 +264,26 @@ def create_generate_image_tool(
                "prompt": prompt,
                "image_count": len(images),
            }
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="image",
+                    operation="generate",
+                    status="success",
+                    external_id=str(db_image_gen_id),
+                    verifiable_url=image_url,
+                    preview=(revised_prompt or prompt)[:200],
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )

        except Exception as e:
            logger.exception("Image generation failed in tool")
-            return {
-                "error": f"Image generation failed: {e!s}",
-                "prompt": prompt,
-            }
+            err = f"Image generation failed: {e!s}"
+            return _failed(
+                {"error": err, "prompt": prompt},
+                error=err,
+            )

    return generate_image
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py
@ -1,12 +1,28 @@
-"""Factory for a podcast-generation tool that queues background work and returns an ID for polling."""
+"""Factory for a podcast-generation tool.

+Dispatches the heavy generation to Celery and then polls the podcast row
+until it reaches a terminal status (READY/FAILED). The tool always
+returns a real terminal ``Receipt`` — never a pending one. The wait is
+bounded by the existing per-invocation safety net
+(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode,
+HTTP / process lifetime in single-agent mode).
+"""
+
+import logging
 from typing import Any

+from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
+from langgraph.types import Command
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.agents.shared.deliverable_wait import wait_for_deliverable
+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.db import Podcast, PodcastStatus, shielded_async_session

+logger = logging.getLogger(__name__)
+

 def create_generate_podcast_tool(
    search_space_id: int,
@ -19,9 +35,10 @@ def create_generate_podcast_tool(
    @tool
    async def generate_podcast(
        source_content: str,
+        runtime: ToolRuntime,
        podcast_title: str = "SurfSense Podcast",
        user_prompt: str | None = None,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """
        Generate a podcast from the provided content.

@ -70,23 +87,101 @@ def create_generate_podcast_tool(
                user_prompt=user_prompt,
            )

-            print(f"[generate_podcast] Created podcast {podcast_id}, task: {task.id}")
+            logger.info(
+                "[generate_podcast] Created podcast %s, task: %s",
+                podcast_id,
+                task.id,
+            )

-            return {
-                "status": PodcastStatus.PENDING.value,
+            # Wait until the Celery worker flips the row to a terminal
+            # state. The wait is bounded only by the subagent invoke
+            # timeout (multi-agent) or HTTP lifetime (single-agent) —
+            # see app.agents.shared.deliverable_wait for details.
+            terminal_status, columns, elapsed = await wait_for_deliverable(
+                model=Podcast,
+                row_id=podcast_id,
+                columns=[Podcast.status, Podcast.file_location],
+                terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED},
+            )
+
+            if terminal_status == PodcastStatus.READY:
+                file_location = columns[1] if columns else None
+                logger.info(
+                    "[generate_podcast] Podcast %s READY in %.2fs (file=%s)",
+                    podcast_id,
+                    elapsed,
+                    file_location,
+                )
+                payload: dict[str, Any] = {
+                    "status": PodcastStatus.READY.value,
+                    "podcast_id": podcast_id,
+                    "title": podcast_title,
+                    "file_location": file_location,
+                    "message": (
+                        "Podcast generated and saved to your podcast panel."
+                    ),
+                }
+                return with_receipt(
+                    payload=payload,
+                    receipt=make_receipt(
+                        route="deliverables",
+                        type="podcast",
+                        operation="generate",
+                        status="success",
+                        external_id=str(podcast_id),
+                        preview=podcast_title,
+                    ),
+                    tool_call_id=runtime.tool_call_id,
+                )
+
+            # Only other terminal state is FAILED.
+            logger.warning(
+                "[generate_podcast] Podcast %s FAILED in %.2fs",
+                podcast_id,
+                elapsed,
+            )
+            err = "Background worker reported FAILED status for this podcast."
+            payload = {
+                "status": PodcastStatus.FAILED.value,
                "podcast_id": podcast_id,
                "title": podcast_title,
-                "message": "Podcast generation started. This may take a few minutes.",
+                "error": err,
            }
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="podcast",
+                    operation="generate",
+                    status="failed",
+                    external_id=str(podcast_id),
+                    preview=podcast_title,
+                    error=err,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )

        except Exception as e:
            error_message = str(e)
-            print(f"[generate_podcast] Error: {error_message}")
-            return {
+            logger.exception("[generate_podcast] Error: %s", error_message)
+            payload = {
                "status": PodcastStatus.FAILED.value,
                "error": error_message,
                "title": podcast_title,
                "podcast_id": None,
            }
+            receipt = make_receipt(
+                route="deliverables",
+                type="podcast",
+                operation="generate",
+                status="failed",
+                preview=podcast_title,
+                error=error_message,
+            )
+            return with_receipt(
+                payload=payload,
+                receipt=receipt,
+                tool_call_id=runtime.tool_call_id,
+            )

    return generate_podcast
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
@ -6,10 +6,14 @@ import logging
 import re
 from typing import Any

+from langchain.tools import ToolRuntime
 from langchain_core.callbacks import dispatch_custom_event
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import tool
+from langgraph.types import Command

+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.db import Report, shielded_async_session
 from app.services.connector_service import ConnectorService
 from app.services.llm_service import get_document_summary_llm
@ -573,13 +577,14 @@ def create_generate_report_tool(
    @tool
    async def generate_report(
        topic: str,
+        runtime: ToolRuntime,
        source_content: str = "",
        source_strategy: str = "provided",
        search_queries: list[str] | None = None,
        report_style: str = "detailed",
        user_instructions: str | None = None,
        parent_report_id: int | None = None,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """
        Generate a structured Markdown report artifact from provided content.

@ -692,6 +697,23 @@ def create_generate_report_tool(
        parent_report_content: str | None = None
        report_group_id: int | None = None

+        def _failed(payload: dict[str, Any], *, error: str) -> Command:
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="report",
+                    operation="generate",
+                    status="failed",
+                    external_id=str(payload.get("report_id"))
+                    if payload.get("report_id") is not None
+                    else None,
+                    preview=topic,
+                    error=error,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )
+
        async def _save_failed_report(error_msg: str) -> int | None:
            """Persist a failed report row using a short-lived session."""
            try:
@ -753,12 +775,15 @@ def create_generate_report_tool(
                    "No LLM configured. Please configure a language model in Settings."
                )
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": topic,
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": topic,
+                    },
+                    error=error_msg,
+                )

            # Build the user instructions string
            user_instructions_section = ""
@ -971,12 +996,15 @@ def create_generate_report_tool(
            if not report_content or not isinstance(report_content, str):
                error_msg = "LLM returned empty or invalid content"
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": topic,
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": topic,
+                    },
+                    error=error_msg,
+                )

            # LLMs often wrap output in ```markdown ... ``` fences — strip them
            report_content = _strip_wrapping_code_fences(report_content)
@ -984,12 +1012,15 @@ def create_generate_report_tool(
            if not report_content:
                error_msg = "LLM returned empty or invalid content"
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": topic,
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": topic,
+                    },
+                    error=error_msg,
+                )

            # Strip any existing footer(s) carried over from parent version(s)
            while report_content.rstrip().endswith(_REPORT_FOOTER):
@ -1036,7 +1067,7 @@ def create_generate_report_tool(
                f"{metadata.get('section_count', 0)} sections"
            )

-            return {
+            payload: dict[str, Any] = {
                "status": "ready",
                "report_id": saved_report_id,
                "title": topic,
@ -1045,17 +1076,32 @@ def create_generate_report_tool(
                "report_markdown": report_content,
                "message": f"Report generated successfully: {topic}",
            }
+            receipt = make_receipt(
+                route="deliverables",
+                type="report",
+                operation="generate",
+                status="success",
+                external_id=str(saved_report_id),
+                preview=topic,
+            )
+            return with_receipt(
+                payload=payload,
+                receipt=receipt,
+                tool_call_id=runtime.tool_call_id,
+            )

        except Exception as e:
            error_message = str(e)
            logger.exception(f"[generate_report] Error: {error_message}")
            report_id = await _save_failed_report(error_message)
-
-            return {
-                "status": "failed",
-                "error": error_message,
-                "report_id": report_id,
-                "title": topic,
-            }
+            return _failed(
+                {
+                    "status": "failed",
+                    "error": error_message,
+                    "report_id": report_id,
+                    "title": topic,
+                },
+                error=error_message,
+            )

    return generate_report
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/resume.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/resume.py
@ -8,10 +8,14 @@ from typing import Any

 import pypdf
 import typst
+from langchain.tools import ToolRuntime
 from langchain_core.callbacks import dispatch_custom_event
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import tool
+from langgraph.types import Command

+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.db import Report, shielded_async_session
 from app.services.llm_service import get_document_summary_llm

@ -429,10 +433,11 @@ def create_generate_resume_tool(
    @tool
    async def generate_resume(
        user_info: str,
+        runtime: ToolRuntime,
        user_instructions: str | None = None,
        parent_report_id: int | None = None,
        max_pages: int = 1,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """
        Generate a professional resume as a Typst document.

@ -476,6 +481,41 @@ def create_generate_resume_tool(
        template = _get_template()
        llm_reference = _build_llm_reference(template)

+        def _success(payload: dict[str, Any], *, report_id: int, title: str) -> Command:
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="resume",
+                    operation="generate",
+                    status="success",
+                    external_id=str(report_id),
+                    preview=title,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )
+
+        def _failed(
+            payload: dict[str, Any],
+            *,
+            report_id: int | None,
+            error: str,
+            title: str = "Resume",
+        ) -> Command:
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="resume",
+                    operation="generate",
+                    status="failed",
+                    external_id=str(report_id) if report_id is not None else None,
+                    preview=title,
+                    error=error,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )
+
        async def _save_failed_report(error_msg: str) -> int | None:
            try:
                async with shielded_async_session() as session:
@ -514,13 +554,17 @@ def create_generate_resume_tool(
            except ValueError as e:
                error_msg = str(e)
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": "Resume",
-                    "content_type": "typst",
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": "Resume",
+                        "content_type": "typst",
+                    },
+                    report_id=report_id,
+                    error=error_msg,
+                )

            # ── Phase 1: READ ─────────────────────────────────────────────
            async with shielded_async_session() as read_session:
@ -541,13 +585,17 @@ def create_generate_resume_tool(
                    "No LLM configured. Please configure a language model in Settings."
                )
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": "Resume",
-                    "content_type": "typst",
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": "Resume",
+                        "content_type": "typst",
+                    },
+                    report_id=report_id,
+                    error=error_msg,
+                )

            # ── Phase 2: LLM GENERATION ───────────────────────────────────

@ -588,13 +636,17 @@ def create_generate_resume_tool(
            if not body or not isinstance(body, str):
                error_msg = "LLM returned empty or invalid content"
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": "Resume",
-                    "content_type": "typst",
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": "Resume",
+                        "content_type": "typst",
+                    },
+                    report_id=report_id,
+                    error=error_msg,
+                )

            body = _strip_typst_fences(body)
            body = _strip_imports(body)
@ -661,13 +713,17 @@ def create_generate_resume_tool(
                        f"{compile_error or 'Unknown compile error'}"
                    )
                    report_id = await _save_failed_report(error_msg)
-                    return {
-                        "status": "failed",
-                        "error": error_msg,
-                        "report_id": report_id,
-                        "title": "Resume",
-                        "content_type": "typst",
-                    }
+                    return _failed(
+                        {
+                            "status": "failed",
+                            "error": error_msg,
+                            "report_id": report_id,
+                            "title": "Resume",
+                            "content_type": "typst",
+                        },
+                        report_id=report_id,
+                        error=error_msg,
+                    )

                actual_pages = _count_pdf_pages(pdf_bytes)
                if actual_pages <= validated_max_pages:
@ -700,13 +756,17 @@ def create_generate_resume_tool(
                ):
                    error_msg = "LLM returned empty content while compressing resume"
                    report_id = await _save_failed_report(error_msg)
-                    return {
-                        "status": "failed",
-                        "error": error_msg,
-                        "report_id": report_id,
-                        "title": "Resume",
-                        "content_type": "typst",
-                    }
+                    return _failed(
+                        {
+                            "status": "failed",
+                            "error": error_msg,
+                            "report_id": report_id,
+                            "title": "Resume",
+                            "content_type": "typst",
+                        },
+                        report_id=report_id,
+                        error=error_msg,
+                    )

                body = _strip_typst_fences(compress_response.content)
                body = _strip_imports(body)
@ -718,13 +778,17 @@ def create_generate_resume_tool(
                    f"Hard limit: <= {MAX_RESUME_PAGES} page(s), actual: {actual_pages}."
                )
                report_id = await _save_failed_report(error_msg)
-                return {
-                    "status": "failed",
-                    "error": error_msg,
-                    "report_id": report_id,
-                    "title": "Resume",
-                    "content_type": "typst",
-                }
+                return _failed(
+                    {
+                        "status": "failed",
+                        "error": error_msg,
+                        "report_id": report_id,
+                        "title": "Resume",
+                        "content_type": "typst",
+                    },
+                    report_id=report_id,
+                    error=error_msg,
+                )

            # ── Phase 4: SAVE ─────────────────────────────────────────────
            dispatch_custom_event(
@ -768,32 +832,40 @@ def create_generate_resume_tool(

            logger.info(f"[generate_resume] Created resume {saved_id}: {resume_title}")

-            return {
-                "status": "ready",
-                "report_id": saved_id,
-                "title": resume_title,
-                "content_type": "typst",
-                "is_revision": bool(parent_content),
-                "message": (
-                    f"Resume generated successfully: {resume_title}"
-                    if target_page_met
-                    else (
-                        f"Resume generated, but could not fit the target of <= {validated_max_pages} "
-                        f"page(s). Final length: {actual_pages} page(s)."
-                    )
-                ),
-            }
+            return _success(
+                {
+                    "status": "ready",
+                    "report_id": saved_id,
+                    "title": resume_title,
+                    "content_type": "typst",
+                    "is_revision": bool(parent_content),
+                    "message": (
+                        f"Resume generated successfully: {resume_title}"
+                        if target_page_met
+                        else (
+                            f"Resume generated, but could not fit the target of <= {validated_max_pages} "
+                            f"page(s). Final length: {actual_pages} page(s)."
+                        )
+                    ),
+                },
+                report_id=saved_id,
+                title=resume_title,
+            )

        except Exception as e:
            error_message = str(e)
            logger.exception(f"[generate_resume] Error: {error_message}")
            report_id = await _save_failed_report(error_message)
-            return {
-                "status": "failed",
-                "error": error_message,
-                "report_id": report_id,
-                "title": "Resume",
-                "content_type": "typst",
-            }
+            return _failed(
+                {
+                    "status": "failed",
+                    "error": error_message,
+                    "report_id": report_id,
+                    "title": "Resume",
+                    "content_type": "typst",
+                },
+                report_id=report_id,
+                error=error_message,
+            )

    return generate_resume
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/video_presentation.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/video_presentation.py
@ -1,12 +1,29 @@
-"""Factory for a video-presentation tool that queues background work and returns an ID for polling."""
+"""Factory for a video-presentation tool.

+Dispatches the heavy generation to Celery and then polls the
+video-presentation row until it reaches a terminal status (READY/FAILED).
+The tool always returns a real terminal ``Receipt`` — never a pending
+one. The wait is bounded by the existing per-invocation safety net
+(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode,
+HTTP / process lifetime in single-agent mode). Video rendering can be
+heavy; raise that ceiling if your generations routinely exceed it.
+"""
+
+import logging
 from typing import Any

+from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
+from langgraph.types import Command
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.agents.shared.deliverable_wait import wait_for_deliverable
+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session

+logger = logging.getLogger(__name__)
+

 def create_generate_video_presentation_tool(
    search_space_id: int,
@ -19,9 +36,10 @@ def create_generate_video_presentation_tool(
    @tool
    async def generate_video_presentation(
        source_content: str,
+        runtime: ToolRuntime,
        video_title: str = "SurfSense Presentation",
        user_prompt: str | None = None,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """Generate a video presentation from the provided content.

        Use this tool when the user asks to create a video, presentation, slides, or slide deck.
@ -56,25 +74,103 @@ def create_generate_video_presentation_tool(
                user_prompt=user_prompt,
            )

-            print(
-                f"[generate_video_presentation] Created video presentation {video_pres_id}, task: {task.id}"
+            logger.info(
+                "[generate_video_presentation] Created video presentation %s, task: %s",
+                video_pres_id,
+                task.id,
            )

-            return {
-                "status": VideoPresentationStatus.PENDING.value,
+            # Wait until the Celery worker flips the row to a terminal
+            # state. The wait is bounded only by the subagent invoke
+            # timeout (multi-agent) or HTTP lifetime (single-agent) —
+            # see app.agents.shared.deliverable_wait for details.
+            terminal_status, _columns, elapsed = await wait_for_deliverable(
+                model=VideoPresentation,
+                row_id=video_pres_id,
+                columns=[VideoPresentation.status],
+                terminal_statuses={
+                    VideoPresentationStatus.READY,
+                    VideoPresentationStatus.FAILED,
+                },
+            )
+
+            if terminal_status == VideoPresentationStatus.READY:
+                logger.info(
+                    "[generate_video_presentation] %s READY in %.2fs",
+                    video_pres_id,
+                    elapsed,
+                )
+                payload: dict[str, Any] = {
+                    "status": VideoPresentationStatus.READY.value,
+                    "video_presentation_id": video_pres_id,
+                    "title": video_title,
+                    "message": "Video presentation generated and saved.",
+                }
+                return with_receipt(
+                    payload=payload,
+                    receipt=make_receipt(
+                        route="deliverables",
+                        type="video_presentation",
+                        operation="generate",
+                        status="success",
+                        external_id=str(video_pres_id),
+                        preview=video_title,
+                    ),
+                    tool_call_id=runtime.tool_call_id,
+                )
+
+            # Only other terminal state is FAILED.
+            logger.warning(
+                "[generate_video_presentation] %s FAILED in %.2fs",
+                video_pres_id,
+                elapsed,
+            )
+            err = (
+                "Background worker reported FAILED status for this "
+                "video presentation."
+            )
+            payload = {
+                "status": VideoPresentationStatus.FAILED.value,
                "video_presentation_id": video_pres_id,
                "title": video_title,
-                "message": "Video presentation generation started. This may take a few minutes.",
+                "error": err,
            }
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="video_presentation",
+                    operation="generate",
+                    status="failed",
+                    external_id=str(video_pres_id),
+                    preview=video_title,
+                    error=err,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )

        except Exception as e:
            error_message = str(e)
-            print(f"[generate_video_presentation] Error: {error_message}")
-            return {
+            logger.exception(
+                "[generate_video_presentation] Error: %s", error_message
+            )
+            payload = {
                "status": VideoPresentationStatus.FAILED.value,
                "error": error_message,
                "title": video_title,
                "video_presentation_id": None,
            }
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="deliverables",
+                    type="video_presentation",
+                    operation="generate",
+                    status="failed",
+                    preview=video_title,
+                    error=error_message,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )

    return generate_video_presentation
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@ -150,11 +150,12 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
+<include snippet="output_contract_base"/>
+
+Route-specific rules:

- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
 - `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.

+<include snippet="verifiable_handle"/>
+
 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@ -117,11 +117,12 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
+<include snippet="output_contract_base"/>
+
+Route-specific rules:

- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
 - `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.

+<include snippet="verifiable_handle"/>
+
 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md
@ -6,7 +6,7 @@ Persist durable preferences/facts/instructions with `update_memory` while avoidi
 </goal>

 <visibility_scope>
-{{MEMORY_VISIBILITY_POLICY}}
+Memory is search-space-scoped; do not assume cross-workspace visibility.
 </visibility_scope>

 <available_tools>
@ -53,10 +53,8 @@ Return **only** one JSON object (no markdown/prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - `evidence.memory_category` is a semantic classification for supervisor logs
  only. It is not the persisted storage format and must not force inline
  `[fact|preference|instruction]` markers into saved memory.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md
@ -46,10 +46,8 @@ Return **only** one JSON object (no markdown/prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
 - `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
 </output_contract>
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md
@ -92,12 +92,12 @@ Return **only** one JSON object (no markdown, no prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: base, table, field, choice, record, etc.).
 - For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; up to 10 entries, then `"...and N more"`).
 </output_contract>

+<include snippet="verifiable_handle"/>
+
 Discover before you mutate; never guess identifiers, choice IDs, or required fields.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md
@ -111,11 +111,12 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+Route-specific rules:
 - For `search_calendar_events` results, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; up to 10 entries, then `"...and N more"`).
 - For ambiguous matches across `update_calendar_event` / `delete_calendar_event`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`, where `label` should include the event title and start time for human readability).

+<include snippet="verifiable_handle"/>
+
 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md
@ -93,12 +93,12 @@ Return **only** one JSON object (no markdown, no prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: task, list, member, status, custom-field choice, etc.).
 - For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (task id, title, status, assignees; up to 10 entries, then `"...and N more"`).
 </output_contract>

+<include snippet="verifiable_handle"/>
+
 Discover before you mutate; never guess identifiers, list statuses, or assignees.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/confluence/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/confluence/system_prompt.md
@ -100,9 +100,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md
@ -108,9 +108,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Resolve before you call; verify before you send; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/dropbox/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/dropbox/system_prompt.md
@ -98,9 +98,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md
@ -110,11 +110,12 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+Route-specific rules:
 - For `search_gmail` results, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; up to 10 entries, then `"...and N more"`).
 - For ambiguous matches across `update_gmail_draft` / `trash_gmail_email` / `read_gmail_email`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`).

+<include snippet="verifiable_handle"/>
+
 Infer before you call; verify before you send; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/tools/send_email.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/tools/send_email.py
@ -5,12 +5,16 @@ from datetime import datetime
 from email.mime.text import MIMEText
 from typing import Any

+from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
+from langgraph.types import Command
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
    request_approval,
 )
+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.services.gmail import GmailToolMetadataService

 logger = logging.getLogger(__name__)
@ -26,9 +30,10 @@ def create_send_gmail_email_tool(
        to: str,
        subject: str,
        body: str,
+        runtime: ToolRuntime,
        cc: str | None = None,
        bcc: str | None = None,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """Send an email via Gmail.

        Use when the user explicitly asks to send an email. This sends the
@ -60,11 +65,34 @@ def create_send_gmail_email_tool(
        """
        logger.info(f"send_gmail_email called: to='{to}', subject='{subject}'")

+        def _emit(
+            payload: dict[str, Any],
+            *,
+            success: bool,
+            external_id: str | None = None,
+            error: str | None = None,
+        ) -> Command:
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="gmail",
+                    type="message",
+                    operation="send",
+                    status="success" if success else "failed",
+                    external_id=external_id,
+                    preview=f"to={to}: {subject}"[:200],
+                    error=error,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )
+
        if db_session is None or search_space_id is None or user_id is None:
-            return {
-                "status": "error",
-                "message": "Gmail tool not properly configured. Please contact support.",
-            }
+            msg = "Gmail tool not properly configured. Please contact support."
+            return _emit(
+                {"status": "error", "message": msg},
+                success=False,
+                error=msg,
+            )

        try:
            metadata_service = GmailToolMetadataService(db_session)
@ -74,16 +102,24 @@ def create_send_gmail_email_tool(

            if "error" in context:
                logger.error(f"Failed to fetch creation context: {context['error']}")
-                return {"status": "error", "message": context["error"]}
+                return _emit(
+                    {"status": "error", "message": context["error"]},
+                    success=False,
+                    error=context["error"],
+                )

            accounts = context.get("accounts", [])
            if accounts and all(a.get("auth_expired") for a in accounts):
                logger.warning("All Gmail accounts have expired authentication")
-                return {
-                    "status": "auth_error",
-                    "message": "All connected Gmail accounts need re-authentication. Please re-authenticate in your connector settings.",
-                    "connector_type": "gmail",
-                }
+                return _emit(
+                    {
+                        "status": "auth_error",
+                        "message": "All connected Gmail accounts need re-authentication. Please re-authenticate in your connector settings.",
+                        "connector_type": "gmail",
+                    },
+                    success=False,
+                    error="auth_expired",
+                )

            logger.info(
                f"Requesting approval for sending Gmail email: to='{to}', subject='{subject}'"
@ -103,10 +139,14 @@ def create_send_gmail_email_tool(
            )

            if result.rejected:
-                return {
-                    "status": "rejected",
-                    "message": "User declined. The email was not sent. Do not ask again or suggest alternatives.",
-                }
+                return _emit(
+                    {
+                        "status": "rejected",
+                        "message": "User declined. The email was not sent. Do not ask again or suggest alternatives.",
+                    },
+                    success=False,
+                    error="user_rejected",
+                )

            final_to = result.params.get("to", to)
            final_subject = result.params.get("subject", subject)
@ -135,10 +175,14 @@ def create_send_gmail_email_tool(
                )
                connector = result.scalars().first()
                if not connector:
-                    return {
-                        "status": "error",
-                        "message": "Selected Gmail connector is invalid or has been disconnected.",
-                    }
+                    msg = (
+                        "Selected Gmail connector is invalid or has been disconnected."
+                    )
+                    return _emit(
+                        {"status": "error", "message": msg},
+                        success=False,
+                        error=msg,
+                    )
                actual_connector_id = connector.id
            else:
                result = await db_session.execute(
@ -150,10 +194,12 @@ def create_send_gmail_email_tool(
                )
                connector = result.scalars().first()
                if not connector:
-                    return {
-                        "status": "error",
-                        "message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
-                    }
+                    msg = "No Gmail connector found. Please connect Gmail in your workspace settings."
+                    return _emit(
+                        {"status": "error", "message": msg},
+                        success=False,
+                        error=msg,
+                    )
                actual_connector_id = connector.id

            logger.info(
@ -166,10 +212,12 @@ def create_send_gmail_email_tool(
            ):
                cca_id = connector.config.get("composio_connected_account_id")
                if not cca_id:
-                    return {
-                        "status": "error",
-                        "message": "Composio connected account ID not found for this Gmail connector.",
-                    }
+                    msg = "Composio connected account ID not found for this Gmail connector."
+                    return _emit(
+                        {"status": "error", "message": msg},
+                        success=False,
+                        error=msg,
+                    )

                from app.services.composio_service import ComposioService

@ -187,7 +235,11 @@ def create_send_gmail_email_tool(
                    bcc=final_bcc,
                )
                if error:
-                    return {"status": "error", "message": error}
+                    return _emit(
+                        {"status": "error", "message": error},
+                        success=False,
+                        error=error,
+                    )
                sent = {"id": sent_message_id, "threadId": sent_thread_id}
            else:
                from google.oauth2.credentials import Credentials
@ -275,11 +327,15 @@ def create_send_gmail_email_tool(
                                actual_connector_id,
                                exc_info=True,
                            )
-                        return {
-                            "status": "insufficient_permissions",
-                            "connector_id": actual_connector_id,
-                            "message": "This Gmail account needs additional permissions. Please re-authenticate in connector settings.",
-                        }
+                        return _emit(
+                            {
+                                "status": "insufficient_permissions",
+                                "connector_id": actual_connector_id,
+                                "message": "This Gmail account needs additional permissions. Please re-authenticate in connector settings.",
+                            },
+                            success=False,
+                            error="insufficient_permissions",
+                        )
                    raise

            logger.info(
@ -310,12 +366,16 @@ def create_send_gmail_email_tool(
                logger.warning(f"KB sync after send failed: {kb_err}")
                kb_message_suffix = " This email will be added to your knowledge base in the next scheduled sync."

-            return {
-                "status": "success",
-                "message_id": sent.get("id"),
-                "thread_id": sent.get("threadId"),
-                "message": f"Successfully sent email to '{final_to}' with subject '{final_subject}'.{kb_message_suffix}",
-            }
+            return _emit(
+                {
+                    "status": "success",
+                    "message_id": sent.get("id"),
+                    "thread_id": sent.get("threadId"),
+                    "message": f"Successfully sent email to '{final_to}' with subject '{final_subject}'.{kb_message_suffix}",
+                },
+                success=True,
+                external_id=sent.get("id"),
+            )

        except Exception as e:
            from langgraph.errors import GraphInterrupt
@ -324,9 +384,11 @@ def create_send_gmail_email_tool(
                raise

            logger.error(f"Error sending Gmail email: {e}", exc_info=True)
-            return {
-                "status": "error",
-                "message": "Something went wrong while sending the email. Please try again.",
-            }
+            msg = "Something went wrong while sending the email. Please try again."
+            return _emit(
+                {"status": "error", "message": msg},
+                success=False,
+                error=str(e),
+            )

    return send_gmail_email
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/google_drive/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/google_drive/system_prompt.md
@ -100,9 +100,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md
@ -111,12 +111,12 @@ Return **only** one JSON object (no markdown, no prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: site, project, issue, user, transition, etc.).
 - For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (issue key, summary, status, assignee; up to 10 entries, then `"...and N more"`).
 </output_contract>

+<include snippet="verifiable_handle"/>
+
 Discover before you mutate; never guess identifiers, transitions, or required fields.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md
@ -101,12 +101,12 @@ Return **only** one JSON object (no markdown, no prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: issue, user, project, state, etc.).
 - For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (identifier, title, state, assignee; up to 10 entries, then `"...and N more"`).
 </output_contract>

+<include snippet="verifiable_handle"/>
+
 Discover before you mutate; never guess identifiers.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md
@ -101,9 +101,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Infer before you call; verify before you create; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/notion/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/notion/system_prompt.md
@ -99,9 +99,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/notion/tools/delete_page.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/notion/tools/delete_page.py
@ -1,12 +1,16 @@
 import logging
 from typing import Any

+from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
+from langgraph.types import Command
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
    request_approval,
 )
+from app.agents.shared.receipt import make_receipt
+from app.agents.shared.receipt_command import with_receipt
 from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
 from app.services.notion.tool_metadata_service import NotionToolMetadataService

@ -35,8 +39,9 @@ def create_delete_notion_page_tool(
    @tool
    async def delete_notion_page(
        page_title: str,
+        runtime: ToolRuntime,
        delete_from_kb: bool = False,
-    ) -> dict[str, Any]:
+    ) -> Command:
        """Delete (archive) a Notion page.

        Use this tool when the user asks you to delete, remove, or archive
@ -65,14 +70,39 @@ def create_delete_notion_page_tool(
            f"delete_notion_page called: page_title='{page_title}', delete_from_kb={delete_from_kb}"
        )

+        def _emit(
+            payload: dict[str, Any],
+            *,
+            status: str,
+            external_id: str | None = None,
+            error: str | None = None,
+        ) -> Command:
+            return with_receipt(
+                payload=payload,
+                receipt=make_receipt(
+                    route="notion",
+                    type="page",
+                    operation="delete",
+                    status="success" if status == "success" else "failed",
+                    external_id=external_id,
+                    preview=page_title,
+                    error=error,
+                ),
+                tool_call_id=runtime.tool_call_id,
+            )
+
        if db_session is None or search_space_id is None or user_id is None:
            logger.error(
                "Notion tool not properly configured - missing required parameters"
            )
-            return {
-                "status": "error",
-                "message": "Notion tool not properly configured. Please contact support.",
-            }
+            return _emit(
+                {
+                    "status": "error",
+                    "message": "Notion tool not properly configured. Please contact support.",
+                },
+                status="error",
+                error="Notion tool not properly configured. Please contact support.",
+            )

        try:
            # Get page context (page_id, account, title) from indexed data
@ -86,16 +116,18 @@ def create_delete_notion_page_tool(
                # Check if it's a "not found" error (softer handling for LLM)
                if "not found" in error_msg.lower():
                    logger.warning(f"Page not found: {error_msg}")
-                    return {
-                        "status": "not_found",
-                        "message": error_msg,
-                    }
+                    return _emit(
+                        {"status": "not_found", "message": error_msg},
+                        status="error",
+                        error=error_msg,
+                    )
                else:
                    logger.error(f"Failed to fetch delete context: {error_msg}")
-                    return {
-                        "status": "error",
-                        "message": error_msg,
-                    }
+                    return _emit(
+                        {"status": "error", "message": error_msg},
+                        status="error",
+                        error=error_msg,
+                    )

            account = context.get("account", {})
            if account.get("auth_expired"):
@ -103,10 +135,14 @@ def create_delete_notion_page_tool(
                    "Notion account %s has expired authentication",
                    account.get("id"),
                )
-                return {
-                    "status": "auth_error",
-                    "message": "The Notion account for this page needs re-authentication. Please re-authenticate in your connector settings.",
-                }
+                return _emit(
+                    {
+                        "status": "auth_error",
+                        "message": "The Notion account for this page needs re-authentication. Please re-authenticate in your connector settings.",
+                    },
+                    status="error",
+                    error="auth_expired",
+                )

            page_id = context.get("page_id")
            connector_id_from_context = account.get("id")
@ -129,10 +165,14 @@ def create_delete_notion_page_tool(

            if result.rejected:
                logger.info("Notion page deletion rejected by user")
-                return {
-                    "status": "rejected",
-                    "message": "User declined. Do not retry or suggest alternatives.",
-                }
+                return _emit(
+                    {
+                        "status": "rejected",
+                        "message": "User declined. Do not retry or suggest alternatives.",
+                    },
+                    status="error",
+                    error="user_rejected",
+                )

            final_page_id = result.params.get("page_id", page_id)
            final_connector_id = result.params.get(
@ -165,18 +205,26 @@ def create_delete_notion_page_tool(
                    logger.error(
                        f"Invalid connector_id={final_connector_id} for search_space_id={search_space_id}"
                    )
-                    return {
-                        "status": "error",
-                        "message": "Selected Notion account is invalid or has been disconnected. Please select a valid account.",
-                    }
+                    return _emit(
+                        {
+                            "status": "error",
+                            "message": "Selected Notion account is invalid or has been disconnected. Please select a valid account.",
+                        },
+                        status="error",
+                        error="invalid_connector",
+                    )
                actual_connector_id = connector.id
                logger.info(f"Validated Notion connector: id={actual_connector_id}")
            else:
                logger.error("No connector found for this page")
-                return {
-                    "status": "error",
-                    "message": "No connector found for this page.",
-                }
+                return _emit(
+                    {
+                        "status": "error",
+                        "message": "No connector found for this page.",
+                    },
+                    status="error",
+                    error="no_connector",
+                )

            # Create connector instance
            notion_connector = NotionHistoryConnector(
@ -232,7 +280,13 @@ def create_delete_notion_page_tool(
                        f"{result.get('message', '')} (also removed from knowledge base)"
                    )

-            return result
+            status = result.get("status", "error")
+            return _emit(
+                result,
+                status=status,
+                external_id=str(final_page_id) if final_page_id else None,
+                error=None if status == "success" else result.get("message"),
+            )

        except Exception as e:
            from langgraph.errors import GraphInterrupt
@ -245,20 +299,28 @@ def create_delete_notion_page_tool(
            if isinstance(e, NotionAPIError) and (
                "401" in error_str or "unauthorized" in error_str
            ):
-                return {
-                    "status": "auth_error",
-                    "message": str(e),
-                    "connector_id": connector_id_from_context
-                    if "connector_id_from_context" in dir()
-                    else None,
-                    "connector_type": "notion",
-                }
+                return _emit(
+                    {
+                        "status": "auth_error",
+                        "message": str(e),
+                        "connector_id": connector_id_from_context
+                        if "connector_id_from_context" in dir()
+                        else None,
+                        "connector_type": "notion",
+                    },
+                    status="error",
+                    error=str(e),
+                )
            if isinstance(e, ValueError | NotionAPIError):
                message = str(e)
            else:
                message = (
                    "Something went wrong while deleting the page. Please try again."
                )
-            return {"status": "error", "message": message}
+            return _emit(
+                {"status": "error", "message": message},
+                status="error",
+                error=message,
+            )

    return delete_notion_page
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/onedrive/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/onedrive/system_prompt.md
@ -97,9 +97,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Infer before you call; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md
@ -87,12 +87,12 @@ Return **only** one JSON object (no markdown, no prose):
  "missing_fields": string[] | null,
  "assumptions": string[] | null
 }
-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+Route-specific rules:
 - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: channel, user, message, thread).
 - For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (channel/user, key identifier, timestamp, short snippet; up to 10 entries, then `"...and N more"`).
 </output_contract>

+<include snippet="verifiable_handle"/>
+
 Discover before you post; never guess channel, user, or thread targets.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md
@ -115,9 +115,8 @@ Return **only** one JSON object (no markdown or prose outside it):
 }
 ```

-Rules:
- `status=success` → `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` → `next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
+<include snippet="output_contract_base"/>
+
+<include snippet="verifiable_handle"/>

 Resolve before you call; verify before you send; map every tool outcome faithfully.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/md_file_reader.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/md_file_reader.py
@ -2,8 +2,11 @@

 from __future__ import annotations

+from functools import lru_cache
 from importlib import resources

+_SHARED_SNIPPETS_PACKAGE = "app.agents.multi_agent_chat.subagents.shared.snippets"
+

 def read_md_file(package: str, stem: str) -> str:
    """Load ``{stem}.md`` from ``package`` via importlib resources, or return empty."""
@ -12,3 +15,13 @@ def read_md_file(package: str, stem: str) -> str:
        return ""
    text = ref.read_text(encoding="utf-8")
    return text.rstrip("\n")
+
+
+@lru_cache(maxsize=64)
+def read_shared_snippet(name: str) -> str:
+    """Load a shared markdown snippet from the snippets package.
+
+    Cached because snippets are static at runtime and resolved many times
+    (once per subagent build, plus per-subagent-per-route).
+    """
+    return read_md_file(_SHARED_SNIPPETS_PACKAGE, name)
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/snippets/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/snippets/init.py
@ -0,0 +1,6 @@
+"""Shared markdown snippets composed into every subagent system prompt.
+
+Resolved at build time by :func:`pack_subagent` in ``subagent_builder.py``
+via the ``<include snippet="NAME"/>`` directive. See ``output_contract_base.md``
+and ``verifiable_handle.md`` for the included content.
+"""
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/snippets/output_contract_base.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/snippets/output_contract_base.md
@ -0,0 +1,6 @@
+Rules (universal):
+- `status=success` -> `next_step=null`, `missing_fields=null`.
+- `status=partial|blocked|error` -> `next_step` must be non-null.
+- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
+- `assumptions`: any inferences you made about the user's intent; `null` when no inferences were needed.
+- The `evidence` object's fields are documented in your route-specific `<output_contract>` above; never invent fields the tool did not return.
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/snippets/verifiable_handle.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/snippets/verifiable_handle.md
@ -0,0 +1,10 @@
+<verifiable_handle>
+Mutating tools you call return a structured `Receipt` object alongside their normal payload (see `evidence.receipts` in your `<output_contract>`). The supervisor uses the Receipt's `verifiable_url` and `external_id` to independently confirm the operation succeeded - do not paraphrase, shorten, or guess these values.
+
+Rules:
+- Quote each Receipt's `verifiable_url` and `external_id` **verbatim** in `evidence.receipts`. Copy character-for-character; never retype from memory.
+- If a Receipt has `status="failed"`, set your own `status="error"` and put the Receipt's `error` field in `next_step`.
+- If a Receipt has `status="pending"` (async backends — podcasts, video presentations, anything queued through Celery), report `status=success`, surface the pending Receipt as-is, and tell the supervisor in `action_summary` that the artefact is **being generated in the background** (e.g. "Podcast 38 queued; orchestrator should report it as kicked off, not yet ready"). A pending Receipt almost always lacks `verifiable_url` because the artefact does not exist yet — that is expected, not a defect. Do **not** wait, poll, or retry; control returns to the supervisor immediately and the asset becomes visible to the user out of band via its own UI surface.
+- Never claim a mutation succeeded without a matching Receipt with `status="success"` or `"pending"` in your tool results this turn.
+- For tools that do not return a Receipt (read-only operations, search, lookup), the receipt rules do not apply; only the route-specific `evidence` fields matter.
+</verifiable_handle>
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/spec.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/spec.py
@ -2,12 +2,30 @@

 from __future__ import annotations

+from collections.abc import Callable, Mapping
 from dataclasses import dataclass
+from typing import Any

 from deepagents import SubAgent

 from app.agents.new_chat.permissions import Ruleset

+# A context-hint provider receives the parent-agent ``runtime.state`` mapping
+# and the ``description`` the orchestrator wrote, and returns a short string
+# the runtime prepends to the subagent's first ``HumanMessage``. Used for
+# things like "current search-space id is X" or "the user is in workspace Y" —
+# never for full corpora, since the prepended text consumes the subagent's
+# prompt budget on every invocation. Return ``None`` (or an empty string) to
+# skip the hint for this call.
+ContextHintProvider = Callable[[Mapping[str, Any], str], str | None]
+
+# Custom key stashed on the deepagents ``SubAgent`` dict so the provider
+# survives the trip from ``pack_subagent`` → registry → middleware →
+# task_tool. ``deepagents.create_agent`` only extracts the keys it
+# recognises, so an extra key here is dropped silently at compile time.
+# The prefix avoids any collision with future deepagents fields.
+SURF_CONTEXT_HINT_PROVIDER_KEY = "surf_context_hint_provider"
+

@dataclass(frozen=True, slots=True)
 class SurfSenseSubagentSpec:
@ -20,10 +38,22 @@ class SurfSenseSubagentSpec:
            layers them into the subagent's :class:`PermissionMiddleware`,
            so each subagent owns its own ruleset without aliasing the
            shared rule engine.
+        context_hint_provider: Optional callback invoked once per ``task(...)``
+            invocation, immediately before the subagent runs. Its return
+            value is prepended to the subagent's first ``HumanMessage`` so
+            the subagent can see things it would otherwise have to discover
+            (active search space, KB root, current user timezone, etc.).
+            Kept out of the deepagents ``spec`` because that dict is forwarded
+            verbatim to upstream code and only recognises its own typed keys.
    """

    spec: SubAgent
    ruleset: Ruleset
+    context_hint_provider: ContextHintProvider | None = None


-__all__ = ["SurfSenseSubagentSpec"]
+__all__ = [
+    "SURF_CONTEXT_HINT_PROVIDER_KEY",
+    "ContextHintProvider",
+    "SurfSenseSubagentSpec",
+]
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/subagent_builder.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/subagent_builder.py
@ -2,6 +2,8 @@

 from __future__ import annotations

+import logging
+import re
 from typing import Any, cast

 from deepagents import SubAgent
@ -12,9 +14,48 @@ from langchain_core.tools import BaseTool
 from app.agents.multi_agent_chat.middleware.shared.permissions import (
    build_permission_mw,
 )
-from app.agents.multi_agent_chat.subagents.shared.spec import SurfSenseSubagentSpec
+from app.agents.multi_agent_chat.subagents.shared.md_file_reader import (
+    read_shared_snippet,
+)
+from app.agents.multi_agent_chat.subagents.shared.spec import (
+    SURF_CONTEXT_HINT_PROVIDER_KEY,
+    ContextHintProvider,
+    SurfSenseSubagentSpec,
+)
 from app.agents.new_chat.permissions import Ruleset

+logger = logging.getLogger(__name__)
+
+# ``<include snippet="NAME"/>`` directive. Matches an XML-style self-closing
+# tag whose ``snippet`` attribute names a file in ``shared/snippets/``.
+# Whitespace around the attribute and self-close is tolerated; the snippet
+# name itself must be a bare identifier (letters / digits / underscores) so
+# we never pull a path-traversal value into ``read_shared_snippet``.
+_INCLUDE_DIRECTIVE_RE = re.compile(
+    r"<include\s+snippet=\"(?P<name>[A-Za-z0-9_]+)\"\s*/>"
+)
+
+
+def _resolve_includes(prompt: str, *, subagent_name: str) -> str:
+    """Replace ``<include snippet="X"/>`` directives with the snippet body.
+
+    Unknown snippet names raise; an empty body is treated as unknown so a
+    typo or missing file fails loudly at startup instead of silently
+    shipping a broken prompt to the LLM.
+    """
+
+    def _replace(match: re.Match[str]) -> str:
+        name = match.group("name")
+        body = read_shared_snippet(name)
+        if not body.strip():
+            raise ValueError(
+                f"Subagent {subagent_name!r}: unknown or empty shared "
+                f"snippet {name!r} referenced via <include>."
+            )
+        return body
+
+    return _INCLUDE_DIRECTIVE_RE.sub(_replace, prompt)
+

 def _user_allowlist_for(
    dependencies: dict[str, Any], subagent_name: str
@ -43,6 +84,7 @@ def pack_subagent(
    dependencies: dict[str, Any],
    model: BaseChatModel | None = None,
    middleware_stack: dict[str, Any] | None = None,
+    context_hint_provider: ContextHintProvider | None = None,
 ) -> SurfSenseSubagentSpec:
    """Pack the route-local pieces into one sub-agent spec + its Ruleset.

@ -68,6 +110,8 @@ def pack_subagent(
        msg = f"Subagent {name!r}: system_prompt is empty"
        raise ValueError(msg)

+    system_prompt = _resolve_includes(system_prompt, subagent_name=name)
+
    flags = dependencies["flags"]
    user_allowlist = _user_allowlist_for(dependencies, name)
    subagent_rulesets: list[Ruleset] = [ruleset]
@ -99,4 +143,12 @@ def pack_subagent(
    }
    if model is not None:
        spec_dict["model"] = model
-    return SurfSenseSubagentSpec(spec=cast(SubAgent, spec_dict), ruleset=ruleset)
+    if context_hint_provider is not None:
+        # Stash the callback on the dict so it survives the trip through
+        # registry / middleware unpacking (both treat the spec as opaque).
+        spec_dict[SURF_CONTEXT_HINT_PROVIDER_KEY] = context_hint_provider
+    return SurfSenseSubagentSpec(
+        spec=cast(SubAgent, spec_dict),
+        ruleset=ruleset,
+        context_hint_provider=context_hint_provider,
+    )