Merge pull request #1443 from CREDO23/feature-automations

[Feat] Automation V1 — Scheduled Agent Tasks, Created via Chat (HITL) or JSON
2026-07-18 23:11:12 +02:00 · 2026-05-28 12:41:41 -07:00 · 2026-05-28 12:41:41 -07:00 · 4dda02c06c
commit 4dda02c06c
parent b645c3f54d 958bf9f95a
219 changed files with 13821 additions and 55 deletions
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/init.py
@ -0,0 +1 @@
+"""``create_automation`` — description + few-shot examples."""
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
@ -0,0 +1,34 @@
+- `create_automation` — Draft and author a new automation. You describe the
+  user's intent; a focused drafter inside the tool turns it into the full
+  automation JSON; the user sees a preview on an approval card and chooses
+  approve or reject. All three phases happen in a single tool call.
+  - Call when the user wants SurfSense to do something on its own: anything
+    recurring or scheduled ("every morning…", "each Monday…", "weekly
+    recap…").
+  - Args:
+    - `intent` (string): restate the user's request **concretely**, in one
+      paragraph. Cover three things:
+      - **What** should run (the action: summarize, recap, post, draft, …).
+      - **When** it should run (schedule + timezone if the user mentioned one;
+        otherwise leave the timezone for the drafter to default to UTC).
+      - **Static values** the automation needs (folder ids, channel names,
+        project keys, parent page ids, …) — list them with their values.
+        If the user did NOT supply one the automation needs, say so
+        explicitly ("the Notion parent page id was not specified") so the
+        drafter leaves a placeholder.
+  - Do NOT prompt the user to confirm before calling — the approval card
+    IS the confirmation. The card shows a structured preview plus the raw
+    JSON; it offers approve/reject only. If the user wants changes after
+    seeing the draft, they reply in chat and you call this tool again with
+    a refined `intent` — that's the edit path.
+  - Returns:
+    - `{status: "saved", automation_id, name}` — confirm briefly to the
+      user ("Saved as automation #N — runs <when>."). Don't dump JSON back.
+    - `{status: "rejected", message}` — the user declined on the card.
+      Acknowledge once ("Understood, I didn't create it.") and stop. Do
+      NOT retry or pitch variants without a fresh user request.
+    - `{status: "invalid", issues, raw?}` — drafting/validation failed
+      before the card was shown. Read the issues, refine your `intent`
+      with the missing details, call again.
+    - `{status: "error", message}` — surface the message verbatim and
+      offer to retry.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/example.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/example.md
@ -0,0 +1,13 @@
+<example>
+user: "Every weekday at 9am, summarize new documents in folder 12 and post the summary to Slack channel #daily-digest."
+→ create_automation(intent="Every weekday at 09:00 UTC, summarize documents added to folder_id=12 since the last run, then post the summary to Slack channel '#daily-digest'. Static inputs: folder_id=12, slack_channel='#daily-digest'.")
+tool returns: {"status": "saved", "automation_id": 42, "name": "Daily folder 12 digest"}
+(Reply briefly: "Saved as automation #42 — runs weekdays at 9am UTC.")
+</example>
+
+<example>
+user: "Once a week on Mondays at 7am Paris time, draft a Notion page recapping last week's Jira tickets in project CORE."
+→ create_automation(intent="Every Monday at 07:00 Europe/Paris, read last week's Jira issues in project CORE, then draft a Notion page recapping them. Static inputs: jira_project_key='CORE'. The user did NOT specify which Notion page the recap should sit under — leave notion_parent_page_id as a placeholder.")
+tool returns: {"status": "saved", "automation_id": 51, "name": "Weekly CORE Jira recap"}
+(Reply: "Saved as automation #51. I left the Notion parent page id as a placeholder — set it on the automation before next Monday.")
+</example>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/init.py
@ -0,0 +1,7 @@
+"""``create_automation`` — author + persist an automation via a HITL card."""
+
+from __future__ import annotations
+
+from .create import create_create_automation_tool
+
+__all__ = ["create_create_automation_tool"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
@ -0,0 +1,208 @@
+"""``create_automation`` — NL intent → drafted JSON → HITL approval card → persisted.
+
+Single tool that:
+
+1. Drafts a structured automation from the user's intent via a focused sub-LLM
+   (system prompt in :mod:`.prompt`).
+2. Surfaces the validated draft in a HITL approval card
+   (``action_type="automation_create"``).
+3. On approval, validates the (possibly edited) payload again and persists
+   it via :class:`AutomationService`.
+
+The main agent only restates the user's request as a single ``intent`` string.
+The drafting sub-LLM owns the JSON shape; the HITL card is the user's review.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from typing import Any
+from uuid import UUID
+
+from fastapi import HTTPException
+from langchain.tools import ToolRuntime
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+from pydantic import ValidationError
+
+from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
+    request_approval,
+)
+from app.automations.schemas.api import AutomationCreate
+from app.automations.services.automation import AutomationService
+from app.db import User, async_session_maker
+from app.utils.content_utils import extract_text_content
+
+from .prompt import build_draft_prompt
+
+logger = logging.getLogger(__name__)
+
+_JSON_FENCE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL)
+
+
+def create_create_automation_tool(
+    *,
+    search_space_id: int,
+    user_id: str | UUID,
+    llm: Any,
+):
+    """Factory for the ``create_automation`` tool.
+
+    ``search_space_id`` is injected from the chat session (the model never
+    has to guess it). ``llm`` is the drafting sub-model — we reuse the main
+    agent's LLM and tag the call so it's identifiable in traces. A fresh
+    ``AsyncSession`` is opened per call to avoid stale sessions on
+    compiled-agent cache hits (same pattern as the Notion / memory tools).
+    """
+    uid = UUID(user_id) if isinstance(user_id, str) else user_id
+
+    @tool
+    async def create_automation(intent: str, runtime: ToolRuntime) -> dict[str, Any]:
+        """Draft + save an automation from a natural-language intent.
+
+        Use this when the user wants SurfSense to do something on its own
+        on a schedule (e.g. "every morning summarize folder 12 to Slack").
+        Restate the user's request as ONE concrete ``intent`` string: what
+        should run, when, and which static values (folder ids, channel
+        names, …) it needs.
+
+        The tool drafts the full automation JSON internally, shows the user
+        a structured preview on an approval card, and persists on approval.
+        The card supports approve/reject only — if the user wants edits
+        after seeing the draft, they say so in chat and you call this tool
+        again with a refined intent. Do NOT prompt the user to confirm
+        before calling — the card IS the confirmation.
+
+        Args:
+            intent: Concrete restatement of the user's request. Include
+                the schedule (with timezone if mentioned), the action to
+                take, and any static values. Example: "Every weekday at
+                09:00 UTC, summarize new docs added to folder_id=12 since
+                the last run, then post the summary to Slack channel
+                '#daily-digest'."
+
+        Returns:
+            ``{"status": "saved", "automation_id": int, "name": str}`` on
+            approval + save.
+            ``{"status": "rejected", "message": "..."}`` when the user
+            declines on the card.
+            ``{"status": "invalid", "issues": [...], "raw": ...}`` when
+            the drafter produced output that did not validate (call again
+            with a more precise intent).
+            ``{"status": "error", "message": "..."}`` on drafter or
+            persistence failure.
+
+            IMPORTANT: when status is ``"rejected"`` the user explicitly
+            declined. Acknowledge once and stop — do NOT retry or pitch
+            variants without a fresh user request.
+        """
+        # --- 1. Draft via sub-LLM ---
+        prompt = build_draft_prompt(search_space_id=search_space_id, intent=intent)
+        try:
+            response = await llm.ainvoke(
+                [HumanMessage(content=prompt)],
+                config={"tags": ["surfsense:internal", "automation-draft"]},
+            )
+        except Exception as exc:
+            logger.exception("create_automation drafting LLM call failed")
+            return {"status": "error", "message": f"drafting failed: {exc}"}
+
+        raw_text = extract_text_content(response.content).strip()
+        draft = _extract_json(raw_text)
+        if draft is None:
+            return {
+                "status": "invalid",
+                "issues": ["model output was not parseable JSON"],
+                "raw": raw_text,
+            }
+
+        # search_space_id is injected here so the sub-LLM never has to guess.
+        draft["search_space_id"] = search_space_id
+        try:
+            validated_draft = AutomationCreate.model_validate(draft)
+        except ValidationError as exc:
+            return {
+                "status": "invalid",
+                "issues": _format_validation_issues(exc),
+                "raw": draft,
+            }
+
+        # --- 2. HITL approval card ---
+        try:
+            card_params = validated_draft.model_dump(mode="json", by_alias=True)
+            # search_space_id is session-scoped, not user-editable.
+            card_params.pop("search_space_id", None)
+
+            result = request_approval(
+                action_type="automation_create",
+                tool_name="create_automation",
+                params=card_params,
+                context={"search_space_id": search_space_id},
+                tool_call_id=runtime.tool_call_id,
+            )
+
+            if result.rejected:
+                return {
+                    "status": "rejected",
+                    "message": "User declined. Do not retry or suggest alternatives.",
+                }
+
+            # --- 3. Persist (re-validate in case the user edited) ---
+            final_payload = {**result.params, "search_space_id": search_space_id}
+            try:
+                final_validated = AutomationCreate.model_validate(final_payload)
+            except ValidationError as exc:
+                return {
+                    "status": "invalid",
+                    "issues": _format_validation_issues(exc),
+                }
+
+            async with async_session_maker() as session:
+                user = await session.get(User, uid)
+                if user is None:
+                    return {
+                        "status": "error",
+                        "message": "user not found in this session",
+                    }
+                service = AutomationService(session=session, user=user)
+                created = await service.create(final_validated)
+                return {
+                    "status": "saved",
+                    "automation_id": created.id,
+                    "name": created.name,
+                }
+
+        except HTTPException as exc:
+            return {"status": "error", "message": exc.detail}
+        except Exception as exc:
+            from langgraph.errors import GraphInterrupt
+
+            if isinstance(exc, GraphInterrupt):
+                raise
+            logger.exception("create_automation failed")
+            return {"status": "error", "message": f"persistence failed: {exc}"}
+
+    return create_automation
+
+
+def _extract_json(text: str) -> dict[str, Any] | None:
+    """Pull a JSON object out of the model response, tolerating ``` fences."""
+    if not text:
+        return None
+    candidate = text
+    fence_match = _JSON_FENCE.search(text)
+    if fence_match:
+        candidate = fence_match.group(1)
+    try:
+        parsed = json.loads(candidate)
+    except json.JSONDecodeError:
+        return None
+    return parsed if isinstance(parsed, dict) else None
+
+
+def _format_validation_issues(exc: ValidationError) -> list[str]:
+    return [
+        f"{'.'.join(str(p) for p in err['loc'])}: {err['msg']}" for err in exc.errors()
+    ]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py
@ -0,0 +1,179 @@
+"""System prompt for the drafting sub-LLM inside ``create_automation``.
+
+Converts a natural-language ``intent`` into a structured ``AutomationCreate``
+JSON object. That object becomes the payload the HITL approval card surfaces.
+
+Scope split:
+    Real automation JSONs live here — this is the graph that *generates*
+    the JSON. The main agent's prompt fragments (``description.md`` /
+    ``example.md``) only carry intent-string examples; the main agent
+    never sees the schema.
+
+Layout:
+    The prompt is concatenated from four format-safe pieces. ``_HEADER`` /
+    ``_FOOTER`` carry the only ``str.format`` placeholders; ``_SCHEMA`` and
+    ``_FEW_SHOTS`` are plain strings so their JSON literals (and the
+    ``{{ inputs.X }}`` Jinja references in queries) can stay readable
+    without doubled-brace escaping.
+
+Catalog handling:
+    v1 hard-codes the action/trigger catalog (one action, one trigger).
+    When new types ship, swap the inline lines for a render-time pull
+    from ``app.automations.actions`` / ``app.automations.triggers`` via
+    lazy imports inside :func:`build_draft_prompt` so this module never
+    participates in the ``multi_agent_chat`` import cycle.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+
+_HEADER = """\
+You are the SurfSense automation drafter. Convert the user intent below
+into a SINGLE JSON object matching the AutomationCreate schema. Output
+ONLY that JSON object — no prose, no markdown fence, no commentary.
+
+Current UTC time (for cron context): {now}
+Target search_space_id: {search_space_id}
+"""
+
+
+_SCHEMA = """
+Required JSON shape:
+{
+  "name": "<1-200 char identifier>",
+  "description": "<one-liner or null>",
+  "definition": {
+    "schema_version": "1.0",
+    "name": "<same as outer name>",
+    "goal": "<one sentence>",
+    "plan": [
+      {
+        "step_id": "<slug>",
+        "action": "agent_task",
+        "params": {
+          "query": "<Jinja string referencing {{ inputs.X }}>",
+          "auto_approve_all": true
+        }
+      }
+    ],
+    "metadata": {"tags": ["..."]}
+  },
+  "triggers": [
+    {
+      "type": "schedule",
+      "params": {"cron": "<5-field cron>", "timezone": "<IANA tz, default UTC>"},
+      "static_inputs": {"<key>": <value>, ...},
+      "enabled": true
+    }
+  ]
+}
+
+v1 catalog (only these are valid):
+- Actions: agent_task — params: query (string, Jinja), auto_approve_all (bool).
+- Triggers: schedule — params: cron (5-field), timezone (IANA, e.g. "UTC",
+  "Europe/Paris"). Has static_inputs (object).
+
+Conventions:
+- Whatever the plan references via {{ inputs.X }} MUST appear either in a
+  trigger's static_inputs OR in definition.inputs.schema_.properties so the
+  executor can resolve it at fire time.
+- static_inputs carries values that stay the same across every fire
+  (folder ids, channel names, project keys, parent page ids). Put them on
+  the trigger that supplies them, not in the plan.
+- If the user did NOT supply a value the plan needs, put "REPLACE_ME" in
+  static_inputs. Do NOT invent ids, channels, or paths.
+- Cron is 5-field (minute hour day-of-month month day-of-week). Use the
+  timezone the user mentioned; default "UTC" when unspecified.
+- Templating variables available at fire time: inputs.* (merged
+  static_inputs + runtime), inputs.fired_at, inputs.last_fired_at.
+"""
+
+
+_FEW_SHOTS = """
+Few-shot examples (intent → JSON output):
+
+### Example 1 — schedule with all static values supplied
+intent: "Every weekday at 09:00 UTC, summarize documents added to folder_id=12 since the last run, then post the summary to Slack channel '#daily-digest'. Static inputs: folder_id=12, slack_channel='#daily-digest'."
+output:
+{
+  "name": "Daily folder 12 digest",
+  "description": "Weekday 09:00 UTC summary of folder 12 documents posted to #daily-digest",
+  "definition": {
+    "schema_version": "1.0",
+    "name": "Daily folder 12 digest",
+    "goal": "Summarize new docs in folder 12 since the last run and post to #daily-digest",
+    "plan": [
+      {
+        "step_id": "summarize_and_post",
+        "action": "agent_task",
+        "params": {
+          "query": "Summarize documents added to folder {{ inputs.folder_id }} since {{ inputs.last_fired_at or 'yesterday' }}, then send the summary to Slack channel {{ inputs.slack_channel }}.",
+          "auto_approve_all": true
+        }
+      }
+    ],
+    "metadata": {"tags": ["daily", "digest", "slack"]}
+  },
+  "triggers": [
+    {
+      "type": "schedule",
+      "params": {"cron": "0 9 * * 1-5", "timezone": "UTC"},
+      "static_inputs": {"folder_id": 12, "slack_channel": "#daily-digest"},
+      "enabled": true
+    }
+  ]
+}
+
+### Example 2 — schedule with a missing value (REPLACE_ME placeholder)
+intent: "Every Monday at 07:00 Europe/Paris, read last week's Jira issues in project CORE, then draft a Notion page recapping them. Static inputs: jira_project_key='CORE'. The user did NOT specify the Notion parent page id — leave it as a placeholder."
+output:
+{
+  "name": "Weekly CORE Jira recap",
+  "description": "Monday 07:00 Europe/Paris recap of last week's CORE Jira issues, drafted to Notion",
+  "definition": {
+    "schema_version": "1.0",
+    "name": "Weekly CORE Jira recap",
+    "goal": "Recap last week's CORE Jira issues into a Notion page",
+    "plan": [
+      {
+        "step_id": "recap",
+        "action": "agent_task",
+        "params": {
+          "query": "List Jira issues in project {{ inputs.jira_project_key }} updated in the 7 days before {{ inputs.fired_at }}. Draft a Notion page under parent id {{ inputs.notion_parent_page_id }} titled 'CORE recap — week of {{ inputs.fired_at }}'.",
+          "auto_approve_all": true
+        }
+      }
+    ],
+    "metadata": {"tags": ["weekly", "recap", "jira", "notion"]}
+  },
+  "triggers": [
+    {
+      "type": "schedule",
+      "params": {"cron": "0 7 * * 1", "timezone": "Europe/Paris"},
+      "static_inputs": {"jira_project_key": "CORE", "notion_parent_page_id": "REPLACE_ME"},
+      "enabled": true
+    }
+  ]
+}
+"""
+
+
+_FOOTER = """
+User intent:
+{intent}
+"""
+
+
+def build_draft_prompt(*, search_space_id: int, intent: str) -> str:
+    """Render the drafting sub-LLM system prompt for the given intent."""
+    return (
+        _HEADER.format(
+            now=datetime.now(UTC).isoformat(timespec="seconds"),
+            search_space_id=search_space_id,
+        )
+        + _SCHEMA
+        + _FEW_SHOTS
+        + _FOOTER.format(intent=intent.strip())
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
@ -10,6 +10,7 @@ MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
    "web_search",
    "scrape_webpage",
    "update_memory",
+    "create_automation",
 )

 MAIN_AGENT_SURFSENSE_TOOL_NAMES: frozenset[str] = frozenset(
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/hitl/approvals/self_gated/request.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/hitl/approvals/self_gated/request.py
@ -49,6 +49,7 @@ def request_approval(
    params: dict[str, Any],
    context: dict[str, Any] | None = None,
    trusted_tools: list[str] | None = None,
+    tool_call_id: str | None = None,
 ) -> HITLResult:
    """Pause the graph for user approval and return the user's decision.

@ -64,6 +65,10 @@ def request_approval(
            forwarded verbatim to the FE for richer card chrome.
        trusted_tools: Per-session allowlist; when ``tool_name`` is in it the
            interrupt is skipped and the tool runs immediately.
+        tool_call_id: Caller's LangChain tool-call id. Required for tools
+            running directly on the main agent; subagent-mounted tools omit
+            it (the ``task`` chokepoint stamps it on re-raise — see
+            :mod:`...checkpointed_subagent_middleware.propagation`).

    Returns:
        :class:`HITLResult` with ``rejected=True`` if the user declined or
@ -90,6 +95,8 @@ def request_approval(
        interrupt_type=action_type,
        context=context,
    )
+    if tool_call_id:
+        payload["tool_call_id"] = tool_call_id
    approval = interrupt(payload)

    parsed = parse_lc_envelope(approval)
				`@ -0,0 +1 @@`
				"""``create_automation`` — description + few-shot examples."""