From 4910263c93045cec1b7bbc7f4f5c368bf1313bf6 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:48:04 +0200
Subject: [PATCH 01/87] refactor(chat): add streaming/shared/ package for
StreamResult and utils
Foundation layer for the parallel refactor of stream_new_chat.py.
Extracts the StreamResult dataclass (tracks per-turn streaming state)
and a small set of shared utilities (resume_step_prefix, safe_float).
Add-only; no existing code imports from this package yet. Existing
stream_new_chat.py keeps its inline equivalents until cutover.
---
.../tasks/chat/streaming/shared/__init__.py | 15 ++++++++
.../chat/streaming/shared/stream_result.py | 37 +++++++++++++++++++
.../app/tasks/chat/streaming/shared/utils.py | 27 ++++++++++++++
3 files changed, 79 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/shared/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/shared/utils.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/shared/__init__.py
new file mode 100644
index 000000000..6c9f1f6b5
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/shared/__init__.py
@@ -0,0 +1,15 @@
+"""Shared building blocks used across every streaming flow."""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+from app.tasks.chat.streaming.shared.utils import (
+ resume_step_prefix,
+ safe_float,
+)
+
+__all__ = [
+ "StreamResult",
+ "resume_step_prefix",
+ "safe_float",
+]
diff --git a/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py b/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py
new file mode 100644
index 000000000..a940e8a9f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py
@@ -0,0 +1,37 @@
+"""Per-turn streaming state shared between the orchestrator and event loop."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class StreamResult:
+ accumulated_text: str = ""
+ is_interrupted: bool = False
+ sandbox_files: list[str] = field(default_factory=list)
+ request_id: str | None = None
+ turn_id: str = ""
+ filesystem_mode: str = "cloud"
+ client_platform: str = "web"
+ intent_detected: str = "chat_only"
+ intent_confidence: float = 0.0
+ write_attempted: bool = False
+ write_succeeded: bool = False
+ verification_succeeded: bool = False
+ commit_gate_passed: bool = True
+ commit_gate_reason: str = ""
+ # Pre-allocated assistant ``new_chat_messages.id`` for this turn, captured by
+ # ``persist_assistant_shell`` right after the user row is persisted. ``None``
+ # for the legacy/anonymous code paths that don't opt in to server-side
+ # ``ContentPart[]`` projection.
+ assistant_message_id: int | None = None
+ # In-memory mirror of the FE's assistant-ui ``ContentPartsState``, populated
+ # by the lifecycle methods called from the streaming event loop at each
+ # ``streaming_service.format_*`` yield site. Snapshot in the streaming
+ # ``finally`` to produce the rich JSONB persisted by
+ # ``finalize_assistant_turn``. ``repr=False`` keeps the log-on-error path
+ # (``StreamResult`` is logged in some error branches) from dumping a
+ # potentially-large parts list.
+ content_builder: Any | None = field(default=None, repr=False)
diff --git a/surfsense_backend/app/tasks/chat/streaming/shared/utils.py b/surfsense_backend/app/tasks/chat/streaming/shared/utils.py
new file mode 100644
index 000000000..fe6901543
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/shared/utils.py
@@ -0,0 +1,27 @@
+"""Small utilities used by streaming orchestrators and phases."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def resume_step_prefix(turn_id: str) -> str:
+ """Per-turn ``step_prefix`` for resume invocations.
+
+ Each ``stream_agent_events`` call constructs a fresh
+ ``AgentEventRelayState`` with ``thinking_step_counter=0``, so two consecutive
+ resume turns would otherwise both emit ``thinking-resume-1``, ``-2`` etc.
+ The frontend rehydrates ``currentThinkingSteps`` from the immediate prior
+ assistant message at the start of every resume — if the new stream's IDs
+ collide with the seeded ones, React renders sibling Timeline rows with the
+ same key. Salting with ``turn_id`` guarantees disjoint IDs across resumes
+ within one thread.
+ """
+ return f"thinking-resume-{turn_id}"
+
+
+def safe_float(value: Any, default: float = 0.0) -> float:
+ try:
+ return float(value)
+ except (TypeError, ValueError):
+ return default
From c13beae1ceea2e344757baea13247d111cf3bd3b Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:48:08 +0200
Subject: [PATCH 02/87] refactor(chat): add streaming/context/ for
mentioned-docs and deep-agents todos
Extracts two pure context helpers used during input-state assembly:
* mentioned_docs.format_mentioned_surfsense_docs_as_context: renders the
user's @-mentioned SurfSense docs into the LLM context block.
* deepagents_todos.extract_todos_from_deepagents: pulls the in-progress
todo list from a deep-agents state snapshot for the title generator.
Add-only; existing call sites in stream_new_chat.py remain untouched
until cutover.
---
.../tasks/chat/streaming/context/__init__.py | 15 +++++
.../streaming/context/deepagents_todos.py | 27 +++++++++
.../chat/streaming/context/mentioned_docs.py | 58 +++++++++++++++++++
3 files changed, 100 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/context/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/context/__init__.py
new file mode 100644
index 000000000..f858a6c06
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/context/__init__.py
@@ -0,0 +1,15 @@
+"""Pre-agent context shaping: mentioned-doc rendering and todos extraction."""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.context.deepagents_todos import (
+ extract_todos_from_deepagents,
+)
+from app.tasks.chat.streaming.context.mentioned_docs import (
+ format_mentioned_surfsense_docs_as_context,
+)
+
+__all__ = [
+ "extract_todos_from_deepagents",
+ "format_mentioned_surfsense_docs_as_context",
+]
diff --git a/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py b/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py
new file mode 100644
index 000000000..0bbf4f0a5
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/context/deepagents_todos.py
@@ -0,0 +1,27 @@
+"""Extract todos from a deepagents ``TodoListMiddleware`` ``Command`` output."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def extract_todos_from_deepagents(command_output: Any) -> dict:
+ """Normalize todos out of a deepagents ``Command`` or dict payload.
+
+ deepagents returns a ``Command`` whose ``update['todos']`` is a list of
+ ``{'content': str, 'status': str}`` dicts. The UI expects the same shape,
+ so no transformation is required — only extraction.
+ """
+ todos_data: list = []
+ if hasattr(command_output, "update"):
+ update = command_output.update
+ todos_data = update.get("todos", [])
+ elif isinstance(command_output, dict):
+ if "todos" in command_output:
+ todos_data = command_output.get("todos", [])
+ elif "update" in command_output and isinstance(
+ command_output["update"], dict
+ ):
+ todos_data = command_output["update"].get("todos", [])
+
+ return {"todos": todos_data}
diff --git a/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py b/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py
new file mode 100644
index 000000000..e02e98d34
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/context/mentioned_docs.py
@@ -0,0 +1,58 @@
+"""Render user-mentioned SurfSense docs as XML context for the agent."""
+
+from __future__ import annotations
+
+import json
+
+from app.db import SurfsenseDocsDocument
+from app.utils.surfsense_docs import surfsense_docs_public_url
+
+
+def format_mentioned_surfsense_docs_as_context(
+ documents: list[SurfsenseDocsDocument],
+) -> str:
+ if not documents:
+ return ""
+
+ context_parts = [""]
+ context_parts.append(
+ "The user has explicitly mentioned the following SurfSense documentation pages. "
+ "These are official documentation about how to use SurfSense and should be used to answer questions about the application. "
+ "Use [citation:CHUNK_ID] format for citations (e.g., [citation:doc-123])."
+ )
+
+ for doc in documents:
+ public_url = surfsense_docs_public_url(doc.source)
+ metadata_json = json.dumps(
+ {"source": doc.source, "public_url": public_url}, ensure_ascii=False
+ )
+
+ context_parts.append("")
+ context_parts.append("")
+ context_parts.append(f" doc-{doc.id} ")
+ context_parts.append(" SURFSENSE_DOCS ")
+ context_parts.append(f" ")
+ context_parts.append(f" ")
+ context_parts.append(
+ f" "
+ )
+ context_parts.append(" ")
+ context_parts.append("")
+ context_parts.append("")
+
+ if hasattr(doc, "chunks") and doc.chunks:
+ for chunk in doc.chunks:
+ context_parts.append(
+ f" "
+ )
+ else:
+ context_parts.append(
+ f" "
+ )
+
+ context_parts.append(" ")
+ context_parts.append(" ")
+ context_parts.append("")
+
+ context_parts.append(" ")
+ return "\n".join(context_parts)
From 88a58f6aff8f7bcd4b1b5191784ceaf5ab0de57e Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:48:14 +0200
Subject: [PATCH 03/87] refactor(chat): add streaming/contract/ for file-write
contract enforcement
Extracts the desktop_local_folder file-operation contract helpers:
* contract_enforcement_active: gates the contract on filesystem mode.
* evaluate_file_contract_outcome: scores tool outputs as success/no-op.
* log_file_contract: structured logging of contract verdicts.
This is the unit responsible for catching agents that claim to have
written/edited a file without actually invoking the filesystem tool.
Add-only; stream_new_chat.py keeps its inline duplicates until cutover.
---
.../tasks/chat/streaming/contract/__init__.py | 15 ++++++
.../chat/streaming/contract/file_contract.py | 53 +++++++++++++++++++
2 files changed, 68 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/contract/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/contract/file_contract.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/contract/__init__.py b/surfsense_backend/app/tasks/chat/streaming/contract/__init__.py
new file mode 100644
index 000000000..4562b362c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/contract/__init__.py
@@ -0,0 +1,15 @@
+"""File-operation contract evaluation and logging."""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.contract.file_contract import (
+ contract_enforcement_active,
+ evaluate_file_contract_outcome,
+ log_file_contract,
+)
+
+__all__ = [
+ "contract_enforcement_active",
+ "evaluate_file_contract_outcome",
+ "log_file_contract",
+]
diff --git a/surfsense_backend/app/tasks/chat/streaming/contract/file_contract.py b/surfsense_backend/app/tasks/chat/streaming/contract/file_contract.py
new file mode 100644
index 000000000..f21f5da02
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/contract/file_contract.py
@@ -0,0 +1,53 @@
+"""File-operation contract: when to enforce, how to score, how to log."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+from app.utils.perf import get_perf_logger
+
+_perf_log = get_perf_logger()
+
+
+def contract_enforcement_active(result: StreamResult) -> bool:
+ # Enforce only in desktop local-folder mode. Kept deterministic, no
+ # env-driven progression modes.
+ return result.filesystem_mode == "desktop_local_folder"
+
+
+def evaluate_file_contract_outcome(result: StreamResult) -> tuple[bool, str]:
+ if result.intent_detected != "file_write":
+ return True, ""
+ if not result.write_attempted:
+ return False, "no_write_attempt"
+ if not result.write_succeeded:
+ return False, "write_failed"
+ if not result.verification_succeeded:
+ return False, "verification_failed"
+ return True, ""
+
+
+def log_file_contract(stage: str, result: StreamResult, **extra: Any) -> None:
+ payload: dict[str, Any] = {
+ "stage": stage,
+ "request_id": result.request_id or "unknown",
+ "turn_id": result.turn_id or "unknown",
+ "chat_id": (
+ result.turn_id.split(":", 1)[0] if ":" in result.turn_id else "unknown"
+ ),
+ "filesystem_mode": result.filesystem_mode,
+ "client_platform": result.client_platform,
+ "intent_detected": result.intent_detected,
+ "intent_confidence": result.intent_confidence,
+ "write_attempted": result.write_attempted,
+ "write_succeeded": result.write_succeeded,
+ "verification_succeeded": result.verification_succeeded,
+ "commit_gate_passed": result.commit_gate_passed,
+ "commit_gate_reason": result.commit_gate_reason or None,
+ }
+ payload.update(extra)
+ _perf_log.info(
+ "[file_operation_contract] %s", json.dumps(payload, ensure_ascii=False)
+ )
From 94bc827252ad20f0335474981eb1009780f25695 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:48:20 +0200
Subject: [PATCH 04/87] refactor(chat): add streaming/agent/ package with
build_main_agent_for_thread
Extracts the agent-construction wrapper that the chat streamers call to
materialize the LangGraph agent for a given thread. Centralizes how we
pass the agent factory plus checkpointer, runtime context, and the
in-memory content builder.
Add-only; pre-existing inline equivalent in stream_new_chat.py stays
until cutover.
---
.../tasks/chat/streaming/agent/__init__.py | 8 +++
.../app/tasks/chat/streaming/agent/builder.py | 49 +++++++++++++++++++
2 files changed, 57 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/agent/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/agent/builder.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/agent/__init__.py b/surfsense_backend/app/tasks/chat/streaming/agent/__init__.py
new file mode 100644
index 000000000..260dcb3f2
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/agent/__init__.py
@@ -0,0 +1,8 @@
+"""Agent construction and per-turn event-loop drivers."""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.agent.builder import build_main_agent_for_thread
+from app.tasks.chat.streaming.agent.event_loop import stream_agent_events
+
+__all__ = ["build_main_agent_for_thread", "stream_agent_events"]
diff --git a/surfsense_backend/app/tasks/chat/streaming/agent/builder.py b/surfsense_backend/app/tasks/chat/streaming/agent/builder.py
new file mode 100644
index 000000000..0db42edbf
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/agent/builder.py
@@ -0,0 +1,49 @@
+"""Single per-thread agent (re)build path.
+
+A graph swap mid-turn would corrupt checkpointer state for the same
+``thread_id``, so both the initial build and any mid-stream 429 recovery rebuild
+must funnel through this single function.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.agents.new_chat.filesystem_selection import FilesystemSelection
+from app.agents.new_chat.llm_config import AgentConfig
+from app.db import ChatVisibility
+from app.services.connector_service import ConnectorService
+
+
+async def build_main_agent_for_thread(
+ agent_factory: Any,
+ *,
+ llm: Any,
+ search_space_id: int,
+ db_session: Any,
+ connector_service: ConnectorService,
+ checkpointer: Any,
+ user_id: str | None,
+ thread_id: int | None,
+ agent_config: AgentConfig | None,
+ firecrawl_api_key: str | None,
+ thread_visibility: ChatVisibility | None,
+ filesystem_selection: FilesystemSelection | None,
+ disabled_tools: list[str] | None = None,
+ mentioned_document_ids: list[int] | None = None,
+) -> Any:
+ return await agent_factory(
+ llm=llm,
+ search_space_id=search_space_id,
+ db_session=db_session,
+ connector_service=connector_service,
+ checkpointer=checkpointer,
+ user_id=user_id,
+ thread_id=thread_id,
+ agent_config=agent_config,
+ firecrawl_api_key=firecrawl_api_key,
+ thread_visibility=thread_visibility,
+ filesystem_selection=filesystem_selection,
+ disabled_tools=disabled_tools,
+ mentioned_document_ids=mentioned_document_ids,
+ )
From 26c569467dc08515f71d697bca379f6e7e3cfb8d Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:48:26 +0200
Subject: [PATCH 05/87] refactor(chat): add
streaming/agent/event_loop.stream_agent_events
Extracts the inner agent-streaming driver previously inlined as
_stream_agent_events in stream_new_chat.py.
stream_agent_events drives graph_stream.event_stream.stream_output and,
after the agent finishes, performs the post-stream safety-net work:
* commit any pending content the agent never explicitly finished
* evaluate file-operation contract outcomes and emit the appropriate
contract verdict for desktop_local_folder turns
This unit is what flows/shared/stream_loop.py wraps in the rate-limit
recovery while-loop. Add-only; no existing wiring uses it yet.
---
.../tasks/chat/streaming/agent/event_loop.py | 175 ++++++++++++++++++
1 file changed, 175 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py b/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py
new file mode 100644
index 000000000..b77bd3890
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py
@@ -0,0 +1,175 @@
+"""Per-turn agent event-loop driver.
+
+Drives ``stream_output`` (graph_stream relay) for one agent turn, then runs the
+post-stream agent-state inspection: safety-net commit of any staged filesystem
+state (in case ``aafter_agent`` was skipped), file-operation contract scoring,
+intent classification, and interrupt detection.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware.kb_persistence import (
+ commit_staged_filesystem_state,
+)
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.streaming.contract.file_contract import (
+ contract_enforcement_active,
+ evaluate_file_contract_outcome,
+ log_file_contract,
+)
+from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
+from app.tasks.chat.streaming.helpers.interrupt_inspector import (
+ all_interrupt_values,
+)
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+from app.tasks.chat.streaming.shared.utils import safe_float
+from app.utils.perf import get_perf_logger
+
+_perf_log = get_perf_logger()
+
+
+async def stream_agent_events(
+ agent: Any,
+ config: dict[str, Any],
+ input_data: Any,
+ streaming_service: VercelStreamingService,
+ result: StreamResult,
+ step_prefix: str = "thinking",
+ initial_step_id: str | None = None,
+ initial_step_title: str = "",
+ initial_step_items: list[str] | None = None,
+ *,
+ fallback_commit_search_space_id: int | None = None,
+ fallback_commit_created_by_id: str | None = None,
+ fallback_commit_filesystem_mode: FilesystemMode = FilesystemMode.CLOUD,
+ fallback_commit_thread_id: int | None = None,
+ runtime_context: Any = None,
+ content_builder: Any | None = None,
+) -> AsyncGenerator[str, None]:
+ """Stream and format ``astream_events`` from the agent.
+
+ Yields SSE-formatted strings; after exhausting, ``result`` carries
+ ``accumulated_text`` and interrupt state. See ``StreamResult`` for the
+ side-channel surface populated by the underlying relay.
+ """
+ async for sse in stream_output(
+ agent=agent,
+ config=config,
+ input_data=input_data,
+ streaming_service=streaming_service,
+ result=result,
+ step_prefix=step_prefix,
+ initial_step_id=initial_step_id,
+ initial_step_title=initial_step_title,
+ initial_step_items=initial_step_items,
+ content_builder=content_builder,
+ runtime_context=runtime_context,
+ ):
+ yield sse
+
+ accumulated_text = result.accumulated_text
+
+ state = await agent.aget_state(config)
+ state_values = getattr(state, "values", {}) or {}
+
+ # Safety net: if astream_events was cancelled before
+ # KnowledgeBasePersistenceMiddleware.aafter_agent ran, any staged work
+ # (dirty_paths / staged_dirs / pending_moves / pending_deletes /
+ # pending_dir_deletes) is still in the checkpointed state. Run the SAME
+ # shared commit helper so the turn's writes don't get lost on client
+ # disconnect, then push the delta back into the graph using ``as_node=...``
+ # so reducers fire as if the after_agent hook produced it.
+ if (
+ fallback_commit_filesystem_mode == FilesystemMode.CLOUD
+ and fallback_commit_search_space_id is not None
+ and (
+ (state_values.get("dirty_paths") or [])
+ or (state_values.get("staged_dirs") or [])
+ or (state_values.get("pending_moves") or [])
+ or (state_values.get("pending_deletes") or [])
+ or (state_values.get("pending_dir_deletes") or [])
+ )
+ ):
+ try:
+ delta = await commit_staged_filesystem_state(
+ state_values,
+ search_space_id=fallback_commit_search_space_id,
+ created_by_id=fallback_commit_created_by_id,
+ filesystem_mode=fallback_commit_filesystem_mode,
+ thread_id=fallback_commit_thread_id,
+ dispatch_events=False,
+ )
+ if delta:
+ await agent.aupdate_state(
+ config,
+ delta,
+ as_node="KnowledgeBasePersistenceMiddleware.after_agent",
+ )
+ except Exception as exc:
+ _perf_log.warning("[stream_agent_events] safety-net commit failed: %s", exc)
+
+ contract_state = state_values.get("file_operation_contract") or {}
+ contract_turn_id = contract_state.get("turn_id")
+ current_turn_id = config.get("configurable", {}).get("turn_id", "")
+ intent_value = contract_state.get("intent")
+ if (
+ isinstance(intent_value, str)
+ and intent_value in ("chat_only", "file_write", "file_read")
+ and contract_turn_id == current_turn_id
+ ):
+ result.intent_detected = intent_value
+ if (
+ isinstance(intent_value, str)
+ and intent_value in ("chat_only", "file_write", "file_read")
+ and contract_turn_id != current_turn_id
+ ):
+ # Ignore stale intent contracts from previous turns/checkpoints.
+ result.intent_detected = "chat_only"
+ result.intent_confidence = (
+ safe_float(contract_state.get("confidence"), default=0.0)
+ if contract_turn_id == current_turn_id
+ else 0.0
+ )
+
+ if result.intent_detected == "file_write":
+ result.commit_gate_passed, result.commit_gate_reason = (
+ evaluate_file_contract_outcome(result)
+ )
+ if not result.commit_gate_passed and contract_enforcement_active(result):
+ gate_notice = (
+ "I could not complete the requested file write because no successful "
+ "write_file/edit_file operation was confirmed."
+ )
+ gate_text_id = streaming_service.generate_text_id()
+ yield streaming_service.format_text_start(gate_text_id)
+ if content_builder is not None:
+ content_builder.on_text_start(gate_text_id)
+ yield streaming_service.format_text_delta(gate_text_id, gate_notice)
+ if content_builder is not None:
+ content_builder.on_text_delta(gate_text_id, gate_notice)
+ yield streaming_service.format_text_end(gate_text_id)
+ if content_builder is not None:
+ content_builder.on_text_end(gate_text_id)
+ yield streaming_service.format_terminal_info(gate_notice, "error")
+ accumulated_text = gate_notice
+ else:
+ result.commit_gate_passed = True
+ result.commit_gate_reason = ""
+
+ result.accumulated_text = accumulated_text
+ log_file_contract("turn_outcome", result)
+
+ pending_values = all_interrupt_values(state)
+ if pending_values:
+ result.is_interrupted = True
+ # One frame per paused subagent so each parallel HITL renders its own
+ # approval card on the wire. Order matches ``state.interrupts``, which
+ # the resume slicer in
+ # ``checkpointed_subagent_middleware.resume_routing`` consumes in the
+ # same order — keeping emit and resume in lock-step.
+ for interrupt_value in pending_values:
+ yield streaming_service.format_interrupt_request(interrupt_value)
From e9a98ecafb6d5f1af3dc8fc2f934e2882da11ec3 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:09 +0200
Subject: [PATCH 06/87] refactor(chat): add streaming/flows/shared/ base
helpers
Six small, single-purpose modules shared by the upcoming new_chat and
resume_chat orchestrators:
* llm_bundle: dispatches negative config_id to the YAML loader and
non-negative config_id to the DB loader, returning (llm, AgentConfig).
* pre_stream_setup: builds the connector service, resolves the
Firecrawl API key, and returns the chat checkpointer.
* first_frames: iter_initial_frames + iter_final_frames emit the canonical
message-start / step-start / idle / finish / done SSE envelope.
* finalize_emit: iter_token_usage_frame emits the per-turn usage frame
from a TokenAccumulator summary.
* finally_cleanup: close_session_and_clear_ai_responding and run_gc_pass
centralize the finally-block bookkeeping.
* span: open_chat_request_span / set_agent_mode / close_chat_request_span /
record_outcome_attrs wrap the OpenTelemetry chat_request span.
Add-only; these are not yet wired into stream_new_chat.py.
---
.../chat/streaming/flows/shared/__init__.py | 3 +
.../streaming/flows/shared/finalize_emit.py | 54 +++++++++++++
.../streaming/flows/shared/finally_cleanup.py | 69 ++++++++++++++++
.../streaming/flows/shared/first_frames.py | 40 ++++++++++
.../chat/streaming/flows/shared/llm_bundle.py | 57 +++++++++++++
.../flows/shared/pre_stream_setup.py | 40 ++++++++++
.../tasks/chat/streaming/flows/shared/span.py | 80 +++++++++++++++++++
7 files changed, 343 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/finalize_emit.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/first_frames.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/llm_bundle.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/pre_stream_setup.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/__init__.py
new file mode 100644
index 000000000..b65acc43c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/__init__.py
@@ -0,0 +1,3 @@
+"""Building blocks shared by ``new_chat`` and ``resume_chat`` orchestrators."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/finalize_emit.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finalize_emit.py
new file mode 100644
index 000000000..e5de3f6a4
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finalize_emit.py
@@ -0,0 +1,54 @@
+"""Emit the per-turn token-usage SSE frame from the accumulator.
+
+``per_message_summary()`` returns ``None`` when the turn made no chargeable
+LLM calls (e.g. interrupt-on-input). In that case we skip the frame; the
+frontend has no usage to render.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from app.services.new_streaming_service import VercelStreamingService
+from app.utils.perf import get_perf_logger
+
+if TYPE_CHECKING:
+ from app.services.token_tracking_service import TokenAccumulator
+
+_perf_log = get_perf_logger()
+logger = logging.getLogger(__name__)
+
+
+def iter_token_usage_frame(
+ streaming_service: VercelStreamingService,
+ *,
+ accumulator: TokenAccumulator,
+ log_label: str,
+):
+ """Yield zero or one ``data: token-usage`` SSE frame.
+
+ Side effect: logs a one-line ``[token_usage] {log_label}: ...`` summary so
+ cost analysis can grep call/total/cost across all flows.
+ """
+ usage_summary = accumulator.per_message_summary()
+ _perf_log.info(
+ "[token_usage] %s: calls=%d total=%d cost_micros=%d summary=%s",
+ log_label,
+ len(accumulator.calls),
+ accumulator.grand_total,
+ accumulator.total_cost_micros,
+ usage_summary,
+ )
+ if usage_summary:
+ yield streaming_service.format_data(
+ "token-usage",
+ {
+ "usage": usage_summary,
+ "prompt_tokens": accumulator.total_prompt_tokens,
+ "completion_tokens": accumulator.total_completion_tokens,
+ "total_tokens": accumulator.grand_total,
+ "cost_micros": accumulator.total_cost_micros,
+ "call_details": accumulator.serialized_calls(),
+ },
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py
new file mode 100644
index 000000000..8d425402f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py
@@ -0,0 +1,69 @@
+"""Shared finally-block helpers: session close, GC pass, native-heap trim.
+
+These are called from inside an ``anyio.CancelScope(shield=True)`` block in
+each flow's ``finally`` (Starlette's BaseHTTPMiddleware cancels the scope on
+client disconnect; without the shield the very first ``await`` would raise
+``CancelledError`` and the rest of cleanup — including ``session.close()`` —
+would never run).
+"""
+
+from __future__ import annotations
+
+import contextlib
+import gc
+import logging
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import shielded_async_session
+from app.services.chat_session_state_service import clear_ai_responding
+from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
+
+_perf_log = get_perf_logger()
+logger = logging.getLogger(__name__)
+
+
+async def close_session_and_clear_ai_responding(
+ session: AsyncSession, chat_id: int
+) -> None:
+ """Rollback + clear AI-responding flag + expunge_all + close.
+
+ On rollback failure we fall back to a fresh shielded session for the flag
+ clear so a UI is never stuck on "AI is responding…" after a crash.
+ """
+ try:
+ await session.rollback()
+ await clear_ai_responding(session, chat_id)
+ except Exception:
+ try:
+ async with shielded_async_session() as fresh_session:
+ await clear_ai_responding(fresh_session, chat_id)
+ except Exception:
+ logger.warning(
+ "Failed to clear AI responding state for thread %s", chat_id
+ )
+
+ with contextlib.suppress(Exception):
+ session.expunge_all()
+
+ with contextlib.suppress(Exception):
+ await session.close()
+
+
+def run_gc_pass(*, log_prefix: str, chat_id: int) -> None:
+ """One full gen0/1/2 pass + native-heap trim + END system snapshot.
+
+ Breaking circular refs held by the agent graph, tools, and LLM wrappers
+ needs to happen in the caller (set the locals to ``None``) — this just
+ runs the collector and logs how many objects came back.
+ """
+ collected = gc.collect(0) + gc.collect(1) + gc.collect(2)
+ if collected:
+ _perf_log.info(
+ "[%s] gc.collect() reclaimed %d objects (chat_id=%s)",
+ log_prefix,
+ collected,
+ chat_id,
+ )
+ trim_native_heap()
+ log_system_snapshot(f"{log_prefix}_END")
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/first_frames.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/first_frames.py
new file mode 100644
index 000000000..5e568b1e8
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/first_frames.py
@@ -0,0 +1,40 @@
+"""Initial SSE frames every flow emits right after pre-stream setup.
+
+Order matters: ``message_start`` opens the assistant message, ``start_step``
+opens the first thinking step, ``turn-info`` lets the frontend stamp the
+correlation id onto the in-flight message, and ``turn-status: busy`` flips the
+UI into the streaming state.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.services.new_streaming_service import VercelStreamingService
+
+
+def iter_initial_frames(
+ streaming_service: VercelStreamingService,
+ *,
+ turn_id: str,
+) -> Iterator[str]:
+ """Yield the four canonical opening frames in order.
+
+ ``turn-info`` carries ``chat_turn_id`` so even pure-text turns (which
+ never produce a tool / action-log event) still teach the frontend the
+ turn correlation id used for ``appendMessage`` durable storage.
+ """
+ yield streaming_service.format_message_start()
+ yield streaming_service.format_start_step()
+ yield streaming_service.format_data("turn-info", {"chat_turn_id": turn_id})
+ yield streaming_service.format_data("turn-status", {"status": "busy"})
+
+
+def iter_final_frames(
+ streaming_service: VercelStreamingService,
+) -> Iterator[str]:
+ """Yield ``turn-status: idle`` plus the finish/done trailer in order."""
+ yield streaming_service.format_data("turn-status", {"status": "idle"})
+ yield streaming_service.format_finish_step()
+ yield streaming_service.format_finish()
+ yield streaming_service.format_done()
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/llm_bundle.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/llm_bundle.py
new file mode 100644
index 000000000..2f334114c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/llm_bundle.py
@@ -0,0 +1,57 @@
+"""Load an LLM + AgentConfig bundle for a given config id.
+
+Handles both code paths uniformly:
+- ``config_id >= 0`` → database-backed ``NewLLMConfig`` row (per-user/per-space).
+- ``config_id < 0`` → YAML-defined global LLM config (built-in defaults).
+
+Returns ``(llm, agent_config, error_message)``; on success ``error_message`` is
+``None``. The caller emits the friendly SSE error frame.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.new_chat.llm_config import (
+ AgentConfig,
+ create_chat_litellm_from_agent_config,
+ create_chat_litellm_from_config,
+ load_agent_config,
+ load_global_llm_config_by_id,
+)
+
+
+async def load_llm_bundle(
+ session: AsyncSession,
+ *,
+ config_id: int,
+ search_space_id: int,
+) -> tuple[Any, AgentConfig | None, str | None]:
+ if config_id >= 0:
+ loaded_agent_config = await load_agent_config(
+ session=session,
+ config_id=config_id,
+ search_space_id=search_space_id,
+ )
+ if not loaded_agent_config:
+ return (
+ None,
+ None,
+ f"Failed to load NewLLMConfig with id {config_id}",
+ )
+ return (
+ create_chat_litellm_from_agent_config(loaded_agent_config),
+ loaded_agent_config,
+ None,
+ )
+
+ loaded_llm_config = load_global_llm_config_by_id(config_id)
+ if not loaded_llm_config:
+ return None, None, f"Failed to load LLM config with id {config_id}"
+ return (
+ create_chat_litellm_from_config(loaded_llm_config),
+ AgentConfig.from_yaml_config(loaded_llm_config),
+ None,
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/pre_stream_setup.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/pre_stream_setup.py
new file mode 100644
index 000000000..ec92306dd
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/pre_stream_setup.py
@@ -0,0 +1,40 @@
+"""Pre-stream setup: connector service, firecrawl key, checkpointer."""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.new_chat.checkpointer import get_checkpointer
+from app.db import SearchSourceConnectorType
+from app.services.connector_service import ConnectorService
+
+
+async def setup_connector_and_firecrawl(
+ session: AsyncSession,
+ *,
+ search_space_id: int,
+) -> tuple[ConnectorService, str | None]:
+ """Build the per-turn connector service and pull the firecrawl API key.
+
+ Returns ``(connector_service, firecrawl_api_key)``. ``firecrawl_api_key`` is
+ ``None`` when no web-crawler connector is configured (the agent simply
+ skips firecrawl-backed tools in that case).
+ """
+ connector_service = ConnectorService(session, search_space_id=search_space_id)
+ firecrawl_api_key: str | None = None
+ webcrawler_connector = await connector_service.get_connector_by_type(
+ SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, search_space_id
+ )
+ if webcrawler_connector and webcrawler_connector.config:
+ firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
+ return connector_service, firecrawl_api_key
+
+
+async def get_chat_checkpointer():
+ """Resolve the PostgreSQL checkpointer for persistent conversation memory.
+
+ Thin wrapper around ``app.agents.new_chat.checkpointer.get_checkpointer`` so
+ flow orchestrators can rely on a streaming-local symbol and we have a hook
+ point if the checkpointer source ever needs to vary per flow.
+ """
+ return await get_checkpointer()
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py
new file mode 100644
index 000000000..1e5169af1
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py
@@ -0,0 +1,80 @@
+"""OpenTelemetry chat-request span wrapper for streaming flows."""
+
+from __future__ import annotations
+
+import contextlib
+import sys
+from typing import Any, Literal
+
+from app.observability import metrics as ot_metrics
+from app.observability import otel as ot
+
+
+def open_chat_request_span(
+ *,
+ chat_id: int,
+ search_space_id: int,
+ flow: Literal["new", "regenerate", "resume"],
+ request_id: str | None,
+ turn_id: str,
+ filesystem_mode: str,
+ client_platform: str,
+ agent_mode: str,
+) -> tuple[Any, Any]:
+ """Open the per-request span; returns ``(span_cm, span)`` for finally-close."""
+ span_cm = ot.chat_request_span(
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ flow=flow,
+ request_id=request_id,
+ turn_id=turn_id,
+ filesystem_mode=filesystem_mode,
+ client_platform=client_platform,
+ agent_mode=agent_mode,
+ )
+ span = span_cm.__enter__()
+ return span_cm, span
+
+
+def set_agent_mode(span: Any, agent_mode: str) -> None:
+ """Tag the span with the resolved agent mode (single / multi)."""
+ with contextlib.suppress(Exception):
+ span.set_attribute("agent.mode", agent_mode)
+
+
+def close_chat_request_span(
+ *,
+ span_cm: Any,
+ span: Any,
+ chat_outcome: str,
+ chat_agent_mode: str,
+ flow: Literal["new", "regenerate", "resume"],
+ chat_error_category: str | None,
+ duration_seconds: float,
+) -> None:
+ """Record metrics + close the span. Swallows errors (finally-block context)."""
+ with contextlib.suppress(Exception):
+ span.set_attribute("chat.outcome", chat_outcome)
+ ot_metrics.record_chat_request_duration(
+ duration_seconds * 1000,
+ flow=flow,
+ outcome=chat_outcome,
+ agent_mode=chat_agent_mode,
+ )
+ ot_metrics.record_chat_request_outcome(
+ flow=flow,
+ outcome=chat_outcome,
+ agent_mode=chat_agent_mode,
+ error_category=chat_error_category,
+ )
+ span_cm.__exit__(*sys.exc_info())
+
+
+def record_outcome_attrs(
+ span: Any, *, chat_outcome: str, chat_error_category: str | None
+) -> None:
+ """Stamp outcome + error.category on the span (used in the except branch)."""
+ with contextlib.suppress(Exception):
+ span.set_attribute("chat.outcome", chat_outcome)
+ if chat_error_category is not None:
+ span.set_attribute("error.category", chat_error_category)
From 40300d300a01705e3134c9953ef6008cd322c9e8 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:14 +0200
Subject: [PATCH 07/87] refactor(chat): add
streaming/flows/shared/premium_quota.py
Centralizes the premium-credits lifecycle for chat turns:
* needs_premium_quota: gate check (premium user + non-fallback config).
* PremiumReservation: dataclass capturing reservation state + token totals.
* reserve_premium / finalize_premium / release_premium: idempotent
reservation, commit, and rollback used by the orchestrators.
Add-only; legacy stream_new_chat.py keeps its inline quota handling
until cutover.
---
.../streaming/flows/shared/premium_quota.py | 132 ++++++++++++++++++
1 file changed, 132 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py
new file mode 100644
index 000000000..0ec40d275
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/premium_quota.py
@@ -0,0 +1,132 @@
+"""Premium credit (USD micro-units) reserve / finalize / release lifecycle.
+
+Both ``stream_new_chat`` and ``stream_resume_chat`` reserve premium credits up
+front (so a single LLM call can't run away with the budget), then finalize the
+actual provider cost reported by LiteLLM when the turn completes successfully,
+or release the reservation on the cancellation / interrupted-without-finalize
+paths.
+
+State is held by the orchestrator as a simple ``PremiumReservation`` tuple
+so reservation, fallback-on-denied, finalize, and release can all be reasoned
+about from one place.
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid as _uuid
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from uuid import UUID
+
+from app.agents.new_chat.llm_config import AgentConfig
+from app.db import shielded_async_session
+
+if TYPE_CHECKING:
+ from app.services.token_tracking_service import TokenAccumulator
+
+
+@dataclass
+class PremiumReservation:
+ """Active premium-credit reservation for one turn.
+
+ ``request_id`` is the per-reservation idempotency key (also passed to
+ ``finalize``/``release`` so racing branches resolve to the same row).
+ ``reserved_micros`` is the up-front estimate; ``finalize`` debits the
+ actual cost, ``release`` returns it untouched.
+ """
+
+ request_id: str
+ reserved_micros: int
+ allowed: bool
+
+
+def needs_premium_quota(
+ agent_config: AgentConfig | None, user_id: str | None
+) -> bool:
+ return bool(agent_config is not None and user_id and agent_config.is_premium)
+
+
+async def reserve_premium(
+ *,
+ agent_config: AgentConfig,
+ user_id: str,
+) -> PremiumReservation:
+ """Reserve estimated micros up front; returns the reservation handle."""
+ from app.services.token_quota_service import (
+ TokenQuotaService,
+ estimate_call_reserve_micros,
+ )
+
+ request_id = _uuid.uuid4().hex[:16]
+ litellm_params = agent_config.litellm_params or {}
+ base_model = (
+ litellm_params.get("base_model") if isinstance(litellm_params, dict) else None
+ ) or agent_config.model_name or ""
+ reserve_amount_micros = estimate_call_reserve_micros(
+ base_model=base_model,
+ quota_reserve_tokens=agent_config.quota_reserve_tokens,
+ )
+ async with shielded_async_session() as quota_session:
+ quota_result = await TokenQuotaService.premium_reserve(
+ db_session=quota_session,
+ user_id=UUID(user_id),
+ request_id=request_id,
+ reserve_micros=reserve_amount_micros,
+ )
+ return PremiumReservation(
+ request_id=request_id,
+ reserved_micros=reserve_amount_micros,
+ allowed=quota_result.allowed,
+ )
+
+
+async def finalize_premium(
+ *,
+ reservation: PremiumReservation,
+ user_id: str,
+ accumulator: TokenAccumulator,
+) -> None:
+ """Finalize debit using the actual provider cost reported by LiteLLM.
+
+ Best-effort: failures here must not bubble up to the SSE stream — the user
+ has already received their tokens; we log and move on.
+ """
+ try:
+ from app.services.token_quota_service import TokenQuotaService
+
+ async with shielded_async_session() as quota_session:
+ await TokenQuotaService.premium_finalize(
+ db_session=quota_session,
+ user_id=UUID(user_id),
+ request_id=reservation.request_id,
+ actual_micros=accumulator.total_cost_micros,
+ reserved_micros=reservation.reserved_micros,
+ )
+ except Exception:
+ logging.getLogger(__name__).warning(
+ "Failed to finalize premium quota for user %s",
+ user_id,
+ exc_info=True,
+ )
+
+
+async def release_premium(
+ *,
+ reservation: PremiumReservation,
+ user_id: str,
+) -> None:
+ """Release the reservation on cancellation paths; never raises."""
+ try:
+ from app.services.token_quota_service import TokenQuotaService
+
+ async with shielded_async_session() as quota_session:
+ await TokenQuotaService.premium_release(
+ db_session=quota_session,
+ user_id=UUID(user_id),
+ reserved_micros=reservation.reserved_micros,
+ )
+ except Exception:
+ logging.getLogger(__name__).warning(
+ "Failed to release premium quota for user %s", user_id
+ )
From 2c3edb7c845d1be1a8b4c4d4c772b9ec25945c82 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:18 +0200
Subject: [PATCH 08/87] refactor(chat): add
streaming/flows/shared/terminal_error.py
Extracts handle_terminal_exception: the shared except-branch behavior for
the chat orchestrators. Classifies the raised exception, logs the
structured chat_stream error event, and emits the terminal-error SSE
frame + done sentinel via the streaming service.
Add-only; nothing imports it yet.
---
.../streaming/flows/shared/terminal_error.py | 120 ++++++++++++++++++
1 file changed, 120 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py
new file mode 100644
index 000000000..c9db2caf2
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/terminal_error.py
@@ -0,0 +1,120 @@
+"""Handle the ``except Exception`` branch of a streaming flow.
+
+Classifies the exception, records OpenTelemetry attributes, emits one terminal
+error SSE frame and the trailing ``turn-status: idle`` + finish/done frames.
+
+Used by both ``stream_new_chat`` and ``stream_resume_chat``; flow-specific bits
+(label, span, BusyError tracking) are passed by the caller.
+"""
+
+from __future__ import annotations
+
+import logging
+import traceback
+from collections.abc import Iterator
+from typing import Any, Literal
+
+from app.agents.new_chat.errors import BusyError
+from app.observability import metrics as ot_metrics
+from app.observability import otel as ot
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
+from app.tasks.chat.streaming.errors.emitter import emit_stream_terminal_error
+from app.tasks.chat.streaming.flows.shared.first_frames import iter_final_frames
+from app.tasks.chat.streaming.flows.shared.span import record_outcome_attrs
+
+logger = logging.getLogger(__name__)
+
+
+def handle_terminal_exception(
+ exc: Exception,
+ *,
+ flow: Literal["new", "regenerate", "resume"],
+ flow_label: str,
+ log_prefix: str,
+ streaming_service: VercelStreamingService,
+ request_id: str | None,
+ chat_id: int,
+ search_space_id: int,
+ user_id: str | None,
+ chat_span: Any,
+) -> tuple[Iterator[str], dict[str, Any]]:
+ """Classify, log, and produce the SSE frames for a terminal exception.
+
+ Returns ``(frame_iterator, summary)``. ``summary`` carries::
+
+ - ``busy_error_raised``: bool — caller must skip the lock-release path
+ when True (caller never acquired the busy mutex).
+ - ``chat_outcome``: str — span outcome attribute.
+ - ``chat_error_category``: str — categorized error label for metrics.
+ """
+ busy_error_raised = isinstance(exc, BusyError)
+
+ (
+ error_kind,
+ error_code,
+ severity,
+ is_expected,
+ user_message,
+ error_extra,
+ ) = classify_stream_exception(exc, flow_label=flow_label)
+ chat_outcome = error_code or error_kind or "error"
+ chat_error_category = ot_metrics.categorize_exception(exc)
+ record_outcome_attrs(
+ chat_span,
+ chat_outcome=chat_outcome,
+ chat_error_category=chat_error_category,
+ )
+ with __suppress():
+ ot.record_error(chat_span, exc)
+ error_message = f"Error during {flow_label}: {exc!s}"
+ # Match the original behavior: log full traceback via ``print`` so it lands
+ # in stderr regardless of the logger config.
+ print(f"[{log_prefix}] {error_message}")
+ print(f"[{log_prefix}] Exception type: {type(exc).__name__}")
+ print(f"[{log_prefix}] Traceback:\n{traceback.format_exc()}")
+
+ def _iter_frames() -> Iterator[str]:
+ if error_code == "TURN_CANCELLING":
+ status_payload: dict[str, Any] = {"status": "cancelling"}
+ if error_extra:
+ status_payload.update(error_extra)
+ yield streaming_service.format_data("turn-status", status_payload)
+ else:
+ yield streaming_service.format_data("turn-status", {"status": "busy"})
+
+ yield emit_stream_terminal_error(
+ streaming_service=streaming_service,
+ flow=flow,
+ request_id=request_id,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ message=user_message,
+ error_kind=error_kind,
+ error_code=error_code,
+ severity=severity,
+ is_expected=is_expected,
+ extra=error_extra,
+ )
+ yield from iter_final_frames(streaming_service)
+
+ return (
+ _iter_frames(),
+ {
+ "busy_error_raised": busy_error_raised,
+ "chat_outcome": chat_outcome,
+ "chat_error_category": chat_error_category,
+ },
+ )
+
+
+def __suppress():
+ """Local single-use ``contextlib.suppress(Exception)`` factory.
+
+ Inlined here so callers don't import ``contextlib`` just for the
+ ``record_error`` call site.
+ """
+ import contextlib
+
+ return contextlib.suppress(Exception)
From b54b803dc9a844d74700f3dc27eb00282d63b081 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:27 +0200
Subject: [PATCH 09/87] refactor(chat): add streaming/flows/shared/ rate-limit
recovery + stream loop
Two cooperating modules that wrap stream_agent_events with in-stream
recovery from provider 429s:
* rate_limit_recovery: can_recover_provider_rate_limit truth-table
guard, reroute_to_next_auto_pin (selects the next eligible auto-pin
config and reloads the LLM bundle), log_rate_limit_recovered.
* stream_loop: run_stream_loop drives stream_agent_events in a
while-True loop, delegating recovery to a flow-supplied RecoverFn
callback so new_chat and resume_chat can share the same loop while
keeping their own nonlocal state.
Add-only; not yet wired into any orchestrator.
---
.../flows/shared/rate_limit_recovery.py | 129 ++++++++++++++++++
.../streaming/flows/shared/stream_loop.py | 85 ++++++++++++
2 files changed, 214 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/rate_limit_recovery.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/stream_loop.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/rate_limit_recovery.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/rate_limit_recovery.py
new file mode 100644
index 000000000..6b3857594
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/rate_limit_recovery.py
@@ -0,0 +1,129 @@
+"""Shared steps for the in-stream provider rate-limit recovery loop.
+
+Both flows wrap ``run_stream_loop`` with a flow-specific ``recover`` closure;
+the *guard*, the *auto-pin reroute*, and the *post-recovery telemetry* are the
+same on both sides and live here so behaviour can't drift.
+
+The orchestrator owns the parts that genuinely diverge:
+
+ * cancelling the title task (new_chat only),
+ * passing ``mentioned_document_ids`` to ``build_main_agent_for_thread``,
+ * the log prefix (``stream_new_chat`` vs ``stream_resume``).
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.new_chat.middleware.busy_mutex import end_turn
+from app.observability import otel as ot
+from app.services.auto_model_pin_service import (
+ mark_runtime_cooldown,
+ resolve_or_get_pinned_llm_config_id,
+)
+from app.tasks.chat.streaming.errors.classifier import (
+ is_provider_rate_limited,
+ log_chat_stream_error,
+)
+
+
+def can_recover_provider_rate_limit(
+ exc: BaseException,
+ *,
+ first_event_seen: bool,
+ runtime_rate_limit_recovered: bool,
+ requested_llm_config_id: int,
+ current_llm_config_id: int,
+) -> bool:
+ """Guard: only the first auto-pin → provider-rate-limited failure recovers.
+
+ All conditions must hold:
+
+ * ``runtime_rate_limit_recovered is False`` — at most one recovery per turn.
+ * ``requested_llm_config_id == 0`` — caller opted into auto-pin (id=0).
+ * ``current_llm_config_id < 0`` — currently on a YAML config (the only
+ kind the auto-pin pool draws from).
+ * ``first_event_seen is False`` — we haven't sent any SSE to the user yet,
+ so a silent rebuild + retry is invisible.
+ * The exception is provider-side rate-limited (HTTP 429 or known shape).
+ """
+ return (
+ not runtime_rate_limit_recovered
+ and requested_llm_config_id == 0
+ and current_llm_config_id < 0
+ and not first_event_seen
+ and is_provider_rate_limited(exc)
+ )
+
+
+async def reroute_to_next_auto_pin(
+ session: AsyncSession,
+ *,
+ chat_id: int,
+ search_space_id: int,
+ user_id: str | None,
+ current_llm_config_id: int,
+ requires_image_input: bool,
+) -> int:
+ """Release lock, cool down the failing config, pick a new auto-pin id.
+
+ Returns the new ``llm_config_id``. ``end_turn`` is called because the failed
+ attempt may still hold the per-thread busy mutex (middleware teardown can
+ lag behind raised provider errors) — the same-request retry would otherwise
+ bounce on ``BusyError``.
+ """
+ end_turn(str(chat_id))
+ mark_runtime_cooldown(current_llm_config_id, reason="provider_rate_limited")
+ pinned = await resolve_or_get_pinned_llm_config_id(
+ session,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_llm_config_id=0,
+ exclude_config_ids={current_llm_config_id},
+ requires_image_input=requires_image_input,
+ )
+ return pinned.resolved_llm_config_id
+
+
+def log_rate_limit_recovered(
+ *,
+ flow: Literal["new", "regenerate", "resume"],
+ request_id: str | None,
+ chat_id: int,
+ search_space_id: int,
+ user_id: str | None,
+ previous_config_id: int,
+ new_config_id: int,
+) -> None:
+ """Emit the OTEL event + structured ``[chat_stream_error]`` log line."""
+ ot.add_event(
+ "chat.rate_limit.recovered",
+ {
+ "recovery.reason": "provider_rate_limited",
+ "recovery.previous_config_id": previous_config_id,
+ "recovery.fallback_config_id": new_config_id,
+ },
+ )
+ log_chat_stream_error(
+ flow=flow,
+ error_kind="rate_limited",
+ error_code="RATE_LIMITED",
+ severity="info",
+ is_expected=True,
+ request_id=request_id,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ message=(
+ "Auto-pinned model hit runtime rate limit; switched to "
+ "another eligible model and retried."
+ ),
+ extra={
+ "auto_runtime_recover": True,
+ "previous_config_id": previous_config_id,
+ "fallback_config_id": new_config_id,
+ },
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/stream_loop.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/stream_loop.py
new file mode 100644
index 000000000..6cf0df855
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/stream_loop.py
@@ -0,0 +1,85 @@
+"""Drive ``stream_agent_events`` with in-stream rate-limit recovery.
+
+Both ``stream_new_chat`` and ``stream_resume_chat`` wrap the agent event loop
+in a ``while True`` that catches the *first* provider rate-limit error
+(``can_runtime_recover``) before any SSE event reaches the user, rebuilds the
+agent on an alternative auto-pin, and retries the turn.
+
+The recovery callback is flow-specific (different ``mentioned_document_ids``
+contract, different logging label, etc.) — this module owns the loop shape,
+the caller owns the rebuild.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncGenerator, Awaitable, Callable
+from typing import Any
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.streaming.agent.event_loop import stream_agent_events
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+
+# Returns the rebuilt agent on a successful recovery, or ``None`` to re-raise
+# the original exception (and let the orchestrator's terminal-error path
+# handle it).
+RecoverFn = Callable[[BaseException, bool], Awaitable[Any | None]]
+
+
+async def run_stream_loop(
+ *,
+ agent: Any,
+ streaming_service: VercelStreamingService,
+ config: dict[str, Any],
+ input_data: Any,
+ stream_result: StreamResult,
+ step_prefix: str = "thinking",
+ initial_step_id: str | None = None,
+ initial_step_title: str = "",
+ initial_step_items: list[str] | None = None,
+ fallback_commit_search_space_id: int | None,
+ fallback_commit_created_by_id: str | None,
+ fallback_commit_filesystem_mode: FilesystemMode,
+ fallback_commit_thread_id: int | None,
+ runtime_context: Any,
+ content_builder: Any | None,
+ recover: RecoverFn,
+ on_first_event: Callable[[], None] | None = None,
+) -> AsyncGenerator[str, None]:
+ """Yield SSE frames; rebuild and retry once on a pre-first-event rate limit.
+
+ ``on_first_event`` fires after the first frame is observed (used by both
+ flows to write a one-time ``First agent event in N.NNNs`` perf line).
+ """
+ first_event_logged = False
+ while True:
+ try:
+ async for sse in stream_agent_events(
+ agent=agent,
+ config=config,
+ input_data=input_data,
+ streaming_service=streaming_service,
+ result=stream_result,
+ step_prefix=step_prefix,
+ initial_step_id=initial_step_id,
+ initial_step_title=initial_step_title,
+ initial_step_items=initial_step_items,
+ fallback_commit_search_space_id=fallback_commit_search_space_id,
+ fallback_commit_created_by_id=fallback_commit_created_by_id,
+ fallback_commit_filesystem_mode=fallback_commit_filesystem_mode,
+ fallback_commit_thread_id=fallback_commit_thread_id,
+ runtime_context=runtime_context,
+ content_builder=content_builder,
+ ):
+ if not first_event_logged:
+ if on_first_event is not None:
+ on_first_event()
+ first_event_logged = True
+ yield sse
+ return
+ except Exception as exc:
+ new_agent = await recover(exc, first_event_logged)
+ if new_agent is None:
+ raise
+ agent = new_agent
+ continue
From 21bddc73a75c121cc33ee5880d54e618d4409c9d Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:31 +0200
Subject: [PATCH 10/87] refactor(chat): add
streaming/flows/shared/assistant_finalize.py
Extracts finalize_assistant_message: the post-stream server-side write
of the final assistant message (with content parts + token usage)
guarded by asyncio.shield + shielded_async_session so a client
disconnect cannot abort the persist.
Add-only; legacy stream_new_chat.py keeps its inline finalize block
until cutover.
---
.../flows/shared/assistant_finalize.py | 109 ++++++++++++++++++
1 file changed, 109 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py
new file mode 100644
index 000000000..d16f81ac7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py
@@ -0,0 +1,109 @@
+"""Server-side assistant-message + token_usage finalization.
+
+Runs inside the streaming flow's ``finally`` block, after the main session has
+been closed (uses its own shielded session, so we don't fight the same DB
+connection).
+
+Idempotent against the legacy frontend ``appendMessage`` recovery branch:
+
+ * the assistant row was already INSERTed by ``persist_assistant_shell``
+ earlier in the turn, so this just UPDATEs it with the rich
+ ``ContentPart[]`` projection from the builder.
+ * ``token_usage`` uses ``INSERT ... ON CONFLICT DO NOTHING`` against the
+ partial unique index from migration 142, so a racing append_message
+ recovery branch can never double-write.
+
+``mark_interrupted`` closes any open text/reasoning blocks and flips running
+tool-calls (no result) to ``state=aborted`` so the persisted JSONB reflects a
+coherent end-state even on client disconnect.
+
+Never raises (best-effort, logs only).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+from app.utils.perf import get_perf_logger
+
+if TYPE_CHECKING:
+ from app.services.token_tracking_service import TokenAccumulator
+
+_perf_log = get_perf_logger()
+
+
+async def finalize_assistant_message(
+ *,
+ stream_result: StreamResult | None,
+ chat_id: int,
+ search_space_id: int,
+ user_id: str | None,
+ accumulator: TokenAccumulator,
+ log_prefix: str,
+) -> None:
+ """Snapshot the content builder and persist the final assistant payload.
+
+ No-op when ``stream_result`` was never populated, the turn never reached
+ ``persist_assistant_shell`` (no ``assistant_message_id``), or the turn id
+ was never assigned.
+ """
+ if not (
+ stream_result
+ and stream_result.turn_id
+ and stream_result.assistant_message_id
+ ):
+ return
+
+ from app.tasks.chat.persistence import finalize_assistant_turn
+
+ builder_stats: dict[str, int] | None = None
+ if stream_result.content_builder is not None:
+ stream_result.content_builder.mark_interrupted()
+ # Snapshot stats BEFORE ``snapshot()`` deepcopies so the perf log
+ # records the actual finalised payload (post-mark_interrupted), not
+ # the live-mutating builder state.
+ builder_stats = stream_result.content_builder.stats()
+ content_payload = stream_result.content_builder.snapshot()
+ else:
+ # Defensive fallback — we always set the builder alongside
+ # ``assistant_message_id`` in the orchestrator, so this branch only
+ # fires if a future refactor ever decouples them. Persist whatever
+ # accumulated text we captured so the row at least renders.
+ content_payload = [
+ {
+ "type": "text",
+ "text": stream_result.accumulated_text or "",
+ }
+ ]
+
+ if builder_stats is not None:
+ _perf_log.info(
+ "[%s] finalize_payload chat_id=%s "
+ "message_id=%s parts=%d bytes=%d text=%d "
+ "reasoning=%d tool_calls=%d "
+ "tool_calls_completed=%d tool_calls_aborted=%d "
+ "thinking_step_parts=%d step_separators=%d",
+ log_prefix,
+ chat_id,
+ stream_result.assistant_message_id,
+ builder_stats["parts"],
+ builder_stats["bytes"],
+ builder_stats["text"],
+ builder_stats["reasoning"],
+ builder_stats["tool_calls"],
+ builder_stats["tool_calls_completed"],
+ builder_stats["tool_calls_aborted"],
+ builder_stats["thinking_step_parts"],
+ builder_stats["step_separators"],
+ )
+
+ await finalize_assistant_turn(
+ message_id=stream_result.assistant_message_id,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ turn_id=stream_result.turn_id,
+ content=content_payload,
+ accumulator=accumulator,
+ )
From 927009745e8fde89c0741e5d4fa503984058c416 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:45 +0200
Subject: [PATCH 11/87] refactor(chat): add streaming/flows/new_chat/
per-concern leaf modules
Seven focused modules that the upcoming new_chat orchestrator
composes:
* auto_pin: resolve_initial_auto_pin selects the initial config (with
vision-capable filtering and error classification).
* llm_capability: check_image_input_capability blocks routing an
image-bearing turn to a known text-only model.
* runtime_context: build_new_chat_runtime_context assembles the
SurfSenseContextSchema for a new-chat turn.
* persistence_spawn: spawn_set_ai_responding_bg, spawn_persist_user_task,
spawn_persist_assistant_shell_task, and await_persist_task background
the four pre-stream DB writes so they overlap with agent build.
* initial_thinking_step: build_initial_thinking_step +
iter_initial_thinking_step_frame produce the very first thinking-1 SSE
step ("Understanding your request" / "Analyzing referenced content").
* title_gen: spawn_title_task + maybe_emit_title_update +
await_pending_title_update background the thread-title generator and
interleave its update into the stream when ready.
* input_state: build_new_chat_input_state assembles the LangGraph
input_state (history bootstrap, mentions resolution, context blocks,
human-message construction). The heavy one.
Add-only; no orchestrator yet (next commit).
---
.../chat/streaming/flows/new_chat/auto_pin.py | 95 +++++++
.../flows/new_chat/initial_thinking_step.py | 95 +++++++
.../streaming/flows/new_chat/input_state.py | 264 ++++++++++++++++++
.../flows/new_chat/llm_capability.py | 62 ++++
.../flows/new_chat/persistence_spawn.py | 129 +++++++++
.../flows/new_chat/runtime_context.py | 38 +++
.../streaming/flows/new_chat/title_gen.py | 237 ++++++++++++++++
7 files changed, 920 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/persistence_spawn.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py
new file mode 100644
index 000000000..cb20eb011
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/auto_pin.py
@@ -0,0 +1,95 @@
+"""Resolve the auto-pin for the *initial* turn config.
+
+Auto-pin (``selected_llm_config_id=0``) picks the best eligible LLM config for
+this thread / search space / user, optionally filtered to vision-capable
+configs when the turn carries images.
+
+Errors classified here:
+
+ * ``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT`` — the auto-pin pool has no
+ vision-capable cfg for an image-bearing turn. The same gate fires later
+ in ``llm_capability`` for explicit selections; mapping both to the same
+ code keeps the FE error UI consistent.
+ * ``SERVER_ERROR`` — any other ``ValueError`` from the resolver.
+
+This module owns *initial* pin resolution; the rate-limit recovery loop has
+its own narrower auto-pin call (with ``exclude_config_ids``) in
+``flows/shared/rate_limit_recovery``.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.observability import otel as ot
+from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
+
+
+@dataclass
+class AutoPinResult:
+ """Outcome of ``resolve_initial_auto_pin``.
+
+ ``llm_config_id`` is set when ``error`` is ``None``; ``error`` carries the
+ classified user-facing message plus error code/kind so the orchestrator can
+ emit one terminal-error SSE frame.
+ """
+
+ llm_config_id: int | None
+ error: tuple[str, str, Literal["user_error", "server_error"]] | None
+
+
+async def resolve_initial_auto_pin(
+ session: AsyncSession,
+ *,
+ chat_id: int,
+ search_space_id: int,
+ user_id: str | None,
+ selected_llm_config_id: int,
+ requires_image_input: bool,
+ requested_llm_config_id: int,
+) -> AutoPinResult:
+ """Run the resolver and classify any ``ValueError`` for the SSE error path."""
+ try:
+ pinned = await resolve_or_get_pinned_llm_config_id(
+ session,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_llm_config_id=selected_llm_config_id,
+ requires_image_input=requires_image_input,
+ )
+ ot.add_event(
+ "model.pin.resolved",
+ {
+ "pin.requested_id": requested_llm_config_id,
+ "pin.resolved_id": pinned.resolved_llm_config_id,
+ "pin.requires_image_input": requires_image_input,
+ },
+ )
+ return AutoPinResult(
+ llm_config_id=pinned.resolved_llm_config_id, error=None
+ )
+ except ValueError as pin_error:
+ # The "no vision-capable cfg" path raises a ValueError whose message
+ # we map to the friendly image-input SSE error so the user sees the
+ # same message regardless of whether the gate fired in the resolver or
+ # in ``llm_capability.assert_vision_capability_for_image_turn``.
+ is_vision_failure = (
+ requires_image_input and "vision-capable" in str(pin_error)
+ )
+ error_code = (
+ "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"
+ if is_vision_failure
+ else "SERVER_ERROR"
+ )
+ error_kind: Literal["user_error", "server_error"] = (
+ "user_error" if is_vision_failure else "server_error"
+ )
+ if is_vision_failure:
+ ot.add_event("quota.denied", {"quota.code": error_code})
+ return AutoPinResult(
+ llm_config_id=None, error=(str(pin_error), error_code, error_kind)
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py
new file mode 100644
index 000000000..c860e517e
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/initial_thinking_step.py
@@ -0,0 +1,95 @@
+"""Build and emit the first ``thinking-1`` step for a new-chat turn.
+
+The step title and "Processing X" items are derived from what the user sent
+(text snippet, image count, mentioned doc titles) so the FE can render a
+meaningful placeholder while the agent stream warms up.
+
+``thinking-1`` is the canonical id for this step — every subsequent
+``thinking-N`` produced by ``stream_agent_events`` folds into the same
+singleton ``data-thinking-steps`` part on the FE.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from dataclasses import dataclass
+from typing import Any
+
+from app.db import SurfsenseDocsDocument
+from app.services.new_streaming_service import VercelStreamingService
+
+
+@dataclass
+class InitialThinkingStep:
+ """Resolved fields passed both into the SSE frame and the builder hook.
+
+ ``items`` is the bullet list under the step title; ``title`` is the
+ one-line step header. ``step_id`` is hard-coded ``thinking-1`` so the FE
+ Timeline can de-duplicate against the prior assistant message on resume.
+ """
+
+ step_id: str
+ title: str
+ items: list[str]
+
+
+def build_initial_thinking_step(
+ *,
+ user_query: str,
+ user_image_data_urls: list[str] | None,
+ mentioned_surfsense_docs: list[SurfsenseDocsDocument],
+) -> InitialThinkingStep:
+ if mentioned_surfsense_docs:
+ title = "Analyzing referenced content"
+ action_verb = "Analyzing"
+ else:
+ title = "Understanding your request"
+ action_verb = "Processing"
+
+ processing_parts: list[str] = []
+ if user_query.strip():
+ query_text = user_query[:80] + ("..." if len(user_query) > 80 else "")
+ processing_parts.append(query_text)
+ elif user_image_data_urls:
+ processing_parts.append(f"[{len(user_image_data_urls)} image(s)]")
+ else:
+ processing_parts.append("(message)")
+
+ if mentioned_surfsense_docs:
+ doc_names: list[str] = []
+ for doc in mentioned_surfsense_docs:
+ t = doc.title
+ if len(t) > 30:
+ t = t[:27] + "..."
+ doc_names.append(t)
+ if len(doc_names) == 1:
+ processing_parts.append(f"[{doc_names[0]}]")
+ else:
+ processing_parts.append(f"[{len(doc_names)} docs]")
+
+ items = [f"{action_verb}: {' '.join(processing_parts)}"]
+ return InitialThinkingStep(step_id="thinking-1", title=title, items=items)
+
+
+def iter_initial_thinking_step_frame(
+ step: InitialThinkingStep,
+ *,
+ streaming_service: VercelStreamingService,
+ content_builder: Any | None,
+) -> Iterator[str]:
+ """Drive both the SSE emission and the builder hook for the initial step.
+
+ The FE folds this step into the same singleton ``data-thinking-steps`` part
+ as everything the agent stream emits later, so we mirror that fold
+ server-side by driving the builder lifecycle ourselves.
+ """
+ if content_builder is not None:
+ content_builder.on_thinking_step(
+ step.step_id, step.title, "in_progress", step.items
+ )
+ yield streaming_service.format_thinking_step(
+ step_id=step.step_id,
+ title=step.title,
+ status="in_progress",
+ items=step.items,
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py
new file mode 100644
index 000000000..fb171c244
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/input_state.py
@@ -0,0 +1,264 @@
+r"""Assemble the LangGraph ``input_state`` for the new-chat turn.
+
+Pipeline:
+
+ 1. **History bootstrap** — only for cloned chats with no LangGraph checkpoint
+ yet; flips the per-thread ``needs_history_bootstrap`` flag back to False
+ once the rows are loaded.
+ 2. **Mentioned SurfSense docs** — eager-load chunks so the formatter has the
+ full content without a second roundtrip.
+ 3. **Recent reports** — top 3 by id desc with non-null content, so the LLM
+ can resolve ``report_id`` for versioning without spelunking history.
+ 4. **@-mention resolve** (cloud mode) — substitute ``@title`` tokens in the
+ query with canonical ``\`/documents/...\``` paths the LLM expects.
+ 5. **Context block render** — XML-wrap surfsense docs + reports, prepend to
+ the rewritten query, optionally prefix with display name for SEARCH_SPACE
+ visibility.
+ 6. **HumanMessage** — multimodal content if images are attached.
+
+Returns the assembled ``input_state`` dict plus side-channel data the
+orchestrator needs downstream (``accepted_folder_ids`` for runtime context;
+``mentioned_surfsense_docs`` for the initial thinking step).
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm import selectinload
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
+from app.db import (
+ ChatVisibility,
+ NewChatThread,
+ Report,
+ SurfsenseDocsDocument,
+)
+from app.tasks.chat.streaming.context.mentioned_docs import (
+ format_mentioned_surfsense_docs_as_context,
+)
+from app.utils.content_utils import bootstrap_history_from_db
+from app.utils.user_message_multimodal import build_human_message_content
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class NewChatInputState:
+ """Everything ``build_new_chat_input_state`` produces.
+
+ ``input_state`` is fed straight to the agent. ``accepted_folder_ids``
+ feeds the runtime context (the resolver may have dropped some chips).
+ ``mentioned_surfsense_docs`` is consumed by the initial thinking-step
+ builder for the FE placeholder before the agent stream starts.
+ """
+
+ input_state: dict[str, Any]
+ accepted_folder_ids: list[int]
+ mentioned_surfsense_docs: list[SurfsenseDocsDocument]
+
+
+async def build_new_chat_input_state(
+ session: AsyncSession,
+ *,
+ chat_id: int,
+ search_space_id: int,
+ user_query: str,
+ user_image_data_urls: list[str] | None,
+ mentioned_document_ids: list[int] | None,
+ mentioned_surfsense_doc_ids: list[int] | None,
+ mentioned_folder_ids: list[int] | None,
+ mentioned_documents: list[dict[str, Any]] | None,
+ needs_history_bootstrap: bool,
+ thread_visibility: ChatVisibility,
+ current_user_display_name: str | None,
+ filesystem_mode: str,
+ request_id: str | None,
+ turn_id: str,
+) -> NewChatInputState:
+ langchain_messages: list[Any] = []
+
+ if needs_history_bootstrap:
+ langchain_messages = await bootstrap_history_from_db(
+ session, chat_id, thread_visibility=thread_visibility
+ )
+ thread_result = await session.execute(
+ select(NewChatThread).filter(NewChatThread.id == chat_id)
+ )
+ thread = thread_result.scalars().first()
+ if thread:
+ thread.needs_history_bootstrap = False
+ await session.commit()
+
+ mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
+ if mentioned_surfsense_doc_ids:
+ result = await session.execute(
+ select(SurfsenseDocsDocument)
+ .options(selectinload(SurfsenseDocsDocument.chunks))
+ .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
+ )
+ mentioned_surfsense_docs = list(result.scalars().all())
+
+ # Top 3 reports keyed by id desc (newest first) with content present,
+ # surfaced inline so the LLM resolves ``report_id`` for versioning without
+ # digging through conversation history.
+ recent_reports_result = await session.execute(
+ select(Report)
+ .filter(
+ Report.thread_id == chat_id,
+ Report.content.isnot(None),
+ )
+ .order_by(Report.id.desc())
+ .limit(3)
+ )
+ recent_reports = list(recent_reports_result.scalars().all())
+
+ agent_user_query, accepted_folder_ids = await _resolve_mentions_for_query(
+ session,
+ search_space_id=search_space_id,
+ user_query=user_query,
+ filesystem_mode=filesystem_mode,
+ mentioned_document_ids=mentioned_document_ids,
+ mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+ mentioned_folder_ids=mentioned_folder_ids,
+ mentioned_documents=mentioned_documents,
+ )
+
+ final_query = _render_query_with_context(
+ agent_user_query=agent_user_query,
+ mentioned_surfsense_docs=mentioned_surfsense_docs,
+ recent_reports=recent_reports,
+ )
+
+ if thread_visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name:
+ final_query = f"**[{current_user_display_name}]:** {final_query}"
+
+ human_content = build_human_message_content(
+ final_query, list(user_image_data_urls or ())
+ )
+ langchain_messages.append(HumanMessage(content=human_content))
+
+ input_state = {
+ "messages": langchain_messages,
+ "search_space_id": search_space_id,
+ "request_id": request_id or "unknown",
+ "turn_id": turn_id,
+ }
+
+ return NewChatInputState(
+ input_state=input_state,
+ accepted_folder_ids=accepted_folder_ids,
+ mentioned_surfsense_docs=mentioned_surfsense_docs,
+ )
+
+
+async def _resolve_mentions_for_query(
+ session: AsyncSession,
+ *,
+ search_space_id: int,
+ user_query: str,
+ filesystem_mode: str,
+ mentioned_document_ids: list[int] | None,
+ mentioned_surfsense_doc_ids: list[int] | None,
+ mentioned_folder_ids: list[int] | None,
+ mentioned_documents: list[dict[str, Any]] | None,
+) -> tuple[str, list[int]]:
+ r"""Resolve @-mention chips and rewrite the user query to canonical paths.
+
+ Cloud mode only: local-folder mode keeps the legacy ``@title`` text path
+ (mention support there is a follow-up task — the path scheme is
+ mount-rooted and the picker UI both need separate work).
+
+ The substitution lands in the returned ``agent_user_query`` ONLY — the
+ original ``user_query`` (with ``@title`` tokens) flows untouched into
+ ``persist_user_turn`` so chip rendering on reload still works
+ (``UserTextPart`` → ``parseMentionSegments`` matches ``@title``, not
+ ``\`/documents/...\```). It also feeds the human-readable surfaces — SSE
+ "Processing X" status, auto thread title, memory seed — which all want
+ what the user typed.
+ """
+ agent_user_query = user_query
+ accepted_folder_ids: list[int] = []
+
+ has_any_mention = bool(
+ mentioned_document_ids
+ or mentioned_surfsense_doc_ids
+ or mentioned_folder_ids
+ or mentioned_documents
+ )
+ if filesystem_mode != FilesystemMode.CLOUD.value or not has_any_mention:
+ return agent_user_query, accepted_folder_ids
+
+ from app.schemas.new_chat import MentionedDocumentInfo
+
+ chip_objs: list[MentionedDocumentInfo] | None = None
+ if mentioned_documents:
+ chip_objs = []
+ for raw in mentioned_documents:
+ if isinstance(raw, MentionedDocumentInfo):
+ chip_objs.append(raw)
+ continue
+ try:
+ chip_objs.append(MentionedDocumentInfo.model_validate(raw))
+ except Exception:
+ logger.debug(
+ "stream_new_chat: dropping malformed mention chip %r", raw
+ )
+
+ resolved = await resolve_mentions(
+ session,
+ search_space_id=search_space_id,
+ mentioned_documents=chip_objs,
+ mentioned_document_ids=mentioned_document_ids,
+ mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+ mentioned_folder_ids=mentioned_folder_ids,
+ )
+ agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
+ accepted_folder_ids = resolved.mentioned_folder_ids
+ return agent_user_query, accepted_folder_ids
+
+
+def _render_query_with_context(
+ *,
+ agent_user_query: str,
+ mentioned_surfsense_docs: list[SurfsenseDocsDocument],
+ recent_reports: list[Report],
+) -> str:
+ """Prepend surfsense-docs + recent-reports XML blocks to the user query."""
+ context_parts: list[str] = []
+
+ if mentioned_surfsense_docs:
+ context_parts.append(
+ format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
+ )
+
+ if recent_reports:
+ report_lines: list[str] = []
+ for r in recent_reports:
+ report_lines.append(
+ f' - report_id={r.id}, title="{r.title}", '
+ f'style="{r.report_style or "detailed"}"'
+ )
+ reports_listing = "\n".join(report_lines)
+ context_parts.append(
+ "\n"
+ "Previously generated reports in this conversation:\n"
+ f"{reports_listing}\n\n"
+ "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of "
+ "these reports, set parent_report_id to the relevant report_id above.\n"
+ "If the user wants a completely NEW report on a different topic, "
+ "leave parent_report_id unset.\n"
+ " "
+ )
+
+ if context_parts:
+ context = "\n\n".join(context_parts)
+ return f"{context}\n\n{agent_user_query} "
+
+ return agent_user_query
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py
new file mode 100644
index 000000000..ff5a56eec
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/llm_capability.py
@@ -0,0 +1,62 @@
+"""Vision-capability gate for image-bearing turns.
+
+Capability safety net for explicit (non-auto-pin) selections: a turn carrying
+user-uploaded images cannot be routed to a chat config that LiteLLM's
+authoritative model map *explicitly* marks as text-only (``supports_vision``
+set to False). The check is intentionally narrow — it only fires when LiteLLM
+is *certain* the model can't accept image input; unknown / unmapped /
+vision-capable models pass through.
+
+Without this guard a known-text-only model would 404 at the provider with
+``"No endpoints found that support image input"``, surfacing as an opaque
+``SERVER_ERROR`` SSE chunk; failing here lets us return a friendly message that
+tells the user what to change.
+"""
+
+from __future__ import annotations
+
+from app.agents.new_chat.llm_config import AgentConfig
+from app.observability import otel as ot
+
+
+def check_image_input_capability(
+ *,
+ user_image_data_urls: list[str] | None,
+ agent_config: AgentConfig | None,
+) -> tuple[str, str] | None:
+ """Return ``(user_message, error_code)`` when the gate trips, else ``None``.
+
+ The caller emits one terminal-error SSE frame on a non-``None`` return.
+ """
+ if not (user_image_data_urls and agent_config is not None):
+ return None
+
+ from app.services.provider_capabilities import is_known_text_only_chat_model
+
+ agent_litellm_params = agent_config.litellm_params or {}
+ agent_base_model = (
+ agent_litellm_params.get("base_model")
+ if isinstance(agent_litellm_params, dict)
+ else None
+ )
+ if not is_known_text_only_chat_model(
+ provider=agent_config.provider,
+ model_name=agent_config.model_name,
+ base_model=agent_base_model,
+ custom_provider=agent_config.custom_provider,
+ ):
+ return None
+
+ model_label = agent_config.config_name or agent_config.model_name or "model"
+ ot.add_event(
+ "quota.denied", {"quota.code": "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"}
+ )
+ return (
+ (
+ f"The selected model ({model_label}) does not support "
+ "image input. Switch to a vision-capable model "
+ "(e.g. GPT-4o, Claude, Gemini) or remove the image "
+ "attachment and try again."
+ ),
+ "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT",
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/persistence_spawn.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/persistence_spawn.py
new file mode 100644
index 000000000..9ea5d2ad6
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/persistence_spawn.py
@@ -0,0 +1,129 @@
+"""Concurrent persistence tasks spawned right after the initial validation gate.
+
+These run *during* the rest of the pre-stream setup so we don't serialize
+their latency against agent construction. Awaiting them at the SSE message-id
+yield sites preserves the ghost-thread protection (the user-row INSERT must
+succeed before any LLM streaming begins).
+
+The ``set_ai_responding`` flag flip runs fully fire-and-forget on its own
+shielded session — failures only delay the "AI is responding…" UI flag, not
+the response itself.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from typing import Any
+from uuid import UUID
+
+from app.db import shielded_async_session
+from app.services.chat_session_state_service import set_ai_responding
+from app.tasks.chat.persistence import (
+ persist_assistant_shell,
+ persist_user_turn,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def spawn_set_ai_responding_bg(
+ *,
+ chat_id: int,
+ user_id: str | None,
+ background_tasks: set[asyncio.Task[Any]],
+) -> None:
+ """Fire-and-forget: flip the per-thread AI-responding flag on its own session.
+
+ Errors are swallowed and logged — the worst case is a stale UI flag, which
+ is preferable to delaying the SSE stream behind a flag write.
+ """
+ if not user_id:
+ return
+
+ async def _bg_set_ai_responding() -> None:
+ try:
+ async with shielded_async_session() as s:
+ await set_ai_responding(s, chat_id, UUID(user_id))
+ except Exception:
+ logger.warning(
+ "set_ai_responding failed (chat_id=%s)",
+ chat_id,
+ exc_info=True,
+ )
+
+ t = asyncio.create_task(_bg_set_ai_responding())
+ background_tasks.add(t)
+ t.add_done_callback(background_tasks.discard)
+
+
+def spawn_persist_user_task(
+ *,
+ chat_id: int,
+ user_id: str | None,
+ turn_id: str,
+ user_query: str,
+ user_image_data_urls: list[str] | None,
+ mentioned_documents: list[dict[str, Any]] | None,
+ background_tasks: set[asyncio.Task[Any]],
+) -> asyncio.Task[int | None]:
+ """Spawn the user-row INSERT; await at the user-message-id yield site."""
+ task = asyncio.create_task(
+ persist_user_turn(
+ chat_id=chat_id,
+ user_id=user_id,
+ turn_id=turn_id,
+ user_query=user_query,
+ user_image_data_urls=user_image_data_urls,
+ mentioned_documents=mentioned_documents,
+ )
+ )
+ background_tasks.add(task)
+ task.add_done_callback(background_tasks.discard)
+ return task
+
+
+def spawn_persist_assistant_shell_task(
+ *,
+ chat_id: int,
+ user_id: str | None,
+ turn_id: str,
+ background_tasks: set[asyncio.Task[Any]],
+) -> asyncio.Task[int | None]:
+ """Spawn the assistant-shell INSERT; await at the assistant-message-id yield site."""
+ task = asyncio.create_task(
+ persist_assistant_shell(
+ chat_id=chat_id,
+ user_id=user_id,
+ turn_id=turn_id,
+ )
+ )
+ background_tasks.add(task)
+ task.add_done_callback(background_tasks.discard)
+ return task
+
+
+async def await_persist_task(
+ task: asyncio.Task[int | None] | None,
+ *,
+ chat_id: int,
+ turn_id: str,
+ log_label: str,
+) -> int | None:
+ """Join a spawned persistence task with ``shield`` + uniform error handling.
+
+ ``shield`` keeps the DB write alive if the SSE generator is cancelled by
+ client disconnect mid-await. Returns ``None`` on failure; the caller
+ abort-paths the turn with a friendly error SSE.
+ """
+ if task is None:
+ return None
+ try:
+ return await asyncio.shield(task)
+ except asyncio.CancelledError:
+ raise
+ except Exception:
+ logger.exception(
+ "%s failed (chat_id=%s, turn_id=%s)", log_label, chat_id, turn_id
+ )
+ return None
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
new file mode 100644
index 000000000..1f11be1fe
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
@@ -0,0 +1,38 @@
+"""Build the per-invocation ``SurfSenseContextSchema`` for a new-chat turn.
+
+Carries the per-turn read inputs that middlewares read via
+``runtime.context.*`` instead of from their ``__init__`` closures, so the same
+compiled-agent instance can serve multiple turns with different
+mention lists / request ids / turn ids without rebuilding the graph.
+"""
+
+from __future__ import annotations
+
+from app.agents.new_chat.context import SurfSenseContextSchema
+
+
+def build_new_chat_runtime_context(
+ *,
+ search_space_id: int,
+ mentioned_document_ids: list[int] | None,
+ accepted_folder_ids: list[int],
+ mentioned_folder_ids: list[int] | None,
+ request_id: str | None,
+ turn_id: str,
+) -> SurfSenseContextSchema:
+ """``mentioned_document_ids`` is consumed by ``KnowledgePriorityMiddleware``.
+
+ ``accepted_folder_ids`` (post-resolve) wins over the raw
+ ``mentioned_folder_ids`` from the request: the resolver drops chips that
+ pointed at deleted folders or folders the caller can't see, so middlewares
+ only get authorized ids.
+ """
+ return SurfSenseContextSchema(
+ search_space_id=search_space_id,
+ mentioned_document_ids=list(mentioned_document_ids or []),
+ mentioned_folder_ids=list(
+ accepted_folder_ids or mentioned_folder_ids or []
+ ),
+ request_id=request_id,
+ turn_id=turn_id,
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py
new file mode 100644
index 000000000..11312110f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/title_gen.py
@@ -0,0 +1,237 @@
+"""Background thread-title generation (first-response only).
+
+The first assistant response in a thread gets a short auto-generated title
+inserted into ``new_chat_threads.title``. We:
+
+ 1. Spawn the generation as an ``asyncio.Task`` so it runs in parallel with
+ the agent stream (no extra TTFT).
+ 2. Probe inside the task (on its own shielded session) whether this is
+ actually the first response — newer turns short-circuit to ``None``.
+ 3. Inject the resulting ``thread-title-update`` SSE frame on the first agent
+ event after the task completes (mid-stream interlock), or right before
+ the finish frames (post-stream join) if the task hadn't finished yet.
+
+Usage tokens come directly off the response (LiteLLM's async callback fires
+via fire-and-forget ``create_task``, so the ``TokenTrackingCallback`` would
+run too late). We also blank the per-task accumulator so the late callback
+doesn't double-count.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from typing import TYPE_CHECKING, Any
+
+from sqlalchemy.future import select
+
+from app.db import NewChatMessage, NewChatThread, shielded_async_session
+from app.prompts import TITLE_GENERATION_PROMPT
+from app.services.new_streaming_service import VercelStreamingService
+
+if TYPE_CHECKING:
+ from app.agents.new_chat.llm_config import AgentConfig
+ from app.services.token_tracking_service import TokenAccumulator
+
+
+logger = logging.getLogger(__name__)
+
+
+def spawn_title_task(
+ *,
+ chat_id: int,
+ user_query: str,
+ user_image_data_urls: list[str] | None,
+ assistant_message_id: int | None,
+ llm: Any,
+ agent_config: AgentConfig | None,
+) -> asyncio.Task[tuple[str | None, dict | None]] | None:
+ """Spawn ``_generate_title``; returns ``None`` when prerequisites aren't met.
+
+ Title gen is gated on a real ``assistant_message_id`` so a stream that
+ aborts before persistence can never leave a thread with a title and no
+ anchoring rows.
+ """
+ if assistant_message_id is None:
+ return None
+ return asyncio.create_task(
+ _generate_title(
+ chat_id=chat_id,
+ user_query=user_query,
+ user_image_data_urls=user_image_data_urls,
+ assistant_message_id=assistant_message_id,
+ llm=llm,
+ agent_config=agent_config,
+ )
+ )
+
+
+async def _generate_title(
+ *,
+ chat_id: int,
+ user_query: str,
+ user_image_data_urls: list[str] | None,
+ assistant_message_id: int,
+ llm: Any,
+ agent_config: AgentConfig | None,
+) -> tuple[str | None, dict | None]:
+ """Probe is-first-response, then call ``acompletion``. Returns ``(title, usage)``."""
+ try:
+ from litellm import acompletion
+
+ from app.services.llm_router_service import LLMRouterService
+ from app.services.provider_api_base import resolve_api_base
+ from app.services.token_tracking_service import _turn_accumulator
+
+ # Excludes this turn's own assistant row (pre-written by
+ # ``persist_assistant_shell``) — without the ``!=`` filter the gate
+ # would false-negative on every turn after the first.
+ try:
+ async with shielded_async_session() as probe_session:
+ probe_result = await probe_session.execute(
+ select(NewChatMessage.id)
+ .filter(
+ NewChatMessage.thread_id == chat_id,
+ NewChatMessage.role == "assistant",
+ NewChatMessage.id != assistant_message_id,
+ )
+ .limit(1)
+ )
+ is_first_response = probe_result.scalars().first() is None
+ except Exception:
+ logger.warning(
+ "[TitleGen] first-response probe failed (chat_id=%s)",
+ chat_id,
+ exc_info=True,
+ )
+ return None, None
+
+ if not is_first_response:
+ return None, None
+
+ _turn_accumulator.set(None)
+
+ title_seed = user_query.strip() or (
+ f"[{len(user_image_data_urls or [])} image(s)]"
+ if user_image_data_urls
+ else ""
+ )
+ prompt = TITLE_GENERATION_PROMPT.replace(
+ "{user_query}", title_seed[:500] or "(message)"
+ )
+ messages = [{"role": "user", "content": prompt}]
+
+ if getattr(llm, "model", None) == "auto":
+ router = LLMRouterService.get_router()
+ response = await router.acompletion(model="auto", messages=messages)
+ else:
+ # Apply the same ``api_base`` cascade chat / vision / image-gen
+ # call sites use so we never inherit ``litellm.api_base``
+ # (commonly set by ``AZURE_OPENAI_ENDPOINT``) when the chat
+ # config itself ships an empty ``api_base``. Without this the
+ # title-gen on an OpenRouter chat config would 404 against the
+ # inherited Azure endpoint — see ``provider_api_base`` for the
+ # same bug repro on the image-gen / vision paths.
+ raw_model = getattr(llm, "model", "") or ""
+ provider_prefix = (
+ raw_model.split("/", 1)[0] if "/" in raw_model else None
+ )
+ provider_value = (
+ agent_config.provider if agent_config is not None else None
+ )
+ title_api_base = resolve_api_base(
+ provider=provider_value,
+ provider_prefix=provider_prefix,
+ config_api_base=getattr(llm, "api_base", None),
+ )
+ response = await acompletion(
+ model=raw_model,
+ messages=messages,
+ api_key=getattr(llm, "api_key", None),
+ api_base=title_api_base,
+ )
+
+ usage_info = None
+ usage = getattr(response, "usage", None)
+ if usage:
+ raw_model = getattr(llm, "model", "") or ""
+ model_name = (
+ raw_model.split("/", 1)[-1]
+ if "/" in raw_model
+ else (raw_model or response.model or "unknown")
+ )
+ usage_info = {
+ "model": model_name,
+ "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
+ "completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
+ "total_tokens": getattr(usage, "total_tokens", 0) or 0,
+ }
+
+ raw_title = response.choices[0].message.content.strip()
+ if raw_title and len(raw_title) <= 100:
+ return raw_title.strip("\"'"), usage_info
+ return None, usage_info
+ except Exception:
+ logger.exception("[TitleGen] _generate_title failed")
+ return None, None
+
+
+async def maybe_emit_title_update(
+ *,
+ title_task: asyncio.Task[tuple[str | None, dict | None]] | None,
+ title_emitted: bool,
+ chat_id: int,
+ accumulator: TokenAccumulator,
+ streaming_service: VercelStreamingService,
+):
+ """Inject one ``thread-title-update`` SSE if the task completed.
+
+ Yields the SSE frame (when applicable). Returns nothing; the orchestrator
+ flips ``title_emitted`` itself after iterating so we don't fight Python's
+ nonlocal-in-generator semantics.
+ """
+ if title_task is None or title_emitted or not title_task.done():
+ return
+ generated_title, title_usage = title_task.result()
+ if title_usage:
+ accumulator.add(**title_usage)
+ if generated_title:
+ async with shielded_async_session() as title_session:
+ title_thread_result = await title_session.execute(
+ select(NewChatThread).filter(NewChatThread.id == chat_id)
+ )
+ title_thread = title_thread_result.scalars().first()
+ if title_thread:
+ title_thread.title = generated_title
+ await title_session.commit()
+ yield streaming_service.format_thread_title_update(chat_id, generated_title)
+
+
+async def await_pending_title_update(
+ *,
+ title_task: asyncio.Task[tuple[str | None, dict | None]] | None,
+ title_emitted: bool,
+ chat_id: int,
+ accumulator: TokenAccumulator,
+ streaming_service: VercelStreamingService,
+):
+ """If the task hadn't completed during the stream, await it now and emit.
+
+ Used right before the finish frames in the success path. Mirror of
+ ``maybe_emit_title_update`` but unconditionally awaits.
+ """
+ if title_task is None or title_emitted:
+ return
+ generated_title, title_usage = await title_task
+ if title_usage:
+ accumulator.add(**title_usage)
+ if generated_title:
+ async with shielded_async_session() as title_session:
+ title_thread_result = await title_session.execute(
+ select(NewChatThread).filter(NewChatThread.id == chat_id)
+ )
+ title_thread = title_thread_result.scalars().first()
+ if title_thread:
+ title_thread.title = generated_title
+ await title_session.commit()
+ yield streaming_service.format_thread_title_update(chat_id, generated_title)
From b2a08885887c995034bdb759a43105f326737e47 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:49:55 +0200
Subject: [PATCH 12/87] refactor(chat): add
streaming/flows/new_chat/orchestrator.stream_new_chat
Slim composition root for the new-chat streaming flow. Sequences:
1. validate inputs and load the LLM bundle (negative id => YAML)
2. open the OTEL chat_request span; set agent_mode tag
3. spawn the four pre-stream DB writes (set-ai-responding, persist
user turn, persist assistant shell, first-assistant probe)
4. reserve premium quota (with free-fallback retry on denial)
5. build connector + checkpointer + agent + input_state
6. emit first frames (message-start, step-start, initial thinking step)
7. spawn the background title generator
8. run the shared stream_loop with a flow-local _recover closure that
reroutes to the next auto-pin config on provider 429s
9. finalize: emit terminal title/token frames, shielded assistant
finalize, release-or-finalize premium quota, close session, GC,
record OTEL outcome
Public entry-point flows/new_chat/__init__ re-exports stream_new_chat.
Existing wiring (routes, tests) still imports the legacy function from
app.tasks.chat.stream_new_chat. Cutover is a later commit.
---
.../chat/streaming/flows/new_chat/__init__.py | 12 +
.../streaming/flows/new_chat/orchestrator.py | 868 ++++++++++++++++++
2 files changed, 880 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/__init__.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/__init__.py
new file mode 100644
index 000000000..566d5e0d9
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/__init__.py
@@ -0,0 +1,12 @@
+"""New-chat streaming flow.
+
+The public entry point ``stream_new_chat`` is the slim coroutine in
+``orchestrator.py`` that composes the per-concern modules in this folder and
+the building blocks under ``flows/shared/``.
+"""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.flows.new_chat.orchestrator import stream_new_chat
+
+__all__ = ["stream_new_chat"]
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py
new file mode 100644
index 000000000..bca72b5ea
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/orchestrator.py
@@ -0,0 +1,868 @@
+"""``stream_new_chat`` — public entry point for a fresh chat turn.
+
+Slim composition layer over the per-concern modules in this folder and the
+building blocks under ``flows/shared/``. Each phase corresponds to a numbered
+block in the surrounding code so the on-the-wire ordering stays explicit:
+
+ 1. Validation / config — auto-pin, LLM bundle, capability, premium reserve.
+ 2. Concurrent persistence + pre-stream setup — spawn DB writes, build the
+ connector, fetch the checkpointer, build the agent.
+ 3. Input assembly — history bootstrap, mentions, surfsense docs, reports.
+ 4. First SSE frames — message_start, start_step, turn-info, turn-status.
+ 5. Persistence join + message-id frames (ghost-thread protection).
+ 6. Initial thinking step + title task + runtime context.
+ 7. Stream loop with in-stream rate-limit recovery + mid-stream title emit.
+ 8. Finalize — premium debit, token-usage SSE, finish frames.
+ 9. Exception branch — classify, emit terminal error, finish frames.
+ 10. Finally — premium release, session close, assistant finalize, GC, span.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import logging
+import time
+from collections.abc import AsyncGenerator
+from functools import partial
+from typing import Any, Literal
+
+import anyio
+
+from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
+from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
+from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
+from app.agents.new_chat.middleware.busy_mutex import end_turn
+from app.config import config as _app_config
+from app.db import ChatVisibility, async_session_maker
+from app.observability import otel as ot
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.content_builder import AssistantContentBuilder
+from app.tasks.chat.streaming.agent.builder import build_main_agent_for_thread
+from app.tasks.chat.streaming.contract.file_contract import log_file_contract
+from app.tasks.chat.streaming.errors.emitter import emit_stream_terminal_error
+from app.tasks.chat.streaming.flows.new_chat.auto_pin import resolve_initial_auto_pin
+from app.tasks.chat.streaming.flows.new_chat.initial_thinking_step import (
+ build_initial_thinking_step,
+ iter_initial_thinking_step_frame,
+)
+from app.tasks.chat.streaming.flows.new_chat.input_state import (
+ build_new_chat_input_state,
+)
+from app.tasks.chat.streaming.flows.new_chat.llm_capability import (
+ check_image_input_capability,
+)
+from app.tasks.chat.streaming.flows.new_chat.persistence_spawn import (
+ await_persist_task,
+ spawn_persist_assistant_shell_task,
+ spawn_persist_user_task,
+ spawn_set_ai_responding_bg,
+)
+from app.tasks.chat.streaming.flows.new_chat.runtime_context import (
+ build_new_chat_runtime_context,
+)
+from app.tasks.chat.streaming.flows.new_chat.title_gen import (
+ await_pending_title_update,
+ maybe_emit_title_update,
+ spawn_title_task,
+)
+from app.tasks.chat.streaming.flows.shared.assistant_finalize import (
+ finalize_assistant_message,
+)
+from app.tasks.chat.streaming.flows.shared.finalize_emit import iter_token_usage_frame
+from app.tasks.chat.streaming.flows.shared.finally_cleanup import (
+ close_session_and_clear_ai_responding,
+ run_gc_pass,
+)
+from app.tasks.chat.streaming.flows.shared.first_frames import (
+ iter_final_frames,
+ iter_initial_frames,
+)
+from app.tasks.chat.streaming.flows.shared.llm_bundle import load_llm_bundle
+from app.tasks.chat.streaming.flows.shared.pre_stream_setup import (
+ get_chat_checkpointer,
+ setup_connector_and_firecrawl,
+)
+from app.tasks.chat.streaming.flows.shared.premium_quota import (
+ PremiumReservation,
+ finalize_premium,
+ needs_premium_quota,
+ release_premium,
+ reserve_premium,
+)
+from app.tasks.chat.streaming.flows.shared.rate_limit_recovery import (
+ can_recover_provider_rate_limit,
+ log_rate_limit_recovered,
+ reroute_to_next_auto_pin,
+)
+from app.tasks.chat.streaming.flows.shared.span import (
+ close_chat_request_span,
+ open_chat_request_span,
+ set_agent_mode,
+)
+from app.tasks.chat.streaming.flows.shared.stream_loop import run_stream_loop
+from app.tasks.chat.streaming.flows.shared.terminal_error import (
+ handle_terminal_exception,
+)
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+from app.utils.perf import get_perf_logger, log_system_snapshot
+
+logger = logging.getLogger(__name__)
+_perf_log = get_perf_logger()
+
+# Holds spawned background tasks (set_ai_responding, persist_user, persist_asst)
+# so the GC doesn't drop them before they finish. Kept at module level so it
+# survives across turns within one process.
+_background_tasks: set[asyncio.Task] = set()
+
+
+async def stream_new_chat(
+ user_query: str,
+ search_space_id: int,
+ chat_id: int,
+ user_id: str | None = None,
+ llm_config_id: int = -1,
+ mentioned_document_ids: list[int] | None = None,
+ mentioned_surfsense_doc_ids: list[int] | None = None,
+ mentioned_folder_ids: list[int] | None = None,
+ mentioned_documents: list[dict[str, Any]] | None = None,
+ checkpoint_id: str | None = None,
+ needs_history_bootstrap: bool = False,
+ thread_visibility: ChatVisibility | None = None,
+ current_user_display_name: str | None = None,
+ disabled_tools: list[str] | None = None,
+ filesystem_selection: FilesystemSelection | None = None,
+ request_id: str | None = None,
+ user_image_data_urls: list[str] | None = None,
+ flow: Literal["new", "regenerate"] = "new",
+) -> AsyncGenerator[str, None]:
+ """Stream a new chat turn using the SurfSense deep agent.
+
+ Uses the Vercel AI SDK Data Stream Protocol (SSE). ``chat_id`` is the
+ LangGraph thread id (durable conversation memory via the checkpointer).
+ Manages its own database session so cleanup runs even when Starlette
+ cancels the task on client disconnect.
+ """
+ streaming_service = VercelStreamingService()
+ stream_result = StreamResult()
+ _t_total = time.perf_counter()
+ fs_mode = filesystem_selection.mode.value if filesystem_selection else "cloud"
+ fs_platform = (
+ filesystem_selection.client_platform.value if filesystem_selection else "web"
+ )
+ stream_result.request_id = request_id
+ stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+ stream_result.filesystem_mode = fs_mode
+ stream_result.client_platform = fs_platform
+
+ chat_agent_mode = "unknown"
+ chat_outcome = "success"
+ chat_error_category: str | None = None
+ chat_span_cm, chat_span = open_chat_request_span(
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ flow=flow,
+ request_id=request_id,
+ turn_id=stream_result.turn_id,
+ filesystem_mode=fs_mode,
+ client_platform=fs_platform,
+ agent_mode=chat_agent_mode,
+ )
+ log_file_contract("turn_start", stream_result)
+ _perf_log.info(
+ "[stream_new_chat] filesystem_mode=%s client_platform=%s",
+ fs_mode,
+ fs_platform,
+ )
+ log_system_snapshot("stream_new_chat_START")
+
+ from app.services.token_tracking_service import start_turn
+
+ accumulator = start_turn()
+
+ premium_reservation: PremiumReservation | None = None
+ busy_error_raised = False
+
+ emit_stream_error = partial(
+ emit_stream_terminal_error,
+ streaming_service=streaming_service,
+ flow=flow,
+ request_id=request_id,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ )
+
+ session = async_session_maker()
+ # Declared at function scope so SSE-yield join points and the finally
+ # clause see them on every exit path.
+ persist_user_task: asyncio.Task[int | None] | None = None
+ persist_asst_task: asyncio.Task[int | None] | None = None
+ try:
+ spawn_set_ai_responding_bg(
+ chat_id=chat_id, user_id=user_id, background_tasks=_background_tasks
+ )
+
+ # --- Block 1: LLM config + capability ---
+
+ requested_llm_config_id = llm_config_id
+ requires_image_input = bool(user_image_data_urls)
+
+ _t0 = time.perf_counter()
+ pin_result = await resolve_initial_auto_pin(
+ session,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_llm_config_id=llm_config_id,
+ requires_image_input=requires_image_input,
+ requested_llm_config_id=requested_llm_config_id,
+ )
+ if pin_result.error is not None:
+ message, error_code, error_kind = pin_result.error
+ yield emit_stream_error(
+ message=message, error_kind=error_kind, error_code=error_code
+ )
+ yield streaming_service.format_done()
+ return
+ llm_config_id = pin_result.llm_config_id # type: ignore[assignment]
+
+ llm, agent_config, llm_load_error = await load_llm_bundle(
+ session, config_id=llm_config_id, search_space_id=search_space_id
+ )
+ if llm_load_error:
+ yield emit_stream_error(
+ message=llm_load_error,
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+ _perf_log.info(
+ "[stream_new_chat] LLM config loaded in %.3fs (config_id=%s)",
+ time.perf_counter() - _t0,
+ llm_config_id,
+ )
+
+ capability_error = check_image_input_capability(
+ user_image_data_urls=user_image_data_urls, agent_config=agent_config
+ )
+ if capability_error is not None:
+ message, error_code = capability_error
+ yield emit_stream_error(
+ message=message,
+ error_kind="user_error",
+ error_code=error_code,
+ )
+ yield streaming_service.format_done()
+ return
+
+ if needs_premium_quota(agent_config, user_id):
+ premium_reservation = await reserve_premium(
+ agent_config=agent_config, user_id=user_id # type: ignore[arg-type]
+ )
+ if not premium_reservation.allowed:
+ ot.add_event("quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"})
+ if requested_llm_config_id == 0:
+ pin_fallback = await resolve_initial_auto_pin(
+ session,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_llm_config_id=0,
+ requires_image_input=requires_image_input,
+ requested_llm_config_id=requested_llm_config_id,
+ )
+ if pin_fallback.error is not None:
+ message, error_code, error_kind = pin_fallback.error
+ yield emit_stream_error(
+ message=message,
+ error_kind=error_kind,
+ error_code=error_code,
+ )
+ yield streaming_service.format_done()
+ return
+ llm_config_id = pin_fallback.llm_config_id # type: ignore[assignment]
+ ot.add_event(
+ "model.repin",
+ {
+ "repin.reason": "premium_quota_exhausted",
+ "repin.to_config_id": llm_config_id,
+ },
+ )
+ llm, agent_config, llm_load_error = await load_llm_bundle(
+ session,
+ config_id=llm_config_id,
+ search_space_id=search_space_id,
+ )
+ if llm_load_error:
+ yield emit_stream_error(
+ message=llm_load_error,
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+ premium_reservation = None
+ # Re-route to free fallback logged via the structured
+ # stream-error logger so cost/analytics see the auto-switch.
+ from app.tasks.chat.streaming.errors.classifier import (
+ log_chat_stream_error,
+ )
+
+ log_chat_stream_error(
+ flow=flow,
+ error_kind="premium_quota_exhausted",
+ error_code="PREMIUM_QUOTA_EXHAUSTED",
+ severity="info",
+ is_expected=True,
+ request_id=request_id,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ message=(
+ "Premium quota exhausted on pinned model; "
+ "auto-fallback switched to a free model"
+ ),
+ extra={
+ "fallback_config_id": llm_config_id,
+ "auto_fallback": True,
+ },
+ )
+ else:
+ yield emit_stream_error(
+ message=(
+ "Buy more tokens to continue with this model, or "
+ "switch to a free model"
+ ),
+ error_kind="premium_quota_exhausted",
+ error_code="PREMIUM_QUOTA_EXHAUSTED",
+ severity="info",
+ is_expected=True,
+ extra={
+ "resolved_config_id": llm_config_id,
+ "auto_fallback": False,
+ },
+ )
+ yield streaming_service.format_done()
+ return
+
+ if not llm:
+ yield emit_stream_error(
+ message="Failed to create LLM instance",
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+
+ # --- Block 2: Spawn concurrent persistence; build pre-stream setup ---
+
+ persist_user_task = spawn_persist_user_task(
+ chat_id=chat_id,
+ user_id=user_id,
+ turn_id=stream_result.turn_id,
+ user_query=user_query,
+ user_image_data_urls=user_image_data_urls,
+ mentioned_documents=mentioned_documents,
+ background_tasks=_background_tasks,
+ )
+ persist_asst_task = spawn_persist_assistant_shell_task(
+ chat_id=chat_id,
+ user_id=user_id,
+ turn_id=stream_result.turn_id,
+ background_tasks=_background_tasks,
+ )
+
+ _t0 = time.perf_counter()
+ connector_service, firecrawl_api_key = await setup_connector_and_firecrawl(
+ session, search_space_id=search_space_id
+ )
+ _perf_log.info(
+ "[stream_new_chat] Connector service + firecrawl key in %.3fs",
+ time.perf_counter() - _t0,
+ )
+
+ _t0 = time.perf_counter()
+ checkpointer = await get_chat_checkpointer()
+ _perf_log.info(
+ "[stream_new_chat] Checkpointer ready in %.3fs", time.perf_counter() - _t0
+ )
+
+ visibility = thread_visibility or ChatVisibility.PRIVATE
+ use_multi_agent = bool(_app_config.MULTI_AGENT_CHAT_ENABLED)
+ chat_agent_mode = "multi" if use_multi_agent else "single"
+ set_agent_mode(chat_span, chat_agent_mode)
+
+ _t0 = time.perf_counter()
+ agent_factory = (
+ create_multi_agent_chat_deep_agent
+ if use_multi_agent
+ else create_surfsense_deep_agent
+ )
+ # Build the agent inline. Provider 429s surface through the in-stream
+ # recovery loop below, which repins the thread to an eligible
+ # alternative config and rebuilds the agent before the user sees any
+ # output.
+ agent = await build_main_agent_for_thread(
+ agent_factory,
+ llm=llm,
+ search_space_id=search_space_id,
+ db_session=session,
+ connector_service=connector_service,
+ checkpointer=checkpointer,
+ user_id=user_id,
+ thread_id=chat_id,
+ agent_config=agent_config,
+ firecrawl_api_key=firecrawl_api_key,
+ thread_visibility=visibility,
+ filesystem_selection=filesystem_selection,
+ disabled_tools=disabled_tools,
+ mentioned_document_ids=mentioned_document_ids,
+ )
+ _perf_log.info(
+ "[stream_new_chat] Agent created in %.3fs", time.perf_counter() - _t0
+ )
+
+ # --- Block 3: Input assembly ---
+
+ _t0 = time.perf_counter()
+ assembled = await build_new_chat_input_state(
+ session,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_query=user_query,
+ user_image_data_urls=user_image_data_urls,
+ mentioned_document_ids=mentioned_document_ids,
+ mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+ mentioned_folder_ids=mentioned_folder_ids,
+ mentioned_documents=mentioned_documents,
+ needs_history_bootstrap=needs_history_bootstrap,
+ thread_visibility=visibility,
+ current_user_display_name=current_user_display_name,
+ filesystem_mode=fs_mode,
+ request_id=request_id,
+ turn_id=stream_result.turn_id,
+ )
+ input_state = assembled.input_state
+ accepted_folder_ids = assembled.accepted_folder_ids
+ mentioned_surfsense_docs = assembled.mentioned_surfsense_docs
+ _perf_log.info(
+ "[stream_new_chat] History bootstrap + doc/report queries in %.3fs",
+ time.perf_counter() - _t0,
+ )
+
+ # All pre-streaming DB reads done. Commit to release the transaction
+ # and its ACCESS SHARE locks so we don't block DDL (e.g. migrations)
+ # for the entire LLM streaming duration. Tools that need DB access
+ # during streaming start their own short-lived transactions (or use
+ # isolated sessions).
+ await session.commit()
+ # Detach heavy ORM objects (documents with chunks, reports, etc.)
+ # from the session identity map now that we've extracted what we
+ # need. Without this they accumulate in memory for the entire
+ # streaming duration (which can be several minutes).
+ session.expunge_all()
+
+ _perf_log.info(
+ "[stream_new_chat] Total pre-stream setup in %.3fs (chat_id=%s)",
+ time.perf_counter() - _t_total,
+ chat_id,
+ )
+
+ configurable: dict[str, Any] = {
+ "thread_id": str(chat_id),
+ "request_id": request_id or "unknown",
+ "turn_id": stream_result.turn_id,
+ }
+ if checkpoint_id:
+ configurable["checkpoint_id"] = checkpoint_id
+
+ config = {
+ "configurable": configurable,
+ # Effectively uncapped, matching the agent-level ``with_config``
+ # default in ``chat_deepagent.create_agent`` and the unbounded
+ # ``while(true)`` in OpenCode's ``session/processor.ts``. Real
+ # circuit-breakers live in middleware (``DoomLoopMiddleware``,
+ # plus ``enable_tool_call_limit`` / ``enable_model_call_limit``).
+ # The original 25 (and our previous 80 bump) hit users on
+ # legitimate multi-tool plans.
+ "recursion_limit": 10_000,
+ }
+
+ # --- Block 4: First SSE frames ---
+
+ for sse in iter_initial_frames(streaming_service, turn_id=stream_result.turn_id):
+ yield sse
+
+ # --- Block 5: Persistence join + message-id frames ---
+
+ user_message_id = await await_persist_task(
+ persist_user_task,
+ chat_id=chat_id,
+ turn_id=stream_result.turn_id,
+ log_label="persist_user_task",
+ )
+ if user_message_id is None:
+ yield emit_stream_error(
+ message="We couldn't save your message. Please try again in a moment.",
+ error_kind="server_error",
+ error_code="MESSAGE_PERSIST_FAILED",
+ )
+ for sse in iter_final_frames(streaming_service):
+ yield sse
+ return
+
+ # Emit canonical user message id BEFORE any LLM streaming so the FE
+ # can rename its optimistic ``msg-user-XXX`` placeholder to
+ # ``msg-{user_message_id}`` and unlock features gated on a real DB id
+ # (comments, edit-from-this-message). See B4 in the
+ # ``sse-based_message_id_handshake`` plan.
+ yield streaming_service.format_data(
+ "user-message-id",
+ {"message_id": user_message_id, "turn_id": stream_result.turn_id},
+ )
+
+ assistant_message_id = await await_persist_task(
+ persist_asst_task,
+ chat_id=chat_id,
+ turn_id=stream_result.turn_id,
+ log_label="persist_asst_task",
+ )
+ if assistant_message_id is None:
+ # Genuine DB failure — abort the turn rather than stream into a
+ # void. The user row is already persisted so the legacy
+ # ghost-thread gate isn't reopened.
+ yield emit_stream_error(
+ message=(
+ "We couldn't initialize the assistant message. Please try again."
+ ),
+ error_kind="server_error",
+ error_code="MESSAGE_PERSIST_FAILED",
+ )
+ for sse in iter_final_frames(streaming_service):
+ yield sse
+ return
+
+ yield streaming_service.format_data(
+ "assistant-message-id",
+ {"message_id": assistant_message_id, "turn_id": stream_result.turn_id},
+ )
+
+ stream_result.assistant_message_id = assistant_message_id
+ stream_result.content_builder = AssistantContentBuilder()
+
+ # --- Block 6: Initial thinking step + title task + runtime context ---
+
+ initial_step = build_initial_thinking_step(
+ user_query=user_query,
+ user_image_data_urls=user_image_data_urls,
+ mentioned_surfsense_docs=mentioned_surfsense_docs,
+ )
+ for sse in iter_initial_thinking_step_frame(
+ initial_step,
+ streaming_service=streaming_service,
+ content_builder=stream_result.content_builder,
+ ):
+ yield sse
+
+ initial_step_id = initial_step.step_id
+ initial_step_title = initial_step.title
+ initial_step_items = initial_step.items
+ # Drop the heavy ORM objects + the container that holds them so they
+ # aren't retained for the entire streaming duration. ``input_state``
+ # already carries the langchain_messages list independently.
+ del assembled, mentioned_surfsense_docs
+
+ title_task = spawn_title_task(
+ chat_id=chat_id,
+ user_query=user_query,
+ user_image_data_urls=user_image_data_urls,
+ assistant_message_id=assistant_message_id,
+ llm=llm,
+ agent_config=agent_config,
+ )
+ title_emitted = False
+
+ runtime_context = build_new_chat_runtime_context(
+ search_space_id=search_space_id,
+ mentioned_document_ids=mentioned_document_ids,
+ accepted_folder_ids=accepted_folder_ids,
+ mentioned_folder_ids=mentioned_folder_ids,
+ request_id=request_id,
+ turn_id=stream_result.turn_id,
+ )
+
+ # --- Block 7: Stream loop ---
+
+ _t_stream_start = time.perf_counter()
+ runtime_rate_limit_recovered = False
+
+ def _on_first_event() -> None:
+ _perf_log.info(
+ "[stream_new_chat] First agent event in %.3fs (time since stream start), "
+ "%.3fs (total since request start) (chat_id=%s)",
+ time.perf_counter() - _t_stream_start,
+ time.perf_counter() - _t_total,
+ chat_id,
+ )
+
+ async def _recover(exc: BaseException, first_event_seen: bool):
+ nonlocal llm_config_id, llm, agent_config, runtime_rate_limit_recovered
+ nonlocal title_task
+ if not can_recover_provider_rate_limit(
+ exc,
+ first_event_seen=first_event_seen,
+ runtime_rate_limit_recovered=runtime_rate_limit_recovered,
+ requested_llm_config_id=requested_llm_config_id,
+ current_llm_config_id=llm_config_id,
+ ):
+ return None
+ runtime_rate_limit_recovered = True
+ previous_config_id = llm_config_id
+ llm_config_id = await reroute_to_next_auto_pin(
+ session,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ current_llm_config_id=llm_config_id,
+ requires_image_input=requires_image_input,
+ )
+ new_llm, new_agent_config, llm_load_err = await load_llm_bundle(
+ session, config_id=llm_config_id, search_space_id=search_space_id
+ )
+ if llm_load_err:
+ # Re-raise the original so the terminal-error path classifies
+ # it correctly (don't swallow as "config load error").
+ return None
+ llm = new_llm
+ agent_config = new_agent_config
+
+ # Title gen used the initial llm object. After a runtime repin we
+ # keep the stream focused on response recovery and skip title gen
+ # for this turn.
+ if title_task is not None and not title_task.done():
+ title_task.cancel()
+ title_task = None
+
+ _t_rebuild = time.perf_counter()
+ new_agent = await build_main_agent_for_thread(
+ agent_factory,
+ llm=llm,
+ search_space_id=search_space_id,
+ db_session=session,
+ connector_service=connector_service,
+ checkpointer=checkpointer,
+ user_id=user_id,
+ thread_id=chat_id,
+ agent_config=agent_config,
+ firecrawl_api_key=firecrawl_api_key,
+ thread_visibility=visibility,
+ filesystem_selection=filesystem_selection,
+ disabled_tools=disabled_tools,
+ mentioned_document_ids=mentioned_document_ids,
+ )
+ _perf_log.info(
+ "[stream_new_chat] Runtime rate-limit recovery repinned "
+ "config_id=%s -> %s and rebuilt agent in %.3fs",
+ previous_config_id,
+ llm_config_id,
+ time.perf_counter() - _t_rebuild,
+ )
+ log_rate_limit_recovered(
+ flow=flow,
+ request_id=request_id,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ previous_config_id=previous_config_id,
+ new_config_id=llm_config_id,
+ )
+ return new_agent
+
+ async for sse in run_stream_loop(
+ agent=agent,
+ streaming_service=streaming_service,
+ config=config,
+ input_data=input_state,
+ stream_result=stream_result,
+ step_prefix="thinking",
+ initial_step_id=initial_step_id,
+ initial_step_title=initial_step_title,
+ initial_step_items=initial_step_items,
+ fallback_commit_search_space_id=search_space_id,
+ fallback_commit_created_by_id=user_id,
+ fallback_commit_filesystem_mode=(
+ filesystem_selection.mode if filesystem_selection else FilesystemMode.CLOUD
+ ),
+ fallback_commit_thread_id=chat_id,
+ runtime_context=runtime_context,
+ content_builder=stream_result.content_builder,
+ recover=_recover,
+ on_first_event=_on_first_event,
+ ):
+ yield sse
+ # Inject the title update mid-stream as soon as the background
+ # task finishes; gated so we emit at most once.
+ async for title_sse in maybe_emit_title_update(
+ title_task=title_task,
+ title_emitted=title_emitted,
+ chat_id=chat_id,
+ accumulator=accumulator,
+ streaming_service=streaming_service,
+ ):
+ yield title_sse
+ title_emitted = True
+ # Account for the case where the task completed but produced no
+ # title — flip the flag anyway so we don't keep checking it.
+ if (
+ title_task is not None
+ and title_task.done()
+ and not title_emitted
+ ):
+ title_emitted = True
+
+ _perf_log.info(
+ "[stream_new_chat] Agent stream completed in %.3fs (chat_id=%s)",
+ time.perf_counter() - _t_stream_start,
+ chat_id,
+ )
+ log_system_snapshot("stream_new_chat_END")
+
+ # --- Block 8: Finalize ---
+
+ if stream_result.is_interrupted:
+ ot.add_event("chat.interrupted", {"chat.flow": flow})
+ if title_task is not None and not title_task.done():
+ title_task.cancel()
+ for sse in iter_token_usage_frame(
+ streaming_service,
+ accumulator=accumulator,
+ log_label="interrupted new_chat",
+ ):
+ yield sse
+ yield streaming_service.format_finish_step()
+ yield streaming_service.format_finish()
+ yield streaming_service.format_done()
+ return
+
+ async for title_sse in await_pending_title_update(
+ title_task=title_task,
+ title_emitted=title_emitted,
+ chat_id=chat_id,
+ accumulator=accumulator,
+ streaming_service=streaming_service,
+ ):
+ yield title_sse
+
+ # Finalize premium credit debit with the actual provider cost reported
+ # by LiteLLM, summed across every call in the turn. Mirrors the
+ # pre-cost behaviour of "premium turn → all calls count" so free
+ # sub-agent calls during a premium turn still contribute to the bill
+ # (they're $0 in practice anyway).
+ if premium_reservation is not None and user_id:
+ await finalize_premium(
+ reservation=premium_reservation,
+ user_id=user_id,
+ accumulator=accumulator,
+ )
+ premium_reservation = None
+
+ for sse in iter_token_usage_frame(
+ streaming_service, accumulator=accumulator, log_label="normal new_chat"
+ ):
+ yield sse
+
+ for sse in iter_final_frames(streaming_service):
+ yield sse
+
+ except Exception as exc:
+ frames, summary = handle_terminal_exception(
+ exc,
+ flow=flow,
+ flow_label="chat",
+ log_prefix="stream_new_chat",
+ streaming_service=streaming_service,
+ request_id=request_id,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ chat_span=chat_span,
+ )
+ if summary["busy_error_raised"]:
+ busy_error_raised = True
+ chat_outcome = summary["chat_outcome"]
+ chat_error_category = summary["chat_error_category"]
+ for sse in frames:
+ yield sse
+
+ finally:
+ # Shield the ENTIRE async cleanup from anyio cancel-scope cancellation.
+ # Starlette's BaseHTTPMiddleware uses anyio task groups; on client
+ # disconnect, it cancels the scope with level-triggered cancellation
+ # — every unshielded ``await`` would raise CancelledError immediately.
+ # Without this the very first ``await`` (session.rollback) would
+ # raise, ``except Exception`` wouldn't catch it (CancelledError is a
+ # BaseException), and the rest of cleanup — including session.close()
+ # — would never run.
+ with anyio.CancelScope(shield=True):
+ # Authoritative fallback cleanup for lock/cancel state. Middleware
+ # teardown can be skipped on some client-abort paths.
+ end_turn(str(chat_id))
+
+ if premium_reservation is not None and user_id:
+ await release_premium(
+ reservation=premium_reservation, user_id=user_id
+ )
+
+ await close_session_and_clear_ai_responding(session, chat_id)
+
+ await finalize_assistant_message(
+ stream_result=stream_result,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ accumulator=accumulator,
+ log_prefix="stream_new_chat",
+ )
+
+ # Persist any sandbox-produced files to local storage so they remain
+ # downloadable after the Daytona sandbox auto-deletes.
+ if stream_result and stream_result.sandbox_files:
+ with contextlib.suppress(Exception):
+ from app.agents.new_chat.sandbox import (
+ is_sandbox_enabled,
+ persist_and_delete_sandbox,
+ )
+
+ if is_sandbox_enabled():
+ with anyio.CancelScope(shield=True):
+ await persist_and_delete_sandbox(
+ chat_id, stream_result.sandbox_files
+ )
+
+ # ``aafter_agent`` doesn't fire on ``interrupt()`` or early bailout.
+ # Skip on ``BusyError`` (caller never acquired the lock).
+ if not busy_error_raised:
+ with contextlib.suppress(Exception):
+ end_turn(str(chat_id))
+ _perf_log.info(
+ "[stream_new_chat] end_turn cleanup (chat_id=%s)", chat_id
+ )
+
+ # Break circular refs held by the agent graph, tools, and LLM
+ # wrappers so the GC can reclaim them in a single pass.
+ agent = llm = connector_service = None # noqa: F841
+ input_state = stream_result = None # noqa: F841
+ session = None # noqa: F841
+
+ run_gc_pass(log_prefix="stream_new_chat", chat_id=chat_id)
+ close_chat_request_span(
+ span_cm=chat_span_cm,
+ span=chat_span,
+ chat_outcome=chat_outcome,
+ chat_agent_mode=chat_agent_mode,
+ flow=flow,
+ chat_error_category=chat_error_category,
+ duration_seconds=time.perf_counter() - _t_total,
+ )
From 885d4acda921ff8b8c0cb10171cf63bcffbc5845 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:50:03 +0200
Subject: [PATCH 13/87] refactor(chat): add streaming/flows/resume_chat/
per-concern leaf modules
Three focused modules used by the upcoming resume-chat orchestrator:
* runtime_context: build_resume_chat_runtime_context assembles the
SurfSenseContextSchema for a resume turn (handles empty mention
lists, since resume requests do not carry fresh @-mentions).
* assistant_shell: persist_resume_assistant_shell writes a fresh
assistant row for the resumed turn so the post-stream finalize
has a target.
* resume_routing: build_resume_routing collects the pending
interrupts across paused subagents and slices the flat list of
ResumeDecision[] into the correct (thread, subagent) buckets so
LangGraph routes each decision back to the right paused tool call.
Add-only; no orchestrator yet (next commit).
---
.../flows/resume_chat/assistant_shell.py | 31 +++++++++
.../flows/resume_chat/resume_routing.py | 65 +++++++++++++++++++
.../flows/resume_chat/runtime_context.py | 23 +++++++
3 files changed, 119 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/assistant_shell.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/assistant_shell.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/assistant_shell.py
new file mode 100644
index 000000000..2f34387f8
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/assistant_shell.py
@@ -0,0 +1,31 @@
+"""Pre-write a fresh assistant row for this resume turn.
+
+The original (interrupted) ``stream_new_chat`` invocation already persisted
+its own assistant row anchored to a different ``turn_id``; resume allocates a
+new ``turn_id`` (per-request, see ``orchestrator``) so we need a separate row
+keyed on the same ``(thread_id, turn_id, ASSISTANT)`` invariant.
+
+Idempotent against migration 141's partial unique index — recovers the
+existing id on retry.
+
+Resume does NOT emit ``data-user-message-id``: the user row is from the
+original interrupted turn (different ``turn_id``) and is never re-persisted
+here. See B5 in the ``sse-based_message_id_handshake`` plan.
+"""
+
+from __future__ import annotations
+
+from app.tasks.chat.persistence import persist_assistant_shell
+
+
+async def persist_resume_assistant_shell(
+ *,
+ chat_id: int,
+ user_id: str | None,
+ turn_id: str,
+) -> int | None:
+ return await persist_assistant_shell(
+ chat_id=chat_id,
+ user_id=user_id,
+ turn_id=turn_id,
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py
new file mode 100644
index 000000000..300fbc9bd
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py
@@ -0,0 +1,65 @@
+"""Route a flat ``decisions`` list back to the right paused subagent.
+
+Each pending interrupt is stamped with its originating ``tool_call_id`` (see
+``checkpointed_subagent_middleware.propagation``) so the resume slicer can
+re-target each ``HumanReview`` decision at the right ``tool_call_id``.
+
+LangGraph rejects scalar ``Command(resume=...)`` when multiple interrupts are
+pending (parallel HITL); the mapped form works for the single-pause case too,
+so we always use it.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+from app.utils.perf import get_perf_logger
+
+_perf_log = get_perf_logger()
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ResumeRoutingPayload:
+ """Resolved per-``tool_call_id`` resume slices + the lg-shaped resume map."""
+
+ routed_resume_value: dict[str, Any]
+ lg_resume_map: dict[str, Any]
+
+
+async def build_resume_routing(
+ agent: Any,
+ *,
+ chat_id: int,
+ decisions: list[dict],
+) -> ResumeRoutingPayload:
+ """Read parent_state, collect pending tool-calls, slice decisions, build map.
+
+ The middleware reads its per-``tool_call_id`` resume slice from the
+ ``surfsense_resume_value`` configurable; parallel siblings each pop their
+ own entry so they never race.
+ """
+ from app.agents.multi_agent_chat.middleware.main_agent.checkpointed_subagent_middleware.resume_routing import (
+ build_lg_resume_map,
+ collect_pending_tool_calls,
+ slice_decisions_by_tool_call,
+ )
+
+ parent_state = await agent.aget_state(
+ {"configurable": {"thread_id": str(chat_id)}}
+ )
+ pending = collect_pending_tool_calls(parent_state)
+ _perf_log.info(
+ "[hitl_route] resume_entry chat_id=%s decisions=%d pending_subagents=%d",
+ chat_id,
+ len(decisions),
+ len(pending),
+ )
+ routed_resume_value = slice_decisions_by_tool_call(decisions, pending)
+ lg_resume_map = build_lg_resume_map(parent_state, routed_resume_value)
+ return ResumeRoutingPayload(
+ routed_resume_value=routed_resume_value,
+ lg_resume_map=lg_resume_map,
+ )
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py
new file mode 100644
index 000000000..59d5d8ca7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py
@@ -0,0 +1,23 @@
+"""Build the per-invocation ``SurfSenseContextSchema`` for a resume turn.
+
+Resume doesn't carry new ``mentioned_document_ids`` (those are seeded by the
+original turn). We still build the context so future middleware extensions
+can rely on ``runtime.context`` always being populated.
+"""
+
+from __future__ import annotations
+
+from app.agents.new_chat.context import SurfSenseContextSchema
+
+
+def build_resume_chat_runtime_context(
+ *,
+ search_space_id: int,
+ request_id: str | None,
+ turn_id: str,
+) -> SurfSenseContextSchema:
+ return SurfSenseContextSchema(
+ search_space_id=search_space_id,
+ request_id=request_id,
+ turn_id=turn_id,
+ )
From cf0085575ca0275f134dbc04f30896b33fa5a50a Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:50:09 +0200
Subject: [PATCH 14/87] refactor(chat): add
streaming/flows/resume_chat/orchestrator + flows public API
Slim composition root for the resume-chat streaming flow. Mirrors the
new_chat orchestrator but specialized for resumed turns:
* no fresh user turn, no title generation, no image-capability gate
* persists a fresh assistant shell for the resumed turn
* applies build_resume_routing to dispatch user decisions to the
correct paused subagent before invoking the agent
* shares the same stream_loop + flow-local _recover closure for in-
stream provider rate-limit recovery
Also lands flows/__init__.py, which becomes the public chat-flow API:
from app.tasks.chat.streaming.flows import stream_new_chat, stream_resume_chat
Existing wiring (routes, contract test) still imports from the legacy
app.tasks.chat.stream_new_chat module. Cutover is the next phase.
---
.../tasks/chat/streaming/flows/__init__.py | 17 +
.../streaming/flows/resume_chat/__init__.py | 12 +
.../flows/resume_chat/orchestrator.py | 629 ++++++++++++++++++
3 files changed, 658 insertions(+)
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/__init__.py
create mode 100644 surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/__init__.py b/surfsense_backend/app/tasks/chat/streaming/flows/__init__.py
new file mode 100644
index 000000000..522db2fad
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/__init__.py
@@ -0,0 +1,17 @@
+"""Top-level streaming flows: ``new_chat`` and ``resume_chat`` orchestrators.
+
+Re-exports the public entry points so callers can write::
+
+ from app.tasks.chat.streaming.flows import stream_new_chat, stream_resume_chat
+
+The orchestrators themselves live under ``new_chat/orchestrator.py`` and
+``resume_chat/orchestrator.py`` (slim composition of the per-concern modules in
+each flow folder and the building blocks in ``shared/``).
+"""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.flows.new_chat import stream_new_chat
+from app.tasks.chat.streaming.flows.resume_chat import stream_resume_chat
+
+__all__ = ["stream_new_chat", "stream_resume_chat"]
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/__init__.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/__init__.py
new file mode 100644
index 000000000..ed0683e19
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/__init__.py
@@ -0,0 +1,12 @@
+"""Resume-chat streaming flow.
+
+Public entry point ``stream_resume_chat`` is the slim coroutine in
+``orchestrator.py`` that composes the per-concern modules in this folder and
+the building blocks under ``flows/shared/``.
+"""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.flows.resume_chat.orchestrator import stream_resume_chat
+
+__all__ = ["stream_resume_chat"]
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py
new file mode 100644
index 000000000..b67ac987e
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/orchestrator.py
@@ -0,0 +1,629 @@
+"""``stream_resume_chat`` — public entry point for a HITL resume turn.
+
+Slim composition layer over the per-concern modules in this folder and the
+building blocks under ``flows/shared/``. Mirrors ``stream_new_chat`` but:
+
+ * No user-message persistence (the original turn already wrote it).
+ * No mentions / surfsense-doc / report context assembly (seeded by original).
+ * No title generation (only fires on first-response).
+ * Synchronous ``persist_assistant_shell`` call (we have no other in-flight
+ pre-stream work to overlap it with).
+ * ``input_data`` is a ``Command(resume=lg_resume_map)`` instead of a
+ LangChain message list.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import gc
+import logging
+import sys
+import time
+import uuid as _uuid
+from collections.abc import AsyncGenerator
+from functools import partial
+from typing import Any
+from uuid import UUID
+
+import anyio
+
+from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
+from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
+from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
+from app.agents.new_chat.middleware.busy_mutex import end_turn
+from app.config import config as _app_config
+from app.db import ChatVisibility, async_session_maker, shielded_async_session
+from app.observability import otel as ot
+from app.services.chat_session_state_service import set_ai_responding
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.content_builder import AssistantContentBuilder
+from app.tasks.chat.streaming.agent.builder import build_main_agent_for_thread
+from app.tasks.chat.streaming.contract.file_contract import log_file_contract
+from app.tasks.chat.streaming.errors.emitter import emit_stream_terminal_error
+from app.tasks.chat.streaming.flows.resume_chat.assistant_shell import (
+ persist_resume_assistant_shell,
+)
+from app.tasks.chat.streaming.flows.resume_chat.resume_routing import (
+ build_resume_routing,
+)
+from app.tasks.chat.streaming.flows.resume_chat.runtime_context import (
+ build_resume_chat_runtime_context,
+)
+from app.tasks.chat.streaming.flows.shared.assistant_finalize import (
+ finalize_assistant_message,
+)
+from app.tasks.chat.streaming.flows.shared.finalize_emit import iter_token_usage_frame
+from app.tasks.chat.streaming.flows.shared.finally_cleanup import (
+ close_session_and_clear_ai_responding,
+ run_gc_pass,
+)
+from app.tasks.chat.streaming.flows.shared.first_frames import (
+ iter_final_frames,
+ iter_initial_frames,
+)
+from app.tasks.chat.streaming.flows.shared.llm_bundle import load_llm_bundle
+from app.tasks.chat.streaming.flows.shared.pre_stream_setup import (
+ get_chat_checkpointer,
+ setup_connector_and_firecrawl,
+)
+from app.tasks.chat.streaming.flows.shared.premium_quota import (
+ PremiumReservation,
+ finalize_premium,
+ needs_premium_quota,
+ release_premium,
+ reserve_premium,
+)
+from app.tasks.chat.streaming.flows.shared.rate_limit_recovery import (
+ can_recover_provider_rate_limit,
+ log_rate_limit_recovered,
+ reroute_to_next_auto_pin,
+)
+from app.tasks.chat.streaming.flows.shared.span import (
+ close_chat_request_span,
+ open_chat_request_span,
+ set_agent_mode,
+)
+from app.tasks.chat.streaming.flows.shared.stream_loop import run_stream_loop
+from app.tasks.chat.streaming.flows.shared.terminal_error import (
+ handle_terminal_exception,
+)
+from app.tasks.chat.streaming.shared.stream_result import StreamResult
+from app.tasks.chat.streaming.shared.utils import resume_step_prefix
+from app.utils.perf import get_perf_logger, log_system_snapshot
+
+logger = logging.getLogger(__name__)
+_perf_log = get_perf_logger()
+
+
+async def stream_resume_chat(
+ chat_id: int,
+ search_space_id: int,
+ decisions: list[dict],
+ user_id: str | None = None,
+ llm_config_id: int = -1,
+ thread_visibility: ChatVisibility | None = None,
+ filesystem_selection: FilesystemSelection | None = None,
+ request_id: str | None = None,
+ disabled_tools: list[str] | None = None,
+) -> AsyncGenerator[str, None]:
+ """Resume a paused HITL turn with the user's decisions.
+
+ Mirrors ``stream_new_chat`` except for the resume-specific routing of
+ ``decisions`` to per-``tool_call_id`` slices (``build_resume_routing``).
+ """
+ streaming_service = VercelStreamingService()
+ stream_result = StreamResult()
+ _t_total = time.perf_counter()
+ fs_mode = filesystem_selection.mode.value if filesystem_selection else "cloud"
+ fs_platform = (
+ filesystem_selection.client_platform.value if filesystem_selection else "web"
+ )
+ stream_result.request_id = request_id
+ stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+ stream_result.filesystem_mode = fs_mode
+ stream_result.client_platform = fs_platform
+
+ chat_agent_mode = "unknown"
+ chat_outcome = "success"
+ chat_error_category: str | None = None
+ chat_span_cm, chat_span = open_chat_request_span(
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ flow="resume",
+ request_id=request_id,
+ turn_id=stream_result.turn_id,
+ filesystem_mode=fs_mode,
+ client_platform=fs_platform,
+ agent_mode=chat_agent_mode,
+ )
+ log_file_contract("turn_start", stream_result)
+ _perf_log.info(
+ "[stream_resume] filesystem_mode=%s client_platform=%s",
+ fs_mode,
+ fs_platform,
+ )
+
+ from app.services.token_tracking_service import start_turn
+
+ accumulator = start_turn()
+
+ premium_reservation: PremiumReservation | None = None
+ busy_error_raised = False
+
+ emit_stream_error = partial(
+ emit_stream_terminal_error,
+ streaming_service=streaming_service,
+ flow="resume",
+ request_id=request_id,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ )
+
+ session = async_session_maker()
+ try:
+ if user_id:
+ await set_ai_responding(session, chat_id, UUID(user_id))
+
+ requested_llm_config_id = llm_config_id
+
+ # --- LLM config ---
+
+ _t0 = time.perf_counter()
+ try:
+ from app.services.auto_model_pin_service import (
+ resolve_or_get_pinned_llm_config_id,
+ )
+
+ pinned = await resolve_or_get_pinned_llm_config_id(
+ session,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_llm_config_id=llm_config_id,
+ )
+ llm_config_id = pinned.resolved_llm_config_id
+ ot.add_event(
+ "model.pin.resolved",
+ {
+ "pin.requested_id": requested_llm_config_id,
+ "pin.resolved_id": llm_config_id,
+ "pin.requires_image_input": False,
+ },
+ )
+ except ValueError as pin_error:
+ yield emit_stream_error(
+ message=str(pin_error),
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+
+ llm, agent_config, llm_load_error = await load_llm_bundle(
+ session, config_id=llm_config_id, search_space_id=search_space_id
+ )
+ if llm_load_error:
+ yield emit_stream_error(
+ message=llm_load_error,
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+ _perf_log.info(
+ "[stream_resume] LLM config loaded in %.3fs", time.perf_counter() - _t0
+ )
+
+ if needs_premium_quota(agent_config, user_id):
+ premium_reservation = await reserve_premium(
+ agent_config=agent_config, user_id=user_id # type: ignore[arg-type]
+ )
+ if not premium_reservation.allowed:
+ ot.add_event(
+ "quota.denied", {"quota.code": "PREMIUM_QUOTA_EXHAUSTED"}
+ )
+ if requested_llm_config_id == 0:
+ try:
+ pinned_fb = await resolve_or_get_pinned_llm_config_id(
+ session,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ selected_llm_config_id=0,
+ force_repin_free=True,
+ )
+ llm_config_id = pinned_fb.resolved_llm_config_id
+ ot.add_event(
+ "model.repin",
+ {
+ "repin.reason": "premium_quota_exhausted",
+ "repin.to_config_id": llm_config_id,
+ },
+ )
+ except ValueError as pin_error:
+ yield emit_stream_error(
+ message=str(pin_error),
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+ llm, agent_config, llm_load_error = await load_llm_bundle(
+ session,
+ config_id=llm_config_id,
+ search_space_id=search_space_id,
+ )
+ if llm_load_error:
+ yield emit_stream_error(
+ message=llm_load_error,
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+ premium_reservation = None
+ from app.tasks.chat.streaming.errors.classifier import (
+ log_chat_stream_error,
+ )
+
+ log_chat_stream_error(
+ flow="resume",
+ error_kind="premium_quota_exhausted",
+ error_code="PREMIUM_QUOTA_EXHAUSTED",
+ severity="info",
+ is_expected=True,
+ request_id=request_id,
+ thread_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ message=(
+ "Premium quota exhausted on pinned model; "
+ "auto-fallback switched to a free model"
+ ),
+ extra={
+ "fallback_config_id": llm_config_id,
+ "auto_fallback": True,
+ },
+ )
+ else:
+ yield emit_stream_error(
+ message=(
+ "Buy more tokens to continue with this model, or "
+ "switch to a free model"
+ ),
+ error_kind="premium_quota_exhausted",
+ error_code="PREMIUM_QUOTA_EXHAUSTED",
+ severity="info",
+ is_expected=True,
+ extra={
+ "resolved_config_id": llm_config_id,
+ "auto_fallback": False,
+ },
+ )
+ yield streaming_service.format_done()
+ return
+
+ if not llm:
+ yield emit_stream_error(
+ message="Failed to create LLM instance",
+ error_kind="server_error",
+ error_code="SERVER_ERROR",
+ )
+ yield streaming_service.format_done()
+ return
+
+ # --- Pre-stream setup ---
+
+ _t0 = time.perf_counter()
+ connector_service, firecrawl_api_key = await setup_connector_and_firecrawl(
+ session, search_space_id=search_space_id
+ )
+ _perf_log.info(
+ "[stream_resume] Connector service + firecrawl key in %.3fs",
+ time.perf_counter() - _t0,
+ )
+
+ _t0 = time.perf_counter()
+ checkpointer = await get_chat_checkpointer()
+ _perf_log.info(
+ "[stream_resume] Checkpointer ready in %.3fs", time.perf_counter() - _t0
+ )
+
+ visibility = thread_visibility or ChatVisibility.PRIVATE
+ use_multi_agent = bool(_app_config.MULTI_AGENT_CHAT_ENABLED)
+ chat_agent_mode = "multi" if use_multi_agent else "single"
+ set_agent_mode(chat_span, chat_agent_mode)
+
+ _t0 = time.perf_counter()
+ agent_factory = (
+ create_multi_agent_chat_deep_agent
+ if use_multi_agent
+ else create_surfsense_deep_agent
+ )
+ agent = await build_main_agent_for_thread(
+ agent_factory,
+ llm=llm,
+ search_space_id=search_space_id,
+ db_session=session,
+ connector_service=connector_service,
+ checkpointer=checkpointer,
+ user_id=user_id,
+ thread_id=chat_id,
+ agent_config=agent_config,
+ firecrawl_api_key=firecrawl_api_key,
+ thread_visibility=visibility,
+ filesystem_selection=filesystem_selection,
+ disabled_tools=disabled_tools,
+ )
+ _perf_log.info(
+ "[stream_resume] Agent created in %.3fs", time.perf_counter() - _t0
+ )
+
+ # Release the transaction before streaming (same rationale as stream_new_chat).
+ await session.commit()
+ session.expunge_all()
+
+ _perf_log.info(
+ "[stream_resume] Total pre-stream setup in %.3fs (chat_id=%s)",
+ time.perf_counter() - _t_total,
+ chat_id,
+ )
+
+ # --- Resume routing ---
+
+ from langgraph.types import Command
+
+ routing = await build_resume_routing(
+ agent, chat_id=chat_id, decisions=decisions
+ )
+
+ config = {
+ "configurable": {
+ "thread_id": str(chat_id),
+ "request_id": request_id or "unknown",
+ "turn_id": stream_result.turn_id,
+ # Per-``tool_call_id`` resume slices read by
+ # ``SurfSenseCheckpointedSubAgentMiddleware``. Parallel
+ # siblings each pop their own entry, so they never race.
+ "surfsense_resume_value": routing.routed_resume_value,
+ },
+ # Same rationale as ``stream_new_chat``: effectively uncapped to
+ # mirror the agent default and OpenCode's session loop. Doom-loop
+ # / call-limit middleware enforce the real ceiling.
+ "recursion_limit": 10_000,
+ }
+
+ # --- First SSE frames ---
+
+ for sse in iter_initial_frames(streaming_service, turn_id=stream_result.turn_id):
+ yield sse
+
+ # --- Assistant-shell persistence + id frame ---
+
+ assistant_message_id = await persist_resume_assistant_shell(
+ chat_id=chat_id,
+ user_id=user_id,
+ turn_id=stream_result.turn_id,
+ )
+ if assistant_message_id is None:
+ yield emit_stream_error(
+ message=(
+ "We couldn't initialize the assistant message. Please try again."
+ ),
+ error_kind="server_error",
+ error_code="MESSAGE_PERSIST_FAILED",
+ )
+ for sse in iter_final_frames(streaming_service):
+ yield sse
+ return
+
+ yield streaming_service.format_data(
+ "assistant-message-id",
+ {"message_id": assistant_message_id, "turn_id": stream_result.turn_id},
+ )
+
+ stream_result.assistant_message_id = assistant_message_id
+ stream_result.content_builder = AssistantContentBuilder()
+
+ runtime_context = build_resume_chat_runtime_context(
+ search_space_id=search_space_id,
+ request_id=request_id,
+ turn_id=stream_result.turn_id,
+ )
+
+ # --- Stream loop ---
+
+ _t_stream_start = time.perf_counter()
+ runtime_rate_limit_recovered = False
+
+ def _on_first_event() -> None:
+ _perf_log.info(
+ "[stream_resume] First agent event in %.3fs (stream), %.3fs (total) (chat_id=%s)",
+ time.perf_counter() - _t_stream_start,
+ time.perf_counter() - _t_total,
+ chat_id,
+ )
+
+ async def _recover(exc: BaseException, first_event_seen: bool):
+ nonlocal llm_config_id, llm, agent_config, runtime_rate_limit_recovered
+ if not can_recover_provider_rate_limit(
+ exc,
+ first_event_seen=first_event_seen,
+ runtime_rate_limit_recovered=runtime_rate_limit_recovered,
+ requested_llm_config_id=requested_llm_config_id,
+ current_llm_config_id=llm_config_id,
+ ):
+ return None
+ runtime_rate_limit_recovered = True
+ previous_config_id = llm_config_id
+ llm_config_id = await reroute_to_next_auto_pin(
+ session,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ current_llm_config_id=llm_config_id,
+ requires_image_input=False,
+ )
+ new_llm, new_agent_config, llm_load_err = await load_llm_bundle(
+ session, config_id=llm_config_id, search_space_id=search_space_id
+ )
+ if llm_load_err:
+ return None
+ llm = new_llm
+ agent_config = new_agent_config
+
+ _t_rebuild = time.perf_counter()
+ new_agent = await build_main_agent_for_thread(
+ agent_factory,
+ llm=llm,
+ search_space_id=search_space_id,
+ db_session=session,
+ connector_service=connector_service,
+ checkpointer=checkpointer,
+ user_id=user_id,
+ thread_id=chat_id,
+ agent_config=agent_config,
+ firecrawl_api_key=firecrawl_api_key,
+ thread_visibility=visibility,
+ filesystem_selection=filesystem_selection,
+ disabled_tools=disabled_tools,
+ )
+ _perf_log.info(
+ "[stream_resume] Runtime rate-limit recovery repinned "
+ "config_id=%s -> %s and rebuilt agent in %.3fs",
+ previous_config_id,
+ llm_config_id,
+ time.perf_counter() - _t_rebuild,
+ )
+ log_rate_limit_recovered(
+ flow="resume",
+ request_id=request_id,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ previous_config_id=previous_config_id,
+ new_config_id=llm_config_id,
+ )
+ return new_agent
+
+ async for sse in run_stream_loop(
+ agent=agent,
+ streaming_service=streaming_service,
+ config=config,
+ input_data=Command(resume=routing.lg_resume_map),
+ stream_result=stream_result,
+ step_prefix=resume_step_prefix(stream_result.turn_id),
+ fallback_commit_search_space_id=search_space_id,
+ fallback_commit_created_by_id=user_id,
+ fallback_commit_filesystem_mode=(
+ filesystem_selection.mode if filesystem_selection else FilesystemMode.CLOUD
+ ),
+ fallback_commit_thread_id=chat_id,
+ runtime_context=runtime_context,
+ content_builder=stream_result.content_builder,
+ recover=_recover,
+ on_first_event=_on_first_event,
+ ):
+ yield sse
+
+ _perf_log.info(
+ "[stream_resume] Agent stream completed in %.3fs (chat_id=%s)",
+ time.perf_counter() - _t_stream_start,
+ chat_id,
+ )
+
+ # --- Finalize ---
+
+ if stream_result.is_interrupted:
+ ot.add_event("chat.interrupted", {"chat.flow": "resume"})
+ for sse in iter_token_usage_frame(
+ streaming_service,
+ accumulator=accumulator,
+ log_label="interrupted resume_chat",
+ ):
+ yield sse
+ yield streaming_service.format_finish_step()
+ yield streaming_service.format_finish()
+ yield streaming_service.format_done()
+ return
+
+ if premium_reservation is not None and user_id:
+ await finalize_premium(
+ reservation=premium_reservation,
+ user_id=user_id,
+ accumulator=accumulator,
+ )
+ premium_reservation = None
+
+ for sse in iter_token_usage_frame(
+ streaming_service, accumulator=accumulator, log_label="normal resume_chat"
+ ):
+ yield sse
+
+ for sse in iter_final_frames(streaming_service):
+ yield sse
+
+ except Exception as exc:
+ frames, summary = handle_terminal_exception(
+ exc,
+ flow="resume",
+ flow_label="resume",
+ log_prefix="stream_resume_chat",
+ streaming_service=streaming_service,
+ request_id=request_id,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ chat_span=chat_span,
+ )
+ if summary["busy_error_raised"]:
+ busy_error_raised = True
+ chat_outcome = summary["chat_outcome"]
+ chat_error_category = summary["chat_error_category"]
+ for sse in frames:
+ yield sse
+
+ finally:
+ with anyio.CancelScope(shield=True):
+ end_turn(str(chat_id))
+
+ if premium_reservation is not None and user_id:
+ await release_premium(
+ reservation=premium_reservation, user_id=user_id
+ )
+
+ await close_session_and_clear_ai_responding(session, chat_id)
+
+ await finalize_assistant_message(
+ stream_result=stream_result,
+ chat_id=chat_id,
+ search_space_id=search_space_id,
+ user_id=user_id,
+ accumulator=accumulator,
+ log_prefix="stream_resume",
+ )
+
+ # Release the lock from the original interrupted turn or any
+ # re-interrupt/bailout. Skip on ``BusyError`` (lock not held here).
+ if not busy_error_raised:
+ with contextlib.suppress(Exception):
+ end_turn(str(chat_id))
+ _perf_log.info(
+ "[stream_resume] end_turn cleanup (chat_id=%s)", chat_id
+ )
+
+ agent = llm = connector_service = None # noqa: F841
+ stream_result = None # noqa: F841
+ session = None # noqa: F841
+
+ run_gc_pass(log_prefix="stream_resume", chat_id=chat_id)
+ close_chat_request_span(
+ span_cm=chat_span_cm,
+ span=chat_span,
+ chat_outcome=chat_outcome,
+ chat_agent_mode=chat_agent_mode,
+ flow="resume",
+ chat_error_category=chat_error_category,
+ duration_seconds=time.perf_counter() - _t_total,
+ )
From cfdad85058083dd822655611ae1390e4873d587a Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Mon, 25 May 2026 21:50:18 +0200
Subject: [PATCH 15/87] test(chat): add parity tests for streaming/flows/
parallel refactor
Adds 34 tests under tests/unit/tasks/chat/streaming/ that cover the
new flows tree against the legacy stream_new_chat.py module to gate
the upcoming cutover. Coverage:
* Public entry points: stream_new_chat and stream_resume_chat are
async generator functions whose parameter signatures (name, kind,
annotation, default) match the legacy versions one-for-one. Uses a
normalized-annotation comparison so PEP-563 vs eager-annotation
representation differences are tolerated.
* Extracted helpers: image-capability gate, runtime-context builders
for new-chat and resume-chat, LLM-bundle dispatcher, premium-quota
needs check + reservation dataclass, rate-limit recovery truth
table, persistence-spawn registration/self-unregistration, await
helpers.
* SSE frame iterators: iter_initial_frames + iter_final_frames emit
the canonical sequence; iter_token_usage_frame skips on None.
* Initial thinking step: 4 parametrized branches (text, image-only,
empty, mentioned-docs), long-query truncation, many-docs collapse.
These tests are scaffolding for the cutover and will be removed once
the legacy module is deleted.
---
.../test_parallel_refactor_parity.py | 582 ++++++++++++++++++
1 file changed, 582 insertions(+)
create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py
new file mode 100644
index 000000000..eb24b4df8
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_parallel_refactor_parity.py
@@ -0,0 +1,582 @@
+"""Parity gate for the parallel refactor of ``stream_new_chat.py``.
+
+The new tree under ``app.tasks.chat.streaming.flows`` is built side-by-side with
+the legacy monolithic ``app.tasks.chat.stream_new_chat`` so we can cut over
+atomically. This file pins externally-observable behaviour at module
+boundaries so a divergence between the two trees fails loudly *before* the
+cutover.
+
+What we verify:
+
+ 1. **Signature parity** — ``stream_new_chat`` / ``stream_resume_chat`` from
+ the new tree have the same call signature as the originals.
+ 2. **Helper extraction parity** — the SRP modules in ``flows/`` produce the
+ same outputs as the inline code in the legacy file for representative
+ inputs (initial thinking step, image-capability gate, runtime context,
+ SSE frame sequences, token-usage frame shape, persistence guards).
+ 3. **Wrapper delegation** — wrappers like ``load_llm_bundle`` /
+ ``can_recover_provider_rate_limit`` exist and are addressable.
+
+Delete this file along with ``stream_new_chat.py`` once the cutover is done
+(see the parent refactor plan).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import inspect
+from dataclasses import dataclass
+from typing import Any
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from app.agents.new_chat.context import SurfSenseContextSchema
+from app.services.new_streaming_service import VercelStreamingService
+
+from app.tasks.chat.stream_new_chat import (
+ stream_new_chat as old_stream_new_chat,
+ stream_resume_chat as old_stream_resume_chat,
+)
+from app.tasks.chat.streaming.flows import (
+ stream_new_chat as new_stream_new_chat,
+ stream_resume_chat as new_stream_resume_chat,
+)
+from app.tasks.chat.streaming.flows.new_chat.initial_thinking_step import (
+ build_initial_thinking_step,
+)
+from app.tasks.chat.streaming.flows.new_chat.llm_capability import (
+ check_image_input_capability,
+)
+from app.tasks.chat.streaming.flows.new_chat.persistence_spawn import (
+ await_persist_task,
+ spawn_persist_assistant_shell_task,
+ spawn_persist_user_task,
+ spawn_set_ai_responding_bg,
+)
+from app.tasks.chat.streaming.flows.new_chat.runtime_context import (
+ build_new_chat_runtime_context,
+)
+from app.tasks.chat.streaming.flows.resume_chat.runtime_context import (
+ build_resume_chat_runtime_context,
+)
+from app.tasks.chat.streaming.flows.shared.finalize_emit import iter_token_usage_frame
+from app.tasks.chat.streaming.flows.shared.first_frames import (
+ iter_final_frames,
+ iter_initial_frames,
+)
+from app.tasks.chat.streaming.flows.shared.llm_bundle import load_llm_bundle
+from app.tasks.chat.streaming.flows.shared.premium_quota import (
+ PremiumReservation,
+ needs_premium_quota,
+)
+from app.tasks.chat.streaming.flows.shared.rate_limit_recovery import (
+ can_recover_provider_rate_limit,
+)
+
+pytestmark = pytest.mark.unit
+
+
+# --------------------------------------------------------------------- signature
+
+
+def _normalize_annotation(ann: Any) -> str:
+ """Compare-friendly form for an annotation.
+
+ The legacy ``stream_new_chat.py`` does NOT use ``from __future__ import
+ annotations``, so its annotations are evaluated at import time and come
+ back as type objects / typing generics. The new tree DOES use it, so its
+ annotations are PEP-563 strings.
+
+ Both reprs describe the same types — strip the module prefixes / typing
+ namespace + the ```` wrapper so we compare the canonical
+ declared form.
+ """
+ if ann is inspect.Signature.empty:
+ return ""
+ raw = ann if isinstance(ann, str) else repr(ann)
+ cleaned = (
+ raw.replace("typing.", "")
+ .replace("collections.abc.", "")
+ .replace("app.db.", "")
+ .replace("app.agents.new_chat.filesystem_selection.", "")
+ .replace("app.agents.new_chat.context.", "")
+ )
+ # Unwrap ```` → ``int`` (legacy-side type objects).
+ if cleaned.startswith(""):
+ cleaned = cleaned[len("")]
+ return cleaned
+
+
+def _normalize_sig(sig: inspect.Signature) -> list[tuple[str, Any, str]]:
+ return [
+ (p.name, p.default, _normalize_annotation(p.annotation))
+ for p in sig.parameters.values()
+ ]
+
+
+def test_stream_new_chat_signature_matches_legacy() -> None:
+ old = inspect.signature(old_stream_new_chat)
+ new = inspect.signature(new_stream_new_chat)
+ assert _normalize_sig(new) == _normalize_sig(old)
+ assert _normalize_annotation(new.return_annotation) == _normalize_annotation(
+ old.return_annotation
+ )
+
+
+def test_stream_resume_chat_signature_matches_legacy() -> None:
+ old = inspect.signature(old_stream_resume_chat)
+ new = inspect.signature(new_stream_resume_chat)
+ assert _normalize_sig(new) == _normalize_sig(old)
+ assert _normalize_annotation(new.return_annotation) == _normalize_annotation(
+ old.return_annotation
+ )
+
+
+def test_orchestrators_are_async_generator_functions() -> None:
+ assert inspect.isasyncgenfunction(new_stream_new_chat)
+ assert inspect.isasyncgenfunction(new_stream_resume_chat)
+
+
+# ------------------------------------------------------------ initial thinking
+
+
+@dataclass
+class _FakeSurfsenseDoc:
+ """Stand-in for ``SurfsenseDocsDocument`` with just the field we read."""
+
+ title: str
+
+
+@pytest.mark.parametrize(
+ "user_query, image_urls, docs, expected_title, expected_action",
+ [
+ ("hello world", None, [], "Understanding your request", "Processing"),
+ ("", ["data:image/png;base64,AAA"], [], "Understanding your request", "Processing"),
+ ("", None, [], "Understanding your request", "Processing"),
+ (
+ "doc question",
+ None,
+ [_FakeSurfsenseDoc(title="My Doc")],
+ "Analyzing referenced content",
+ "Analyzing",
+ ),
+ ],
+)
+def test_initial_thinking_step_branches(
+ user_query: str,
+ image_urls: list[str] | None,
+ docs: list[Any],
+ expected_title: str,
+ expected_action: str,
+) -> None:
+ step = build_initial_thinking_step(
+ user_query=user_query,
+ user_image_data_urls=image_urls,
+ mentioned_surfsense_docs=docs, # type: ignore[arg-type]
+ )
+ assert step.step_id == "thinking-1"
+ assert step.title == expected_title
+ assert len(step.items) == 1
+ assert step.items[0].startswith(f"{expected_action}: ")
+
+
+def test_initial_thinking_step_truncates_long_query() -> None:
+ long_query = "x" * 200
+ step = build_initial_thinking_step(
+ user_query=long_query,
+ user_image_data_urls=None,
+ mentioned_surfsense_docs=[],
+ )
+ # 80-char truncation + ellipsis, sandwiched after "Processing: ".
+ assert "..." in step.items[0]
+ item = step.items[0]
+ payload = item[len("Processing: ") :]
+ assert payload.startswith("x" * 80) and payload.endswith("...")
+
+
+def test_initial_thinking_step_collapses_many_doc_names() -> None:
+ docs = [_FakeSurfsenseDoc(title=f"Doc {i}") for i in range(5)]
+ step = build_initial_thinking_step(
+ user_query="q",
+ user_image_data_urls=None,
+ mentioned_surfsense_docs=docs, # type: ignore[arg-type]
+ )
+ assert "[5 docs]" in step.items[0]
+
+
+# ------------------------------------------------------------ capability gate
+
+
+def test_image_capability_passes_without_images() -> None:
+ assert check_image_input_capability(
+ user_image_data_urls=None, agent_config=None
+ ) is None
+
+
+def test_image_capability_passes_when_capability_unknown() -> None:
+ """Unknown / unmapped models are not blocked — only models LiteLLM has
+ *explicitly* marked text-only trip the gate."""
+
+ class _AgentConfig:
+ provider = "openrouter"
+ model_name = "unknown-mystery-model"
+ custom_provider = None
+ config_name = "Unknown"
+ litellm_params: dict[str, Any] = {}
+
+ with patch(
+ "app.services.provider_capabilities.is_known_text_only_chat_model",
+ return_value=False,
+ ):
+ assert (
+ check_image_input_capability(
+ user_image_data_urls=["data:image/png;base64,AAA"],
+ agent_config=_AgentConfig(), # type: ignore[arg-type]
+ )
+ is None
+ )
+
+
+def test_image_capability_blocks_known_text_only_models() -> None:
+ class _AgentConfig:
+ provider = "openai"
+ model_name = "gpt-3.5-turbo"
+ custom_provider = None
+ config_name = "GPT-3.5"
+ litellm_params: dict[str, Any] = {"base_model": "gpt-3.5-turbo"}
+
+ with patch(
+ "app.services.provider_capabilities.is_known_text_only_chat_model",
+ return_value=True,
+ ):
+ result = check_image_input_capability(
+ user_image_data_urls=["data:image/png;base64,AAA"],
+ agent_config=_AgentConfig(), # type: ignore[arg-type]
+ )
+ assert result is not None
+ message, error_code = result
+ assert error_code == "MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT"
+ assert "GPT-3.5" in message
+
+
+# ---------------------------------------------------------------- runtime ctx
+
+
+def test_new_chat_runtime_context_prefers_accepted_folder_ids() -> None:
+ ctx = build_new_chat_runtime_context(
+ search_space_id=7,
+ mentioned_document_ids=[1, 2],
+ accepted_folder_ids=[10],
+ mentioned_folder_ids=[20, 30],
+ request_id="req",
+ turn_id="t1",
+ )
+ assert isinstance(ctx, SurfSenseContextSchema)
+ assert ctx.search_space_id == 7
+ assert list(ctx.mentioned_document_ids) == [1, 2]
+ assert list(ctx.mentioned_folder_ids) == [10]
+ assert ctx.request_id == "req"
+ assert ctx.turn_id == "t1"
+
+
+def test_new_chat_runtime_context_falls_back_to_mentioned_folder_ids() -> None:
+ ctx = build_new_chat_runtime_context(
+ search_space_id=7,
+ mentioned_document_ids=None,
+ accepted_folder_ids=[],
+ mentioned_folder_ids=[20, 30],
+ request_id=None,
+ turn_id="t2",
+ )
+ assert list(ctx.mentioned_folder_ids) == [20, 30]
+
+
+def test_resume_chat_runtime_context_empty_mention_lists() -> None:
+ ctx = build_resume_chat_runtime_context(
+ search_space_id=42, request_id="req-r", turn_id="t-r"
+ )
+ assert ctx.search_space_id == 42
+ assert ctx.request_id == "req-r"
+ assert ctx.turn_id == "t-r"
+
+
+# ---------------------------------------------------------------- SSE frames
+
+
+def test_iter_initial_frames_emits_canonical_sequence() -> None:
+ svc = VercelStreamingService()
+ frames = list(iter_initial_frames(svc, turn_id="42:1700000000000"))
+ # Exactly 4 frames: message_start, start_step, turn-info (turn_id), turn-status (busy).
+ assert len(frames) == 4
+ assert "42:1700000000000" in frames[2]
+ assert '"status":"busy"' in frames[3] or '"status": "busy"' in frames[3]
+
+
+def test_iter_final_frames_emits_idle_then_finish_done() -> None:
+ svc = VercelStreamingService()
+ frames = list(iter_final_frames(svc))
+ assert len(frames) == 4
+ assert '"status":"idle"' in frames[0] or '"status": "idle"' in frames[0]
+
+
+# ----------------------------------------------------------- token usage frame
+
+
+class _FakeAccumulator:
+ """Minimal stand-in covering only the fields ``iter_token_usage_frame`` reads."""
+
+ def __init__(self, summary: Any = None) -> None:
+ self._summary = summary
+ self.calls = [1, 2, 3]
+ self.grand_total = 100
+ self.total_cost_micros = 50_000
+ self.total_prompt_tokens = 60
+ self.total_completion_tokens = 40
+
+ def per_message_summary(self) -> Any:
+ return self._summary
+
+ def serialized_calls(self) -> list[Any]:
+ return list(self.calls)
+
+
+def test_token_usage_frame_skipped_when_no_summary() -> None:
+ svc = VercelStreamingService()
+ frames = list(
+ iter_token_usage_frame(
+ svc,
+ accumulator=_FakeAccumulator(summary=None), # type: ignore[arg-type]
+ log_label="parity-empty",
+ )
+ )
+ assert frames == []
+
+
+def test_token_usage_frame_emitted_when_summary_present() -> None:
+ svc = VercelStreamingService()
+ frames = list(
+ iter_token_usage_frame(
+ svc,
+ accumulator=_FakeAccumulator(summary=[{"m": "x", "t": 100}]), # type: ignore[arg-type]
+ log_label="parity-populated",
+ )
+ )
+ assert len(frames) == 1
+ # Field shape on the wire is fixed by the FE; assert each surfaces.
+ payload = frames[0]
+ for key in (
+ '"prompt_tokens":60',
+ '"completion_tokens":40',
+ '"total_tokens":100',
+ '"cost_micros":50000',
+ ):
+ assert key in payload.replace(" ", "")
+
+
+# ------------------------------------------------------------------ llm_bundle
+
+
+def test_load_llm_bundle_routes_negative_id_to_yaml_loader() -> None:
+ async def _run() -> tuple[Any, Any, str | None]:
+ with (
+ patch(
+ "app.tasks.chat.streaming.flows.shared.llm_bundle.load_global_llm_config_by_id",
+ return_value=None,
+ ),
+ ):
+ return await load_llm_bundle(
+ session=AsyncMock(), # type: ignore[arg-type]
+ config_id=-1,
+ search_space_id=7,
+ )
+
+ llm, agent_config, error = asyncio.run(_run())
+ assert llm is None
+ assert agent_config is None
+ assert error is not None and "id -1" in error
+
+
+def test_load_llm_bundle_routes_nonnegative_id_to_db_loader() -> None:
+ async def _run() -> tuple[Any, Any, str | None]:
+ with (
+ patch(
+ "app.tasks.chat.streaming.flows.shared.llm_bundle.load_agent_config",
+ new=AsyncMock(return_value=None),
+ ),
+ ):
+ return await load_llm_bundle(
+ session=AsyncMock(), # type: ignore[arg-type]
+ config_id=12,
+ search_space_id=7,
+ )
+
+ llm, agent_config, error = asyncio.run(_run())
+ assert llm is None
+ assert agent_config is None
+ assert error is not None and "id 12" in error
+
+
+# ----------------------------------------------------------------- premium quota
+
+
+def test_needs_premium_quota_requires_user_and_premium_flag() -> None:
+ class _AgentConfig:
+ is_premium = True
+
+ class _NonPremium:
+ is_premium = False
+
+ assert needs_premium_quota(_AgentConfig(), "user-1") is True # type: ignore[arg-type]
+ assert needs_premium_quota(_AgentConfig(), None) is False # type: ignore[arg-type]
+ assert needs_premium_quota(_NonPremium(), "user-1") is False # type: ignore[arg-type]
+ assert needs_premium_quota(None, "user-1") is False
+
+
+def test_premium_reservation_dataclass_shape() -> None:
+ # Sanity: the dataclass exists and carries the fields the orchestrator uses.
+ r = PremiumReservation(request_id="abc", reserved_micros=100, allowed=True)
+ assert r.request_id == "abc"
+ assert r.reserved_micros == 100
+ assert r.allowed is True
+
+
+# ----------------------------------------------------------- rate-limit guard
+
+
+@pytest.mark.parametrize(
+ "first_event_seen, recovered, requested_id, current_id, expected",
+ [
+ (False, False, 0, -1, True),
+ # Already recovered: no second pass.
+ (False, True, 0, -1, False),
+ # User explicitly picked a config: don't silently switch.
+ (False, False, 5, -1, False),
+ # Already on a database-backed (positive) id.
+ (False, False, 0, 7, False),
+ # User has already seen output: silent rebuild not possible.
+ (True, False, 0, -1, False),
+ ],
+)
+def test_can_recover_provider_rate_limit_truth_table(
+ first_event_seen: bool,
+ recovered: bool,
+ requested_id: int,
+ current_id: int,
+ expected: bool,
+) -> None:
+ # Use a known rate-limit-shaped exception so the helper's last condition
+ # is satisfied; the guard only short-circuits to False when one of the
+ # *other* preconditions fails.
+ exc = Exception('{"error":{"type":"rate_limit_error","message":"slow"}}')
+ assert (
+ can_recover_provider_rate_limit(
+ exc,
+ first_event_seen=first_event_seen,
+ runtime_rate_limit_recovered=recovered,
+ requested_llm_config_id=requested_id,
+ current_llm_config_id=current_id,
+ )
+ is expected
+ )
+
+
+def test_can_recover_provider_rate_limit_rejects_non_rate_limit_exception() -> None:
+ assert (
+ can_recover_provider_rate_limit(
+ ValueError("not a rate limit"),
+ first_event_seen=False,
+ runtime_rate_limit_recovered=False,
+ requested_llm_config_id=0,
+ current_llm_config_id=-1,
+ )
+ is False
+ )
+
+
+# --------------------------------------------------------- persistence spawn
+
+
+def test_spawn_set_ai_responding_bg_noop_without_user_id() -> None:
+ async def _run() -> set[asyncio.Task]:
+ background: set[asyncio.Task] = set()
+ spawn_set_ai_responding_bg(
+ chat_id=1, user_id=None, background_tasks=background
+ )
+ return background
+
+ bg = asyncio.run(_run())
+ assert bg == set()
+
+
+def test_spawn_persist_user_task_registers_and_self_unregisters() -> None:
+ async def _run() -> tuple[int, int]:
+ background: set[asyncio.Task] = set()
+ with patch(
+ "app.tasks.chat.streaming.flows.new_chat.persistence_spawn.persist_user_turn",
+ new=AsyncMock(return_value=99),
+ ):
+ task = spawn_persist_user_task(
+ chat_id=1,
+ user_id="u",
+ turn_id="t",
+ user_query="hi",
+ user_image_data_urls=None,
+ mentioned_documents=None,
+ background_tasks=background,
+ )
+ size_before_await = len(background)
+ result = await asyncio.shield(task)
+ # Give the done-callback one event-loop tick to run.
+ await asyncio.sleep(0)
+ return size_before_await, result # type: ignore[return-value]
+
+ size_before, result = asyncio.run(_run())
+ assert size_before == 1
+ assert result == 99
+
+
+def test_spawn_persist_assistant_shell_task_registers() -> None:
+ async def _run() -> int | None:
+ background: set[asyncio.Task] = set()
+ with patch(
+ "app.tasks.chat.streaming.flows.new_chat.persistence_spawn.persist_assistant_shell",
+ new=AsyncMock(return_value=42),
+ ):
+ task = spawn_persist_assistant_shell_task(
+ chat_id=1,
+ user_id="u",
+ turn_id="t",
+ background_tasks=background,
+ )
+ return await asyncio.shield(task)
+
+ assert asyncio.run(_run()) == 42
+
+
+def test_await_persist_task_returns_none_on_failure() -> None:
+ async def _run() -> int | None:
+ async def _boom() -> int:
+ raise RuntimeError("DB down")
+
+ task = asyncio.create_task(_boom())
+ return await await_persist_task(
+ task,
+ chat_id=1,
+ turn_id="t",
+ log_label="parity-failure",
+ )
+
+ assert asyncio.run(_run()) is None
+
+
+def test_await_persist_task_returns_none_for_none_input() -> None:
+ async def _run() -> int | None:
+ return await await_persist_task(
+ None,
+ chat_id=1,
+ turn_id="t",
+ log_label="parity-none",
+ )
+
+ assert asyncio.run(_run()) is None
From 123f0d3b5d7e5b56c1fccd9583ae4699e7e6bc62 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:30:21 +0200
Subject: [PATCH 16/87] docs(automation): add v2 design plan baseline
Track the initial v2 design document for the SurfSense automation feature.
This is the baseline snapshot of the design before applying the v1-minimum
scope narrowing (capability trimming, MCP deferral, queue-routing deferral).
Subsequent commits trim this down to the v1 scope.
---
automation-design-plan.md | 1395 +++++++++++++++++++++++++++++++++++++
1 file changed, 1395 insertions(+)
create mode 100644 automation-design-plan.md
diff --git a/automation-design-plan.md b/automation-design-plan.md
new file mode 100644
index 000000000..072f7ad99
--- /dev/null
+++ b/automation-design-plan.md
@@ -0,0 +1,1395 @@
+# SurfSense Automation Feature — Design Plan (v2)
+
+A generic, extensible automation system for SurfSense that lets users (and
+future SurfSense features) trigger agent work on a schedule, on an external
+event, or on demand — with the ability to author automations either by hand
+or from a natural-language description that yields an editable, structured
+definition.
+
+This document supersedes the v1 draft. It folds in the design audit pass and
+the corrections from working through worked examples (notably: removing the
+connector bias, clarifying the executor's role, integrating MCP cleanly, and
+committing to JSON Schema as the single declarative language).
+
+---
+
+## 1. The load-bearing principle
+
+> **The JSON definition is the program. Everything else is interpreter.**
+
+Every decision in this document serves that principle. If we ever face a
+design choice and one option lets some behavior leak out of the definition
+into the engine, we pick the other option.
+
+Three properties follow from this principle, and they're the reason the
+system will survive feature growth:
+
+- **Reproducibility** — same definition + same inputs → same observable
+ behavior, regardless of which version of the engine runs it.
+- **Portability** — definitions can be exported, imported, version-
+ controlled, code-reviewed, and shared across SurfSense instances.
+- **LLM tractability** — the NL authoring flow works because the LLM only
+ needs to produce a self-contained JSON document that validates against a
+ schema. It doesn't need to understand the engine.
+
+---
+
+## 2. The four-layer contract
+
+The system is structured as four layers. Layers 1, 2, and 4 are defined by
+SurfSense developers (at registration time). Layer 3 is what users write
+(or the NL generator produces). The runtime reads all four to do its job.
+
+| Layer | What it is | Defined by |
+| ----- | ---------- | ---------- |
+| **1. Capability registry** | What this SurfSense instance can do | Developers, at startup |
+| **2. Action contract** | Per-action input/output schema | Developers, at startup |
+| **3. Automation definition** | One concrete saved automation | Users (or NL generator) |
+| **4. Trigger contract** | Per-trigger config and payload schemas | Developers, at startup |
+
+Each layer constrains the one above. The runtime reads all four but doesn't
+know what's in them ahead of time. That's how a new capability or trigger
+type becomes available across the engine without code changes outside its
+registration.
+
+### Schema language
+
+Every shape in every layer is described in **JSON Schema (draft 2020-12).**
+No exceptions, no parallel languages, no inline shorthand. Two documented
+extensions on top:
+
+- `default: "$some_token"` — runtime-resolved defaults. The vocabulary is
+ fixed: `$last_fired_at`, `$creator`, `$space_default`. The engine resolves
+ these to values before validation.
+- `x-surfsense-*` annotations — editor hints (widget type, autocomplete
+ source). The validator ignores them; the form editor reads them.
+
+---
+
+## 3. Capability registry (Layer 1)
+
+A `Capability` is one discrete thing the SurfSense backend exposes —
+"post a Slack message," "query the Search Space," "generate a podcast." It
+is the atomic unit of "things automations can do."
+
+```python
+@dataclass
+class Capability:
+ id: str # "slack.post_message"
+ name: str # "Post Slack message"
+ description: str # for the NL generator
+ input_schema: dict # JSON Schema
+ output_schema: dict # JSON Schema
+ required_credentials: list[CredSpec] # what creds the handler needs
+ side_effects: set[SideEffect] # READ, WRITE, EXTERNAL_WRITE,
+ # COST_INCURRING, USER_VISIBLE
+ expected_duration_seconds: int # estimate or upper bound
+ cost_estimate: Callable[[dict], Decimal] # f(input) → estimated USD
+ handler: AsyncHandler
+```
+
+### Where capabilities live: a two-tier registry
+
+The capability registry has different storage requirements for different
+kinds of capabilities. **Native capabilities and MCP capabilities have
+different lifecycles**, so they're persisted differently:
+
+| Tier | What's there | Where it lives | Lifetime |
+| --- | --- | --- | --- |
+| **Native** | Capabilities defined in SurfSense's codebase (`search_space.query`, `agent.run`, etc.) | In-memory dict, populated at startup from `automations/capabilities/native.py` | Process lifetime, identical across all workers |
+| **MCP (durable)** | The fact that this SearchSpace has connected to this MCP server, the tool list it exposes, credentials | PostgreSQL: `mcp_connections` and `mcp_tools` tables | Persistent across restarts and across time |
+| **MCP (cached)** | Handler closures wrapping `(connection_id, tool_name)` | Per-worker in-memory cache, lazily built from the database on first reference | Process lifetime, rebuilt on demand |
+
+The reason this matters: **a user connects an MCP server on Monday, writes
+an automation on Tuesday, the automation runs on Friday.** Between Monday
+and Friday, workers will restart many times. Any state that only lives in
+worker memory is gone. The closures generated at connection time would
+not survive.
+
+So we split persistence by lifecycle:
+
+- Native capability handlers live in the codebase. Always available, no
+ need for the database.
+- MCP capability metadata lives in the database, so the knowledge "this
+ SearchSpace has these capabilities" survives any restart.
+- The actual closures are built on demand from the database state. They
+ live in worker memory only until the worker dies, at which point they
+ get rebuilt by the next worker that needs them.
+
+### MCP database schema
+
+```sql
+CREATE TABLE mcp_connections (
+ id UUID PRIMARY KEY,
+ search_space_id INT REFERENCES search_spaces(id),
+ server_url TEXT,
+ transport TEXT, -- "http", "stdio", etc.
+ name TEXT, -- "Slack (Acme workspace)"
+ access_token BYTEA, -- encrypted at rest
+ refresh_token BYTEA, -- encrypted at rest
+ expires_at TIMESTAMPTZ,
+ last_harvested_at TIMESTAMPTZ,
+ created_at TIMESTAMPTZ,
+ created_by INT REFERENCES users(id)
+);
+
+CREATE TABLE mcp_tools (
+ id UUID PRIMARY KEY,
+ connection_id UUID REFERENCES mcp_connections(id) ON DELETE CASCADE,
+ name TEXT, -- "post_message"
+ description TEXT,
+ input_schema JSONB,
+ output_schema JSONB,
+ side_effects TEXT[], -- inferred or admin-curated
+ UNIQUE (connection_id, name)
+);
+```
+
+### MCP lifecycle: connect, harvest, invoke
+
+Three phases, each with distinct concerns.
+
+**Phase 1 — Connect (one-time, on user action).** User clicks "Connect
+Slack MCP." OAuth flow completes. A row is added to `mcp_connections`
+with the encrypted tokens.
+
+**Phase 2 — Harvest (right after connect, also re-runnable).** SurfSense
+opens a temporary client to the MCP server, calls `tools/list`, and writes
+one row to `mcp_tools` per discovered tool. The temporary client is then
+discarded; only the database state persists.
+
+```python
+async def harvest_mcp_server(connection_id: UUID, ctx):
+ connection = await ctx.db.get(MCPConnection, connection_id)
+ client = build_temporary_client(connection)
+ tools = await client.list_tools()
+
+ # Replace existing tool rows for this connection
+ await ctx.db.execute(
+ delete(MCPTool).where(MCPTool.connection_id == connection_id)
+ )
+ for tool in tools:
+ ctx.db.add(MCPTool(
+ connection_id=connection_id,
+ name=tool.name,
+ description=tool.description,
+ input_schema=tool.inputSchema,
+ output_schema=tool.outputSchema,
+ side_effects=infer_side_effects(tool),
+ ))
+ connection.last_harvested_at = now()
+ await ctx.db.commit()
+```
+
+Harvesting can be re-run on a schedule (say, daily) or on user request,
+to pick up new tools the server has added.
+
+**Phase 3 — Invoke (every time a step references an MCP capability).**
+This is where the closure gets built. The executor calls
+`ctx.get_capability("slack.post_message")`. The worker's in-memory cache is
+checked; on miss, the database is queried:
+
+```python
+async def get_capability(capability_id: str, ctx: ActionContext) -> Capability:
+ cached = _WORKER_CAPABILITY_CACHE.get((ctx.search_space.id, capability_id))
+ if cached:
+ return cached
+
+ if is_native(capability_id):
+ capability = _NATIVE_REGISTRY[capability_id]
+ else:
+ # MCP path: look up tool metadata
+ tool_row = await ctx.db.execute(
+ select(MCPTool)
+ .join(MCPConnection)
+ .where(MCPConnection.search_space_id == ctx.search_space.id)
+ .where(tool_qualified_name(MCPTool, MCPConnection) == capability_id)
+ )
+ capability = Capability(
+ id=capability_id,
+ input_schema=tool_row.input_schema,
+ output_schema=tool_row.output_schema,
+ side_effects=set(tool_row.side_effects),
+ handler=make_mcp_handler(
+ connection_id=tool_row.connection_id,
+ tool_name=tool_row.name,
+ ),
+ )
+
+ _WORKER_CAPABILITY_CACHE[(ctx.search_space.id, capability_id)] = capability
+ return capability
+```
+
+The closure created by `make_mcp_handler` captures only the connection ID
+and tool name. When invoked, it asks `ctx.resolve_mcp_client(connection_id)`
+to build an authenticated client from the connection record (including
+token refresh if needed). That client is also transient — built per call,
+discarded after.
+
+### Credentials: resolved at the moment of use
+
+The handler doesn't carry credentials and the closure doesn't capture them.
+When invoked, the handler asks `ActionContext` for what it needs:
+
+```python
+def make_mcp_handler(connection_id: UUID, tool_name: str):
+ async def handler(ctx: ActionContext, args: dict) -> Any:
+ # Credential resolution happens here, per call
+ client = await ctx.resolve_mcp_client(connection_id)
+ response = await client.call_tool(name=tool_name, arguments=args)
+ return response.content
+ return handler
+```
+
+`ctx.resolve_mcp_client(connection_id)`:
+1. Loads the `mcp_connections` row
+2. Decrypts the access token
+3. Refreshes the token if it's expired (using the refresh token)
+4. Constructs an `MCPClient` with the token set as a default authorization
+ header
+
+The HTTP library carries the auth header on every subsequent call the
+client makes — the handler doesn't think about it after construction.
+
+For native capabilities calling external APIs directly,
+`ctx.resolve_http_client(provider)` returns an authenticated `httpx`
+client. For LLM operations, `ctx.resolve_llm(provider)` returns a
+configured LLM client. **Three resolution methods, one pattern: the
+context returns a client already authenticated.**
+
+Three properties this gives us:
+
+- **Credentials never appear in the automation definition.** The JSON
+ contains capability references and connection IDs, never tokens.
+- **Credentials never appear in the LLM's context.** Even during
+ `agent_task`, the LLM sees tool descriptions only; the host holds
+ credentials and uses them when executing the tools the LLM requests.
+- **Credentials are loaded per-call, not pre-loaded.** The credential
+ exists in memory only during the moment a handler is making a call. No
+ long-lived secrets in worker memory.
+
+---
+
+## 4. Action contract (Layer 2)
+
+An `Action` is what a user references in a plan step. Most actions are
+thin wrappers around one capability (e.g., `slack_post` wraps
+`slack.post_message`). Some compose: `agent_task` is one action whose
+handler invokes the LangGraph runtime, which in turn can call many
+capabilities.
+
+```python
+@dataclass
+class ActionDefinition:
+ type: str # "agent_task", "slack_post"
+ name: str # for the UI
+ description: str # for the NL generator
+ config_schema: dict # JSON Schema for action.config
+ output_contract: dict | DynamicOutput # what it produces
+ uses_capabilities: list[str] # IDs from the registry
+ produces_artifacts: list[ArtifactSpec] # see §8
+ handler: AsyncHandler
+```
+
+### Tight vs loose actions
+
+Two patterns coexist by design:
+
+- **Tight actions** (`slack_post`, `linear_create_issue`, `send_email`):
+ config_schema is fully specified, output_contract is fixed, handler is a
+ thin wrapper. ~20 LOC each. Used when the user knows exactly what they
+ want done — no LLM tokens spent on trivial work.
+
+- **Loose actions** (`agent_task`): config_schema accepts a `prompt` and a
+ `tools` allowlist; output_contract is *dynamic* — the user declares the
+ output shape they want via `output_schema` in the step config; the
+ handler asks the LLM to return that shape and validates. Used when
+ judgment is needed.
+
+The agent's tool list is **the same capabilities** that tight actions call
+directly. One registry, two invocation modes. Adding a new MCP server gives
+both modes access to its tools automatically.
+
+### How names in the definition become function calls
+
+The definition contains strings like `"action": "slack_post"`. The string is
+just a name — it does not point to a function. At runtime, the executor
+performs a **name-based lookup** against the action registry:
+
+```python
+# step.action is a string from the JSON definition, e.g. "slack_post"
+action_def = _ACTION_REGISTRY[step.action] # dict lookup
+handler = action_def.handler # Python callable
+result = await handler(ctx, resolved_config) # invocation
+```
+
+The registry is a Python dict (or a thin wrapper around one) populated at
+process startup. Each entry in `automations/actions/*.py` calls a
+`register_action(...)` function at module import time, putting its
+`ActionDefinition` (including the handler function reference) into the
+registry.
+
+The same pattern applies to capabilities. The definition references
+capabilities by ID (`"slack.post_message"`); the capability registry maps
+the ID to a `Capability` object holding the handler. Definitions never
+reference Python code directly — they reference names that the registry
+resolves to code.
+
+This separation is what makes the contract portable. The definition is
+pure data. The registry is the engine's runtime vocabulary. They meet at
+name-based lookup; nothing else crosses the boundary.
+
+### The full expressive spectrum
+
+The contract supports a continuous spectrum from purely deterministic to
+fully agentic. Six practical shapes worth recognizing:
+
+| Shape | Example | Cost / latency profile |
+| --- | --- | --- |
+| **1. Direct call** | `slack_post` with literal channel and template | No LLM. ~200ms. Fractions of a cent. |
+| **2. Direct call with computed inputs** | `linear_create_issue` using `{{summary.title}}` from a prior step | No LLM for this step. Cheap. |
+| **3. Single-domain agent task** | `agent_task` with `tools: ["slack.*"]` only | One LLM, bounded toolset. |
+| **4. Multi-domain agent task, narrow** | `agent_task` with `tools: ["github.list_pull_requests", "linear.create_issue"]` | One LLM, named capabilities. |
+| **5. Multi-domain agent task, broad** | `agent_task` with `tools: ["slack.*", "github.*", "linear.*"]` | One LLM, large toolset, most agentic. |
+| **6. Composed plan** | `agent_task` (narrow) for thinking → `slack_post` + `linear_create_issue` for acting | Best cost-to-power ratio. |
+
+Shape 6 is the underrated one and the cost-and-speed answer. The agent
+reasons once (Shape 3 or 4) and its structured output drives several
+deterministic actions. This is roughly 5–10x cheaper and 3–4x faster than
+forcing the agent to do everything (Shape 5) and produces the same outcome.
+
+**The NL generator's job is to propose Shape 6-style plans by default.**
+The Review LLM flags proposals that use `agent_task` for steps a
+deterministic action could handle. This is the discipline that keeps
+automations cheap at scale.
+
+The user navigates the spectrum by intent (describing what they want), not
+by mechanism — the shape selection is the engine's responsibility, not the
+user's.
+
+---
+
+## 5. Automation definition (Layer 3)
+
+This is the JSON the user writes (or the NL generator produces). Stored in
+`automations.definition` as JSONB.
+
+### Top-level shape
+
+```jsonc
+{
+ "schema_version": "1.0",
+ "name": "Daily competitor digest",
+ "goal": "Summarize new competitor content and post to Slack",
+
+ "inputs": {
+ "schema": {
+ "type": "object",
+ "required": ["since"],
+ "properties": {
+ "since": { "type": "string", "format": "date-time",
+ "default": "$last_fired_at" },
+ "tags": { "type": "array", "items": { "type": "string" },
+ "default": ["competitor"] }
+ }
+ }
+ },
+
+ "triggers": [
+ {
+ "type": "schedule",
+ "config": { "cron": "0 9 * * 1-5", "timezone": "Africa/Kigali" }
+ }
+ ],
+
+ "plan": [
+ {
+ "step_id": "research",
+ "action": "agent_task",
+ "config": {
+ "prompt": "Find documents tagged {{inputs.tags}} indexed since {{inputs.since}}. Return JSON with bullets and source_doc_ids.",
+ "tools": ["search_space.query", "search_space.fetch_document"],
+ "model": "anthropic/claude-sonnet-4-7",
+ "output_schema": {
+ "type": "object",
+ "required": ["bullets", "source_doc_ids"],
+ "properties": {
+ "bullets": { "type": "array", "items": { "type": "string" } },
+ "source_doc_ids": { "type": "array", "items": { "type": "string" } }
+ }
+ }
+ },
+ "output_as": "summary"
+ },
+ {
+ "step_id": "deliver",
+ "action": "slack_post",
+ "config": {
+ "channel_id": "C0123",
+ "message_template": "*Competitor digest*\n\n{% for b in summary.bullets %}• {{b}}\n{% endfor %}"
+ }
+ }
+ ],
+
+ "execution": {
+ "timeout_seconds": 600,
+ "max_retries": 2,
+ "retry_backoff": "exponential",
+ "concurrency": "drop_if_running",
+ "budget_cap_usd": 1.50,
+ "on_failure": [ /* steps to run if main plan fails after retries */ ]
+ },
+
+ "metadata": { "tags": ["digest"], "created_from_nl": true }
+}
+```
+
+### Plan steps
+
+```jsonc
+{
+ "step_id": "...", // unique within plan
+ "action": "...", // references an ActionDefinition.type
+ "when": "{{ ... }}", // optional Jinja expr → bool; false = skip
+ "config": { ... }, // validated against action's config_schema
+ "output_as": "...", // binds output to this name for later steps
+ "max_retries": 0, // optional, overrides automation default
+ "timeout_seconds": 1200 // optional, overrides automation default
+}
+```
+
+Steps run **sequentially**. No parallelism, no DAGs, no loops. If a user
+needs branching, they use `when:` on multiple steps. If they need
+parallelism or iteration, they use `agent_task` and let the agent reason
+about it, or they compose automations through events (§7.5).
+
+---
+
+## 6. Trigger contract (Layer 4)
+
+Three trigger types. That's the entire taxonomy.
+
+### `schedule`
+
+```python
+TriggerDefinition(
+ type="schedule",
+ config_schema={
+ "type": "object",
+ "required": ["cron", "timezone"],
+ "properties": {
+ "cron": { "type": "string" },
+ "timezone": { "type": "string", "format": "iana-timezone" }
+ }
+ },
+ payload_schema={
+ "type": "object",
+ "properties": {
+ "fired_at": { "type": "string", "format": "date-time" },
+ "scheduled_for": { "type": "string", "format": "date-time" },
+ "last_fired_at": { "type": "string", "format": "date-time" }
+ }
+ }
+)
+```
+
+Implementation: extends `app/utils/periodic_scheduler.py`, which already
+reads connector sync schedules. Adds a second source — `automation_triggers
+WHERE type='schedule'`. Same Celery Beat checker, two source tables.
+
+Minimum interval: 1 minute (the existing checker's resolution). The form
+editor warns when users set intervals under 15 minutes that they probably
+want an event trigger instead.
+
+### `webhook`
+
+```python
+TriggerDefinition(
+ type="webhook",
+ config_schema={
+ "type": "object",
+ "properties": {
+ "input_mapping": {
+ "type": "object",
+ "additionalProperties": { "type": "string" }
+ # values are JSONPath expressions
+ }
+ }
+ },
+ # payload is whatever the POST body is; user-defined shape via mapping
+)
+```
+
+Endpoint: `POST /api/v1/automations/{id}/fire`. Bearer token shown once,
+hashed at rest, rotatable, revocable. Returns `202 Accepted` with the
+created run's URL. Caller polls for status; we do not push callbacks in
+v1 (a `callback_webhook` action can be added later).
+
+Idempotency: honors `Idempotency-Key` header or `idempotency_key` in body.
+Dedups against runs in the last 24 hours.
+
+### `event`
+
+```python
+TriggerDefinition(
+ type="event",
+ config_schema={
+ "type": "object",
+ "required": ["event_type"],
+ "properties": {
+ "event_type": { "type": "string" }, # e.g. "drive.file_added"
+ # or "surfsense.podcast.generated"
+ "filters": { "$ref": "#/definitions/filter_expression" }
+ }
+ }
+ # payload shape is documented per event_type in a separate registry
+)
+```
+
+**Events absorb both connector events and internal SurfSense events.** A
+file added to Drive and a podcast finishing in SurfSense are both events
+in the same `domain_events` table, both subscribable by automations, both
+matched by the same dispatcher code. The engine doesn't distinguish.
+
+### Filter grammar
+
+Filters are JSON-structured operators, not expressions. This is the one
+place we deliberately don't use Jinja, because filters run on a hot path
+(every event matched against every subscribing trigger) and structured
+filters can be indexed and short-circuited.
+
+Vocabulary:
+- Equality: `equals`, `not_equals`
+- String: `starts_with`, `ends_with`, `contains`, `regex`
+- Numeric: `gt`, `gte`, `lt`, `lte`
+- Set: `in`, `not_in`
+- Existence: `exists`
+- Composition: `$and`, `$or`, `$not`
+
+Inspired by AWS EventBridge and MongoDB query syntax. The filter grammar
+itself is published as a JSON Schema, so users get inline error messages.
+
+---
+
+## 7. Runtime components
+
+Each component is distinct, replaceable, and has one job.
+
+### 7.1 Dispatcher
+
+What it does: matches firing triggers to automations, creates `AutomationRun`
+rows, enqueues executor tasks.
+
+For schedule triggers: Celery Beat polls the trigger table, computes due
+ones, fires.
+
+For webhook triggers: the FastAPI handler is the dispatcher entry point.
+Validates token, runs input_mapping, creates run.
+
+For event triggers: subscribes to the `domain_events` table. For each new
+event, evaluates all matching triggers' filters, fires the matches.
+
+Common path (after a trigger has fired):
+1. Resolve `inputs` from trigger payload and defaults
+2. Validate resolved inputs against the automation's input schema
+3. **Cost estimate** — sum capabilities' `cost_estimate(args)` for the plan;
+ refuse if exceeds `budget_cap_usd`
+4. **Idempotency check** — dedup against existing pending/running runs
+5. **Snapshot the resolved definition** into the run row (immutable history)
+6. Enqueue executor task on the appropriate Celery queue (per
+ `expected_duration_seconds`)
+
+### 7.2 Executor
+
+What it is: **a Celery task wrapping a single function that walks a plan
+step by step.** Not an agent, not a workflow engine, not a scheduler. A
+loop with bookkeeping. Maybe 200 lines.
+
+```python
+async def execute_run(run_id: int) -> None:
+ run = load_run(run_id); run.status = "running"; save(run)
+ context = build_run_context(run)
+ step_outputs = {}
+
+ for step in run.plan:
+ if step.when and not evaluate_predicate(step.when, context | step_outputs):
+ record_step_skipped(run, step); continue
+
+ resolved_config = render_config(step.config, context | step_outputs)
+ action = action_registry.get(step.action)
+ validate(resolved_config, action.config_schema)
+
+ try:
+ result = await with_retries(
+ action.handler,
+ ctx=build_action_context(run, action),
+ args=resolved_config,
+ policy=step.retry_policy or run.execution.retry_policy,
+ )
+ validate(result, step.output_schema)
+ if step.output_as:
+ step_outputs[step.output_as] = result
+ record_step_succeeded(run, step, result)
+ except Exception as e:
+ record_step_failed(run, step, e)
+ await run_on_failure(run, e)
+ return
+
+ run.status = "succeeded"; save(run)
+ publish_event("automation.run.succeeded", run) # see §7.5
+```
+
+Intelligence lives **inside handlers**, not in the executor. The most
+intelligent handler is `agent_task`, which spins up a LangGraph Deep Agent
+for one step and returns when the agent finishes. The executor sees a
+validated dict come back; it doesn't know that step was "smart."
+
+### 7.3 Action handlers
+
+One handler per `ActionDefinition.type`. Receives `(ctx, args)`, returns
+a dict matching `output_contract` (or matching the user-declared
+`output_schema` for dynamic-output actions like `agent_task`).
+
+Handlers handle their own credential resolution via `ctx.resolve_credentials`.
+They do not know about retries, timeouts, or budget caps — those are the
+executor's concern.
+
+### 7.4 Template engine
+
+#### Why it exists
+
+Most fields in an automation definition contain literal strings the user
+authored once — but the actual rendered value has to change per run, because
+it includes data from the trigger payload or from prior step outputs. The
+template engine is what turns `"Daily digest for {{run.started_at}}"` into
+`"Daily digest for 2026-05-26"` at run time.
+
+Three fields use it:
+- `*_template` strings in tight action configs (Slack messages, email bodies,
+ Linear titles, etc.)
+- `prompt` in `agent_task` configs (so the agent sees resolved values, not
+ `{{...}}` placeholders)
+- `when:` step predicates (which need to evaluate to a boolean)
+
+#### Public interface
+
+Single module, ~80 lines. Three public functions — everything else in the
+engine routes through these:
+
+```python
+def render_template(template: str, context: dict) -> str: ...
+def evaluate_predicate(expression: str, context: dict) -> bool: ...
+def build_run_context(run, step_outputs) -> dict: ...
+```
+
+Backed by Jinja2's `SandboxedEnvironment`. The whole module is the seam: if
+the template language is ever swapped, only this file changes.
+
+#### Security architecture: allowlist by default
+
+`SandboxedEnvironment` starts empty. A freshly-created instance gives a
+template access to:
+- Variables in the context dict we pass in (`run`, `inputs`, prior step
+ outputs)
+- Public (non-underscore) attributes of those variables
+- Jinja's built-in control flow (`{% if %}`, `{% for %}`, `{% set %}`)
+
+Nothing else. No Python builtins, no modules, no I/O, no network, no
+filesystem. Everything beyond the above must be **explicitly registered.**
+This is the structurally important property: anything we didn't add is
+inaccessible. The risk surface equals the size of what we registered.
+
+The three sandbox rules that enforce this:
+1. **Attribute access is filtered** — names starting with underscore are
+ rejected. This blocks the entire family of `{{x.__class__.__mro__...}}`
+ Python escape paths in one rule.
+2. **Globals are allowlist-only** — `open`, `eval`, `exec`, `__import__`,
+ `getattr`, every module name, are all absent unless we register them.
+ We register zero globals.
+3. **Unsafe callables are blocked** — `str.format` and `str.format_map`
+ specifically (due to CVE-2016-10745), plus anything marked
+ `unsafe_callable`.
+
+#### What we register, exactly
+
+- **Filters: a curated 15**, no more. `join`, `length`, `default`, `upper`,
+ `lower`, `truncate`, `tojson`, `date`, `replace`, `trim`, `slugify`,
+ `first`, `last`, `sort`, `reverse`. Each one is audited for what it does
+ with its input; none of them takes a callable, runs `eval`, or reaches
+ into Python objects beyond simple data transformation.
+- **Globals: none.**
+- **Tests: only the safe built-ins** (`defined`, `none`, `number`, `string`,
+ `mapping`, `sequence`, `boolean`).
+
+Adding a new filter requires a deliberate code change and review: does this
+filter do anything dangerous with its input? If yes, don't add it. The list
+only grows by audited additions.
+
+#### Runtime limits (defense in depth)
+
+The sandbox handles the attack surface inside the template language. Three
+additional limits handle resource exhaustion that the language permits but
+the runtime shouldn't tolerate:
+
+- **Template source length capped at 8 KB.** Checked before parsing.
+- **Render time capped at 100 ms per render.** Implemented via a watchdog
+ thread; renders that exceed are killed and the step fails. Catches
+ `{% for i in range(10**9) %}` and nested loop bombs.
+- **Output size capped at 1 MB.** A small template can produce a multi-GB
+ string via `{{ 'A' * 10**8 }}`-style multiplication; this catches it.
+
+Plus `StrictUndefined`: any reference to a missing variable raises
+immediately rather than silently rendering empty, so misconfigurations
+fail fast.
+
+#### Threat model and residual risk
+
+The trust model from day one is:
+
+- Templates are generated by an LLM from a user's natural-language input
+ (see §10), or written/edited by humans in the editable form
+- A second LLM reviews the proposal and produces a plain-language summary
+ plus flagged anomalies for the user
+- The user reviews and approves before the automation runs
+- The Generator LLM's input is scoped (user prompt + schema + registry
+ only — no arbitrary document content), minimizing prompt-injection paths
+
+The sandbox + runtime limits + curated filter list protect against the
+malformed-template attack. Human review protects against the
+semantically-malicious-but-syntactically-valid attack. These are
+complementary layers, not redundant.
+
+Known residual risks, each genuinely small:
+
+- **Future Jinja CVEs.** Historical sandbox bypasses have existed and
+ been patched. This is a generic third-party-dependency risk, comparable
+ to bugs in any other library we rely on. Mitigation: subscribe to
+ security advisories, ship updates within a week of disclosure.
+- **Side channels via prompts to LLMs.** A template that renders into a
+ prompt can attempt prompt injection of the agent at run time. This is
+ not a sandbox concern but a separate concern in `agent_task`'s design.
+- **Operator deployments with long-lived secrets in worker env vars.**
+ Mitigation: credentials fetched per-handler-per-call via
+ `ActionContext.resolve_credentials`, never pre-loaded into worker
+ env vars accessible to templates.
+
+The sandbox-with-allowlist architecture means **the attack surface
+equals the set of things we registered.** With zero globals registered
+and 15 audited filters, the surface is small, bounded, and reviewable.
+This is the structural property that makes the architecture sound, and
+it doesn't depend on hypothetical assumptions about who authors templates.
+
+#### Pre-Phase-5 gate
+
+One trust-model change is documented in the roadmap: **Phase 5 introduces
+template sharing across SearchSpaces** (automation templates as
+exportable, importable artifacts). At that point, the *approver* of a
+template (the original author) is no longer the *runner* (the importer).
+The "human reviews before save" mitigation breaks down because the
+reviewer doesn't bear the risk.
+
+Before Phase 5 ships, this needs an explicit re-approval flow: importing
+a template triggers a fresh review pass by the importing user, with the
+flagged-anomalies output prominently displayed, and the import cannot
+complete without explicit per-template approval.
+
+This is a UX/flow decision, not a template-language migration. Jinja
+itself stays; what changes is the approval workflow at the import boundary.
+
+#### The `run.*` namespace exposed in every template
+
+```
+run.id, run.started_at, run.automation_id, run.automation_name,
+run.automation_version, run.trigger_type, run.trigger_id,
+run.search_space_id, run.creator_id, run.attempt,
+run.failed_step_id, run.error.* (only in on_failure context)
+```
+
+#### Default value rendering
+
+Non-string template values render as JSON by default (via the `finalize`
+hook): lists become `["a", "b"]`, dicts become `{"k": "v"}`, datetimes
+become ISO 8601. The `| join`, `| length`, `| tojson` filters give explicit
+control. Strings render as themselves with no quoting. `None` renders as
+empty string in templates, as `null` in JSON contexts.
+
+### 7.5 Event bus
+
+`domain_events` table, polled by Celery Beat alongside the existing
+scheduler. Both connector events and internal SurfSense events publish to
+it. Both are consumed by the dispatcher's event-trigger subscriber.
+
+**Automations themselves publish events.** Successful and failed runs emit
+`automation.run.succeeded` / `automation.run.failed` events with the run
+metadata. This makes automations composable through events — chain them by
+subscribing one automation's event trigger to another's run event. No new
+mechanism; the trigger filter and event publishing already exist.
+
+Upgrade path documented: when throughput or latency demands it, replace
+PostgreSQL polling with Redis Streams. The `events.publish()` and
+`events.subscribe()` interfaces stay the same. Nothing else changes.
+
+---
+
+## 8. Cross-cutting concerns
+
+### Concurrency policy
+
+Per-automation `concurrency` field controls what happens when a new fire
+occurs while a previous run is still running:
+
+- `drop_if_running` — silently skip the new fire
+- `queue` — execute serially, in arrival order
+- `allow_parallel` — start a new run independently
+
+The dispatcher enforces this before enqueueing.
+
+### Retry policy
+
+Three fields, per-automation defaults with optional per-step overrides:
+- `max_retries`: integer, 0–10
+- `retry_backoff`: `none` | `linear` | `exponential`
+- `timeout_seconds`: integer
+
+Retries on:
+- Capability handler exceptions
+- Output schema validation failures (for dynamic-output actions, the
+ validation error is fed back to the LLM in the retry)
+
+Not retries:
+- `when:` evaluation failures (these are user errors, surface immediately)
+- Input validation failures (caught at dispatch, never reach the executor)
+
+### Budget enforcement
+
+`budget_cap_usd` is per-run. The dispatcher refuses to enqueue if estimated
+cost exceeds it. The executor kills the run if accumulated cost crosses it
+mid-flight (the LLM ops handler reports tokens consumed back to the
+executor between calls).
+
+### On-failure handlers
+
+`execution.on_failure` is a list of steps that run after the main plan has
+failed and all retries are exhausted. Same step shape as the main plan.
+Cannot have their own `on_failure`. See `run.error.*` in the run context.
+
+### Artifacts
+
+Actions that produce artifacts declare `produces_artifacts: list[ArtifactSpec]`:
+
+```python
+@dataclass
+class ArtifactSpec:
+ kind: str # "audio", "document", "image", "data"
+ retention: str # "transient" | "default" | "permanent"
+ visibility: str # "private" | "search_space" | "shared"
+```
+
+The engine handles storage (writes to SurfSense's existing object storage),
+URL generation (signed, scoped to the run's permissions), and cleanup (a
+nightly Celery Beat task deletes expired artifacts).
+
+### Duration classes and queue routing
+
+Capabilities declare `expected_duration_seconds`. The dispatcher routes
+runs to Celery queues based on the longest-duration step:
+- < 10s → `automations_fast`
+- 10s – 5min → `automations_medium`
+- 5min – 1hr → `automations_long`
+
+Operators scale each queue's worker pool independently. A future "very
+long" queue is a config change, not a contract change.
+
+---
+
+## 9. Data model
+
+Six tables. All scoped by `search_space_id` for RBAC.
+
+The first four (`automations`, `automation_triggers`, `automation_runs`,
+`domain_events`) are the engine's own state. The last two
+(`mcp_connections`, `mcp_tools`) hold the durable knowledge that backs
+MCP-derived capabilities — see §3 for the lifecycle rationale.
+
+### `automations`
+
+| field | type | notes |
+| ----------------- | ----------------------------------- | -------------------------------------------------------------------------- |
+| `id` | int PK | |
+| `search_space_id` | FK → `search_spaces.id` | |
+| `created_by` | FK → `users.id` | runs execute as this identity |
+| `name` | str | |
+| `description` | str | |
+| `status` | enum | `active`, `paused`, `archived` |
+| `definition` | jsonb | the editable structured spec |
+| `version` | int | bumped on every edit |
+| `created_at` / `updated_at` | timestamps | |
+
+### `automation_triggers`
+
+| field | type | notes |
+| --------------- | ----------------------------------------------------------------------------- | ------------------------------------------- |
+| `id` | int PK | |
+| `automation_id` | FK | |
+| `type` | enum: `schedule`, `webhook`, `event` | |
+| `config` | jsonb | validated against trigger's `config_schema` |
+| `enabled` | bool | |
+| `secret_hash` | str / null | for webhook bearer tokens |
+| `last_fired_at` | timestamp | |
+
+### `automation_runs`
+
+| field | type | notes |
+| ----------------- | ---------------------------------------------------------------------------- | -------------------------------------------------- |
+| `id` | int PK | |
+| `automation_id` | FK | |
+| `trigger_id` | FK / null | null = manual via UI |
+| `status` | enum | `pending`, `running`, `succeeded`, `failed`, `cancelled`, `timed_out` |
+| `definition_snapshot` | jsonb | the definition as it was when this run fired |
+| `trigger_payload` | jsonb | |
+| `resolved_inputs` | jsonb | |
+| `step_results` | jsonb | array of per-step results with timing |
+| `output` | jsonb / null | |
+| `artifacts` | jsonb | references to created artifacts |
+| `error` | jsonb / null | |
+| `cost_usd` | decimal | accumulated cost |
+| `started_at` / `finished_at` | timestamps | |
+| `agent_session_id`| str / null | link to LangGraph trace if agent_task was used |
+
+### `domain_events`
+
+| field | type | notes |
+| ----------------- | ----------- | -------------------------------------------------- |
+| `id` | UUID PK | |
+| `search_space_id` | FK | scoping |
+| `event_type` | varchar | e.g. `drive.file_added`, `automation.run.succeeded` |
+| `source_id` | varchar | which connector/automation/etc. produced it |
+| `payload` | jsonb | matches the event type's documented schema |
+| `created_at` | timestamp | |
+| `consumed_by` | jsonb | array of consumer_ids, for tracking + replay |
+| `expires_at` | timestamp | auto-cleanup after 7 days |
+
+### `mcp_connections`
+
+Persistent record of MCP server connections per SearchSpace.
+
+| field | type | notes |
+| ------------------- | ----------- | -------------------------------------------------- |
+| `id` | UUID PK | |
+| `search_space_id` | FK | scoping |
+| `server_url` | text | the MCP server's endpoint |
+| `transport` | text | `"http"`, `"stdio"`, etc. |
+| `name` | text | human-readable label (e.g., "Slack — Acme") |
+| `access_token` | bytea | encrypted at rest |
+| `refresh_token` | bytea | encrypted at rest |
+| `expires_at` | timestamp | for OAuth tokens |
+| `last_harvested_at` | timestamp | when tool list was last refreshed |
+| `created_at` | timestamp | |
+| `created_by` | FK → users | |
+
+### `mcp_tools`
+
+The tool list each connected MCP server exposes. Acts as the durable
+source for MCP capabilities — definitions reference `mcp_tools` rows by
+qualified name, and worker processes lazily build handler closures from
+this state.
+
+| field | type | notes |
+| --------------- | ----------- | ------------------------------------------------ |
+| `id` | UUID PK | |
+| `connection_id` | FK → `mcp_connections.id` ON DELETE CASCADE | |
+| `name` | text | the tool name reported by the MCP server |
+| `description` | text | description for the NL generator and form editor |
+| `input_schema` | jsonb | JSON Schema for tool arguments |
+| `output_schema` | jsonb | JSON Schema for tool results |
+| `side_effects` | text[] | inferred from MCP hints + naming + admin override |
+| UNIQUE | | (connection_id, name) |
+
+NL drafts are **not** a core table. They live in a generic short-TTL store
+(Redis or a transient table) when the NL flow is built in Phase 3.
+
+---
+
+## 10. NL authoring flow
+
+**This is how the system is intended to be used from day one, not just a
+Phase 3 addition.** The product surface is: user describes intent in natural
+language, LLM produces a structured proposal, user reviews and edits in an
+auto-generated form, then saves. Hand-authoring JSON directly is supported
+but is not the primary path.
+
+This shapes the trust model. Templates are LLM-generated from day one, not
+hand-written by power users. The mitigation is human-in-the-loop review,
+not "trusted authors only."
+
+### Pass 1: Proposal generation
+
+User provides natural-language input. The Generator LLM is given:
+- The full schema set (input schema for definition, registry of action
+ types with their config_schemas, registry of trigger types, available
+ capabilities for this SearchSpace, list of allowed Jinja filters)
+- A tool to list available connectors, channels, and other SearchSpace
+ resources, so it doesn't invent names that don't exist
+- A few-shot set of examples
+
+**Scoped input.** The Generator does *not* receive arbitrary SearchSpace
+document content. Its context is the user's prompt plus the schema and
+registry information. This minimizes the prompt-injection surface — there's
+no document text in the context for an attacker to seed instructions into.
+
+If a user wants document-aware generation later ("create an automation
+that processes documents like this one"), that's a deliberate feature
+extension with its own prompt-injection mitigations, not the default flow.
+
+Output: a structured proposal matching the automation definition schema.
+
+### Pass 2: Deterministic validation
+
+Server-side, before the proposal reaches the user:
+- Validate against JSON Schema (shape correctness)
+- Verify every capability referenced exists in the registry (resource existence)
+- Verify every connector/channel/resource referenced exists in this SearchSpace
+- Validate every template against the sandbox's allowlist (no underscore
+ attributes, no unregistered filter names, length under cap)
+
+Failures here are deterministic errors, not warnings. A proposal that
+references a non-existent capability or includes a template using
+`{{x.__class__}}` is rejected before the user sees it; the Generator is
+re-prompted with the validation error and asked to fix the proposal.
+
+### Pass 2.5: Review pass
+
+A second LLM call — the **Review LLM** — examines the validated proposal and
+produces two outputs for the user:
+
+1. **A plain-language summary** of what the automation will do, in business
+ terms. "This automation will run every weekday at 9am. It reads documents
+ in this SearchSpace tagged 'competitor' that were indexed since the last
+ run, asks an agent to summarize them as 5 bullets, and posts the summary
+ to your #engineering-standup Slack channel. Estimated cost: $0.40 per
+ run."
+
+2. **A "things worth checking" list** flagging anything unusual:
+ - Templates with unusual attribute paths or filter usage
+ - Prompts containing instructions that look more like commands than
+ descriptions ("ignore previous instructions" style)
+ - Action sequences that touch external systems without obvious benefit
+ to the user
+ - Cost estimates that seem high relative to the goal
+ - References to capabilities the user hasn't used before
+ - Schedules tighter than 15 minutes (likely should be event triggers)
+
+The Review LLM is a **UX layer** that makes review actually useful. It is
+**not a security boundary.** The deterministic controls (sandbox, runtime
+limits, schema validator) are the security boundaries. The Review LLM
+helps users catch their own intent mismatches and surfaces anomalies for
+attention, but the sandbox would block dangerous templates even if the
+Review LLM missed them.
+
+This separation is important: two probabilistic controls compounding can
+create a false sense of security. The Review LLM is explicitly framed in
+the architecture as helper, not gatekeeper.
+
+### Pass 3: Editable review
+
+The user lands on a form pre-filled with the proposal. The page shows:
+- The plain-language summary from the Review pass
+- The flagged items, prominently displayed near the relevant fields
+- The full editable form, auto-generated from the JSON Schemas
+- Cost estimate and impact summary (which external systems get touched)
+
+**Every field is editable.** Clarifications appear as required fields.
+Templates are shown in code-styled fields with syntax highlighting and the
+filter palette visible. The user can edit any field; saving re-runs Pass 2
+(deterministic validation) before persisting.
+
+Hitting **Save** promotes the proposal to an `automation` row.
+
+### Editing existing automations
+
+NL editing of an existing automation is a patch operation: the Generator
+LLM receives the current definition plus the NL instruction and produces a
+modified proposal. The same Pass 2 (validation) and Pass 2.5 (review) run
+against the modified version, and the user reviews the diff before saving.
+Existing run history is unaffected — only future runs use the new version.
+
+### Why human-in-the-loop is non-negotiable
+
+The Generator LLM, the Review LLM, and the sandbox are three layers of
+defense against malformed or malicious proposals. The human approval step
+is the fourth and most important layer. It exists because:
+
+- LLMs can be prompt-injected; humans can spot text that asks them to
+ ignore instructions
+- LLMs can produce confident-but-wrong proposals; humans can catch
+ semantic mismatches between intent and output
+- The cost of a bad automation running unattended is high; the cost of a
+ user clicking "approve" after reading is low
+
+The architecture must never offer "auto-approve" or "skip review" options
+for LLM-generated proposals. Save requires human action on the proposal,
+always.
+
+---
+
+## 11. Repository layout
+
+```
+surfsense_backend/app/
+├── automations/ # NEW: the engine
+│ ├── __init__.py
+│ ├── models.py # SQLAlchemy models for 6 tables
+│ ├── schemas.py # Pydantic schemas (definition envelope, etc.)
+│ ├── routes.py # FastAPI router (/api/v1/automations)
+│ ├── service.py # CRUD + business logic
+│ ├── dispatcher.py # trigger matching, cost check, run creation
+│ ├── executor.py # the Celery task that runs a plan
+│ ├── templating.py # Jinja sandbox + filters
+│ ├── events.py # publish/subscribe for domain_events
+│ ├── filters.py # JSON filter grammar evaluator
+│ ├── actions/
+│ │ ├── registry.py
+│ │ ├── agent_task.py
+│ │ ├── transform_data.py
+│ │ ├── slack_post.py
+│ │ ├── send_email.py
+│ │ ├── notification.py
+│ │ └── (more in Phase 5: podcast_generation, report_generation, ...)
+│ ├── triggers/
+│ │ ├── registry.py
+│ │ ├── schedule.py # Celery Beat hookup
+│ │ ├── webhook.py # /fire endpoint
+│ │ └── event.py # subscribes to domain_events
+│ ├── capabilities/
+│ │ ├── registry.py
+│ │ ├── native.py # native capability registrations
+│ │ ├── mcp_harvester.py # registers MCP tools as capabilities (Phase 4)
+│ │ └── (LLM ops registered alongside)
+│ └── nl/ # Phase 1 — primary user path
+│ ├── generator.py # Generator LLM
+│ ├── reviewer.py # Review LLM (summary + flagged items)
+│ ├── validator.py # deterministic schema + resource checks
+│ └── prompts.py # system prompts for both LLMs
+│
+├── utils/
+│ └── periodic_scheduler.py # EXTENDED to scan automation_triggers
+│
+└── alembic/versions/
+ └── NN_add_automation_tables.py
+
+surfsense_web/app/(routes)/
+└── automations/ # NEW: UI
+ ├── page.tsx # list
+ ├── new/page.tsx # NL input + draft preview (Phase 1)
+ ├── [id]/page.tsx # editor (auto-generated forms)
+ └── [id]/runs/page.tsx # run history, streamed via Electric SQL
+```
+
+---
+
+## 12. Phased delivery
+
+Each phase delivers something usable. Each de-risks the next. **NL authoring
+is the primary user path from Phase 1** — what evolves across phases is
+which actions and triggers are available, not whether users can describe
+automations in natural language.
+
+### Phase 1 — Engine MVP with NL authoring
+- 4 tables + Alembic migration
+- Capability registry with native capabilities (`search_space.query`,
+ `search_space.fetch_document`, `agent.run`)
+- `agent_task` action only
+- `schedule` trigger + manual "Run now" endpoint
+- Executor with retries, timeouts, budget caps
+- Template engine (Jinja sandbox + 15 filters + 4 runtime limits)
+- **NL authoring flow**: Generator LLM, deterministic validator,
+ Review LLM, editable form
+- Run history UI with Electric SQL streaming
+
+**After Phase 1**: a user can describe an automation in natural language,
+review the proposal (with summary + flagged anomalies), edit any field,
+save, and watch it run on a schedule. The Claude Routines value
+proposition, on SurfSense's data, with NL-first authoring.
+
+### Phase 2 — Webhooks and delivery
+- `webhook` trigger with per-automation bearer tokens
+- Tight actions: `slack_post`, `send_email`, `notification`
+- `transform_data` action
+- `on_failure` hooks
+- Step-level retry/timeout overrides
+- Concurrency policy enforcement
+
+**After Phase 2**: external systems can drive automations, results go
+somewhere humans see, complex pipelines have proper error handling.
+
+### Phase 3 — NL authoring polish
+- NL patch flow for editing existing automations (diff-based)
+- Conversational refinement during proposal review ("change the schedule
+ to weekdays only," "add a Slack notification on failure")
+- Improved Review LLM coverage (more anomaly patterns, cost-relative-to-
+ goal heuristics)
+- Saved prompt templates and starter examples
+
+**After Phase 3**: NL authoring is the polished primary surface; edit
+flows are conversational rather than form-only.
+
+### Phase 4 — Event triggers
+- `domain_events` table and `events.py` module
+- Indexing pipeline publishes `connector.*` events (smallest change — just
+ add publish calls to the existing flow)
+- Automations publish `automation.run.*` events on completion
+- `event` trigger with filter grammar
+- MCP capability harvester (so MCP-backed events and tools both work)
+
+**After Phase 4**: "do X when Y happens" automations work, including
+automation-chaining through events.
+
+### Phase 5 — Wrapping existing features and sharing
+- Wrap existing SurfSense capabilities as actions: `podcast_generation`,
+ `report_generation`, `indexing_sweep`
+- Artifact lifecycle implementation
+- `expected_duration_seconds` based queue routing (split `automations_long`
+ from `automations_default`)
+- **Automation templates** (shareable, exportable, importable) — with
+ the import re-approval flow that handles the approver-≠-runner trust
+ shift documented in §7.4's pre-Phase-5 gate
+- Cross-automation composition examples in the docs
+
+**After Phase 5**: every existing SurfSense capability is automatable
+without any per-feature code, and automations can be shared between
+SearchSpaces and users.
+
+---
+
+## 13. Decisions locked
+
+For reference — every decision made through the design process, in one
+place.
+
+### Foundations
+1. ✅ JSON Schema 2020-12 is the single schema language for everything
+2. ✅ Definition is the program; infrastructure is the interpreter
+3. ✅ List of steps (not single action) in the plan, with `output_as` chaining
+4. ✅ One capability registry serving native + MCP + LLM operations through the same interface
+5. ✅ Capability IDs do not leak handler kind (`slack.post_message`, not `mcp.slack.post_message`)
+6. ✅ Name-based resolution: definitions reference actions and capabilities by string ID. The registry is the runtime's vocabulary; lookup is a dict access. No code references in definitions.
+7. ✅ The expressive spectrum runs from pure direct calls to broad agent_task; the NL generator proposes the cheapest shape that meets intent (Shape 6 from §4 by default)
+
+### Trigger taxonomy
+8. ✅ Three trigger types: `schedule`, `webhook`, `event`
+9. ✅ Events absorb both connector events and internal SurfSense events
+10. ✅ Filter grammar is JSON-structured operators (not Jinja)
+
+### Templating cluster
+11. ✅ Jinja2 `SandboxedEnvironment` for templates and `when:` predicates — but with the explicit understanding that the sandbox is an allowlist-by-default architecture, not a denylist
+12. ✅ Zero globals registered. Curated 15 filters only, each audited for safe behavior with hostile input. List grows only by reviewed addition
+13. ✅ Four runtime mitigations: `StrictUndefined`, 8 KB template source cap, 100 ms render time cap (watchdog-enforced), 1 MB output size cap
+14. ✅ Non-string template values render as JSON by default
+15. ✅ Fixed `run.*` namespace, documented
+16. ⏸ **Pre-Phase-5 gate**: template sharing across SearchSpaces breaks the approver-equals-runner trust model. Mitigation is a re-approval flow at the import boundary (UX-level), not a template-language migration. Jinja itself stays.
+
+### Execution
+17. ✅ Executor is a Celery task wrapping a sequential loop — not an agent
+18. ✅ `when:` is optional per step; false = skipped (not failed)
+19. ✅ No DAGs, no parallelism, no loops — composition via agent_task or events
+20. ✅ `on_failure` part of execution policy from v1
+21. ✅ Step-level retry and timeout overrides
+22. ✅ Budget cap enforced pre-enqueue and mid-flight
+
+### Components
+23. ✅ Dispatcher / executor / handlers / registry — distinct, each replaceable
+24. ✅ Side effects are a set, including `USER_VISIBLE`
+25. ✅ `expected_duration_seconds` integer drives queue routing
+26. ✅ `produces_artifacts` is a list of `ArtifactSpec`, not a bool
+27. ✅ Output schemas recommended on `agent_task`; editor warns when missing
+
+### Event bus
+28. ✅ `domain_events` table for v1, with upgrade path to Redis Streams
+29. ✅ Automations publish run events for composability
+30. ✅ Publish/subscribe behind interface — no direct table access elsewhere
+
+### Capability storage (two-tier persistence)
+31. ✅ Native capabilities registered in-memory at startup from the codebase. Identical across all workers.
+32. ✅ MCP capability metadata persisted in `mcp_connections` and `mcp_tools` tables. Survives restarts.
+33. ✅ MCP handler closures built lazily per worker from database state. Worker-local cache, rebuilt on demand.
+34. ✅ MCP server tool list re-harvested on a schedule (default: daily) and on user request.
+35. ✅ MCP tools harvested into the capability registry at connection time
+36. ✅ Side effects inferred from MCP hints + naming + admin overrides
+37. ✅ MCP tools callable directly (no agent required) when caller knows args
+
+### Credentials
+38. ✅ Credentials never appear in the automation definition — only connection IDs do
+39. ✅ Credentials never appear in the LLM's context — the host holds them and uses them on the LLM's behalf
+40. ✅ Credentials resolved per-call by `ActionContext`, not pre-loaded into worker environment
+41. ✅ Tokens encrypted at rest in the database; refresh handled automatically by `ActionContext.resolve_*_client`
+
+### NL authoring
+42. ✅ LLM-authored templates is the primary path from day one — not a Phase 3 addition. Hand-authoring JSON is supported but secondary
+43. ✅ Generator LLM produces JSON; deterministic schema + resource validation runs before user sees the proposal
+44. ✅ Review LLM produces plain-language summary + flagged anomalies for the user — UX layer, not a security boundary
+45. ✅ Generator LLM's input is scoped (user prompt + schema + registry only); arbitrary document content is not fed in
+46. ✅ Human approval is required before save — no auto-approval option, ever
+47. ✅ Every field editable in the proposal; unresolved questions surface as clarifications
+48. ✅ NL drafts are transient storage, not a core table
+
+### Data model
+49. ✅ Six tables total — four for engine state, two for MCP persistence
+50. ✅ Run rows snapshot the definition (immutable history)
+51. ✅ All entities scoped by `search_space_id` for RBAC
+52. ✅ Editing an automation bumps `version`; existing runs unaffected
+
+---
+
+## 14. Open questions deferred to implementation
+
+None of these block design; they're decisions a developer will make in
+context, with the principle from §1 as their guide.
+
+- Exact retry backoff formulas (multipliers, jitter, ceilings)
+- Webhook signature verification standards (HMAC scheme, header naming)
+- Whether to support inline JSON Schema `$ref` to external schemas, or
+ inline everything
+- Specific CDN/storage backend choices for artifacts (probably
+ whatever SurfSense already uses for podcasts)
+- Rate limits per SearchSpace and per user
+- Audit log retention policy
+
+---
+
+## 15. Why this is ready to build
+
+This document satisfies five tests:
+
+1. **The four worked examples** (digest, CI webhook, file-added-trigger,
+ weekly podcast) all express cleanly in the contract without special
+ cases. Each one was used to find gaps before the gaps reached code.
+
+2. **The audit pass identified six refinements**, all incorporated. No
+ pending audit items.
+
+3. **Every decision points back to the principle from §1.** When a future
+ feature request lands, "does it belong in the definition or in the
+ engine?" gives a clear answer.
+
+4. **The build is staged** so Phase 1 ships in weeks, not months, and
+ each subsequent phase delivers user value while de-risking the next.
+
+5. **Existing SurfSense infrastructure is reused**, not paralleled. Celery
+ Beat, PostgreSQL/JSONB, Electric SQL, SQLAlchemy/Alembic, the existing
+ `tools/registry.py` pattern, the existing Search Space scoping — all
+ continue to do what they already do. The automation engine is a new
+ directory, not a new system.
+
+The next document a developer needs is the Pydantic models and JSON
+Schemas spelled out concretely. Those follow mechanically from this plan.
+
+---
+
+*Sources consulted: Claude Code Routines documentation; NousResearch/hermes-
+agent (cron and skills subsystems); n8n documentation on node types and
+workflow data model; the SurfSense repository and DeepWiki architecture
+notes (FastAPI + Celery Beat + Electric SQL + LangGraph Deep Agents +
+Search Space RBAC); Model Context Protocol specification for capability
+harvesting; AWS EventBridge for filter grammar; workflow-pattern
+literature (van der Aalst et al.) for the trigger / action / concurrency
+vocabulary.*
From 16b661862930266418fb1bfda6b75e7fbf8f62c7 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:33:10 +0200
Subject: [PATCH 17/87] docs(automation): trim Capability dataclass to
v1-minimum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reduce the §3 Capability dataclass from ten fields to five:
id, description, input_schema, output_schema, handler. Removed
fields (name, required_credentials, side_effects,
expected_duration_seconds, cost_estimate) are reintroduced only when a
concrete consumer feature demands them. The v1 invariant is that a
Capability is a typed, named, callable unit and every consumer
(executor, agent tool layer, future HTTP API) sees the same five-field
shape.
---
automation-design-plan.md | 35 ++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/automation-design-plan.md b/automation-design-plan.md
index 072f7ad99..ca7506446 100644
--- a/automation-design-plan.md
+++ b/automation-design-plan.md
@@ -76,18 +76,39 @@ is the atomic unit of "things automations can do."
@dataclass
class Capability:
id: str # "slack.post_message"
- name: str # "Post Slack message"
- description: str # for the NL generator
+ description: str # for the NL generator + UI label
input_schema: dict # JSON Schema
output_schema: dict # JSON Schema
- required_credentials: list[CredSpec] # what creds the handler needs
- side_effects: set[SideEffect] # READ, WRITE, EXTERNAL_WRITE,
- # COST_INCURRING, USER_VISIBLE
- expected_duration_seconds: int # estimate or upper bound
- cost_estimate: Callable[[dict], Decimal] # f(input) → estimated USD
handler: AsyncHandler
```
+### v1-minimum: five fields, nothing else
+
+The Capability is **deliberately five fields in v1**. Every additional field
+that earlier drafts considered (`name`, `required_credentials`,
+`side_effects`, `expected_duration_seconds`, `cost_estimate`) has been
+removed until a concrete consumer feature demands it. Authoring stays cheap
+and the registry stays trivial to introspect:
+
+- `name` → folded into `description`. The UI can render a short label from
+ the first line of `description` or fall back to `id`. No separate field
+ needed in v1.
+- `required_credentials` → returns when external-credential capabilities
+ ship (Phase 2). v1 capabilities run server-side with app config; nothing
+ to declare.
+- `side_effects` → returns when RBAC inside automations or
+ `READ_ONLY`-only agent tool gating arrives. v1 capabilities are
+ hand-picked and all trusted code.
+- `expected_duration_seconds` → returns when multi-queue routing ships.
+ Single Celery queue in v1.
+- `cost_estimate` → never returns as a declared field; cost is measured
+ per run from a ledger, aggregated per Capability, and surfaced as a
+ historical average. Pre-flight checks are deferred.
+
+The runtime invariant: a Capability is **a typed, named, callable thing
+the system can do.** Every consumer (executor, agent tool layer, future
+HTTP API) sees the same five-field shape and uses it the same way.
+
### Where capabilities live: a two-tier registry
The capability registry has different storage requirements for different
From b029c090bdfa7cbcadfab346e106a5599237ebbd Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:34:03 +0200
Subject: [PATCH 18/87] docs(automation): defer MCP integration to Phase 4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Remove the two-tier registry, MCP database schema, harvester pseudocode,
and the lazy per-worker closure cache from §3. v1 ships with a single
in-memory native registry; the MCP design is reintroduced in Phase 4
along with the rest of the integration-tooling surface.
The deferral is additive: the v1 registry interface is the same callable
surface a Phase-4 MCP harvester will register into. No design rewrite
between phases.
---
automation-design-plan.md | 155 ++++++--------------------------------
1 file changed, 23 insertions(+), 132 deletions(-)
diff --git a/automation-design-plan.md b/automation-design-plan.md
index ca7506446..07eca2049 100644
--- a/automation-design-plan.md
+++ b/automation-design-plan.md
@@ -109,143 +109,34 @@ The runtime invariant: a Capability is **a typed, named, callable thing
the system can do.** Every consumer (executor, agent tool layer, future
HTTP API) sees the same five-field shape and uses it the same way.
-### Where capabilities live: a two-tier registry
+### Where capabilities live (v1)
-The capability registry has different storage requirements for different
-kinds of capabilities. **Native capabilities and MCP capabilities have
-different lifecycles**, so they're persisted differently:
+In v1, the capability registry is a single in-memory dict, populated at
+process startup from native registrations in
+`automations/registries/capabilities/`. Identical across all workers.
+No database persistence, no closures rebuilt per worker.
-| Tier | What's there | Where it lives | Lifetime |
-| --- | --- | --- | --- |
-| **Native** | Capabilities defined in SurfSense's codebase (`search_space.query`, `agent.run`, etc.) | In-memory dict, populated at startup from `automations/capabilities/native.py` | Process lifetime, identical across all workers |
-| **MCP (durable)** | The fact that this SearchSpace has connected to this MCP server, the tool list it exposes, credentials | PostgreSQL: `mcp_connections` and `mcp_tools` tables | Persistent across restarts and across time |
-| **MCP (cached)** | Handler closures wrapping `(connection_id, tool_name)` | Per-worker in-memory cache, lazily built from the database on first reference | Process lifetime, rebuilt on demand |
+### MCP integration — deferred to Phase 4
-The reason this matters: **a user connects an MCP server on Monday, writes
-an automation on Tuesday, the automation runs on Friday.** Between Monday
-and Friday, workers will restart many times. Any state that only lives in
-worker memory is gone. The closures generated at connection time would
-not survive.
+The earlier two-tier registry (native + MCP-derived), the
+`mcp_connections` / `mcp_tools` tables, the harvester, and the lazy
+per-worker closure cache are **deferred to Phase 4** along with the
+rest of the integration-tooling surface. They are removed from v1
+because:
-So we split persistence by lifecycle:
+- v1 has no external connector capabilities (no Slack, Notion, Drive,
+ etc.). The only capabilities that will ship are server-side helpers
+ (search-space query / fetch) plus the loose `agent_task` action.
+- Without external connectors, the lifecycle mismatch that motivates
+ the two-tier design (connect Monday, run Friday, workers restarted
+ in between) doesn't arise. A startup-time dict is sufficient.
+- Phase 4 reintroduces this design as-is — the registry interface in
+ v1 is the same callable surface a Phase-4 MCP harvester will register
+ into. The deferral is additive, not a different design.
-- Native capability handlers live in the codebase. Always available, no
- need for the database.
-- MCP capability metadata lives in the database, so the knowledge "this
- SearchSpace has these capabilities" survives any restart.
-- The actual closures are built on demand from the database state. They
- live in worker memory only until the worker dies, at which point they
- get rebuilt by the next worker that needs them.
-
-### MCP database schema
-
-```sql
-CREATE TABLE mcp_connections (
- id UUID PRIMARY KEY,
- search_space_id INT REFERENCES search_spaces(id),
- server_url TEXT,
- transport TEXT, -- "http", "stdio", etc.
- name TEXT, -- "Slack (Acme workspace)"
- access_token BYTEA, -- encrypted at rest
- refresh_token BYTEA, -- encrypted at rest
- expires_at TIMESTAMPTZ,
- last_harvested_at TIMESTAMPTZ,
- created_at TIMESTAMPTZ,
- created_by INT REFERENCES users(id)
-);
-
-CREATE TABLE mcp_tools (
- id UUID PRIMARY KEY,
- connection_id UUID REFERENCES mcp_connections(id) ON DELETE CASCADE,
- name TEXT, -- "post_message"
- description TEXT,
- input_schema JSONB,
- output_schema JSONB,
- side_effects TEXT[], -- inferred or admin-curated
- UNIQUE (connection_id, name)
-);
-```
-
-### MCP lifecycle: connect, harvest, invoke
-
-Three phases, each with distinct concerns.
-
-**Phase 1 — Connect (one-time, on user action).** User clicks "Connect
-Slack MCP." OAuth flow completes. A row is added to `mcp_connections`
-with the encrypted tokens.
-
-**Phase 2 — Harvest (right after connect, also re-runnable).** SurfSense
-opens a temporary client to the MCP server, calls `tools/list`, and writes
-one row to `mcp_tools` per discovered tool. The temporary client is then
-discarded; only the database state persists.
-
-```python
-async def harvest_mcp_server(connection_id: UUID, ctx):
- connection = await ctx.db.get(MCPConnection, connection_id)
- client = build_temporary_client(connection)
- tools = await client.list_tools()
-
- # Replace existing tool rows for this connection
- await ctx.db.execute(
- delete(MCPTool).where(MCPTool.connection_id == connection_id)
- )
- for tool in tools:
- ctx.db.add(MCPTool(
- connection_id=connection_id,
- name=tool.name,
- description=tool.description,
- input_schema=tool.inputSchema,
- output_schema=tool.outputSchema,
- side_effects=infer_side_effects(tool),
- ))
- connection.last_harvested_at = now()
- await ctx.db.commit()
-```
-
-Harvesting can be re-run on a schedule (say, daily) or on user request,
-to pick up new tools the server has added.
-
-**Phase 3 — Invoke (every time a step references an MCP capability).**
-This is where the closure gets built. The executor calls
-`ctx.get_capability("slack.post_message")`. The worker's in-memory cache is
-checked; on miss, the database is queried:
-
-```python
-async def get_capability(capability_id: str, ctx: ActionContext) -> Capability:
- cached = _WORKER_CAPABILITY_CACHE.get((ctx.search_space.id, capability_id))
- if cached:
- return cached
-
- if is_native(capability_id):
- capability = _NATIVE_REGISTRY[capability_id]
- else:
- # MCP path: look up tool metadata
- tool_row = await ctx.db.execute(
- select(MCPTool)
- .join(MCPConnection)
- .where(MCPConnection.search_space_id == ctx.search_space.id)
- .where(tool_qualified_name(MCPTool, MCPConnection) == capability_id)
- )
- capability = Capability(
- id=capability_id,
- input_schema=tool_row.input_schema,
- output_schema=tool_row.output_schema,
- side_effects=set(tool_row.side_effects),
- handler=make_mcp_handler(
- connection_id=tool_row.connection_id,
- tool_name=tool_row.name,
- ),
- )
-
- _WORKER_CAPABILITY_CACHE[(ctx.search_space.id, capability_id)] = capability
- return capability
-```
-
-The closure created by `make_mcp_handler` captures only the connection ID
-and tool name. When invoked, it asks `ctx.resolve_mcp_client(connection_id)`
-to build an authenticated client from the connection record (including
-token refresh if needed). That client is also transient — built per call,
-discarded after.
+See archived design at `docs/automation/archived/mcp-registry.md` once
+v1 ships; for now the only consumer of the registry is the in-memory
+native path.
### Credentials: resolved at the moment of use
From 144d702c354f4985aea68a9af06d35d58ecc8f3e Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:35:37 +0200
Subject: [PATCH 19/87] docs(automation): defer credentials, cost,
queue-routing, side-effects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Update §3 (Credentials), §7.1 (Dispatcher common path), §8 (Duration
classes and queue routing), and §13 (Decisions locked) to reflect the
v1-minimum scope:
- Credentials block in §3 collapses to a deferred-to-Phase-2 note. The
three guarantees (no creds in definition, no creds in LLM context,
per-call resolution) return unchanged when Phase 2 ships external
capabilities.
- Cost-estimate pre-check in the dispatcher's common path is removed.
Mid-flight budget kill in the executor still enforces budget_cap_usd.
- Queue routing by expected_duration_seconds is deferred. Single
automations_default queue in v1.
- Decisions 24, 25, 26, 32-37, 38-41 marked deferred with explicit
return phase. Three new v1-minimum decisions added (5-field
Capability, measured-not-declared cost, single queue).
All deferrals are additive: the original designs return as-is when
warranted; nothing is rewritten between phases.
---
automation-design-plan.md | 122 +++++++++++++++++---------------------
1 file changed, 56 insertions(+), 66 deletions(-)
diff --git a/automation-design-plan.md b/automation-design-plan.md
index 07eca2049..9d738c6f7 100644
--- a/automation-design-plan.md
+++ b/automation-design-plan.md
@@ -138,47 +138,24 @@ See archived design at `docs/automation/archived/mcp-registry.md` once
v1 ships; for now the only consumer of the registry is the in-memory
native path.
-### Credentials: resolved at the moment of use
+### Credentials — deferred to Phase 2
-The handler doesn't carry credentials and the closure doesn't capture them.
-When invoked, the handler asks `ActionContext` for what it needs:
+The earlier per-call credential resolution pattern (`ctx.resolve_mcp_client`,
+`ctx.resolve_http_client`, `ctx.resolve_llm`) is **deferred to Phase 2**.
+v1 capabilities run server-side using app-level configuration; none of
+the seven v1 capabilities needs per-user or per-connection auth.
-```python
-def make_mcp_handler(connection_id: UUID, tool_name: str):
- async def handler(ctx: ActionContext, args: dict) -> Any:
- # Credential resolution happens here, per call
- client = await ctx.resolve_mcp_client(connection_id)
- response = await client.call_tool(name=tool_name, arguments=args)
- return response.content
- return handler
-```
+When Phase 2 ships external-credential capabilities (Slack, email, etc.),
+the three guarantees the original design promised are reintroduced
+unchanged:
-`ctx.resolve_mcp_client(connection_id)`:
-1. Loads the `mcp_connections` row
-2. Decrypts the access token
-3. Refreshes the token if it's expired (using the refresh token)
-4. Constructs an `MCPClient` with the token set as a default authorization
- header
+- Credentials never appear in the automation definition (connection IDs
+ only).
+- Credentials never appear in the LLM's context (the host holds them
+ and uses them on the LLM's behalf when executing tool calls).
+- Credentials are loaded per-call, not pre-loaded into worker memory.
-The HTTP library carries the auth header on every subsequent call the
-client makes — the handler doesn't think about it after construction.
-
-For native capabilities calling external APIs directly,
-`ctx.resolve_http_client(provider)` returns an authenticated `httpx`
-client. For LLM operations, `ctx.resolve_llm(provider)` returns a
-configured LLM client. **Three resolution methods, one pattern: the
-context returns a client already authenticated.**
-
-Three properties this gives us:
-
-- **Credentials never appear in the automation definition.** The JSON
- contains capability references and connection IDs, never tokens.
-- **Credentials never appear in the LLM's context.** Even during
- `agent_task`, the LLM sees tool descriptions only; the host holds
- credentials and uses them when executing the tools the LLM requests.
-- **Credentials are loaded per-call, not pre-loaded.** The credential
- exists in memory only during the moment a handler is making a call. No
- long-lived secrets in worker memory.
+The Phase-2 design returns as-is; only the v1 surface is simplified.
---
@@ -504,12 +481,18 @@ event, evaluates all matching triggers' filters, fires the matches.
Common path (after a trigger has fired):
1. Resolve `inputs` from trigger payload and defaults
2. Validate resolved inputs against the automation's input schema
-3. **Cost estimate** — sum capabilities' `cost_estimate(args)` for the plan;
- refuse if exceeds `budget_cap_usd`
-4. **Idempotency check** — dedup against existing pending/running runs
-5. **Snapshot the resolved definition** into the run row (immutable history)
-6. Enqueue executor task on the appropriate Celery queue (per
- `expected_duration_seconds`)
+3. **Idempotency check** — dedup against existing pending/running runs
+4. **Snapshot the resolved definition** into the run row (immutable history)
+5. Enqueue executor task on the single `automations_default` Celery queue
+
+The cost-estimate pre-check (originally step 3) is **deferred**.
+v1 capabilities do not declare `cost_estimate`; pre-flight budgeting
+returns when a historical-cost ledger exists. The mid-flight budget
+cap (§7.2) still kills the run if accumulated cost crosses
+`budget_cap_usd`.
+
+Queue routing by `expected_duration_seconds` is **deferred** until load
+patterns justify a second queue. v1 uses a single queue.
### 7.2 Executor
@@ -801,16 +784,18 @@ The engine handles storage (writes to SurfSense's existing object storage),
URL generation (signed, scoped to the run's permissions), and cleanup (a
nightly Celery Beat task deletes expired artifacts).
-### Duration classes and queue routing
+### Duration classes and queue routing — deferred
-Capabilities declare `expected_duration_seconds`. The dispatcher routes
-runs to Celery queues based on the longest-duration step:
-- < 10s → `automations_fast`
-- 10s – 5min → `automations_medium`
-- 5min – 1hr → `automations_long`
+The original design routed runs to multiple Celery queues based on each
+capability's declared `expected_duration_seconds`. v1 ships with **one
+queue** (`automations_default`) and capabilities do not declare a
+duration. Multi-queue routing returns when burst load on a single queue
+actually justifies the operational complexity of independent worker
+pools.
-Operators scale each queue's worker pool independently. A future "very
-long" queue is a config change, not a contract change.
+Adding the second queue is a config change plus reintroducing
+`expected_duration_seconds` on the `Capability` dataclass — both
+mechanical, additive, and free of design rewrite.
---
@@ -1210,9 +1195,9 @@ place.
### Components
23. ✅ Dispatcher / executor / handlers / registry — distinct, each replaceable
-24. ✅ Side effects are a set, including `USER_VISIBLE`
-25. ✅ `expected_duration_seconds` integer drives queue routing
-26. ✅ `produces_artifacts` is a list of `ArtifactSpec`, not a bool
+24. ⏸ Side effects are a set, including `USER_VISIBLE` — **deferred** until multi-user automation RBAC ships
+25. ⏸ `expected_duration_seconds` integer drives queue routing — **deferred** until a second Celery queue is needed
+26. ⏸ `produces_artifacts` is a list of `ArtifactSpec`, not a bool — **deferred** until artifacts beyond the deliverable handlers' own persistence are needed
27. ✅ Output schemas recommended on `agent_task`; editor warns when missing
### Event bus
@@ -1220,20 +1205,25 @@ place.
29. ✅ Automations publish run events for composability
30. ✅ Publish/subscribe behind interface — no direct table access elsewhere
-### Capability storage (two-tier persistence)
+### Capability storage
31. ✅ Native capabilities registered in-memory at startup from the codebase. Identical across all workers.
-32. ✅ MCP capability metadata persisted in `mcp_connections` and `mcp_tools` tables. Survives restarts.
-33. ✅ MCP handler closures built lazily per worker from database state. Worker-local cache, rebuilt on demand.
-34. ✅ MCP server tool list re-harvested on a schedule (default: daily) and on user request.
-35. ✅ MCP tools harvested into the capability registry at connection time
-36. ✅ Side effects inferred from MCP hints + naming + admin overrides
-37. ✅ MCP tools callable directly (no agent required) when caller knows args
+32. ⏸ MCP capability metadata persisted in `mcp_connections` and `mcp_tools` tables — **deferred to Phase 4**
+33. ⏸ MCP handler closures built lazily per worker from database state — **deferred to Phase 4**
+34. ⏸ MCP server tool list re-harvested on a schedule — **deferred to Phase 4**
+35. ⏸ MCP tools harvested into the capability registry at connection time — **deferred to Phase 4**
+36. ⏸ Side effects inferred from MCP hints + naming + admin overrides — **deferred to Phase 4**
+37. ⏸ MCP tools callable directly (no agent required) when caller knows args — **deferred to Phase 4**
-### Credentials
-38. ✅ Credentials never appear in the automation definition — only connection IDs do
-39. ✅ Credentials never appear in the LLM's context — the host holds them and uses them on the LLM's behalf
-40. ✅ Credentials resolved per-call by `ActionContext`, not pre-loaded into worker environment
-41. ✅ Tokens encrypted at rest in the database; refresh handled automatically by `ActionContext.resolve_*_client`
+### Credentials — all deferred to Phase 2
+38. ⏸ Credentials never appear in the automation definition — only connection IDs do — **Phase 2**
+39. ⏸ Credentials never appear in the LLM's context — the host holds them — **Phase 2**
+40. ⏸ Credentials resolved per-call by `ActionContext`, not pre-loaded into worker environment — **Phase 2**
+41. ⏸ Tokens encrypted at rest; refresh handled automatically by `ActionContext.resolve_*_client` — **Phase 2**
+
+### v1-minimum (new lock)
+v1. ✅ `Capability` is exactly five fields: `id`, `description`, `input_schema`, `output_schema`, `handler`. Additional fields are added only when a concrete consumer feature requires them.
+v2. ✅ Cost is **measured** from a per-run ledger, not declared. Pre-flight cost checks return when the ledger has enough history.
+v3. ✅ Single `automations_default` Celery queue in v1. Multi-queue routing returns when load justifies it.
### NL authoring
42. ✅ LLM-authored templates is the primary path from day one — not a Phase 3 addition. Hand-authoring JSON is supported but secondary
From db8c472664b301d38c75d30d96bf65136a11a981 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:37:05 +0200
Subject: [PATCH 20/87] docs(automation): narrow v1 data model + Phase 1 scope
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
§9 (Data model): drop from six tables to three. v1 ships automations,
automation_triggers, automation_runs only. domain_events deferred to
Phase 3 (event trigger); mcp_connections/mcp_tools deferred to Phase 4
(MCP integration). Remove the table definitions for the deferred ones
and replace with a deferred-tables note pointing to the consuming
phase.
automation_triggers.type enum narrowed to schedule|manual for v1.
Webhook and event types ship with their respective phases. secret_hash
column deferred to Phase 2 alongside the webhook trigger.
automation_runs.cost_usd column deferred until at least one v1
capability records token-level cost — additive when reintroduced.
§14 (Phase 1) reorganized into four explicit steps matching the work
we're about to do: scaffolding + schemas + empty registries (step 1),
then registry population (step 2), then executor (step 3), then NL
authoring + UI (step 4). The current commit batch lands step 1 only.
---
automation-design-plan.md | 126 ++++++++++++++++++--------------------
1 file changed, 59 insertions(+), 67 deletions(-)
diff --git a/automation-design-plan.md b/automation-design-plan.md
index 9d738c6f7..f57385e31 100644
--- a/automation-design-plan.md
+++ b/automation-design-plan.md
@@ -801,12 +801,20 @@ mechanical, additive, and free of design rewrite.
## 9. Data model
-Six tables. All scoped by `search_space_id` for RBAC.
+**v1 ships three tables:** `automations`, `automation_triggers`,
+`automation_runs`. All scoped by `search_space_id` for RBAC.
-The first four (`automations`, `automation_triggers`, `automation_runs`,
-`domain_events`) are the engine's own state. The last two
-(`mcp_connections`, `mcp_tools`) hold the durable knowledge that backs
-MCP-derived capabilities — see §3 for the lifecycle rationale.
+The other three tables described in earlier drafts are deferred:
+
+- `domain_events` → **deferred to Phase 3** (introduced with the event
+ trigger).
+- `mcp_connections`, `mcp_tools` → **deferred to Phase 4** (MCP
+ integration).
+
+The deferred tables ship as-is when their consuming feature lands;
+nothing in the v1 schema needs to change to accommodate them. The three
+v1 tables form the engine's persistent state — definitions, triggers,
+and an immutable run history.
### `automations`
@@ -828,12 +836,14 @@ MCP-derived capabilities — see §3 for the lifecycle rationale.
| --------------- | ----------------------------------------------------------------------------- | ------------------------------------------- |
| `id` | int PK | |
| `automation_id` | FK | |
-| `type` | enum: `schedule`, `webhook`, `event` | |
+| `type` | enum: `schedule`, `manual` (Phase 2/3 add `webhook`, `event`) | |
| `config` | jsonb | validated against trigger's `config_schema` |
| `enabled` | bool | |
-| `secret_hash` | str / null | for webhook bearer tokens |
| `last_fired_at` | timestamp | |
+`secret_hash` (for webhook bearer tokens) is **deferred to Phase 2** with
+the webhook trigger.
+
### `automation_runs`
| field | type | notes |
@@ -849,61 +859,25 @@ MCP-derived capabilities — see §3 for the lifecycle rationale.
| `output` | jsonb / null | |
| `artifacts` | jsonb | references to created artifacts |
| `error` | jsonb / null | |
-| `cost_usd` | decimal | accumulated cost |
| `started_at` / `finished_at` | timestamps | |
| `agent_session_id`| str / null | link to LangGraph trace if agent_task was used |
-### `domain_events`
+`cost_usd` (per-run accumulated cost) is **deferred** until at least one
+v1 capability records token-level cost. When reintroduced it lands as a
+column-only migration.
-| field | type | notes |
-| ----------------- | ----------- | -------------------------------------------------- |
-| `id` | UUID PK | |
-| `search_space_id` | FK | scoping |
-| `event_type` | varchar | e.g. `drive.file_added`, `automation.run.succeeded` |
-| `source_id` | varchar | which connector/automation/etc. produced it |
-| `payload` | jsonb | matches the event type's documented schema |
-| `created_at` | timestamp | |
-| `consumed_by` | jsonb | array of consumer_ids, for tracking + replay |
-| `expires_at` | timestamp | auto-cleanup after 7 days |
+### Deferred tables
-### `mcp_connections`
+- **`domain_events`** — the event bus backing event triggers. Ships in
+ Phase 3 with the event trigger. v1 only emits `automation.run.*`
+ events into application logs; the table is added when at least one
+ consumer needs to subscribe to them.
+- **`mcp_connections`** / **`mcp_tools`** — see §3. Both ship in Phase 4
+ alongside the MCP harvester and the two-tier registry.
-Persistent record of MCP server connections per SearchSpace.
-
-| field | type | notes |
-| ------------------- | ----------- | -------------------------------------------------- |
-| `id` | UUID PK | |
-| `search_space_id` | FK | scoping |
-| `server_url` | text | the MCP server's endpoint |
-| `transport` | text | `"http"`, `"stdio"`, etc. |
-| `name` | text | human-readable label (e.g., "Slack — Acme") |
-| `access_token` | bytea | encrypted at rest |
-| `refresh_token` | bytea | encrypted at rest |
-| `expires_at` | timestamp | for OAuth tokens |
-| `last_harvested_at` | timestamp | when tool list was last refreshed |
-| `created_at` | timestamp | |
-| `created_by` | FK → users | |
-
-### `mcp_tools`
-
-The tool list each connected MCP server exposes. Acts as the durable
-source for MCP capabilities — definitions reference `mcp_tools` rows by
-qualified name, and worker processes lazily build handler closures from
-this state.
-
-| field | type | notes |
-| --------------- | ----------- | ------------------------------------------------ |
-| `id` | UUID PK | |
-| `connection_id` | FK → `mcp_connections.id` ON DELETE CASCADE | |
-| `name` | text | the tool name reported by the MCP server |
-| `description` | text | description for the NL generator and form editor |
-| `input_schema` | jsonb | JSON Schema for tool arguments |
-| `output_schema` | jsonb | JSON Schema for tool results |
-| `side_effects` | text[] | inferred from MCP hints + naming + admin override |
-| UNIQUE | | (connection_id, name) |
-
-NL drafts are **not** a core table. They live in a generic short-TTL store
-(Redis or a transient table) when the NL flow is built in Phase 3.
+NL drafts are **not** a core table. They live in a generic short-TTL
+store (Redis or a transient table) when the NL flow is built in
+Phase 3.
---
@@ -1092,21 +1066,39 @@ which actions and triggers are available, not whether users can describe
automations in natural language.
### Phase 1 — Engine MVP with NL authoring
-- 4 tables + Alembic migration
-- Capability registry with native capabilities (`search_space.query`,
- `search_space.fetch_document`, `agent.run`)
-- `agent_task` action only
-- `schedule` trigger + manual "Run now" endpoint
-- Executor with retries, timeouts, budget caps
-- Template engine (Jinja sandbox + 15 filters + 4 runtime limits)
-- **NL authoring flow**: Generator LLM, deterministic validator,
- Review LLM, editable form
+
+**Step 1 (current scope, this batch of commits):**
+- 3 tables (`automations`, `automation_triggers`, `automation_runs`) +
+ Alembic migration
+- Empty Capability, Action, Trigger registries (concrete entries land in
+ later steps when the consuming feature lands)
+- Pydantic schemas for the automation definition envelope, the two v1
+ trigger configs (`schedule`, `manual`), and the one v1 action config
+ (`agent_task`)
+- Module structure under `app/automations/` (data/, schemas/,
+ registries/), fully isolated from the existing codebase
+
+**Step 2:**
+- Register the `agent_task` action and the `schedule` / `manual`
+ triggers in the registries
+- Capability registry populated with native deliverable-producing
+ capabilities (chosen when this step starts)
+
+**Step 3:**
+- Executor (single-queue Celery task) with retries, timeouts, budget
+ caps measured against `cost_usd` ledger on the run
+- Template engine (Jinja sandbox + the v1 filter allowlist + runtime
+ limits)
+- Manual "Run now" endpoint
+
+**Step 4:**
+- NL authoring flow: Generator LLM, deterministic validator, Review LLM,
+ editable form
- Run history UI with Electric SQL streaming
**After Phase 1**: a user can describe an automation in natural language,
review the proposal (with summary + flagged anomalies), edit any field,
-save, and watch it run on a schedule. The Claude Routines value
-proposition, on SurfSense's data, with NL-first authoring.
+save, and watch it run on a schedule.
### Phase 2 — Webhooks and delivery
- `webhook` trigger with per-automation bearer tokens
From 113748dfd5054aba1d3e16f7d7350297de0990ca Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:38:41 +0200
Subject: [PATCH 21/87] feat(automation): scaffold isolated module structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Create app/automations/ with the SRP-per-file / grouped-folders layout
that mirrors app/agents/multi_agent_chat/. Twelve __init__.py files,
each a thin re-export with a single-line docstring describing the
subpackage's role, no exports yet (filled in subsequent commits).
Tree:
app/automations/
├── persistence/
│ ├── enums/ (status / type enums; one per file)
│ └── models/ (SQLAlchemy tables; one per file)
├── schemas/
│ ├── definition/ (the JSON envelope, broken by concern)
│ ├── triggers/ (per-trigger config schemas)
│ └── actions/ (per-action config schemas)
└── registries/
├── capabilities/ (types.py + store.py)
├── actions/ (types.py + store.py)
└── triggers/ (types.py + store.py)
The persistence/ folder is named to avoid surfsense_backend/.gitignore's
data/ ignore rule, which silently masked the original data/ name and
its contents from version control.
Isolation invariant: the module imports only from app.db (foundational
Base + FK targets, unavoidable) and stdlib / SQLAlchemy / Pydantic.
No imports from app.agents.*, app.services.*, app.tasks.*, app.routes.*
or any other business-logic module. Confirmed importable with no side
effects.
---
surfsense_backend/app/automations/__init__.py | 5 +++++
surfsense_backend/app/automations/persistence/__init__.py | 5 +++++
.../app/automations/persistence/enums/__init__.py | 5 +++++
.../app/automations/persistence/models/__init__.py | 5 +++++
surfsense_backend/app/automations/registries/__init__.py | 5 +++++
.../app/automations/registries/actions/__init__.py | 5 +++++
.../app/automations/registries/capabilities/__init__.py | 5 +++++
.../app/automations/registries/triggers/__init__.py | 5 +++++
surfsense_backend/app/automations/schemas/__init__.py | 5 +++++
.../app/automations/schemas/actions/__init__.py | 5 +++++
.../app/automations/schemas/definition/__init__.py | 5 +++++
.../app/automations/schemas/triggers/__init__.py | 5 +++++
12 files changed, 60 insertions(+)
create mode 100644 surfsense_backend/app/automations/__init__.py
create mode 100644 surfsense_backend/app/automations/persistence/__init__.py
create mode 100644 surfsense_backend/app/automations/persistence/enums/__init__.py
create mode 100644 surfsense_backend/app/automations/persistence/models/__init__.py
create mode 100644 surfsense_backend/app/automations/registries/__init__.py
create mode 100644 surfsense_backend/app/automations/registries/actions/__init__.py
create mode 100644 surfsense_backend/app/automations/registries/capabilities/__init__.py
create mode 100644 surfsense_backend/app/automations/registries/triggers/__init__.py
create mode 100644 surfsense_backend/app/automations/schemas/__init__.py
create mode 100644 surfsense_backend/app/automations/schemas/actions/__init__.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/__init__.py
create mode 100644 surfsense_backend/app/automations/schemas/triggers/__init__.py
diff --git a/surfsense_backend/app/automations/__init__.py b/surfsense_backend/app/automations/__init__.py
new file mode 100644
index 000000000..edb7891ea
--- /dev/null
+++ b/surfsense_backend/app/automations/__init__.py
@@ -0,0 +1,5 @@
+"""Automations: scheduled / triggered runs of capabilities — see automation-design-plan.md."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/persistence/__init__.py b/surfsense_backend/app/automations/persistence/__init__.py
new file mode 100644
index 000000000..05c39014e
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/__init__.py
@@ -0,0 +1,5 @@
+"""Persistence layer: SQLAlchemy enums under ``enums/`` and models under ``models/``."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/persistence/enums/__init__.py b/surfsense_backend/app/automations/persistence/enums/__init__.py
new file mode 100644
index 000000000..f221687dc
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/enums/__init__.py
@@ -0,0 +1,5 @@
+"""SQLAlchemy / Python enums backing the three automation tables."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/persistence/models/__init__.py b/surfsense_backend/app/automations/persistence/models/__init__.py
new file mode 100644
index 000000000..da73c9e41
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/models/__init__.py
@@ -0,0 +1,5 @@
+"""SQLAlchemy models: one file per table (``automation.py``, ``trigger.py``, ``run.py``)."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/registries/__init__.py b/surfsense_backend/app/automations/registries/__init__.py
new file mode 100644
index 000000000..e7334cca8
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/__init__.py
@@ -0,0 +1,5 @@
+"""Three registries — ``capabilities/``, ``actions/``, ``triggers/`` — populated at import time."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/registries/actions/__init__.py b/surfsense_backend/app/automations/registries/actions/__init__.py
new file mode 100644
index 000000000..6b19b7091
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/actions/__init__.py
@@ -0,0 +1,5 @@
+"""Action registry: ``types.py`` (dataclass), ``store.py`` (dict + register fn)."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/registries/capabilities/__init__.py b/surfsense_backend/app/automations/registries/capabilities/__init__.py
new file mode 100644
index 000000000..77f9f88b7
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/capabilities/__init__.py
@@ -0,0 +1,5 @@
+"""Capability registry: ``types.py`` (dataclass), ``store.py`` (dict + register fn)."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/registries/triggers/__init__.py b/surfsense_backend/app/automations/registries/triggers/__init__.py
new file mode 100644
index 000000000..bc795b61a
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/triggers/__init__.py
@@ -0,0 +1,5 @@
+"""Trigger registry: ``types.py`` (dataclass), ``store.py`` (dict + register fn)."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/schemas/__init__.py b/surfsense_backend/app/automations/schemas/__init__.py
new file mode 100644
index 000000000..67211b898
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/__init__.py
@@ -0,0 +1,5 @@
+"""Pydantic schemas: definition envelope, trigger configs, action configs."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/schemas/actions/__init__.py b/surfsense_backend/app/automations/schemas/actions/__init__.py
new file mode 100644
index 000000000..1aa68b629
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/actions/__init__.py
@@ -0,0 +1,5 @@
+"""Per-action config schemas: one file per action type registered in v1."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/schemas/definition/__init__.py b/surfsense_backend/app/automations/schemas/definition/__init__.py
new file mode 100644
index 000000000..3fbda8cc8
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/__init__.py
@@ -0,0 +1,5 @@
+"""Automation definition envelope: the editable structured spec users author and run."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/surfsense_backend/app/automations/schemas/triggers/__init__.py b/surfsense_backend/app/automations/schemas/triggers/__init__.py
new file mode 100644
index 000000000..2da765bc3
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/triggers/__init__.py
@@ -0,0 +1,5 @@
+"""Per-trigger config schemas: one file per trigger type registered in v1."""
+
+from __future__ import annotations
+
+__all__: list[str] = []
From 05931375f4c5d92d5737c3c06bef718a1712d374 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:42:50 +0200
Subject: [PATCH 22/87] feat(automation): add SQLAlchemy models for the three
v1 tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Three enums (one file each) plus three models (one file each), all
under app/automations/persistence/. The module imports from app.db
only (Base/BaseModel/TimestampMixin and FK targets searchspaces.id /
user.id); no business-logic imports.
Enums:
- AutomationStatus: active | paused | archived
- RunStatus: pending | running | succeeded | failed | cancelled
| timed_out
- TriggerType: schedule | manual (Phase-2/3 add webhook | event)
Models:
- Automation: search_space-scoped, created_by_user_id (SET NULL),
name + description, status enum, definition JSONB, version int,
updated_at with onupdate.
- AutomationTrigger: FK → automations (CASCADE), type enum, config
JSONB, enabled bool, last_fired_at. Webhook secret_hash is omitted
until Phase 2.
- AutomationRun: FK → automations (CASCADE), nullable trigger_id
(SET NULL — null = manual via UI), status enum,
definition_snapshot for immutable history, trigger_payload /
resolved_inputs / step_results / output / artifacts / error JSONB
columns, started_at / finished_at timestamps, agent_session_id for
linking to the LangGraph trace. cost_usd column omitted until at
least one v1 capability records token-level cost.
Verified: Base.metadata exposes all three table names; columns and
enums introspect as documented; no linter errors.
---
.../app/automations/persistence/__init__.py | 12 ++-
.../automations/persistence/enums/__init__.py | 10 ++-
.../persistence/enums/automation_status.py | 18 +++++
.../persistence/enums/run_status.py | 28 +++++++
.../persistence/enums/trigger_type.py | 21 ++++++
.../persistence/models/__init__.py | 10 ++-
.../persistence/models/automation.py | 75 +++++++++++++++++++
.../app/automations/persistence/models/run.py | 72 ++++++++++++++++++
.../automations/persistence/models/trigger.py | 57 ++++++++++++++
9 files changed, 300 insertions(+), 3 deletions(-)
create mode 100644 surfsense_backend/app/automations/persistence/enums/automation_status.py
create mode 100644 surfsense_backend/app/automations/persistence/enums/run_status.py
create mode 100644 surfsense_backend/app/automations/persistence/enums/trigger_type.py
create mode 100644 surfsense_backend/app/automations/persistence/models/automation.py
create mode 100644 surfsense_backend/app/automations/persistence/models/run.py
create mode 100644 surfsense_backend/app/automations/persistence/models/trigger.py
diff --git a/surfsense_backend/app/automations/persistence/__init__.py b/surfsense_backend/app/automations/persistence/__init__.py
index 05c39014e..265742a85 100644
--- a/surfsense_backend/app/automations/persistence/__init__.py
+++ b/surfsense_backend/app/automations/persistence/__init__.py
@@ -2,4 +2,14 @@
from __future__ import annotations
-__all__: list[str] = []
+from .enums import AutomationStatus, RunStatus, TriggerType
+from .models import Automation, AutomationRun, AutomationTrigger
+
+__all__ = [
+ "Automation",
+ "AutomationRun",
+ "AutomationStatus",
+ "AutomationTrigger",
+ "RunStatus",
+ "TriggerType",
+]
diff --git a/surfsense_backend/app/automations/persistence/enums/__init__.py b/surfsense_backend/app/automations/persistence/enums/__init__.py
index f221687dc..cf9e7dd1b 100644
--- a/surfsense_backend/app/automations/persistence/enums/__init__.py
+++ b/surfsense_backend/app/automations/persistence/enums/__init__.py
@@ -2,4 +2,12 @@
from __future__ import annotations
-__all__: list[str] = []
+from .automation_status import AutomationStatus
+from .run_status import RunStatus
+from .trigger_type import TriggerType
+
+__all__ = [
+ "AutomationStatus",
+ "RunStatus",
+ "TriggerType",
+]
diff --git a/surfsense_backend/app/automations/persistence/enums/automation_status.py b/surfsense_backend/app/automations/persistence/enums/automation_status.py
new file mode 100644
index 000000000..3f2ca9621
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/enums/automation_status.py
@@ -0,0 +1,18 @@
+"""``AutomationStatus`` — lifecycle of a stored automation definition."""
+
+from __future__ import annotations
+
+from enum import StrEnum
+
+
+class AutomationStatus(StrEnum):
+ """Status of an automation in the registry.
+
+ ``active`` — eligible to fire from its triggers.
+ ``paused`` — definition retained, triggers do not fire.
+ ``archived`` — kept for run history only; no edits, no fires.
+ """
+
+ ACTIVE = "active"
+ PAUSED = "paused"
+ ARCHIVED = "archived"
diff --git a/surfsense_backend/app/automations/persistence/enums/run_status.py b/surfsense_backend/app/automations/persistence/enums/run_status.py
new file mode 100644
index 000000000..0f619bd82
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/enums/run_status.py
@@ -0,0 +1,28 @@
+"""``RunStatus`` — the state machine of a single ``AutomationRun``."""
+
+from __future__ import annotations
+
+from enum import StrEnum
+
+
+class RunStatus(StrEnum):
+ """Lifecycle states of an ``AutomationRun`` row.
+
+ Transitions are linear with three terminal branches:
+
+ pending → running → (succeeded | failed | cancelled | timed_out)
+
+ ``pending`` — row created, executor task enqueued, work not started.
+ ``running`` — executor has picked up the run.
+ ``succeeded`` — terminal: plan completed without error.
+ ``failed`` — terminal: at least one step raised an unrecoverable error.
+ ``cancelled`` — terminal: caller asked for cancellation.
+ ``timed_out`` — terminal: run exceeded its configured timeout.
+ """
+
+ PENDING = "pending"
+ RUNNING = "running"
+ SUCCEEDED = "succeeded"
+ FAILED = "failed"
+ CANCELLED = "cancelled"
+ TIMED_OUT = "timed_out"
diff --git a/surfsense_backend/app/automations/persistence/enums/trigger_type.py b/surfsense_backend/app/automations/persistence/enums/trigger_type.py
new file mode 100644
index 000000000..eb06fe773
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/enums/trigger_type.py
@@ -0,0 +1,21 @@
+"""``TriggerType`` — the trigger-kind discriminator (v1 = schedule, manual)."""
+
+from __future__ import annotations
+
+from enum import StrEnum
+
+
+class TriggerType(StrEnum):
+ """Kind of trigger an ``AutomationTrigger`` row represents.
+
+ v1 ships two kinds:
+
+ ``schedule`` — fires on a cron expression managed by Celery Beat.
+ ``manual`` — fires on demand from the UI's "Run now" affordance.
+
+ ``webhook`` and ``event`` are deferred to Phase 2 and Phase 3
+ respectively; adding them is an enum-value extension only.
+ """
+
+ SCHEDULE = "schedule"
+ MANUAL = "manual"
diff --git a/surfsense_backend/app/automations/persistence/models/__init__.py b/surfsense_backend/app/automations/persistence/models/__init__.py
index da73c9e41..4aca02a03 100644
--- a/surfsense_backend/app/automations/persistence/models/__init__.py
+++ b/surfsense_backend/app/automations/persistence/models/__init__.py
@@ -2,4 +2,12 @@
from __future__ import annotations
-__all__: list[str] = []
+from .automation import Automation
+from .run import AutomationRun
+from .trigger import AutomationTrigger
+
+__all__ = [
+ "Automation",
+ "AutomationRun",
+ "AutomationTrigger",
+]
diff --git a/surfsense_backend/app/automations/persistence/models/automation.py b/surfsense_backend/app/automations/persistence/models/automation.py
new file mode 100644
index 000000000..fc4a1ed93
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/models/automation.py
@@ -0,0 +1,75 @@
+"""``Automation`` table — the editable, versioned automation definition."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlalchemy import (
+ TIMESTAMP,
+ Column,
+ Enum as SQLAlchemyEnum,
+ ForeignKey,
+ Integer,
+ String,
+ Text,
+)
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+
+from app.db import BaseModel, TimestampMixin
+
+from ..enums.automation_status import AutomationStatus
+
+
+class Automation(BaseModel, TimestampMixin):
+ """The editable, versioned spec a user authors.
+
+ The ``definition`` JSON is what the user (or the NL generator) writes
+ and edits. Each save bumps ``version`` by one; the previous JSON is
+ not kept in this row — version history is reconstructed from the
+ ``definition_snapshot`` column on every ``AutomationRun`` that fired
+ against a given version.
+ """
+
+ __tablename__ = "automations"
+
+ search_space_id = Column(
+ Integer,
+ ForeignKey("searchspaces.id", ondelete="CASCADE"),
+ nullable=False,
+ index=True,
+ )
+
+ created_by_user_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("user.id", ondelete="SET NULL"),
+ nullable=True,
+ index=True,
+ )
+
+ name = Column(String(200), nullable=False)
+ description = Column(Text, nullable=True)
+
+ status = Column(
+ SQLAlchemyEnum(AutomationStatus, name="automation_status"),
+ nullable=False,
+ default=AutomationStatus.ACTIVE,
+ server_default=AutomationStatus.ACTIVE.value,
+ index=True,
+ )
+
+ definition = Column(JSONB, nullable=False)
+
+ version = Column(
+ Integer,
+ nullable=False,
+ default=1,
+ server_default="1",
+ )
+
+ updated_at = Column(
+ TIMESTAMP(timezone=True),
+ nullable=False,
+ default=lambda: datetime.now(UTC),
+ onupdate=lambda: datetime.now(UTC),
+ index=True,
+ )
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
new file mode 100644
index 000000000..5c6ec93ec
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -0,0 +1,72 @@
+"""``AutomationRun`` table — the immutable per-fire execution record."""
+
+from __future__ import annotations
+
+from sqlalchemy import (
+ TIMESTAMP,
+ Column,
+ Enum as SQLAlchemyEnum,
+ ForeignKey,
+ Integer,
+ String,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+
+from app.db import BaseModel, TimestampMixin
+
+from ..enums.run_status import RunStatus
+
+
+class AutomationRun(BaseModel, TimestampMixin):
+ """One execution of an automation.
+
+ Every fire of any trigger inserts exactly one row here. The row is
+ immutable from the user's perspective — the executor only updates
+ ``status``, ``step_results``, ``output``, ``artifacts``, ``error``,
+ ``started_at``, ``finished_at`` as the run progresses; the
+ ``definition_snapshot`` is locked at fire time so the user can always
+ see exactly what code path executed for any historical run.
+ """
+
+ __tablename__ = "automation_runs"
+
+ automation_id = Column(
+ Integer,
+ ForeignKey("automations.id", ondelete="CASCADE"),
+ nullable=False,
+ index=True,
+ )
+
+ trigger_id = Column(
+ Integer,
+ ForeignKey("automation_triggers.id", ondelete="SET NULL"),
+ nullable=True,
+ index=True,
+ )
+
+ status = Column(
+ SQLAlchemyEnum(RunStatus, name="automation_run_status"),
+ nullable=False,
+ default=RunStatus.PENDING,
+ server_default=RunStatus.PENDING.value,
+ index=True,
+ )
+
+ definition_snapshot = Column(JSONB, nullable=False)
+
+ trigger_payload = Column(JSONB, nullable=True)
+
+ resolved_inputs = Column(JSONB, nullable=False, server_default="{}")
+
+ step_results = Column(JSONB, nullable=False, server_default="[]")
+
+ output = Column(JSONB, nullable=True)
+
+ artifacts = Column(JSONB, nullable=False, server_default="[]")
+
+ error = Column(JSONB, nullable=True)
+
+ started_at = Column(TIMESTAMP(timezone=True), nullable=True)
+ finished_at = Column(TIMESTAMP(timezone=True), nullable=True)
+
+ agent_session_id = Column(String(200), nullable=True)
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
new file mode 100644
index 000000000..3173770d6
--- /dev/null
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -0,0 +1,57 @@
+"""``AutomationTrigger`` table — one row per (automation, trigger-instance) pair."""
+
+from __future__ import annotations
+
+from sqlalchemy import (
+ TIMESTAMP,
+ Boolean,
+ Column,
+ Enum as SQLAlchemyEnum,
+ ForeignKey,
+ Integer,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+
+from app.db import BaseModel, TimestampMixin
+
+from ..enums.trigger_type import TriggerType
+
+
+class AutomationTrigger(BaseModel, TimestampMixin):
+ """One trigger attached to an automation.
+
+ An automation may have multiple triggers — e.g. a ``schedule`` trigger
+ for the autonomous path and a ``manual`` trigger backing the UI's
+ "Run now" affordance. Each trigger's ``config`` is validated against
+ the registered ``TriggerDefinition.config_schema`` for its ``type``.
+ """
+
+ __tablename__ = "automation_triggers"
+
+ automation_id = Column(
+ Integer,
+ ForeignKey("automations.id", ondelete="CASCADE"),
+ nullable=False,
+ index=True,
+ )
+
+ type = Column(
+ SQLAlchemyEnum(TriggerType, name="automation_trigger_type"),
+ nullable=False,
+ index=True,
+ )
+
+ config = Column(JSONB, nullable=False)
+
+ enabled = Column(
+ Boolean,
+ nullable=False,
+ default=True,
+ server_default="true",
+ index=True,
+ )
+
+ last_fired_at = Column(
+ TIMESTAMP(timezone=True),
+ nullable=True,
+ )
From d9183464d9ec5e69f4630083993b0e3277eda7d4 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:44:33 +0200
Subject: [PATCH 23/87] feat(automation): add Alembic migration for the three
automation tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Migration 144 -> 143. Matches the SQLAlchemy models added in commit 7
and the v1 data model in automation-design-plan.md §9.
Up:
- CREATE TYPE automation_status / automation_trigger_type /
automation_run_status (PostgreSQL ENUMs created first because the
tables reference them).
- CREATE TABLE automations with FK to searchspaces (CASCADE) and
user (SET NULL); five indexes matching the SQLAlchemy model.
- CREATE TABLE automation_triggers with FK to automations
(CASCADE); four indexes.
- CREATE TABLE automation_runs with FK to automations (CASCADE) and
automation_triggers (SET NULL — null trigger_id == manual via UI);
four indexes.
Down: drops every index, table, and ENUM in reverse-dependency order
so the migration is reversible without ON DELETE side effects.
Verified: `alembic history` resolves 143 -> 144 (head) cleanly.
domain_events (Phase 3) and mcp_connections / mcp_tools (Phase 4) ship
in their own migrations when the consuming feature lands; this
migration only covers the three v1 tables.
---
.../versions/144_add_automation_tables.py | 167 ++++++++++++++++++
1 file changed, 167 insertions(+)
create mode 100644 surfsense_backend/alembic/versions/144_add_automation_tables.py
diff --git a/surfsense_backend/alembic/versions/144_add_automation_tables.py b/surfsense_backend/alembic/versions/144_add_automation_tables.py
new file mode 100644
index 000000000..6aa208dc1
--- /dev/null
+++ b/surfsense_backend/alembic/versions/144_add_automation_tables.py
@@ -0,0 +1,167 @@
+"""Add automation tables (automations, automation_triggers, automation_runs)
+
+Revision ID: 144
+Revises: 143
+Create Date: 2026-05-26
+
+Adds the three tables that back the v1 automation engine, plus the
+three PostgreSQL ENUM types they reference. Matches the SQLAlchemy
+models under ``app.automations.persistence.models`` and the v1 data
+model in ``automation-design-plan.md`` §9.
+
+v1 ships these three tables only. ``domain_events`` is deferred to
+Phase 3 with the event trigger; ``mcp_connections`` / ``mcp_tools``
+are deferred to Phase 4 with the MCP integration.
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+revision: str = "144"
+down_revision: str | None = "143"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+ # ENUM types (PostgreSQL requires types created before tables that use them)
+ op.execute(
+ """
+ CREATE TYPE automation_status AS ENUM (
+ 'active', 'paused', 'archived'
+ );
+ """
+ )
+ op.execute(
+ """
+ CREATE TYPE automation_trigger_type AS ENUM (
+ 'schedule', 'manual'
+ );
+ """
+ )
+ op.execute(
+ """
+ CREATE TYPE automation_run_status AS ENUM (
+ 'pending', 'running', 'succeeded', 'failed',
+ 'cancelled', 'timed_out'
+ );
+ """
+ )
+
+ # automations — the editable, versioned automation definition
+ op.execute(
+ """
+ CREATE TABLE automations (
+ id SERIAL PRIMARY KEY,
+ search_space_id INTEGER NOT NULL
+ REFERENCES searchspaces(id) ON DELETE CASCADE,
+ created_by_user_id UUID
+ REFERENCES "user"(id) ON DELETE SET NULL,
+ name VARCHAR(200) NOT NULL,
+ description TEXT,
+ status automation_status NOT NULL DEFAULT 'active',
+ definition JSONB NOT NULL,
+ version INTEGER NOT NULL DEFAULT 1,
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+ updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+ );
+ """
+ )
+ op.execute(
+ "CREATE INDEX ix_automations_search_space_id ON automations(search_space_id);"
+ )
+ op.execute(
+ "CREATE INDEX ix_automations_created_by_user_id ON automations(created_by_user_id);"
+ )
+ op.execute("CREATE INDEX ix_automations_status ON automations(status);")
+ op.execute("CREATE INDEX ix_automations_created_at ON automations(created_at);")
+ op.execute("CREATE INDEX ix_automations_updated_at ON automations(updated_at);")
+
+ # automation_triggers — one row per (automation, trigger-instance) pair
+ op.execute(
+ """
+ CREATE TABLE automation_triggers (
+ id SERIAL PRIMARY KEY,
+ automation_id INTEGER NOT NULL
+ REFERENCES automations(id) ON DELETE CASCADE,
+ type automation_trigger_type NOT NULL,
+ config JSONB NOT NULL,
+ enabled BOOLEAN NOT NULL DEFAULT true,
+ last_fired_at TIMESTAMP WITH TIME ZONE,
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+ );
+ """
+ )
+ op.execute(
+ "CREATE INDEX ix_automation_triggers_automation_id ON automation_triggers(automation_id);"
+ )
+ op.execute(
+ "CREATE INDEX ix_automation_triggers_type ON automation_triggers(type);"
+ )
+ op.execute(
+ "CREATE INDEX ix_automation_triggers_enabled ON automation_triggers(enabled);"
+ )
+ op.execute(
+ "CREATE INDEX ix_automation_triggers_created_at ON automation_triggers(created_at);"
+ )
+
+ # automation_runs — the immutable per-fire execution record
+ op.execute(
+ """
+ CREATE TABLE automation_runs (
+ id SERIAL PRIMARY KEY,
+ automation_id INTEGER NOT NULL
+ REFERENCES automations(id) ON DELETE CASCADE,
+ trigger_id INTEGER
+ REFERENCES automation_triggers(id) ON DELETE SET NULL,
+ status automation_run_status NOT NULL DEFAULT 'pending',
+ definition_snapshot JSONB NOT NULL,
+ trigger_payload JSONB,
+ resolved_inputs JSONB NOT NULL DEFAULT '{}'::jsonb,
+ step_results JSONB NOT NULL DEFAULT '[]'::jsonb,
+ output JSONB,
+ artifacts JSONB NOT NULL DEFAULT '[]'::jsonb,
+ error JSONB,
+ started_at TIMESTAMP WITH TIME ZONE,
+ finished_at TIMESTAMP WITH TIME ZONE,
+ agent_session_id VARCHAR(200),
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+ );
+ """
+ )
+ op.execute(
+ "CREATE INDEX ix_automation_runs_automation_id ON automation_runs(automation_id);"
+ )
+ op.execute(
+ "CREATE INDEX ix_automation_runs_trigger_id ON automation_runs(trigger_id);"
+ )
+ op.execute("CREATE INDEX ix_automation_runs_status ON automation_runs(status);")
+ op.execute(
+ "CREATE INDEX ix_automation_runs_created_at ON automation_runs(created_at);"
+ )
+
+
+def downgrade() -> None:
+ op.execute("DROP INDEX IF EXISTS ix_automation_runs_created_at;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_runs_status;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_runs_trigger_id;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_runs_automation_id;")
+ op.execute("DROP TABLE IF EXISTS automation_runs;")
+
+ op.execute("DROP INDEX IF EXISTS ix_automation_triggers_created_at;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_triggers_enabled;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_triggers_type;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_triggers_automation_id;")
+ op.execute("DROP TABLE IF EXISTS automation_triggers;")
+
+ op.execute("DROP INDEX IF EXISTS ix_automations_updated_at;")
+ op.execute("DROP INDEX IF EXISTS ix_automations_created_at;")
+ op.execute("DROP INDEX IF EXISTS ix_automations_status;")
+ op.execute("DROP INDEX IF EXISTS ix_automations_created_by_user_id;")
+ op.execute("DROP INDEX IF EXISTS ix_automations_search_space_id;")
+ op.execute("DROP TABLE IF EXISTS automations;")
+
+ op.execute("DROP TYPE IF EXISTS automation_run_status;")
+ op.execute("DROP TYPE IF EXISTS automation_trigger_type;")
+ op.execute("DROP TYPE IF EXISTS automation_status;")
From be4d43d6c9c41ea115c7a6e4a2bbf41afbf241b3 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:50:52 +0200
Subject: [PATCH 24/87] feat(automation): add Pydantic schemas for the
automation definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Three layers of Pydantic models under app/automations/schemas/, one
file per concern (SRP), matching the envelope in
automation-design-plan.md §5.
definition/ — the editable envelope persisted in
automations.definition:
- envelope.py AutomationDefinition (top-level shape)
- plan_step.py PlanStep (one step in the sequential plan)
- inputs.py InputsBlock (the inputs JSON Schema wrapper)
- execution.py ExecutionBlock (timeouts, retries, concurrency,
budget cap, on_failure plan)
- metadata.py MetadataBlock (tags + created_from_nl + extras)
- trigger_spec.py TriggerSpec (one entry in triggers[])
triggers/ — per-trigger config schemas, dispatched by registry on the
TriggerSpec.type discriminator:
- schedule.py ScheduleTriggerConfig(cron, timezone)
- manual.py ManualTriggerConfig() — empty in v1
actions/ — per-action config schemas, dispatched by registry on the
PlanStep.action discriminator:
- agent_task.py AgentTaskActionConfig(prompt, tools, model,
output_schema)
Design properties verified by an inline smoke test:
- The §5 worked example round-trips through model_validate_json /
model_dump_json byte-for-byte (InputsBlock uses
serialize_by_alias so the JSON key stays "schema" not
"schema_").
- Envelope rejects unknown top-level keys (extra="forbid").
- MetadataBlock tolerates unknown keys (extra="allow").
- ExecutionBlock defaults apply when the block is omitted.
- retry_backoff and concurrency are typed as Literal — bogus
values rejected at validation time.
- Per-type configs enforce their required fields (cron + timezone
on schedule; non-empty prompt on agent_task).
The envelope keeps trigger and action configs as untyped dicts on
purpose — per-type validation is a registry-driven dispatch (commit
10), keeping the envelope free of every-type-knows-every-type
coupling.
---
.../app/automations/schemas/__init__.py | 23 ++++-
.../automations/schemas/actions/__init__.py | 6 +-
.../automations/schemas/actions/agent_task.py | 66 ++++++++++++++
.../schemas/definition/__init__.py | 16 +++-
.../schemas/definition/envelope.py | 89 +++++++++++++++++++
.../schemas/definition/execution.py | 76 ++++++++++++++++
.../automations/schemas/definition/inputs.py | 43 +++++++++
.../schemas/definition/metadata.py | 36 ++++++++
.../schemas/definition/plan_step.py | 86 ++++++++++++++++++
.../schemas/definition/trigger_spec.py | 40 +++++++++
.../automations/schemas/triggers/__init__.py | 8 +-
.../automations/schemas/triggers/manual.py | 21 +++++
.../automations/schemas/triggers/schedule.py | 33 +++++++
13 files changed, 539 insertions(+), 4 deletions(-)
create mode 100644 surfsense_backend/app/automations/schemas/actions/agent_task.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/envelope.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/execution.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/inputs.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/metadata.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/plan_step.py
create mode 100644 surfsense_backend/app/automations/schemas/definition/trigger_spec.py
create mode 100644 surfsense_backend/app/automations/schemas/triggers/manual.py
create mode 100644 surfsense_backend/app/automations/schemas/triggers/schedule.py
diff --git a/surfsense_backend/app/automations/schemas/__init__.py b/surfsense_backend/app/automations/schemas/__init__.py
index 67211b898..83a95a2a8 100644
--- a/surfsense_backend/app/automations/schemas/__init__.py
+++ b/surfsense_backend/app/automations/schemas/__init__.py
@@ -2,4 +2,25 @@
from __future__ import annotations
-__all__: list[str] = []
+from .actions import AgentTaskActionConfig
+from .definition import (
+ AutomationDefinition,
+ ExecutionBlock,
+ InputsBlock,
+ MetadataBlock,
+ PlanStep,
+ TriggerSpec,
+)
+from .triggers import ManualTriggerConfig, ScheduleTriggerConfig
+
+__all__ = [
+ "AgentTaskActionConfig",
+ "AutomationDefinition",
+ "ExecutionBlock",
+ "InputsBlock",
+ "ManualTriggerConfig",
+ "MetadataBlock",
+ "PlanStep",
+ "ScheduleTriggerConfig",
+ "TriggerSpec",
+]
diff --git a/surfsense_backend/app/automations/schemas/actions/__init__.py b/surfsense_backend/app/automations/schemas/actions/__init__.py
index 1aa68b629..17c257562 100644
--- a/surfsense_backend/app/automations/schemas/actions/__init__.py
+++ b/surfsense_backend/app/automations/schemas/actions/__init__.py
@@ -2,4 +2,8 @@
from __future__ import annotations
-__all__: list[str] = []
+from .agent_task import AgentTaskActionConfig
+
+__all__ = [
+ "AgentTaskActionConfig",
+]
diff --git a/surfsense_backend/app/automations/schemas/actions/agent_task.py b/surfsense_backend/app/automations/schemas/actions/agent_task.py
new file mode 100644
index 000000000..74e41166a
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/actions/agent_task.py
@@ -0,0 +1,66 @@
+"""``AgentTaskActionConfig`` — config for the ``agent_task`` action type."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class AgentTaskActionConfig(BaseModel):
+ """Config for an ``agent_task`` plan step.
+
+ Validated against ``PlanStep.config`` whenever the step's
+ ``action`` is ``agent_task``. The step instructs the LangGraph
+ Deep Agent runtime to:
+
+ 1. Receive ``prompt`` (with all preceding-step outputs and inputs
+ already rendered by the template engine).
+ 2. Run the agent with access to *exactly* the capabilities named
+ in ``tools`` — nothing else from the registry is visible to
+ this agent invocation.
+ 3. Return a JSON object matching ``output_schema`` (recommended;
+ the executor validates and re-prompts on mismatch).
+
+ ``output_schema`` is the design's "dynamic output contract" —
+ instead of locking the output shape on the ActionDefinition (as
+ tight actions do), the user declares the shape they want for this
+ specific step, and the agent has to match it.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ prompt: str = Field(
+ ...,
+ description=(
+ "The task prompt rendered through the Jinja sandbox. May "
+ "reference automation inputs and prior-step outputs."
+ ),
+ min_length=1,
+ )
+ tools: list[str] = Field(
+ default_factory=list,
+ description=(
+ "Allowlist of capability IDs the agent may call (e.g., "
+ "'search_space.query'). Empty list = no tool access; the "
+ "agent must answer from the prompt alone."
+ ),
+ )
+ model: str | None = Field(
+ default=None,
+ description=(
+ "Optional LiteLLM model identifier (e.g., "
+ "'anthropic/claude-sonnet-4-7'). Omitted means the "
+ "automation falls back to the search space's default "
+ "agent_llm_id."
+ ),
+ )
+ output_schema: dict[str, Any] | None = Field(
+ default=None,
+ description=(
+ "Optional JSON Schema declaring the shape the agent must "
+ "return. Strongly recommended; the editor warns when "
+ "missing. Validated by the executor before binding to "
+ "``output_as``."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/definition/__init__.py b/surfsense_backend/app/automations/schemas/definition/__init__.py
index 3fbda8cc8..14040c20a 100644
--- a/surfsense_backend/app/automations/schemas/definition/__init__.py
+++ b/surfsense_backend/app/automations/schemas/definition/__init__.py
@@ -2,4 +2,18 @@
from __future__ import annotations
-__all__: list[str] = []
+from .envelope import AutomationDefinition
+from .execution import ExecutionBlock
+from .inputs import InputsBlock
+from .metadata import MetadataBlock
+from .plan_step import PlanStep
+from .trigger_spec import TriggerSpec
+
+__all__ = [
+ "AutomationDefinition",
+ "ExecutionBlock",
+ "InputsBlock",
+ "MetadataBlock",
+ "PlanStep",
+ "TriggerSpec",
+]
diff --git a/surfsense_backend/app/automations/schemas/definition/envelope.py b/surfsense_backend/app/automations/schemas/definition/envelope.py
new file mode 100644
index 000000000..ccf4c53df
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/envelope.py
@@ -0,0 +1,89 @@
+"""``AutomationDefinition`` — the top-level envelope persisted in ``automations.definition``."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .execution import ExecutionBlock
+from .inputs import InputsBlock
+from .metadata import MetadataBlock
+from .plan_step import PlanStep
+from .trigger_spec import TriggerSpec
+
+
+class AutomationDefinition(BaseModel):
+ """The top-level JSON shape stored in ``automations.definition``.
+
+ This is the editable spec a user authors (or the NL generator
+ produces). The envelope is structural only — every nested
+ discriminator (``triggers[].type``, ``plan[].action``) is resolved
+ against the registries at validation time, so adding a new
+ trigger or action type does not require touching this schema.
+
+ See ``automation-design-plan.md`` §5 for the worked example and
+ rationale.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ schema_version: str = Field(
+ default="1.0",
+ description=(
+ "Schema version of the envelope itself. Migrations bump "
+ "this when the envelope shape changes; nested per-type "
+ "configs evolve independently via the registries."
+ ),
+ )
+ name: str = Field(
+ ...,
+ description="Short, user-facing name shown in lists.",
+ min_length=1,
+ max_length=200,
+ )
+ goal: str | None = Field(
+ default=None,
+ description=(
+ "Optional plain-language statement of what the "
+ "automation is for. Used by the NL generator's review "
+ "pass and by the UI's run dialog."
+ ),
+ )
+ inputs: InputsBlock | None = Field(
+ default=None,
+ description=(
+ "Optional input contract. When omitted, the automation "
+ "accepts no inputs at fire time."
+ ),
+ )
+ triggers: list[TriggerSpec] = Field(
+ default_factory=list,
+ description=(
+ "Triggers that fire this automation. Empty list means "
+ "the automation is only runnable via the manual "
+ "``Run now`` path."
+ ),
+ )
+ plan: list[PlanStep] = Field(
+ ...,
+ description=(
+ "Ordered sequence of steps. Executed in array order — "
+ "no parallelism, no DAGs, no loops at the envelope "
+ "level."
+ ),
+ min_length=1,
+ )
+ execution: ExecutionBlock = Field(
+ default_factory=ExecutionBlock,
+ description=(
+ "Execution defaults (timeouts, retries, concurrency, "
+ "budget). All fields default to safe values; the block "
+ "may be omitted entirely."
+ ),
+ )
+ metadata: MetadataBlock = Field(
+ default_factory=MetadataBlock,
+ description=(
+ "Free-form metadata (tags, NL-generator breadcrumbs, "
+ "UI annotations). Tolerates unknown keys by design."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/definition/execution.py b/surfsense_backend/app/automations/schemas/definition/execution.py
new file mode 100644
index 000000000..bb80e7281
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/execution.py
@@ -0,0 +1,76 @@
+"""``ExecutionBlock`` — the ``execution`` section of the automation definition."""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .plan_step import PlanStep
+
+
+class ExecutionBlock(BaseModel):
+ """The ``execution`` block of an ``AutomationDefinition``.
+
+ Carries automation-wide defaults that individual ``PlanStep``s
+ can override. Every field has a sane default so an automation
+ definition may omit the block entirely; in that case all defaults
+ apply.
+
+ ``on_failure`` is a secondary plan that runs only when the main
+ ``plan`` fails after retries exhaust. It uses the same
+ ``PlanStep`` shape as the main plan and shares the same execution
+ semantics.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ timeout_seconds: int = Field(
+ default=600,
+ gt=0,
+ description=(
+ "Hard wall-clock cap for the entire run. The executor "
+ "transitions the run to ``timed_out`` when this is "
+ "exceeded."
+ ),
+ )
+ max_retries: int = Field(
+ default=2,
+ ge=0,
+ description=(
+ "Per-step retry budget applied when a step raises a "
+ "retryable error. Steps may override per-step."
+ ),
+ )
+ retry_backoff: Literal["exponential", "linear", "none"] = Field(
+ default="exponential",
+ description="Backoff policy between retries.",
+ )
+ concurrency: Literal[
+ "drop_if_running", "queue", "always"
+ ] = Field(
+ default="drop_if_running",
+ description=(
+ "Behaviour when a new fire arrives while a previous run "
+ "is still in progress. ``drop_if_running`` skips the new "
+ "fire, ``queue`` enqueues it, ``always`` runs it in "
+ "parallel."
+ ),
+ )
+ budget_cap_usd: float | None = Field(
+ default=None,
+ gt=0,
+ description=(
+ "Optional mid-flight cost cap in USD. The executor kills "
+ "the run when accumulated cost exceeds this value. v1 "
+ "treats this as an advisory because cost tracking lands "
+ "with the executor in a later step."
+ ),
+ )
+ on_failure: list[PlanStep] = Field(
+ default_factory=list,
+ description=(
+ "Secondary plan executed only when the main plan fails "
+ "after retries exhaust. Empty list means no fallback."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/definition/inputs.py b/surfsense_backend/app/automations/schemas/definition/inputs.py
new file mode 100644
index 000000000..279efc113
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/inputs.py
@@ -0,0 +1,43 @@
+"""``InputsBlock`` — the ``inputs`` section of the automation definition."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class InputsBlock(BaseModel):
+ """The ``inputs`` block of an ``AutomationDefinition``.
+
+ Holds a JSON Schema describing what data the automation accepts at
+ fire time. The same schema is used by:
+
+ - The form editor (to render the manual-run dialog).
+ - The dispatcher (to validate trigger payloads before enqueueing
+ executor work).
+ - The template engine (to expose ``{{ inputs.* }}`` references in
+ plan-step configs).
+
+ The ``schema`` value is the JSON-Schema dict itself, not a
+ Pydantic model — automations express their input contract in pure
+ JSON Schema so it round-trips losslessly through the database and
+ the NL generator.
+ """
+
+ model_config = ConfigDict(
+ extra="forbid",
+ populate_by_name=True,
+ serialize_by_alias=True,
+ )
+
+ schema_: dict[str, Any] = Field(
+ ...,
+ alias="schema",
+ description=(
+ "JSON Schema (draft-07 compatible) describing the inputs "
+ "this automation accepts. Properties may use the special "
+ "``$last_fired_at`` default literal to bind to the "
+ "trigger's last fire time."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/definition/metadata.py b/surfsense_backend/app/automations/schemas/definition/metadata.py
new file mode 100644
index 000000000..dc6541983
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/metadata.py
@@ -0,0 +1,36 @@
+"""``MetadataBlock`` — the ``metadata`` section of the automation definition."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class MetadataBlock(BaseModel):
+ """Free-form metadata attached to the automation definition.
+
+ Unlike the rest of the envelope this block tolerates unknown keys
+ (``extra='allow'``) — it's a deliberate extension point for
+ UI annotations, NL-generator breadcrumbs, custom tags, etc.
+
+ Two fields are first-class so the rest of the system can rely on
+ them without reaching into the loose extras:
+
+ ``tags`` — used by the UI for filtering and grouping.
+ ``created_from_nl`` — set by the NL generator so we can later
+ measure how many runs came from natural-language authoring.
+ """
+
+ model_config = ConfigDict(extra="allow")
+
+ tags: list[str] = Field(
+ default_factory=list,
+ description="UI-facing tags. No semantic meaning to the engine.",
+ )
+ created_from_nl: bool = Field(
+ default=False,
+ description=(
+ "True when the definition was produced by the NL "
+ "generator (set automatically by the generator path; "
+ "human-authored definitions keep this false)."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/definition/plan_step.py b/surfsense_backend/app/automations/schemas/definition/plan_step.py
new file mode 100644
index 000000000..6898a0914
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/plan_step.py
@@ -0,0 +1,86 @@
+"""``PlanStep`` — one entry in the envelope's ``plan`` array."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class PlanStep(BaseModel):
+ """One step in an automation's sequential plan.
+
+ Steps run in array order, no parallelism, no DAGs, no loops. The
+ ``when`` Jinja expression provides conditional skip; branching is
+ achieved by ``when`` clauses on multiple steps. For looping or
+ parallel work, the user routes through ``agent_task`` and lets the
+ agent reason about it.
+
+ ``config`` is dispatched against the action registry at
+ validation time — its shape is determined by
+ ``ActionDefinition.config_schema`` for the ``action`` value.
+
+ ``output_as`` binds the step's typed output into the template
+ namespace for later steps, e.g. ``output_as: 'summary'`` then
+ ``{{ summary.bullets }}`` in a downstream step's config.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ step_id: str = Field(
+ ...,
+ description=(
+ "Unique-within-plan identifier. Used in run logs and as "
+ "the default for ``output_as`` when not provided."
+ ),
+ min_length=1,
+ )
+ action: str = Field(
+ ...,
+ description=(
+ "Action-type discriminator (e.g., ``agent_task``). "
+ "Resolved against the action registry."
+ ),
+ min_length=1,
+ )
+ when: str | None = Field(
+ default=None,
+ description=(
+ "Optional Jinja expression evaluated against the run "
+ "context. Step is skipped when the expression is "
+ "falsy."
+ ),
+ )
+ config: dict[str, Any] = Field(
+ default_factory=dict,
+ description=(
+ "Action-type-specific config. Validated against the "
+ "registered ``ActionDefinition.config_schema`` for "
+ "``action`` at definition-save time. Jinja templates "
+ "inside config are rendered at step-execute time."
+ ),
+ )
+ output_as: str | None = Field(
+ default=None,
+ description=(
+ "Name to bind the step output under for downstream "
+ "steps. Defaults to ``step_id`` when omitted."
+ ),
+ )
+ max_retries: int | None = Field(
+ default=None,
+ ge=0,
+ description=(
+ "Per-step override of the automation-level ``max_retries``. "
+ "Omitted means inherit from execution block."
+ ),
+ )
+ timeout_seconds: int | None = Field(
+ default=None,
+ gt=0,
+ description=(
+ "Per-step override of the automation-level "
+ "``timeout_seconds``. Omitted means inherit from "
+ "execution block."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
new file mode 100644
index 000000000..827b0a315
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
@@ -0,0 +1,40 @@
+"""``TriggerSpec`` — one entry in the envelope's ``triggers`` array."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class TriggerSpec(BaseModel):
+ """One trigger attached to an automation, as it appears in the definition.
+
+ The envelope keeps ``config`` as an untyped JSON object on purpose
+ — the per-type config schemas live in
+ ``app.automations.schemas.triggers`` and are dispatched at
+ validation time by looking up ``type`` in the trigger registry.
+
+ This mirrors the design's "definitions are pure data" principle:
+ the envelope describes shape, the registry resolves names to
+ behaviour.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ type: str = Field(
+ ...,
+ description=(
+ "Trigger-type discriminator (e.g., ``schedule``, ``manual``). "
+ "Resolved against the trigger registry."
+ ),
+ min_length=1,
+ )
+ config: dict[str, Any] = Field(
+ default_factory=dict,
+ description=(
+ "Trigger-type-specific config. Validated against the "
+ "registered ``TriggerDefinition.config_schema`` for "
+ "``type`` at definition-save time."
+ ),
+ )
diff --git a/surfsense_backend/app/automations/schemas/triggers/__init__.py b/surfsense_backend/app/automations/schemas/triggers/__init__.py
index 2da765bc3..847c7443b 100644
--- a/surfsense_backend/app/automations/schemas/triggers/__init__.py
+++ b/surfsense_backend/app/automations/schemas/triggers/__init__.py
@@ -2,4 +2,10 @@
from __future__ import annotations
-__all__: list[str] = []
+from .manual import ManualTriggerConfig
+from .schedule import ScheduleTriggerConfig
+
+__all__ = [
+ "ManualTriggerConfig",
+ "ScheduleTriggerConfig",
+]
diff --git a/surfsense_backend/app/automations/schemas/triggers/manual.py b/surfsense_backend/app/automations/schemas/triggers/manual.py
new file mode 100644
index 000000000..6e04ba062
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/triggers/manual.py
@@ -0,0 +1,21 @@
+"""``ManualTriggerConfig`` — config for the ``manual`` trigger type (empty in v1)."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, ConfigDict
+
+
+class ManualTriggerConfig(BaseModel):
+ """Config for the UI-driven ``manual`` trigger.
+
+ Validated against ``AutomationTrigger.config`` whenever the
+ persisted ``type`` is ``manual``. v1 carries no configurable
+ fields — the "Run now" affordance simply fires this trigger with
+ an empty config object. The model exists so the registry dispatch
+ is uniform across all trigger types.
+
+ Future versions may add fields here (e.g., a fixed prompt to
+ pre-fill the run dialog with) without breaking v1 payloads.
+ """
+
+ model_config = ConfigDict(extra="forbid")
diff --git a/surfsense_backend/app/automations/schemas/triggers/schedule.py b/surfsense_backend/app/automations/schemas/triggers/schedule.py
new file mode 100644
index 000000000..e7c20da3a
--- /dev/null
+++ b/surfsense_backend/app/automations/schemas/triggers/schedule.py
@@ -0,0 +1,33 @@
+"""``ScheduleTriggerConfig`` — config for the ``schedule`` trigger type."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class ScheduleTriggerConfig(BaseModel):
+ """Config for a cron-driven trigger.
+
+ Validated against ``AutomationTrigger.config`` whenever the
+ persisted ``type`` is ``schedule``. The cron expression is
+ evaluated by Celery Beat's source; the timezone is an IANA name
+ (e.g., ``Africa/Kigali``) and is required so the user's cron is
+ unambiguous across DST boundaries.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ cron: str = Field(
+ ...,
+ description=(
+ "Five-field cron expression. Minimum resolution is one "
+ "minute; the form editor warns when intervals tighter "
+ "than 15 minutes are used."
+ ),
+ examples=["0 9 * * 1-5"],
+ )
+ timezone: str = Field(
+ ...,
+ description="IANA timezone name (e.g., 'Africa/Kigali', 'UTC').",
+ examples=["Africa/Kigali"],
+ )
From 7a96c0e29c2ff7521eab2289a618a95c0aa4a7c1 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 22:54:17 +0200
Subject: [PATCH 25/87] feat(automation): add empty Capability / Action /
Trigger registries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Three registries under app/automations/registries/, each as its own
folder with the same SRP-per-file split (types.py for the dataclass,
store.py for the in-memory dict + register/get/all functions). All
three start empty; concrete entries land when the user signs off on
which capabilities / actions / triggers to include (step 2).
Capability (locked at v1-minimum five fields — see commit 2):
- id, description, input_schema, output_schema, handler
- CapabilityHandler = Callable[[dict[str, Any]], Awaitable[Any]]
- Frozen, slotted dataclass (immutable post-registration).
ActionDefinition (v1-trim of design plan §4):
- type, name, description, config_schema, handler
- Defers output_contract (handled per-step by agent_task's
config.output_schema), uses_capabilities (no static analysis
needed until >1 action ships), and produces_artifacts (deferred
alongside the artifact pipeline).
TriggerDefinition (declarative, no handler):
- type, description, config_schema, payload_schema
- No handler field — firing is a single dispatcher's
responsibility, not a per-trigger one.
store.py contract for all three:
- register_*: idempotent at process startup, raises on duplicate
- get_*: returns None on miss
- all_*: returns a defensive copy of the registry dict
Verified by an inline smoke test (10 checks): empty initial state,
registration and lookup work, duplicates raise, frozen dataclasses
reject mutation, snapshots are copies, handlers are awaitable.
Isolation invariant audit: grep across the full app/automations/
tree shows only three app.* imports, all of them
``from app.db import BaseModel, TimestampMixin`` in the model files.
No imports from app.agents.*, app.services.*, app.tasks.*,
app.routes.*, or any other business-logic module.
---
.../app/automations/registries/__init__.py | 38 +++++++++++++++-
.../registries/actions/__init__.py | 11 ++++-
.../automations/registries/actions/store.py | 33 ++++++++++++++
.../automations/registries/actions/types.py | 44 +++++++++++++++++++
.../registries/capabilities/__init__.py | 11 ++++-
.../registries/capabilities/store.py | 40 +++++++++++++++++
.../registries/capabilities/types.py | 40 +++++++++++++++++
.../registries/triggers/__init__.py | 10 ++++-
.../automations/registries/triggers/store.py | 33 ++++++++++++++
.../automations/registries/triggers/types.py | 35 +++++++++++++++
10 files changed, 291 insertions(+), 4 deletions(-)
create mode 100644 surfsense_backend/app/automations/registries/actions/store.py
create mode 100644 surfsense_backend/app/automations/registries/actions/types.py
create mode 100644 surfsense_backend/app/automations/registries/capabilities/store.py
create mode 100644 surfsense_backend/app/automations/registries/capabilities/types.py
create mode 100644 surfsense_backend/app/automations/registries/triggers/store.py
create mode 100644 surfsense_backend/app/automations/registries/triggers/types.py
diff --git a/surfsense_backend/app/automations/registries/__init__.py b/surfsense_backend/app/automations/registries/__init__.py
index e7334cca8..47023f903 100644
--- a/surfsense_backend/app/automations/registries/__init__.py
+++ b/surfsense_backend/app/automations/registries/__init__.py
@@ -2,4 +2,40 @@
from __future__ import annotations
-__all__: list[str] = []
+from .actions import (
+ ActionDefinition,
+ ActionHandler,
+ all_actions,
+ get_action,
+ register_action,
+)
+from .capabilities import (
+ Capability,
+ CapabilityHandler,
+ all_capabilities,
+ get_capability,
+ register_capability,
+)
+from .triggers import (
+ TriggerDefinition,
+ all_triggers,
+ get_trigger,
+ register_trigger,
+)
+
+__all__ = [
+ "ActionDefinition",
+ "ActionHandler",
+ "Capability",
+ "CapabilityHandler",
+ "TriggerDefinition",
+ "all_actions",
+ "all_capabilities",
+ "all_triggers",
+ "get_action",
+ "get_capability",
+ "get_trigger",
+ "register_action",
+ "register_capability",
+ "register_trigger",
+]
diff --git a/surfsense_backend/app/automations/registries/actions/__init__.py b/surfsense_backend/app/automations/registries/actions/__init__.py
index 6b19b7091..c6b550096 100644
--- a/surfsense_backend/app/automations/registries/actions/__init__.py
+++ b/surfsense_backend/app/automations/registries/actions/__init__.py
@@ -2,4 +2,13 @@
from __future__ import annotations
-__all__: list[str] = []
+from .store import all_actions, get_action, register_action
+from .types import ActionDefinition, ActionHandler
+
+__all__ = [
+ "ActionDefinition",
+ "ActionHandler",
+ "all_actions",
+ "get_action",
+ "register_action",
+]
diff --git a/surfsense_backend/app/automations/registries/actions/store.py b/surfsense_backend/app/automations/registries/actions/store.py
new file mode 100644
index 000000000..720243b83
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/actions/store.py
@@ -0,0 +1,33 @@
+"""Action registry: in-memory dict + ``register_action`` API."""
+
+from __future__ import annotations
+
+from .types import ActionDefinition
+
+_REGISTRY: dict[str, ActionDefinition] = {}
+
+
+def register_action(action: ActionDefinition) -> None:
+ """Add an action to the in-memory registry.
+
+ Raises ``ValueError`` on duplicate ``type`` — registration runs
+ once per process, so a duplicate is always a bug.
+ """
+
+ if action.type in _REGISTRY:
+ raise ValueError(
+ f"Action already registered: {action.type!r}"
+ )
+ _REGISTRY[action.type] = action
+
+
+def get_action(action_type: str) -> ActionDefinition | None:
+ """Look up one action by type. Returns ``None`` on miss."""
+
+ return _REGISTRY.get(action_type)
+
+
+def all_actions() -> dict[str, ActionDefinition]:
+ """Snapshot of the registry as a defensive copy."""
+
+ return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/actions/types.py b/surfsense_backend/app/automations/registries/actions/types.py
new file mode 100644
index 000000000..2ab2906b1
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/actions/types.py
@@ -0,0 +1,44 @@
+"""``ActionDefinition`` dataclass — the v1-minimum action shape."""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any
+
+ActionHandler = Callable[[dict[str, Any]], Awaitable[Any]]
+"""The signature every action handler must satisfy.
+
+Identical in shape to ``CapabilityHandler`` — both receive a
+caller-validated input dict and return an arbitrary output. The
+distinction is purely architectural: capabilities are the low-level
+"what SurfSense can do" surface, actions are the user-facing
+building blocks composed into a plan.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class ActionDefinition:
+ """A user-facing step type the plan editor can compose.
+
+ v1 trims the dataclass to the five fields necessary for
+ registry dispatch and form rendering. The full design (§4)
+ includes ``output_contract``, ``uses_capabilities``, and
+ ``produces_artifacts``; all three are deferred until a consumer
+ feature requires them:
+
+ - ``output_contract`` — the loose ``agent_task`` action declares
+ its output shape per-step via ``config.output_schema``, so the
+ action-level contract is not needed in v1.
+ - ``uses_capabilities`` — would let the NL generator do static
+ analysis of which capabilities each action invokes; deferred
+ because v1 ships a single (``agent_task``) action.
+ - ``produces_artifacts`` — deferred alongside the artifact
+ pipeline (see §13 decision 26).
+ """
+
+ type: str
+ name: str
+ description: str
+ config_schema: dict[str, Any]
+ handler: ActionHandler
diff --git a/surfsense_backend/app/automations/registries/capabilities/__init__.py b/surfsense_backend/app/automations/registries/capabilities/__init__.py
index 77f9f88b7..6fa2c8246 100644
--- a/surfsense_backend/app/automations/registries/capabilities/__init__.py
+++ b/surfsense_backend/app/automations/registries/capabilities/__init__.py
@@ -2,4 +2,13 @@
from __future__ import annotations
-__all__: list[str] = []
+from .store import all_capabilities, get_capability, register_capability
+from .types import Capability, CapabilityHandler
+
+__all__ = [
+ "Capability",
+ "CapabilityHandler",
+ "all_capabilities",
+ "get_capability",
+ "register_capability",
+]
diff --git a/surfsense_backend/app/automations/registries/capabilities/store.py b/surfsense_backend/app/automations/registries/capabilities/store.py
new file mode 100644
index 000000000..3c8822d76
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/capabilities/store.py
@@ -0,0 +1,40 @@
+"""Capability registry: in-memory dict + ``register_capability`` API."""
+
+from __future__ import annotations
+
+from .types import Capability
+
+_REGISTRY: dict[str, Capability] = {}
+
+
+def register_capability(capability: Capability) -> None:
+ """Add a capability to the in-memory registry.
+
+ Raises ``ValueError`` on duplicate ``id`` — registration is
+ idempotent only at the module level (a module's
+ ``register_capability`` call runs once per process), so a
+ duplicate is always a bug.
+ """
+
+ if capability.id in _REGISTRY:
+ raise ValueError(
+ f"Capability already registered: {capability.id!r}"
+ )
+ _REGISTRY[capability.id] = capability
+
+
+def get_capability(capability_id: str) -> Capability | None:
+ """Look up one capability by id. Returns ``None`` on miss."""
+
+ return _REGISTRY.get(capability_id)
+
+
+def all_capabilities() -> dict[str, Capability]:
+ """Snapshot of the registry as a defensive copy.
+
+ Returned dict is safe to iterate while other code calls
+ ``register_capability`` (which v1 never does post-startup, but
+ the contract holds anyway).
+ """
+
+ return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/capabilities/types.py b/surfsense_backend/app/automations/registries/capabilities/types.py
new file mode 100644
index 000000000..001f26ac1
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/capabilities/types.py
@@ -0,0 +1,40 @@
+"""``Capability`` dataclass — the v1-minimum five-field shape."""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any
+
+CapabilityHandler = Callable[[dict[str, Any]], Awaitable[Any]]
+"""The signature every capability handler must satisfy.
+
+The handler is a closure that already holds whatever runtime context
+it needs (DB session, search-space scope, logger, etc.). The
+registry only passes through the caller's input dict — the same dict
+that was validated against ``input_schema``.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class Capability:
+ """The unit of "what SurfSense can do," consumed by every layer.
+
+ v1 keeps the dataclass to exactly five fields. Earlier drafts
+ considered ``name``, ``required_credentials``, ``side_effects``,
+ ``expected_duration_seconds``, and ``cost_estimate``; every one
+ of those has been removed until a concrete consumer feature
+ requires it (see ``automation-design-plan.md`` §3, decision v1).
+
+ The handler is a ready-to-call function. It does not receive a
+ context argument — context is bound at registration time by the
+ factory that builds the closure (so a capability returned to an
+ agent's tool list looks identical to one returned to an
+ automation's action runtime).
+ """
+
+ id: str
+ description: str
+ input_schema: dict[str, Any]
+ output_schema: dict[str, Any]
+ handler: CapabilityHandler
diff --git a/surfsense_backend/app/automations/registries/triggers/__init__.py b/surfsense_backend/app/automations/registries/triggers/__init__.py
index bc795b61a..f69c6fe8d 100644
--- a/surfsense_backend/app/automations/registries/triggers/__init__.py
+++ b/surfsense_backend/app/automations/registries/triggers/__init__.py
@@ -2,4 +2,12 @@
from __future__ import annotations
-__all__: list[str] = []
+from .store import all_triggers, get_trigger, register_trigger
+from .types import TriggerDefinition
+
+__all__ = [
+ "TriggerDefinition",
+ "all_triggers",
+ "get_trigger",
+ "register_trigger",
+]
diff --git a/surfsense_backend/app/automations/registries/triggers/store.py b/surfsense_backend/app/automations/registries/triggers/store.py
new file mode 100644
index 000000000..0a5fbdadb
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/triggers/store.py
@@ -0,0 +1,33 @@
+"""Trigger registry: in-memory dict + ``register_trigger`` API."""
+
+from __future__ import annotations
+
+from .types import TriggerDefinition
+
+_REGISTRY: dict[str, TriggerDefinition] = {}
+
+
+def register_trigger(trigger: TriggerDefinition) -> None:
+ """Add a trigger to the in-memory registry.
+
+ Raises ``ValueError`` on duplicate ``type`` — registration runs
+ once per process, so a duplicate is always a bug.
+ """
+
+ if trigger.type in _REGISTRY:
+ raise ValueError(
+ f"Trigger already registered: {trigger.type!r}"
+ )
+ _REGISTRY[trigger.type] = trigger
+
+
+def get_trigger(trigger_type: str) -> TriggerDefinition | None:
+ """Look up one trigger by type. Returns ``None`` on miss."""
+
+ return _REGISTRY.get(trigger_type)
+
+
+def all_triggers() -> dict[str, TriggerDefinition]:
+ """Snapshot of the registry as a defensive copy."""
+
+ return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/triggers/types.py b/surfsense_backend/app/automations/registries/triggers/types.py
new file mode 100644
index 000000000..256944823
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/triggers/types.py
@@ -0,0 +1,35 @@
+"""``TriggerDefinition`` dataclass — declarative trigger metadata, no handler."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass(frozen=True, slots=True)
+class TriggerDefinition:
+ """A trigger type the dispatcher knows how to fire.
+
+ Triggers are purely declarative: the dispatcher (a single
+ process-wide component, not a per-type handler) reads the
+ ``automation_triggers`` table and decides when each row should
+ fire. The trigger's job here is to declare its input/output
+ contract:
+
+ - ``config_schema``: JSON Schema for the persisted
+ ``AutomationTrigger.config`` — used by the form editor and
+ validated on save.
+ - ``payload_schema``: JSON Schema for the payload the dispatcher
+ will deliver to the executor at fire time (e.g., a schedule
+ trigger emits ``fired_at`` / ``scheduled_for`` /
+ ``last_fired_at``).
+
+ No ``handler`` field — firing is a dispatcher responsibility,
+ not a per-trigger one. This keeps the dispatcher single and
+ leaves trigger types as pure metadata.
+ """
+
+ type: str
+ description: str
+ config_schema: dict[str, Any]
+ payload_schema: dict[str, Any]
From f0e00bd3ee0ec5366ea6cea039723a68d08b9a9e Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Tue, 26 May 2026 23:01:22 +0200
Subject: [PATCH 26/87] chore(automation): trim docstrings to intent only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cut the docstrings and Field(description=...) text across the entire
automations/ tree down to single-line intent statements, matching the
multi_agent_chat conciseness style:
- Module docstrings: one line stating what the file is.
- Class docstrings: deleted when the class name + module docstring
already cover intent; kept only where they add a constraint or
rationale not visible in the signature.
- Pydantic Field descriptions: short noun phrases / clauses, not
full sentences. Reasoning that belonged in the design plan moved
out of the code.
- Enum values: per-value docstrings replaced with terse inline
comments where the meaning isn't obvious from the name.
Behaviour is unchanged. The same 33 files, same public surface, same
imports — verified by re-running the 10-point registry smoke test and
the 8-point schema round-trip / constraint suite from commits 9 and
10.
LOC: 1180 → 691 (-42%).
---
surfsense_backend/app/automations/__init__.py | 2 +-
.../app/automations/persistence/__init__.py | 2 +-
.../automations/persistence/enums/__init__.py | 2 +-
.../persistence/enums/automation_status.py | 15 +---
.../persistence/enums/run_status.py | 16 +---
.../persistence/enums/trigger_type.py | 13 +--
.../persistence/models/__init__.py | 2 +-
.../persistence/models/automation.py | 18 +---
.../app/automations/persistence/models/run.py | 18 +---
.../automations/persistence/models/trigger.py | 15 +---
.../app/automations/registries/__init__.py | 2 +-
.../registries/actions/__init__.py | 2 +-
.../automations/registries/actions/store.py | 18 +---
.../automations/registries/actions/types.py | 28 +------
.../registries/capabilities/__init__.py | 2 +-
.../registries/capabilities/store.py | 25 +-----
.../registries/capabilities/types.py | 24 +-----
.../registries/triggers/__init__.py | 2 +-
.../automations/registries/triggers/store.py | 18 +---
.../automations/registries/triggers/types.py | 23 +----
.../app/automations/schemas/__init__.py | 2 +-
.../automations/schemas/actions/__init__.py | 2 +-
.../automations/schemas/actions/agent_task.py | 49 ++---------
.../schemas/definition/__init__.py | 2 +-
.../schemas/definition/envelope.py | 83 +++----------------
.../schemas/definition/execution.py | 65 ++-------------
.../automations/schemas/definition/inputs.py | 26 +-----
.../schemas/definition/metadata.py | 28 +------
.../schemas/definition/plan_step.py | 74 ++---------------
.../schemas/definition/trigger_spec.py | 29 +------
.../automations/schemas/triggers/__init__.py | 2 +-
.../automations/schemas/triggers/manual.py | 14 +---
.../automations/schemas/triggers/schedule.py | 25 +-----
33 files changed, 80 insertions(+), 568 deletions(-)
diff --git a/surfsense_backend/app/automations/__init__.py b/surfsense_backend/app/automations/__init__.py
index edb7891ea..a4ce8ecc9 100644
--- a/surfsense_backend/app/automations/__init__.py
+++ b/surfsense_backend/app/automations/__init__.py
@@ -1,4 +1,4 @@
-"""Automations: scheduled / triggered runs of capabilities — see automation-design-plan.md."""
+"""Automations engine — see automation-design-plan.md."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/__init__.py b/surfsense_backend/app/automations/persistence/__init__.py
index 265742a85..4c1ea3423 100644
--- a/surfsense_backend/app/automations/persistence/__init__.py
+++ b/surfsense_backend/app/automations/persistence/__init__.py
@@ -1,4 +1,4 @@
-"""Persistence layer: SQLAlchemy enums under ``enums/`` and models under ``models/``."""
+"""SQLAlchemy models and enums for the automation tables."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/enums/__init__.py b/surfsense_backend/app/automations/persistence/enums/__init__.py
index cf9e7dd1b..6c2cfcf1f 100644
--- a/surfsense_backend/app/automations/persistence/enums/__init__.py
+++ b/surfsense_backend/app/automations/persistence/enums/__init__.py
@@ -1,4 +1,4 @@
-"""SQLAlchemy / Python enums backing the three automation tables."""
+"""Enums for the automation tables."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/enums/automation_status.py b/surfsense_backend/app/automations/persistence/enums/automation_status.py
index 3f2ca9621..aff6f4683 100644
--- a/surfsense_backend/app/automations/persistence/enums/automation_status.py
+++ b/surfsense_backend/app/automations/persistence/enums/automation_status.py
@@ -1,4 +1,4 @@
-"""``AutomationStatus`` — lifecycle of a stored automation definition."""
+"""Automation lifecycle status."""
from __future__ import annotations
@@ -6,13 +6,6 @@ from enum import StrEnum
class AutomationStatus(StrEnum):
- """Status of an automation in the registry.
-
- ``active`` — eligible to fire from its triggers.
- ``paused`` — definition retained, triggers do not fire.
- ``archived`` — kept for run history only; no edits, no fires.
- """
-
- ACTIVE = "active"
- PAUSED = "paused"
- ARCHIVED = "archived"
+ ACTIVE = "active" # eligible to fire
+ PAUSED = "paused" # kept, but triggers don't fire
+ ARCHIVED = "archived" # read-only history
diff --git a/surfsense_backend/app/automations/persistence/enums/run_status.py b/surfsense_backend/app/automations/persistence/enums/run_status.py
index 0f619bd82..64dcd49e8 100644
--- a/surfsense_backend/app/automations/persistence/enums/run_status.py
+++ b/surfsense_backend/app/automations/persistence/enums/run_status.py
@@ -1,4 +1,4 @@
-"""``RunStatus`` — the state machine of a single ``AutomationRun``."""
+"""AutomationRun state machine: pending → running → (succeeded|failed|cancelled|timed_out)."""
from __future__ import annotations
@@ -6,20 +6,6 @@ from enum import StrEnum
class RunStatus(StrEnum):
- """Lifecycle states of an ``AutomationRun`` row.
-
- Transitions are linear with three terminal branches:
-
- pending → running → (succeeded | failed | cancelled | timed_out)
-
- ``pending`` — row created, executor task enqueued, work not started.
- ``running`` — executor has picked up the run.
- ``succeeded`` — terminal: plan completed without error.
- ``failed`` — terminal: at least one step raised an unrecoverable error.
- ``cancelled`` — terminal: caller asked for cancellation.
- ``timed_out`` — terminal: run exceeded its configured timeout.
- """
-
PENDING = "pending"
RUNNING = "running"
SUCCEEDED = "succeeded"
diff --git a/surfsense_backend/app/automations/persistence/enums/trigger_type.py b/surfsense_backend/app/automations/persistence/enums/trigger_type.py
index eb06fe773..8318bfdee 100644
--- a/surfsense_backend/app/automations/persistence/enums/trigger_type.py
+++ b/surfsense_backend/app/automations/persistence/enums/trigger_type.py
@@ -1,4 +1,4 @@
-"""``TriggerType`` — the trigger-kind discriminator (v1 = schedule, manual)."""
+"""Trigger-kind discriminator. v1: schedule | manual; webhook/event in Phase 2/3."""
from __future__ import annotations
@@ -6,16 +6,5 @@ from enum import StrEnum
class TriggerType(StrEnum):
- """Kind of trigger an ``AutomationTrigger`` row represents.
-
- v1 ships two kinds:
-
- ``schedule`` — fires on a cron expression managed by Celery Beat.
- ``manual`` — fires on demand from the UI's "Run now" affordance.
-
- ``webhook`` and ``event`` are deferred to Phase 2 and Phase 3
- respectively; adding them is an enum-value extension only.
- """
-
SCHEDULE = "schedule"
MANUAL = "manual"
diff --git a/surfsense_backend/app/automations/persistence/models/__init__.py b/surfsense_backend/app/automations/persistence/models/__init__.py
index 4aca02a03..4bc023ea3 100644
--- a/surfsense_backend/app/automations/persistence/models/__init__.py
+++ b/surfsense_backend/app/automations/persistence/models/__init__.py
@@ -1,4 +1,4 @@
-"""SQLAlchemy models: one file per table (``automation.py``, ``trigger.py``, ``run.py``)."""
+"""SQLAlchemy models, one per table."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/models/automation.py b/surfsense_backend/app/automations/persistence/models/automation.py
index fc4a1ed93..637fd2282 100644
--- a/surfsense_backend/app/automations/persistence/models/automation.py
+++ b/surfsense_backend/app/automations/persistence/models/automation.py
@@ -1,4 +1,4 @@
-"""``Automation`` table — the editable, versioned automation definition."""
+"""``automations`` table — editable, versioned automation definition."""
from __future__ import annotations
@@ -21,15 +21,6 @@ from ..enums.automation_status import AutomationStatus
class Automation(BaseModel, TimestampMixin):
- """The editable, versioned spec a user authors.
-
- The ``definition`` JSON is what the user (or the NL generator) writes
- and edits. Each save bumps ``version`` by one; the previous JSON is
- not kept in this row — version history is reconstructed from the
- ``definition_snapshot`` column on every ``AutomationRun`` that fired
- against a given version.
- """
-
__tablename__ = "automations"
search_space_id = Column(
@@ -59,12 +50,7 @@ class Automation(BaseModel, TimestampMixin):
definition = Column(JSONB, nullable=False)
- version = Column(
- Integer,
- nullable=False,
- default=1,
- server_default="1",
- )
+ version = Column(Integer, nullable=False, default=1, server_default="1")
updated_at = Column(
TIMESTAMP(timezone=True),
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index 5c6ec93ec..9291e5da0 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -1,4 +1,4 @@
-"""``AutomationRun`` table — the immutable per-fire execution record."""
+"""``automation_runs`` table — immutable per-fire execution record."""
from __future__ import annotations
@@ -18,16 +18,6 @@ from ..enums.run_status import RunStatus
class AutomationRun(BaseModel, TimestampMixin):
- """One execution of an automation.
-
- Every fire of any trigger inserts exactly one row here. The row is
- immutable from the user's perspective — the executor only updates
- ``status``, ``step_results``, ``output``, ``artifacts``, ``error``,
- ``started_at``, ``finished_at`` as the run progresses; the
- ``definition_snapshot`` is locked at fire time so the user can always
- see exactly what code path executed for any historical run.
- """
-
__tablename__ = "automation_runs"
automation_id = Column(
@@ -52,18 +42,14 @@ class AutomationRun(BaseModel, TimestampMixin):
index=True,
)
+ # locked at fire time so historical runs always show the exact code path
definition_snapshot = Column(JSONB, nullable=False)
trigger_payload = Column(JSONB, nullable=True)
-
resolved_inputs = Column(JSONB, nullable=False, server_default="{}")
-
step_results = Column(JSONB, nullable=False, server_default="[]")
-
output = Column(JSONB, nullable=True)
-
artifacts = Column(JSONB, nullable=False, server_default="[]")
-
error = Column(JSONB, nullable=True)
started_at = Column(TIMESTAMP(timezone=True), nullable=True)
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index 3173770d6..8dab48a6b 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -1,4 +1,4 @@
-"""``AutomationTrigger`` table — one row per (automation, trigger-instance) pair."""
+"""``automation_triggers`` table — one row per (automation, trigger-instance) pair."""
from __future__ import annotations
@@ -18,14 +18,6 @@ from ..enums.trigger_type import TriggerType
class AutomationTrigger(BaseModel, TimestampMixin):
- """One trigger attached to an automation.
-
- An automation may have multiple triggers — e.g. a ``schedule`` trigger
- for the autonomous path and a ``manual`` trigger backing the UI's
- "Run now" affordance. Each trigger's ``config`` is validated against
- the registered ``TriggerDefinition.config_schema`` for its ``type``.
- """
-
__tablename__ = "automation_triggers"
automation_id = Column(
@@ -51,7 +43,4 @@ class AutomationTrigger(BaseModel, TimestampMixin):
index=True,
)
- last_fired_at = Column(
- TIMESTAMP(timezone=True),
- nullable=True,
- )
+ last_fired_at = Column(TIMESTAMP(timezone=True), nullable=True)
diff --git a/surfsense_backend/app/automations/registries/__init__.py b/surfsense_backend/app/automations/registries/__init__.py
index 47023f903..a97595ced 100644
--- a/surfsense_backend/app/automations/registries/__init__.py
+++ b/surfsense_backend/app/automations/registries/__init__.py
@@ -1,4 +1,4 @@
-"""Three registries — ``capabilities/``, ``actions/``, ``triggers/`` — populated at import time."""
+"""Capability, action, and trigger registries — populated at process startup."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/registries/actions/__init__.py b/surfsense_backend/app/automations/registries/actions/__init__.py
index c6b550096..1bb3ae9cc 100644
--- a/surfsense_backend/app/automations/registries/actions/__init__.py
+++ b/surfsense_backend/app/automations/registries/actions/__init__.py
@@ -1,4 +1,4 @@
-"""Action registry: ``types.py`` (dataclass), ``store.py`` (dict + register fn)."""
+"""Action registry."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/registries/actions/store.py b/surfsense_backend/app/automations/registries/actions/store.py
index 720243b83..eff66c4c7 100644
--- a/surfsense_backend/app/automations/registries/actions/store.py
+++ b/surfsense_backend/app/automations/registries/actions/store.py
@@ -1,4 +1,4 @@
-"""Action registry: in-memory dict + ``register_action`` API."""
+"""In-memory action registry. Populated once at process startup."""
from __future__ import annotations
@@ -8,26 +8,16 @@ _REGISTRY: dict[str, ActionDefinition] = {}
def register_action(action: ActionDefinition) -> None:
- """Add an action to the in-memory registry.
-
- Raises ``ValueError`` on duplicate ``type`` — registration runs
- once per process, so a duplicate is always a bug.
- """
-
+ """Register an action. Raises on duplicate type."""
if action.type in _REGISTRY:
- raise ValueError(
- f"Action already registered: {action.type!r}"
- )
+ raise ValueError(f"Action already registered: {action.type!r}")
_REGISTRY[action.type] = action
def get_action(action_type: str) -> ActionDefinition | None:
- """Look up one action by type. Returns ``None`` on miss."""
-
return _REGISTRY.get(action_type)
def all_actions() -> dict[str, ActionDefinition]:
- """Snapshot of the registry as a defensive copy."""
-
+ """Defensive snapshot of the registry."""
return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/actions/types.py b/surfsense_backend/app/automations/registries/actions/types.py
index 2ab2906b1..13c826c66 100644
--- a/surfsense_backend/app/automations/registries/actions/types.py
+++ b/surfsense_backend/app/automations/registries/actions/types.py
@@ -1,4 +1,4 @@
-"""``ActionDefinition`` dataclass — the v1-minimum action shape."""
+"""``ActionDefinition`` dataclass and handler signature."""
from __future__ import annotations
@@ -7,36 +7,10 @@ from dataclasses import dataclass
from typing import Any
ActionHandler = Callable[[dict[str, Any]], Awaitable[Any]]
-"""The signature every action handler must satisfy.
-
-Identical in shape to ``CapabilityHandler`` — both receive a
-caller-validated input dict and return an arbitrary output. The
-distinction is purely architectural: capabilities are the low-level
-"what SurfSense can do" surface, actions are the user-facing
-building blocks composed into a plan.
-"""
@dataclass(frozen=True, slots=True)
class ActionDefinition:
- """A user-facing step type the plan editor can compose.
-
- v1 trims the dataclass to the five fields necessary for
- registry dispatch and form rendering. The full design (§4)
- includes ``output_contract``, ``uses_capabilities``, and
- ``produces_artifacts``; all three are deferred until a consumer
- feature requires them:
-
- - ``output_contract`` — the loose ``agent_task`` action declares
- its output shape per-step via ``config.output_schema``, so the
- action-level contract is not needed in v1.
- - ``uses_capabilities`` — would let the NL generator do static
- analysis of which capabilities each action invokes; deferred
- because v1 ships a single (``agent_task``) action.
- - ``produces_artifacts`` — deferred alongside the artifact
- pipeline (see §13 decision 26).
- """
-
type: str
name: str
description: str
diff --git a/surfsense_backend/app/automations/registries/capabilities/__init__.py b/surfsense_backend/app/automations/registries/capabilities/__init__.py
index 6fa2c8246..213303fc0 100644
--- a/surfsense_backend/app/automations/registries/capabilities/__init__.py
+++ b/surfsense_backend/app/automations/registries/capabilities/__init__.py
@@ -1,4 +1,4 @@
-"""Capability registry: ``types.py`` (dataclass), ``store.py`` (dict + register fn)."""
+"""Capability registry."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/registries/capabilities/store.py b/surfsense_backend/app/automations/registries/capabilities/store.py
index 3c8822d76..4d87abe47 100644
--- a/surfsense_backend/app/automations/registries/capabilities/store.py
+++ b/surfsense_backend/app/automations/registries/capabilities/store.py
@@ -1,4 +1,4 @@
-"""Capability registry: in-memory dict + ``register_capability`` API."""
+"""In-memory capability registry. Populated once at process startup."""
from __future__ import annotations
@@ -8,33 +8,16 @@ _REGISTRY: dict[str, Capability] = {}
def register_capability(capability: Capability) -> None:
- """Add a capability to the in-memory registry.
-
- Raises ``ValueError`` on duplicate ``id`` — registration is
- idempotent only at the module level (a module's
- ``register_capability`` call runs once per process), so a
- duplicate is always a bug.
- """
-
+ """Register a capability. Raises on duplicate id."""
if capability.id in _REGISTRY:
- raise ValueError(
- f"Capability already registered: {capability.id!r}"
- )
+ raise ValueError(f"Capability already registered: {capability.id!r}")
_REGISTRY[capability.id] = capability
def get_capability(capability_id: str) -> Capability | None:
- """Look up one capability by id. Returns ``None`` on miss."""
-
return _REGISTRY.get(capability_id)
def all_capabilities() -> dict[str, Capability]:
- """Snapshot of the registry as a defensive copy.
-
- Returned dict is safe to iterate while other code calls
- ``register_capability`` (which v1 never does post-startup, but
- the contract holds anyway).
- """
-
+ """Defensive snapshot of the registry."""
return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/capabilities/types.py b/surfsense_backend/app/automations/registries/capabilities/types.py
index 001f26ac1..2759bc809 100644
--- a/surfsense_backend/app/automations/registries/capabilities/types.py
+++ b/surfsense_backend/app/automations/registries/capabilities/types.py
@@ -1,4 +1,4 @@
-"""``Capability`` dataclass — the v1-minimum five-field shape."""
+"""``Capability`` dataclass and handler signature. Locked at five fields for v1."""
from __future__ import annotations
@@ -7,32 +7,10 @@ from dataclasses import dataclass
from typing import Any
CapabilityHandler = Callable[[dict[str, Any]], Awaitable[Any]]
-"""The signature every capability handler must satisfy.
-
-The handler is a closure that already holds whatever runtime context
-it needs (DB session, search-space scope, logger, etc.). The
-registry only passes through the caller's input dict — the same dict
-that was validated against ``input_schema``.
-"""
@dataclass(frozen=True, slots=True)
class Capability:
- """The unit of "what SurfSense can do," consumed by every layer.
-
- v1 keeps the dataclass to exactly five fields. Earlier drafts
- considered ``name``, ``required_credentials``, ``side_effects``,
- ``expected_duration_seconds``, and ``cost_estimate``; every one
- of those has been removed until a concrete consumer feature
- requires it (see ``automation-design-plan.md`` §3, decision v1).
-
- The handler is a ready-to-call function. It does not receive a
- context argument — context is bound at registration time by the
- factory that builds the closure (so a capability returned to an
- agent's tool list looks identical to one returned to an
- automation's action runtime).
- """
-
id: str
description: str
input_schema: dict[str, Any]
diff --git a/surfsense_backend/app/automations/registries/triggers/__init__.py b/surfsense_backend/app/automations/registries/triggers/__init__.py
index f69c6fe8d..843da5e70 100644
--- a/surfsense_backend/app/automations/registries/triggers/__init__.py
+++ b/surfsense_backend/app/automations/registries/triggers/__init__.py
@@ -1,4 +1,4 @@
-"""Trigger registry: ``types.py`` (dataclass), ``store.py`` (dict + register fn)."""
+"""Trigger registry."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/registries/triggers/store.py b/surfsense_backend/app/automations/registries/triggers/store.py
index 0a5fbdadb..af0fafac7 100644
--- a/surfsense_backend/app/automations/registries/triggers/store.py
+++ b/surfsense_backend/app/automations/registries/triggers/store.py
@@ -1,4 +1,4 @@
-"""Trigger registry: in-memory dict + ``register_trigger`` API."""
+"""In-memory trigger registry. Populated once at process startup."""
from __future__ import annotations
@@ -8,26 +8,16 @@ _REGISTRY: dict[str, TriggerDefinition] = {}
def register_trigger(trigger: TriggerDefinition) -> None:
- """Add a trigger to the in-memory registry.
-
- Raises ``ValueError`` on duplicate ``type`` — registration runs
- once per process, so a duplicate is always a bug.
- """
-
+ """Register a trigger. Raises on duplicate type."""
if trigger.type in _REGISTRY:
- raise ValueError(
- f"Trigger already registered: {trigger.type!r}"
- )
+ raise ValueError(f"Trigger already registered: {trigger.type!r}")
_REGISTRY[trigger.type] = trigger
def get_trigger(trigger_type: str) -> TriggerDefinition | None:
- """Look up one trigger by type. Returns ``None`` on miss."""
-
return _REGISTRY.get(trigger_type)
def all_triggers() -> dict[str, TriggerDefinition]:
- """Snapshot of the registry as a defensive copy."""
-
+ """Defensive snapshot of the registry."""
return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/triggers/types.py b/surfsense_backend/app/automations/registries/triggers/types.py
index 256944823..5da081343 100644
--- a/surfsense_backend/app/automations/registries/triggers/types.py
+++ b/surfsense_backend/app/automations/registries/triggers/types.py
@@ -1,4 +1,4 @@
-"""``TriggerDefinition`` dataclass — declarative trigger metadata, no handler."""
+"""``TriggerDefinition`` dataclass. Declarative; firing is the dispatcher's job."""
from __future__ import annotations
@@ -8,27 +8,6 @@ from typing import Any
@dataclass(frozen=True, slots=True)
class TriggerDefinition:
- """A trigger type the dispatcher knows how to fire.
-
- Triggers are purely declarative: the dispatcher (a single
- process-wide component, not a per-type handler) reads the
- ``automation_triggers`` table and decides when each row should
- fire. The trigger's job here is to declare its input/output
- contract:
-
- - ``config_schema``: JSON Schema for the persisted
- ``AutomationTrigger.config`` — used by the form editor and
- validated on save.
- - ``payload_schema``: JSON Schema for the payload the dispatcher
- will deliver to the executor at fire time (e.g., a schedule
- trigger emits ``fired_at`` / ``scheduled_for`` /
- ``last_fired_at``).
-
- No ``handler`` field — firing is a dispatcher responsibility,
- not a per-trigger one. This keeps the dispatcher single and
- leaves trigger types as pure metadata.
- """
-
type: str
description: str
config_schema: dict[str, Any]
diff --git a/surfsense_backend/app/automations/schemas/__init__.py b/surfsense_backend/app/automations/schemas/__init__.py
index 83a95a2a8..23f0232fb 100644
--- a/surfsense_backend/app/automations/schemas/__init__.py
+++ b/surfsense_backend/app/automations/schemas/__init__.py
@@ -1,4 +1,4 @@
-"""Pydantic schemas: definition envelope, trigger configs, action configs."""
+"""Pydantic schemas for the automation definition and per-type configs."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/schemas/actions/__init__.py b/surfsense_backend/app/automations/schemas/actions/__init__.py
index 17c257562..4149206d7 100644
--- a/surfsense_backend/app/automations/schemas/actions/__init__.py
+++ b/surfsense_backend/app/automations/schemas/actions/__init__.py
@@ -1,4 +1,4 @@
-"""Per-action config schemas: one file per action type registered in v1."""
+"""Per-action config schemas, one per action type."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/schemas/actions/agent_task.py b/surfsense_backend/app/automations/schemas/actions/agent_task.py
index 74e41166a..fe9d5fcef 100644
--- a/surfsense_backend/app/automations/schemas/actions/agent_task.py
+++ b/surfsense_backend/app/automations/schemas/actions/agent_task.py
@@ -8,59 +8,20 @@ from pydantic import BaseModel, ConfigDict, Field
class AgentTaskActionConfig(BaseModel):
- """Config for an ``agent_task`` plan step.
-
- Validated against ``PlanStep.config`` whenever the step's
- ``action`` is ``agent_task``. The step instructs the LangGraph
- Deep Agent runtime to:
-
- 1. Receive ``prompt`` (with all preceding-step outputs and inputs
- already rendered by the template engine).
- 2. Run the agent with access to *exactly* the capabilities named
- in ``tools`` — nothing else from the registry is visible to
- this agent invocation.
- 3. Return a JSON object matching ``output_schema`` (recommended;
- the executor validates and re-prompts on mismatch).
-
- ``output_schema`` is the design's "dynamic output contract" —
- instead of locking the output shape on the ActionDefinition (as
- tight actions do), the user declares the shape they want for this
- specific step, and the agent has to match it.
- """
+ """Run a LangGraph Deep Agent restricted to a scoped capability list."""
model_config = ConfigDict(extra="forbid")
- prompt: str = Field(
- ...,
- description=(
- "The task prompt rendered through the Jinja sandbox. May "
- "reference automation inputs and prior-step outputs."
- ),
- min_length=1,
- )
+ prompt: str = Field(..., min_length=1, description="Task prompt; Jinja-rendered.")
tools: list[str] = Field(
default_factory=list,
- description=(
- "Allowlist of capability IDs the agent may call (e.g., "
- "'search_space.query'). Empty list = no tool access; the "
- "agent must answer from the prompt alone."
- ),
+ description="Capability IDs the agent may call. Empty = no tool access.",
)
model: str | None = Field(
default=None,
- description=(
- "Optional LiteLLM model identifier (e.g., "
- "'anthropic/claude-sonnet-4-7'). Omitted means the "
- "automation falls back to the search space's default "
- "agent_llm_id."
- ),
+ description="LiteLLM model id. Defaults to the search space's agent_llm_id.",
)
output_schema: dict[str, Any] | None = Field(
default=None,
- description=(
- "Optional JSON Schema declaring the shape the agent must "
- "return. Strongly recommended; the editor warns when "
- "missing. Validated by the executor before binding to "
- "``output_as``."
- ),
+ description="JSON Schema the agent must return. Recommended.",
)
diff --git a/surfsense_backend/app/automations/schemas/definition/__init__.py b/surfsense_backend/app/automations/schemas/definition/__init__.py
index 14040c20a..838e72f86 100644
--- a/surfsense_backend/app/automations/schemas/definition/__init__.py
+++ b/surfsense_backend/app/automations/schemas/definition/__init__.py
@@ -1,4 +1,4 @@
-"""Automation definition envelope: the editable structured spec users author and run."""
+"""Automation definition envelope and its building blocks."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/schemas/definition/envelope.py b/surfsense_backend/app/automations/schemas/definition/envelope.py
index ccf4c53df..ccd76d612 100644
--- a/surfsense_backend/app/automations/schemas/definition/envelope.py
+++ b/surfsense_backend/app/automations/schemas/definition/envelope.py
@@ -1,4 +1,4 @@
-"""``AutomationDefinition`` — the top-level envelope persisted in ``automations.definition``."""
+"""``AutomationDefinition`` — top-level envelope persisted in ``automations.definition``."""
from __future__ import annotations
@@ -12,78 +12,15 @@ from .trigger_spec import TriggerSpec
class AutomationDefinition(BaseModel):
- """The top-level JSON shape stored in ``automations.definition``.
-
- This is the editable spec a user authors (or the NL generator
- produces). The envelope is structural only — every nested
- discriminator (``triggers[].type``, ``plan[].action``) is resolved
- against the registries at validation time, so adding a new
- trigger or action type does not require touching this schema.
-
- See ``automation-design-plan.md`` §5 for the worked example and
- rationale.
- """
+ """Top-level shape of an automation. See automation-design-plan.md §5."""
model_config = ConfigDict(extra="forbid")
- schema_version: str = Field(
- default="1.0",
- description=(
- "Schema version of the envelope itself. Migrations bump "
- "this when the envelope shape changes; nested per-type "
- "configs evolve independently via the registries."
- ),
- )
- name: str = Field(
- ...,
- description="Short, user-facing name shown in lists.",
- min_length=1,
- max_length=200,
- )
- goal: str | None = Field(
- default=None,
- description=(
- "Optional plain-language statement of what the "
- "automation is for. Used by the NL generator's review "
- "pass and by the UI's run dialog."
- ),
- )
- inputs: InputsBlock | None = Field(
- default=None,
- description=(
- "Optional input contract. When omitted, the automation "
- "accepts no inputs at fire time."
- ),
- )
- triggers: list[TriggerSpec] = Field(
- default_factory=list,
- description=(
- "Triggers that fire this automation. Empty list means "
- "the automation is only runnable via the manual "
- "``Run now`` path."
- ),
- )
- plan: list[PlanStep] = Field(
- ...,
- description=(
- "Ordered sequence of steps. Executed in array order — "
- "no parallelism, no DAGs, no loops at the envelope "
- "level."
- ),
- min_length=1,
- )
- execution: ExecutionBlock = Field(
- default_factory=ExecutionBlock,
- description=(
- "Execution defaults (timeouts, retries, concurrency, "
- "budget). All fields default to safe values; the block "
- "may be omitted entirely."
- ),
- )
- metadata: MetadataBlock = Field(
- default_factory=MetadataBlock,
- description=(
- "Free-form metadata (tags, NL-generator breadcrumbs, "
- "UI annotations). Tolerates unknown keys by design."
- ),
- )
+ schema_version: str = "1.0"
+ name: str = Field(..., min_length=1, max_length=200)
+ goal: str | None = None
+ inputs: InputsBlock | None = None
+ triggers: list[TriggerSpec] = Field(default_factory=list)
+ plan: list[PlanStep] = Field(..., min_length=1)
+ execution: ExecutionBlock = Field(default_factory=ExecutionBlock)
+ metadata: MetadataBlock = Field(default_factory=MetadataBlock)
diff --git a/surfsense_backend/app/automations/schemas/definition/execution.py b/surfsense_backend/app/automations/schemas/definition/execution.py
index bb80e7281..2fcbc611e 100644
--- a/surfsense_backend/app/automations/schemas/definition/execution.py
+++ b/surfsense_backend/app/automations/schemas/definition/execution.py
@@ -1,4 +1,4 @@
-"""``ExecutionBlock`` — the ``execution`` section of the automation definition."""
+"""``ExecutionBlock`` — automation-wide execution defaults (overridable per step)."""
from __future__ import annotations
@@ -10,67 +10,16 @@ from .plan_step import PlanStep
class ExecutionBlock(BaseModel):
- """The ``execution`` block of an ``AutomationDefinition``.
-
- Carries automation-wide defaults that individual ``PlanStep``s
- can override. Every field has a sane default so an automation
- definition may omit the block entirely; in that case all defaults
- apply.
-
- ``on_failure`` is a secondary plan that runs only when the main
- ``plan`` fails after retries exhaust. It uses the same
- ``PlanStep`` shape as the main plan and shares the same execution
- semantics.
- """
-
model_config = ConfigDict(extra="forbid")
- timeout_seconds: int = Field(
- default=600,
- gt=0,
- description=(
- "Hard wall-clock cap for the entire run. The executor "
- "transitions the run to ``timed_out`` when this is "
- "exceeded."
- ),
- )
- max_retries: int = Field(
- default=2,
- ge=0,
- description=(
- "Per-step retry budget applied when a step raises a "
- "retryable error. Steps may override per-step."
- ),
- )
- retry_backoff: Literal["exponential", "linear", "none"] = Field(
- default="exponential",
- description="Backoff policy between retries.",
- )
- concurrency: Literal[
- "drop_if_running", "queue", "always"
- ] = Field(
- default="drop_if_running",
- description=(
- "Behaviour when a new fire arrives while a previous run "
- "is still in progress. ``drop_if_running`` skips the new "
- "fire, ``queue`` enqueues it, ``always`` runs it in "
- "parallel."
- ),
- )
+ timeout_seconds: int = Field(default=600, gt=0, description="Wall-clock cap for the run.")
+ max_retries: int = Field(default=2, ge=0, description="Per-step retry budget.")
+ retry_backoff: Literal["exponential", "linear", "none"] = "exponential"
+ concurrency: Literal["drop_if_running", "queue", "always"] = "drop_if_running"
budget_cap_usd: float | None = Field(
- default=None,
- gt=0,
- description=(
- "Optional mid-flight cost cap in USD. The executor kills "
- "the run when accumulated cost exceeds this value. v1 "
- "treats this as an advisory because cost tracking lands "
- "with the executor in a later step."
- ),
+ default=None, gt=0, description="Kill the run when accumulated cost exceeds this."
)
on_failure: list[PlanStep] = Field(
default_factory=list,
- description=(
- "Secondary plan executed only when the main plan fails "
- "after retries exhaust. Empty list means no fallback."
- ),
+ description="Steps run when the main plan fails after retries.",
)
diff --git a/surfsense_backend/app/automations/schemas/definition/inputs.py b/surfsense_backend/app/automations/schemas/definition/inputs.py
index 279efc113..52aed4e90 100644
--- a/surfsense_backend/app/automations/schemas/definition/inputs.py
+++ b/surfsense_backend/app/automations/schemas/definition/inputs.py
@@ -1,4 +1,4 @@
-"""``InputsBlock`` — the ``inputs`` section of the automation definition."""
+"""``InputsBlock`` — JSON Schema for inputs an automation accepts at fire time."""
from __future__ import annotations
@@ -8,23 +8,6 @@ from pydantic import BaseModel, ConfigDict, Field
class InputsBlock(BaseModel):
- """The ``inputs`` block of an ``AutomationDefinition``.
-
- Holds a JSON Schema describing what data the automation accepts at
- fire time. The same schema is used by:
-
- - The form editor (to render the manual-run dialog).
- - The dispatcher (to validate trigger payloads before enqueueing
- executor work).
- - The template engine (to expose ``{{ inputs.* }}`` references in
- plan-step configs).
-
- The ``schema`` value is the JSON-Schema dict itself, not a
- Pydantic model — automations express their input contract in pure
- JSON Schema so it round-trips losslessly through the database and
- the NL generator.
- """
-
model_config = ConfigDict(
extra="forbid",
populate_by_name=True,
@@ -34,10 +17,5 @@ class InputsBlock(BaseModel):
schema_: dict[str, Any] = Field(
...,
alias="schema",
- description=(
- "JSON Schema (draft-07 compatible) describing the inputs "
- "this automation accepts. Properties may use the special "
- "``$last_fired_at`` default literal to bind to the "
- "trigger's last fire time."
- ),
+ description="JSON Schema (draft-07) for accepted inputs.",
)
diff --git a/surfsense_backend/app/automations/schemas/definition/metadata.py b/surfsense_backend/app/automations/schemas/definition/metadata.py
index dc6541983..61d7af390 100644
--- a/surfsense_backend/app/automations/schemas/definition/metadata.py
+++ b/surfsense_backend/app/automations/schemas/definition/metadata.py
@@ -1,4 +1,4 @@
-"""``MetadataBlock`` — the ``metadata`` section of the automation definition."""
+"""``MetadataBlock`` — free-form metadata on a definition. Extra keys allowed."""
from __future__ import annotations
@@ -6,31 +6,9 @@ from pydantic import BaseModel, ConfigDict, Field
class MetadataBlock(BaseModel):
- """Free-form metadata attached to the automation definition.
-
- Unlike the rest of the envelope this block tolerates unknown keys
- (``extra='allow'``) — it's a deliberate extension point for
- UI annotations, NL-generator breadcrumbs, custom tags, etc.
-
- Two fields are first-class so the rest of the system can rely on
- them without reaching into the loose extras:
-
- ``tags`` — used by the UI for filtering and grouping.
- ``created_from_nl`` — set by the NL generator so we can later
- measure how many runs came from natural-language authoring.
- """
-
model_config = ConfigDict(extra="allow")
- tags: list[str] = Field(
- default_factory=list,
- description="UI-facing tags. No semantic meaning to the engine.",
- )
+ tags: list[str] = Field(default_factory=list)
created_from_nl: bool = Field(
- default=False,
- description=(
- "True when the definition was produced by the NL "
- "generator (set automatically by the generator path; "
- "human-authored definitions keep this false)."
- ),
+ default=False, description="True when produced by the NL generator."
)
diff --git a/surfsense_backend/app/automations/schemas/definition/plan_step.py b/surfsense_backend/app/automations/schemas/definition/plan_step.py
index 6898a0914..6a0bf9a1b 100644
--- a/surfsense_backend/app/automations/schemas/definition/plan_step.py
+++ b/surfsense_backend/app/automations/schemas/definition/plan_step.py
@@ -1,4 +1,4 @@
-"""``PlanStep`` — one entry in the envelope's ``plan`` array."""
+"""``PlanStep`` — one step in the sequential plan."""
from __future__ import annotations
@@ -8,79 +8,21 @@ from pydantic import BaseModel, ConfigDict, Field
class PlanStep(BaseModel):
- """One step in an automation's sequential plan.
-
- Steps run in array order, no parallelism, no DAGs, no loops. The
- ``when`` Jinja expression provides conditional skip; branching is
- achieved by ``when`` clauses on multiple steps. For looping or
- parallel work, the user routes through ``agent_task`` and lets the
- agent reason about it.
-
- ``config`` is dispatched against the action registry at
- validation time — its shape is determined by
- ``ActionDefinition.config_schema`` for the ``action`` value.
-
- ``output_as`` binds the step's typed output into the template
- namespace for later steps, e.g. ``output_as: 'summary'`` then
- ``{{ summary.bullets }}`` in a downstream step's config.
- """
-
model_config = ConfigDict(extra="forbid")
- step_id: str = Field(
- ...,
- description=(
- "Unique-within-plan identifier. Used in run logs and as "
- "the default for ``output_as`` when not provided."
- ),
- min_length=1,
- )
- action: str = Field(
- ...,
- description=(
- "Action-type discriminator (e.g., ``agent_task``). "
- "Resolved against the action registry."
- ),
- min_length=1,
- )
+ step_id: str = Field(..., min_length=1, description="Unique within the plan.")
+ action: str = Field(..., min_length=1, description="Action type; resolved via registry.")
when: str | None = Field(
default=None,
- description=(
- "Optional Jinja expression evaluated against the run "
- "context. Step is skipped when the expression is "
- "falsy."
- ),
+ description="Optional Jinja expression; step is skipped when falsy.",
)
config: dict[str, Any] = Field(
default_factory=dict,
- description=(
- "Action-type-specific config. Validated against the "
- "registered ``ActionDefinition.config_schema`` for "
- "``action`` at definition-save time. Jinja templates "
- "inside config are rendered at step-execute time."
- ),
+ description="Action-type-specific config; Jinja-rendered at execute time.",
)
output_as: str | None = Field(
default=None,
- description=(
- "Name to bind the step output under for downstream "
- "steps. Defaults to ``step_id`` when omitted."
- ),
- )
- max_retries: int | None = Field(
- default=None,
- ge=0,
- description=(
- "Per-step override of the automation-level ``max_retries``. "
- "Omitted means inherit from execution block."
- ),
- )
- timeout_seconds: int | None = Field(
- default=None,
- gt=0,
- description=(
- "Per-step override of the automation-level "
- "``timeout_seconds``. Omitted means inherit from "
- "execution block."
- ),
+ description="Bind step output under this name. Defaults to step_id.",
)
+ max_retries: int | None = Field(default=None, ge=0)
+ timeout_seconds: int | None = Field(default=None, gt=0)
diff --git a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
index 827b0a315..0fdf1f35a 100644
--- a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
+++ b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
@@ -1,4 +1,4 @@
-"""``TriggerSpec`` — one entry in the envelope's ``triggers`` array."""
+"""``TriggerSpec`` — one entry in the definition's ``triggers[]`` array."""
from __future__ import annotations
@@ -8,33 +8,10 @@ from pydantic import BaseModel, ConfigDict, Field
class TriggerSpec(BaseModel):
- """One trigger attached to an automation, as it appears in the definition.
-
- The envelope keeps ``config`` as an untyped JSON object on purpose
- — the per-type config schemas live in
- ``app.automations.schemas.triggers`` and are dispatched at
- validation time by looking up ``type`` in the trigger registry.
-
- This mirrors the design's "definitions are pure data" principle:
- the envelope describes shape, the registry resolves names to
- behaviour.
- """
-
model_config = ConfigDict(extra="forbid")
- type: str = Field(
- ...,
- description=(
- "Trigger-type discriminator (e.g., ``schedule``, ``manual``). "
- "Resolved against the trigger registry."
- ),
- min_length=1,
- )
+ type: str = Field(..., min_length=1, description="Trigger type; resolved via registry.")
config: dict[str, Any] = Field(
default_factory=dict,
- description=(
- "Trigger-type-specific config. Validated against the "
- "registered ``TriggerDefinition.config_schema`` for "
- "``type`` at definition-save time."
- ),
+ description="Type-specific config; validated against the trigger's schema.",
)
diff --git a/surfsense_backend/app/automations/schemas/triggers/__init__.py b/surfsense_backend/app/automations/schemas/triggers/__init__.py
index 847c7443b..0cd8bc38e 100644
--- a/surfsense_backend/app/automations/schemas/triggers/__init__.py
+++ b/surfsense_backend/app/automations/schemas/triggers/__init__.py
@@ -1,4 +1,4 @@
-"""Per-trigger config schemas: one file per trigger type registered in v1."""
+"""Per-trigger config schemas, one per trigger type."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/schemas/triggers/manual.py b/surfsense_backend/app/automations/schemas/triggers/manual.py
index 6e04ba062..bf14f80b6 100644
--- a/surfsense_backend/app/automations/schemas/triggers/manual.py
+++ b/surfsense_backend/app/automations/schemas/triggers/manual.py
@@ -1,4 +1,4 @@
-"""``ManualTriggerConfig`` — config for the ``manual`` trigger type (empty in v1)."""
+"""``ManualTriggerConfig`` — config for the ``manual`` trigger (empty in v1)."""
from __future__ import annotations
@@ -6,16 +6,4 @@ from pydantic import BaseModel, ConfigDict
class ManualTriggerConfig(BaseModel):
- """Config for the UI-driven ``manual`` trigger.
-
- Validated against ``AutomationTrigger.config`` whenever the
- persisted ``type`` is ``manual``. v1 carries no configurable
- fields — the "Run now" affordance simply fires this trigger with
- an empty config object. The model exists so the registry dispatch
- is uniform across all trigger types.
-
- Future versions may add fields here (e.g., a fixed prompt to
- pre-fill the run dialog with) without breaking v1 payloads.
- """
-
model_config = ConfigDict(extra="forbid")
diff --git a/surfsense_backend/app/automations/schemas/triggers/schedule.py b/surfsense_backend/app/automations/schemas/triggers/schedule.py
index e7c20da3a..9d8c7d38d 100644
--- a/surfsense_backend/app/automations/schemas/triggers/schedule.py
+++ b/surfsense_backend/app/automations/schemas/triggers/schedule.py
@@ -6,28 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field
class ScheduleTriggerConfig(BaseModel):
- """Config for a cron-driven trigger.
-
- Validated against ``AutomationTrigger.config`` whenever the
- persisted ``type`` is ``schedule``. The cron expression is
- evaluated by Celery Beat's source; the timezone is an IANA name
- (e.g., ``Africa/Kigali``) and is required so the user's cron is
- unambiguous across DST boundaries.
- """
-
model_config = ConfigDict(extra="forbid")
- cron: str = Field(
- ...,
- description=(
- "Five-field cron expression. Minimum resolution is one "
- "minute; the form editor warns when intervals tighter "
- "than 15 minutes are used."
- ),
- examples=["0 9 * * 1-5"],
- )
- timezone: str = Field(
- ...,
- description="IANA timezone name (e.g., 'Africa/Kigali', 'UTC').",
- examples=["Africa/Kigali"],
- )
+ cron: str = Field(..., description="Five-field cron expression.", examples=["0 9 * * 1-5"])
+ timezone: str = Field(..., description="IANA timezone.", examples=["Africa/Kigali"])
From 35117a952dd9ec5b6709f668dff0109e3c3d0d0e Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 11:41:32 +0200
Subject: [PATCH 27/87] refactor(automation): drop agent_session_id from
AutomationRun
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A run can contain zero, one, or N agent_task steps. A single
agent_session_id at the run level holds at most one of them, so the
column is the wrong shape for the data.
Per-step session ids (LangGraph thread/checkpoint reference for an
agent_task step) live inside step_results[i] alongside the rest of
the per-step bag (status, timings, output). Each agent step records
its own; non-agent steps record nothing. Run-level "primary session"
is a UI concern, not a schema concern.
Trade-off: trace -> run reverse lookup is now a JSONB query, not an
index hit. Usually traversal goes run -> trace; if the reverse
becomes hot we add a GIN index on step_results or a generated
column — both additive.
Changes:
- AutomationRun: drop the agent_session_id column; module docstring
notes where per-step session ids now live.
- Migration 144: drop the column from the CREATE TABLE; downgrade
unchanged.
Safe to edit migration 144 in place (vs. add 145 with ALTER ... DROP):
this branch has not shipped and the table has never existed in any
deployed database.
---
.../alembic/versions/144_add_automation_tables.py | 1 -
.../app/automations/persistence/models/run.py | 10 ++++++----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/surfsense_backend/alembic/versions/144_add_automation_tables.py b/surfsense_backend/alembic/versions/144_add_automation_tables.py
index 6aa208dc1..acfa3db77 100644
--- a/surfsense_backend/alembic/versions/144_add_automation_tables.py
+++ b/surfsense_backend/alembic/versions/144_add_automation_tables.py
@@ -125,7 +125,6 @@ def upgrade() -> None:
error JSONB,
started_at TIMESTAMP WITH TIME ZONE,
finished_at TIMESTAMP WITH TIME ZONE,
- agent_session_id VARCHAR(200),
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index 9291e5da0..43a1de07d 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -1,4 +1,9 @@
-"""``automation_runs`` table — immutable per-fire execution record."""
+"""``automation_runs`` table — immutable per-fire execution record.
+
+Per-step metadata (incl. any LangGraph session id for an ``agent_task`` step)
+lives inside ``step_results[i]``, since a single run may contain zero, one,
+or N agent steps.
+"""
from __future__ import annotations
@@ -8,7 +13,6 @@ from sqlalchemy import (
Enum as SQLAlchemyEnum,
ForeignKey,
Integer,
- String,
)
from sqlalchemy.dialects.postgresql import JSONB
@@ -54,5 +58,3 @@ class AutomationRun(BaseModel, TimestampMixin):
started_at = Column(TIMESTAMP(timezone=True), nullable=True)
finished_at = Column(TIMESTAMP(timezone=True), nullable=True)
-
- agent_session_id = Column(String(200), nullable=True)
From a4fbfd8c0d77bc9f11ef26d87c5d64b283109829 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 11:45:04 +0200
Subject: [PATCH 28/87] chore(automation): tighten run.py + envelope.py
docstrings
Re-apply the trim style after the prior refactor commit re-introduced
a multi-line docstring on AutomationRun.
- AutomationRun: drop the four-line docstring explaining where
per-step session ids live; move the note to a single-line inline
comment right above ``step_results`` where it's actionable.
- AutomationDefinition: drop the design-plan cross-reference; the
module docstring already establishes what the file is.
No behaviour change.
---
.../app/automations/persistence/models/run.py | 9 +++------
.../app/automations/schemas/definition/envelope.py | 2 +-
2 files changed, 4 insertions(+), 7 deletions(-)
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index 43a1de07d..45da6a39d 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -1,9 +1,4 @@
-"""``automation_runs`` table — immutable per-fire execution record.
-
-Per-step metadata (incl. any LangGraph session id for an ``agent_task`` step)
-lives inside ``step_results[i]``, since a single run may contain zero, one,
-or N agent steps.
-"""
+"""``automation_runs`` table — immutable per-fire execution record."""
from __future__ import annotations
@@ -51,6 +46,8 @@ class AutomationRun(BaseModel, TimestampMixin):
trigger_payload = Column(JSONB, nullable=True)
resolved_inputs = Column(JSONB, nullable=False, server_default="{}")
+ # one entry per executed step; agent_task entries carry their own
+ # `agent_session_id` (LangGraph thread reference) inside this JSONB
step_results = Column(JSONB, nullable=False, server_default="[]")
output = Column(JSONB, nullable=True)
artifacts = Column(JSONB, nullable=False, server_default="[]")
diff --git a/surfsense_backend/app/automations/schemas/definition/envelope.py b/surfsense_backend/app/automations/schemas/definition/envelope.py
index ccd76d612..ffc45a0cd 100644
--- a/surfsense_backend/app/automations/schemas/definition/envelope.py
+++ b/surfsense_backend/app/automations/schemas/definition/envelope.py
@@ -12,7 +12,7 @@ from .trigger_spec import TriggerSpec
class AutomationDefinition(BaseModel):
- """Top-level shape of an automation. See automation-design-plan.md §5."""
+ """Top-level shape of an automation."""
model_config = ConfigDict(extra="forbid")
From fe32cd35ed9e1f407ac8b9cc46b9acaf57742790 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:29:18 +0200
Subject: [PATCH 29/87] refactor(automation): rename trigger config column to
params
---
surfsense_backend/alembic/versions/144_add_automation_tables.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/surfsense_backend/alembic/versions/144_add_automation_tables.py b/surfsense_backend/alembic/versions/144_add_automation_tables.py
index acfa3db77..8b59ee969 100644
--- a/surfsense_backend/alembic/versions/144_add_automation_tables.py
+++ b/surfsense_backend/alembic/versions/144_add_automation_tables.py
@@ -86,7 +86,7 @@ def upgrade() -> None:
automation_id INTEGER NOT NULL
REFERENCES automations(id) ON DELETE CASCADE,
type automation_trigger_type NOT NULL,
- config JSONB NOT NULL,
+ params JSONB NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT true,
last_fired_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
From c8a89ccac8536d42d679020d3fcc70d8ca234184 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:29:22 +0200
Subject: [PATCH 30/87] refactor(automation): rename trigger model config to
params
---
surfsense_backend/app/automations/persistence/__init__.py | 2 +-
.../app/automations/persistence/models/__init__.py | 2 +-
surfsense_backend/app/automations/persistence/models/run.py | 2 +-
surfsense_backend/app/automations/persistence/models/trigger.py | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/surfsense_backend/app/automations/persistence/__init__.py b/surfsense_backend/app/automations/persistence/__init__.py
index 4c1ea3423..b10aef03d 100644
--- a/surfsense_backend/app/automations/persistence/__init__.py
+++ b/surfsense_backend/app/automations/persistence/__init__.py
@@ -1,4 +1,4 @@
-"""SQLAlchemy models and enums for the automation tables."""
+"""Models and enums for the automation tables."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/models/__init__.py b/surfsense_backend/app/automations/persistence/models/__init__.py
index 4bc023ea3..8b985f025 100644
--- a/surfsense_backend/app/automations/persistence/models/__init__.py
+++ b/surfsense_backend/app/automations/persistence/models/__init__.py
@@ -1,4 +1,4 @@
-"""SQLAlchemy models, one per table."""
+"""Models, one per table."""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index 45da6a39d..118085b1d 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -47,7 +47,7 @@ class AutomationRun(BaseModel, TimestampMixin):
trigger_payload = Column(JSONB, nullable=True)
resolved_inputs = Column(JSONB, nullable=False, server_default="{}")
# one entry per executed step; agent_task entries carry their own
- # `agent_session_id` (LangGraph thread reference) inside this JSONB
+ # `agent_session_id` inside their entry
step_results = Column(JSONB, nullable=False, server_default="[]")
output = Column(JSONB, nullable=True)
artifacts = Column(JSONB, nullable=False, server_default="[]")
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index 8dab48a6b..55affeabc 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -33,7 +33,7 @@ class AutomationTrigger(BaseModel, TimestampMixin):
index=True,
)
- config = Column(JSONB, nullable=False)
+ params = Column(JSONB, nullable=False)
enabled = Column(
Boolean,
From 9fa35f21cfea0d09c2a148502810fea7f31d0645 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:29:26 +0200
Subject: [PATCH 31/87] refactor(automation): rename schema config to params,
drop dead fields
---
.../app/automations/schemas/__init__.py | 12 ++++++------
.../app/automations/schemas/actions/__init__.py | 6 +++---
.../app/automations/schemas/actions/agent_task.py | 14 +++++++-------
.../automations/schemas/definition/execution.py | 3 ---
.../app/automations/schemas/definition/inputs.py | 2 +-
.../app/automations/schemas/definition/metadata.py | 3 ---
.../automations/schemas/definition/plan_step.py | 6 +++---
.../automations/schemas/definition/trigger_spec.py | 4 ++--
.../app/automations/schemas/triggers/__init__.py | 10 +++++-----
.../app/automations/schemas/triggers/manual.py | 4 ++--
.../app/automations/schemas/triggers/schedule.py | 4 ++--
11 files changed, 31 insertions(+), 37 deletions(-)
diff --git a/surfsense_backend/app/automations/schemas/__init__.py b/surfsense_backend/app/automations/schemas/__init__.py
index 23f0232fb..2bb0060ba 100644
--- a/surfsense_backend/app/automations/schemas/__init__.py
+++ b/surfsense_backend/app/automations/schemas/__init__.py
@@ -1,8 +1,8 @@
-"""Pydantic schemas for the automation definition and per-type configs."""
+"""Schemas for the automation definition and per-type configs."""
from __future__ import annotations
-from .actions import AgentTaskActionConfig
+from .actions import AgentTaskActionParams
from .definition import (
AutomationDefinition,
ExecutionBlock,
@@ -11,16 +11,16 @@ from .definition import (
PlanStep,
TriggerSpec,
)
-from .triggers import ManualTriggerConfig, ScheduleTriggerConfig
+from .triggers import ManualTriggerParams, ScheduleTriggerParams
__all__ = [
- "AgentTaskActionConfig",
+ "AgentTaskActionParams",
"AutomationDefinition",
"ExecutionBlock",
"InputsBlock",
- "ManualTriggerConfig",
+ "ManualTriggerParams",
"MetadataBlock",
"PlanStep",
- "ScheduleTriggerConfig",
+ "ScheduleTriggerParams",
"TriggerSpec",
]
diff --git a/surfsense_backend/app/automations/schemas/actions/__init__.py b/surfsense_backend/app/automations/schemas/actions/__init__.py
index 4149206d7..c51d33b6a 100644
--- a/surfsense_backend/app/automations/schemas/actions/__init__.py
+++ b/surfsense_backend/app/automations/schemas/actions/__init__.py
@@ -1,9 +1,9 @@
-"""Per-action config schemas, one per action type."""
+"""Per-action params schemas, one per action type."""
from __future__ import annotations
-from .agent_task import AgentTaskActionConfig
+from .agent_task import AgentTaskActionParams
__all__ = [
- "AgentTaskActionConfig",
+ "AgentTaskActionParams",
]
diff --git a/surfsense_backend/app/automations/schemas/actions/agent_task.py b/surfsense_backend/app/automations/schemas/actions/agent_task.py
index fe9d5fcef..348db8095 100644
--- a/surfsense_backend/app/automations/schemas/actions/agent_task.py
+++ b/surfsense_backend/app/automations/schemas/actions/agent_task.py
@@ -1,4 +1,4 @@
-"""``AgentTaskActionConfig`` — config for the ``agent_task`` action type."""
+"""``AgentTaskActionParams`` — params for the ``agent_task`` action type."""
from __future__ import annotations
@@ -7,21 +7,21 @@ from typing import Any
from pydantic import BaseModel, ConfigDict, Field
-class AgentTaskActionConfig(BaseModel):
- """Run a LangGraph Deep Agent restricted to a scoped capability list."""
+class AgentTaskActionParams(BaseModel):
+ """Run an agent task with a scoped tool allowlist."""
model_config = ConfigDict(extra="forbid")
- prompt: str = Field(..., min_length=1, description="Task prompt; Jinja-rendered.")
+ prompt: str = Field(..., min_length=1, description="Task prompt; rendered at execute time.")
tools: list[str] = Field(
default_factory=list,
- description="Capability IDs the agent may call. Empty = no tool access.",
+ description="Tool identifiers the agent may call. Empty = no tool access.",
)
model: str | None = Field(
default=None,
- description="LiteLLM model id. Defaults to the search space's agent_llm_id.",
+ description="Model identifier. Defaults to the search space's agent_llm_id.",
)
output_schema: dict[str, Any] | None = Field(
default=None,
- description="JSON Schema the agent must return. Recommended.",
+ description="JSON Schema (draft 2020-12) the agent must return. Recommended.",
)
diff --git a/surfsense_backend/app/automations/schemas/definition/execution.py b/surfsense_backend/app/automations/schemas/definition/execution.py
index 2fcbc611e..d5f31364c 100644
--- a/surfsense_backend/app/automations/schemas/definition/execution.py
+++ b/surfsense_backend/app/automations/schemas/definition/execution.py
@@ -16,9 +16,6 @@ class ExecutionBlock(BaseModel):
max_retries: int = Field(default=2, ge=0, description="Per-step retry budget.")
retry_backoff: Literal["exponential", "linear", "none"] = "exponential"
concurrency: Literal["drop_if_running", "queue", "always"] = "drop_if_running"
- budget_cap_usd: float | None = Field(
- default=None, gt=0, description="Kill the run when accumulated cost exceeds this."
- )
on_failure: list[PlanStep] = Field(
default_factory=list,
description="Steps run when the main plan fails after retries.",
diff --git a/surfsense_backend/app/automations/schemas/definition/inputs.py b/surfsense_backend/app/automations/schemas/definition/inputs.py
index 52aed4e90..b0b1a9414 100644
--- a/surfsense_backend/app/automations/schemas/definition/inputs.py
+++ b/surfsense_backend/app/automations/schemas/definition/inputs.py
@@ -17,5 +17,5 @@ class InputsBlock(BaseModel):
schema_: dict[str, Any] = Field(
...,
alias="schema",
- description="JSON Schema (draft-07) for accepted inputs.",
+ description="JSON Schema (draft 2020-12) for accepted inputs.",
)
diff --git a/surfsense_backend/app/automations/schemas/definition/metadata.py b/surfsense_backend/app/automations/schemas/definition/metadata.py
index 61d7af390..9b3722430 100644
--- a/surfsense_backend/app/automations/schemas/definition/metadata.py
+++ b/surfsense_backend/app/automations/schemas/definition/metadata.py
@@ -9,6 +9,3 @@ class MetadataBlock(BaseModel):
model_config = ConfigDict(extra="allow")
tags: list[str] = Field(default_factory=list)
- created_from_nl: bool = Field(
- default=False, description="True when produced by the NL generator."
- )
diff --git a/surfsense_backend/app/automations/schemas/definition/plan_step.py b/surfsense_backend/app/automations/schemas/definition/plan_step.py
index 6a0bf9a1b..5d16f1f3e 100644
--- a/surfsense_backend/app/automations/schemas/definition/plan_step.py
+++ b/surfsense_backend/app/automations/schemas/definition/plan_step.py
@@ -14,11 +14,11 @@ class PlanStep(BaseModel):
action: str = Field(..., min_length=1, description="Action type; resolved via registry.")
when: str | None = Field(
default=None,
- description="Optional Jinja expression; step is skipped when falsy.",
+ description="Optional predicate; step is skipped when falsy.",
)
- config: dict[str, Any] = Field(
+ params: dict[str, Any] = Field(
default_factory=dict,
- description="Action-type-specific config; Jinja-rendered at execute time.",
+ description="Action-type-specific params; rendered at execute time.",
)
output_as: str | None = Field(
default=None,
diff --git a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
index 0fdf1f35a..a359a2f63 100644
--- a/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
+++ b/surfsense_backend/app/automations/schemas/definition/trigger_spec.py
@@ -11,7 +11,7 @@ class TriggerSpec(BaseModel):
model_config = ConfigDict(extra="forbid")
type: str = Field(..., min_length=1, description="Trigger type; resolved via registry.")
- config: dict[str, Any] = Field(
+ params: dict[str, Any] = Field(
default_factory=dict,
- description="Type-specific config; validated against the trigger's schema.",
+ description="Type-specific params; validated against the trigger's schema.",
)
diff --git a/surfsense_backend/app/automations/schemas/triggers/__init__.py b/surfsense_backend/app/automations/schemas/triggers/__init__.py
index 0cd8bc38e..3ddd26f95 100644
--- a/surfsense_backend/app/automations/schemas/triggers/__init__.py
+++ b/surfsense_backend/app/automations/schemas/triggers/__init__.py
@@ -1,11 +1,11 @@
-"""Per-trigger config schemas, one per trigger type."""
+"""Per-trigger params schemas, one per trigger type."""
from __future__ import annotations
-from .manual import ManualTriggerConfig
-from .schedule import ScheduleTriggerConfig
+from .manual import ManualTriggerParams
+from .schedule import ScheduleTriggerParams
__all__ = [
- "ManualTriggerConfig",
- "ScheduleTriggerConfig",
+ "ManualTriggerParams",
+ "ScheduleTriggerParams",
]
diff --git a/surfsense_backend/app/automations/schemas/triggers/manual.py b/surfsense_backend/app/automations/schemas/triggers/manual.py
index bf14f80b6..577655086 100644
--- a/surfsense_backend/app/automations/schemas/triggers/manual.py
+++ b/surfsense_backend/app/automations/schemas/triggers/manual.py
@@ -1,9 +1,9 @@
-"""``ManualTriggerConfig`` — config for the ``manual`` trigger (empty in v1)."""
+"""``ManualTriggerParams`` — params for the ``manual`` trigger (empty in v1)."""
from __future__ import annotations
from pydantic import BaseModel, ConfigDict
-class ManualTriggerConfig(BaseModel):
+class ManualTriggerParams(BaseModel):
model_config = ConfigDict(extra="forbid")
diff --git a/surfsense_backend/app/automations/schemas/triggers/schedule.py b/surfsense_backend/app/automations/schemas/triggers/schedule.py
index 9d8c7d38d..0418bd1d9 100644
--- a/surfsense_backend/app/automations/schemas/triggers/schedule.py
+++ b/surfsense_backend/app/automations/schemas/triggers/schedule.py
@@ -1,11 +1,11 @@
-"""``ScheduleTriggerConfig`` — config for the ``schedule`` trigger type."""
+"""``ScheduleTriggerParams`` — params for the ``schedule`` trigger type."""
from __future__ import annotations
from pydantic import BaseModel, ConfigDict, Field
-class ScheduleTriggerConfig(BaseModel):
+class ScheduleTriggerParams(BaseModel):
model_config = ConfigDict(extra="forbid")
cron: str = Field(..., description="Five-field cron expression.", examples=["0 9 * * 1-5"])
From 7ac99b89a0ddf4f8f0d6f0d08b4fac66b7e2c1a2 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:29:30 +0200
Subject: [PATCH 32/87] refactor(automation): drop Capability registry
---
.../app/automations/registries/__init__.py | 14 +----------
.../automations/registries/actions/types.py | 2 +-
.../registries/capabilities/__init__.py | 14 -----------
.../registries/capabilities/store.py | 23 -------------------
.../registries/capabilities/types.py | 18 ---------------
.../automations/registries/triggers/types.py | 2 +-
6 files changed, 3 insertions(+), 70 deletions(-)
delete mode 100644 surfsense_backend/app/automations/registries/capabilities/__init__.py
delete mode 100644 surfsense_backend/app/automations/registries/capabilities/store.py
delete mode 100644 surfsense_backend/app/automations/registries/capabilities/types.py
diff --git a/surfsense_backend/app/automations/registries/__init__.py b/surfsense_backend/app/automations/registries/__init__.py
index a97595ced..f497caf59 100644
--- a/surfsense_backend/app/automations/registries/__init__.py
+++ b/surfsense_backend/app/automations/registries/__init__.py
@@ -1,4 +1,4 @@
-"""Capability, action, and trigger registries — populated at process startup."""
+"""Action and trigger registries — populated at process startup."""
from __future__ import annotations
@@ -9,13 +9,6 @@ from .actions import (
get_action,
register_action,
)
-from .capabilities import (
- Capability,
- CapabilityHandler,
- all_capabilities,
- get_capability,
- register_capability,
-)
from .triggers import (
TriggerDefinition,
all_triggers,
@@ -26,16 +19,11 @@ from .triggers import (
__all__ = [
"ActionDefinition",
"ActionHandler",
- "Capability",
- "CapabilityHandler",
"TriggerDefinition",
"all_actions",
- "all_capabilities",
"all_triggers",
"get_action",
- "get_capability",
"get_trigger",
"register_action",
- "register_capability",
"register_trigger",
]
diff --git a/surfsense_backend/app/automations/registries/actions/types.py b/surfsense_backend/app/automations/registries/actions/types.py
index 13c826c66..99f94ae7c 100644
--- a/surfsense_backend/app/automations/registries/actions/types.py
+++ b/surfsense_backend/app/automations/registries/actions/types.py
@@ -14,5 +14,5 @@ class ActionDefinition:
type: str
name: str
description: str
- config_schema: dict[str, Any]
+ params_schema: dict[str, Any]
handler: ActionHandler
diff --git a/surfsense_backend/app/automations/registries/capabilities/__init__.py b/surfsense_backend/app/automations/registries/capabilities/__init__.py
deleted file mode 100644
index 213303fc0..000000000
--- a/surfsense_backend/app/automations/registries/capabilities/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Capability registry."""
-
-from __future__ import annotations
-
-from .store import all_capabilities, get_capability, register_capability
-from .types import Capability, CapabilityHandler
-
-__all__ = [
- "Capability",
- "CapabilityHandler",
- "all_capabilities",
- "get_capability",
- "register_capability",
-]
diff --git a/surfsense_backend/app/automations/registries/capabilities/store.py b/surfsense_backend/app/automations/registries/capabilities/store.py
deleted file mode 100644
index 4d87abe47..000000000
--- a/surfsense_backend/app/automations/registries/capabilities/store.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""In-memory capability registry. Populated once at process startup."""
-
-from __future__ import annotations
-
-from .types import Capability
-
-_REGISTRY: dict[str, Capability] = {}
-
-
-def register_capability(capability: Capability) -> None:
- """Register a capability. Raises on duplicate id."""
- if capability.id in _REGISTRY:
- raise ValueError(f"Capability already registered: {capability.id!r}")
- _REGISTRY[capability.id] = capability
-
-
-def get_capability(capability_id: str) -> Capability | None:
- return _REGISTRY.get(capability_id)
-
-
-def all_capabilities() -> dict[str, Capability]:
- """Defensive snapshot of the registry."""
- return dict(_REGISTRY)
diff --git a/surfsense_backend/app/automations/registries/capabilities/types.py b/surfsense_backend/app/automations/registries/capabilities/types.py
deleted file mode 100644
index 2759bc809..000000000
--- a/surfsense_backend/app/automations/registries/capabilities/types.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""``Capability`` dataclass and handler signature. Locked at five fields for v1."""
-
-from __future__ import annotations
-
-from collections.abc import Awaitable, Callable
-from dataclasses import dataclass
-from typing import Any
-
-CapabilityHandler = Callable[[dict[str, Any]], Awaitable[Any]]
-
-
-@dataclass(frozen=True, slots=True)
-class Capability:
- id: str
- description: str
- input_schema: dict[str, Any]
- output_schema: dict[str, Any]
- handler: CapabilityHandler
diff --git a/surfsense_backend/app/automations/registries/triggers/types.py b/surfsense_backend/app/automations/registries/triggers/types.py
index 5da081343..783bd7842 100644
--- a/surfsense_backend/app/automations/registries/triggers/types.py
+++ b/surfsense_backend/app/automations/registries/triggers/types.py
@@ -10,5 +10,5 @@ from typing import Any
class TriggerDefinition:
type: str
description: str
- config_schema: dict[str, Any]
+ params_schema: dict[str, Any]
payload_schema: dict[str, Any]
From 7f4c1c25abad3531ecb54191aef9b9323feaacf9 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:45:32 +0200
Subject: [PATCH 33/87] feat(automation): wire SQLAlchemy relationships on both
sides
---
.../persistence/models/automation.py | 16 ++++++++++
.../app/automations/persistence/models/run.py | 4 +++
.../automations/persistence/models/trigger.py | 8 +++++
surfsense_backend/app/db.py | 32 +++++++++++++++++++
4 files changed, 60 insertions(+)
diff --git a/surfsense_backend/app/automations/persistence/models/automation.py b/surfsense_backend/app/automations/persistence/models/automation.py
index 637fd2282..ee86851c1 100644
--- a/surfsense_backend/app/automations/persistence/models/automation.py
+++ b/surfsense_backend/app/automations/persistence/models/automation.py
@@ -14,6 +14,7 @@ from sqlalchemy import (
Text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import relationship
from app.db import BaseModel, TimestampMixin
@@ -59,3 +60,18 @@ class Automation(BaseModel, TimestampMixin):
onupdate=lambda: datetime.now(UTC),
index=True,
)
+
+ search_space = relationship("SearchSpace", back_populates="automations")
+ created_by = relationship("User", back_populates="automations")
+ triggers = relationship(
+ "AutomationTrigger",
+ back_populates="automation",
+ cascade="all, delete-orphan",
+ passive_deletes=True,
+ )
+ runs = relationship(
+ "AutomationRun",
+ back_populates="automation",
+ cascade="all, delete-orphan",
+ passive_deletes=True,
+ )
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index 118085b1d..fdc355e8f 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -10,6 +10,7 @@ from sqlalchemy import (
Integer,
)
from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import relationship
from app.db import BaseModel, TimestampMixin
@@ -55,3 +56,6 @@ class AutomationRun(BaseModel, TimestampMixin):
started_at = Column(TIMESTAMP(timezone=True), nullable=True)
finished_at = Column(TIMESTAMP(timezone=True), nullable=True)
+
+ automation = relationship("Automation", back_populates="runs")
+ trigger = relationship("AutomationTrigger", back_populates="runs")
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index 55affeabc..7582234d4 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -11,6 +11,7 @@ from sqlalchemy import (
Integer,
)
from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import relationship
from app.db import BaseModel, TimestampMixin
@@ -44,3 +45,10 @@ class AutomationTrigger(BaseModel, TimestampMixin):
)
last_fired_at = Column(TIMESTAMP(timezone=True), nullable=True)
+
+ automation = relationship("Automation", back_populates="triggers")
+ runs = relationship(
+ "AutomationRun",
+ back_populates="trigger",
+ passive_deletes=True,
+ )
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index 9fc27fb1f..71466495b 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -1533,6 +1533,14 @@ class SearchSpace(BaseModel, TimestampMixin):
cascade="all, delete-orphan",
)
+ automations = relationship(
+ "Automation",
+ back_populates="search_space",
+ order_by="Automation.id",
+ cascade="all, delete-orphan",
+ passive_deletes=True,
+ )
+
# RBAC relationships
roles = relationship(
"SearchSpaceRole",
@@ -2125,6 +2133,13 @@ if config.AUTH_TYPE == "GOOGLE":
passive_deletes=True,
)
+ # Automations created by this user
+ automations = relationship(
+ "Automation",
+ back_populates="created_by",
+ passive_deletes=True,
+ )
+
# Incentive tasks completed by this user
incentive_tasks = relationship(
"UserIncentiveTask",
@@ -2257,6 +2272,13 @@ else:
passive_deletes=True,
)
+ # Automations created by this user
+ automations = relationship(
+ "Automation",
+ back_populates="created_by",
+ passive_deletes=True,
+ )
+
# Incentive tasks completed by this user
incentive_tasks = relationship(
"UserIncentiveTask",
@@ -2560,6 +2582,16 @@ class RefreshToken(Base, TimestampMixin):
return not self.is_expired and not self.is_revoked
+# Register model packages that live outside this file so their classes
+# are present in Base.metadata before configure_mappers() resolves any
+# string-based relationship() references.
+from app.automations.persistence import ( # noqa: E402, F401
+ Automation,
+ AutomationRun,
+ AutomationTrigger,
+)
+
+
engine = create_async_engine(
DATABASE_URL,
pool_size=30,
From 56b3e1bfc4961493e4ad774c1ad25997e3171ae7 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:48:41 +0200
Subject: [PATCH 34/87] refactor(automation): drop Block suffix from definition
components
---
.../app/automations/schemas/__init__.py | 12 ++++++------
.../app/automations/schemas/definition/__init__.py | 14 +++++++-------
.../app/automations/schemas/definition/envelope.py | 12 ++++++------
.../automations/schemas/definition/execution.py | 4 ++--
.../app/automations/schemas/definition/inputs.py | 4 ++--
.../app/automations/schemas/definition/metadata.py | 4 ++--
6 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/surfsense_backend/app/automations/schemas/__init__.py b/surfsense_backend/app/automations/schemas/__init__.py
index 2bb0060ba..8659ac9c9 100644
--- a/surfsense_backend/app/automations/schemas/__init__.py
+++ b/surfsense_backend/app/automations/schemas/__init__.py
@@ -5,9 +5,9 @@ from __future__ import annotations
from .actions import AgentTaskActionParams
from .definition import (
AutomationDefinition,
- ExecutionBlock,
- InputsBlock,
- MetadataBlock,
+ Execution,
+ Inputs,
+ Metadata,
PlanStep,
TriggerSpec,
)
@@ -16,10 +16,10 @@ from .triggers import ManualTriggerParams, ScheduleTriggerParams
__all__ = [
"AgentTaskActionParams",
"AutomationDefinition",
- "ExecutionBlock",
- "InputsBlock",
+ "Execution",
+ "Inputs",
"ManualTriggerParams",
- "MetadataBlock",
+ "Metadata",
"PlanStep",
"ScheduleTriggerParams",
"TriggerSpec",
diff --git a/surfsense_backend/app/automations/schemas/definition/__init__.py b/surfsense_backend/app/automations/schemas/definition/__init__.py
index 838e72f86..3fb0a739b 100644
--- a/surfsense_backend/app/automations/schemas/definition/__init__.py
+++ b/surfsense_backend/app/automations/schemas/definition/__init__.py
@@ -1,19 +1,19 @@
-"""Automation definition envelope and its building blocks."""
+"""Automation definition envelope and its components."""
from __future__ import annotations
from .envelope import AutomationDefinition
-from .execution import ExecutionBlock
-from .inputs import InputsBlock
-from .metadata import MetadataBlock
+from .execution import Execution
+from .inputs import Inputs
+from .metadata import Metadata
from .plan_step import PlanStep
from .trigger_spec import TriggerSpec
__all__ = [
"AutomationDefinition",
- "ExecutionBlock",
- "InputsBlock",
- "MetadataBlock",
+ "Execution",
+ "Inputs",
+ "Metadata",
"PlanStep",
"TriggerSpec",
]
diff --git a/surfsense_backend/app/automations/schemas/definition/envelope.py b/surfsense_backend/app/automations/schemas/definition/envelope.py
index ffc45a0cd..f919b2abb 100644
--- a/surfsense_backend/app/automations/schemas/definition/envelope.py
+++ b/surfsense_backend/app/automations/schemas/definition/envelope.py
@@ -4,9 +4,9 @@ from __future__ import annotations
from pydantic import BaseModel, ConfigDict, Field
-from .execution import ExecutionBlock
-from .inputs import InputsBlock
-from .metadata import MetadataBlock
+from .execution import Execution
+from .inputs import Inputs
+from .metadata import Metadata
from .plan_step import PlanStep
from .trigger_spec import TriggerSpec
@@ -19,8 +19,8 @@ class AutomationDefinition(BaseModel):
schema_version: str = "1.0"
name: str = Field(..., min_length=1, max_length=200)
goal: str | None = None
- inputs: InputsBlock | None = None
+ inputs: Inputs | None = None
triggers: list[TriggerSpec] = Field(default_factory=list)
plan: list[PlanStep] = Field(..., min_length=1)
- execution: ExecutionBlock = Field(default_factory=ExecutionBlock)
- metadata: MetadataBlock = Field(default_factory=MetadataBlock)
+ execution: Execution = Field(default_factory=Execution)
+ metadata: Metadata = Field(default_factory=Metadata)
diff --git a/surfsense_backend/app/automations/schemas/definition/execution.py b/surfsense_backend/app/automations/schemas/definition/execution.py
index d5f31364c..61861f8d8 100644
--- a/surfsense_backend/app/automations/schemas/definition/execution.py
+++ b/surfsense_backend/app/automations/schemas/definition/execution.py
@@ -1,4 +1,4 @@
-"""``ExecutionBlock`` — automation-wide execution defaults (overridable per step)."""
+"""``Execution`` — automation-wide execution defaults (overridable per step)."""
from __future__ import annotations
@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict, Field
from .plan_step import PlanStep
-class ExecutionBlock(BaseModel):
+class Execution(BaseModel):
model_config = ConfigDict(extra="forbid")
timeout_seconds: int = Field(default=600, gt=0, description="Wall-clock cap for the run.")
diff --git a/surfsense_backend/app/automations/schemas/definition/inputs.py b/surfsense_backend/app/automations/schemas/definition/inputs.py
index b0b1a9414..619fd16cd 100644
--- a/surfsense_backend/app/automations/schemas/definition/inputs.py
+++ b/surfsense_backend/app/automations/schemas/definition/inputs.py
@@ -1,4 +1,4 @@
-"""``InputsBlock`` — JSON Schema for inputs an automation accepts at fire time."""
+"""``Inputs`` — JSON Schema for inputs an automation accepts at fire time."""
from __future__ import annotations
@@ -7,7 +7,7 @@ from typing import Any
from pydantic import BaseModel, ConfigDict, Field
-class InputsBlock(BaseModel):
+class Inputs(BaseModel):
model_config = ConfigDict(
extra="forbid",
populate_by_name=True,
diff --git a/surfsense_backend/app/automations/schemas/definition/metadata.py b/surfsense_backend/app/automations/schemas/definition/metadata.py
index 9b3722430..3ac341d2e 100644
--- a/surfsense_backend/app/automations/schemas/definition/metadata.py
+++ b/surfsense_backend/app/automations/schemas/definition/metadata.py
@@ -1,11 +1,11 @@
-"""``MetadataBlock`` — free-form metadata on a definition. Extra keys allowed."""
+"""``Metadata`` — free-form metadata on a definition. Extra keys allowed."""
from __future__ import annotations
from pydantic import BaseModel, ConfigDict, Field
-class MetadataBlock(BaseModel):
+class Metadata(BaseModel):
model_config = ConfigDict(extra="allow")
tags: list[str] = Field(default_factory=list)
From 99fd1a1338d870316e24d727380a9b1e46169e0a Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 13:58:57 +0200
Subject: [PATCH 35/87] feat(automation): register agent_task action and
schedule/manual triggers
---
.../registries/actions/__init__.py | 3 +++
.../registries/actions/agent_task.py | 27 +++++++++++++++++++
.../registries/triggers/__init__.py | 3 +++
.../automations/registries/triggers/manual.py | 17 ++++++++++++
.../registries/triggers/schedule.py | 21 +++++++++++++++
5 files changed, 71 insertions(+)
create mode 100644 surfsense_backend/app/automations/registries/actions/agent_task.py
create mode 100644 surfsense_backend/app/automations/registries/triggers/manual.py
create mode 100644 surfsense_backend/app/automations/registries/triggers/schedule.py
diff --git a/surfsense_backend/app/automations/registries/actions/__init__.py b/surfsense_backend/app/automations/registries/actions/__init__.py
index 1bb3ae9cc..68e507133 100644
--- a/surfsense_backend/app/automations/registries/actions/__init__.py
+++ b/surfsense_backend/app/automations/registries/actions/__init__.py
@@ -12,3 +12,6 @@ __all__ = [
"get_action",
"register_action",
]
+
+# Built-in actions self-register at import time.
+from . import agent_task # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/registries/actions/agent_task.py b/surfsense_backend/app/automations/registries/actions/agent_task.py
new file mode 100644
index 000000000..9acc11c2c
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/actions/agent_task.py
@@ -0,0 +1,27 @@
+"""Built-in ``agent_task`` action. Self-registers at import time."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.automations.schemas.actions import AgentTaskActionParams
+
+from .store import register_action
+from .types import ActionDefinition
+
+
+async def _handle_agent_task(args: dict[str, Any]) -> dict[str, Any]:
+ """Stub. Validates params; real wiring lands with the executor."""
+ AgentTaskActionParams.model_validate(args)
+ return {"status": "stubbed"}
+
+
+AGENT_TASK_ACTION = ActionDefinition(
+ type="agent_task",
+ name="Agent task",
+ description="Run an agent task with a scoped tool allowlist.",
+ params_schema=AgentTaskActionParams.model_json_schema(),
+ handler=_handle_agent_task,
+)
+
+register_action(AGENT_TASK_ACTION)
diff --git a/surfsense_backend/app/automations/registries/triggers/__init__.py b/surfsense_backend/app/automations/registries/triggers/__init__.py
index 843da5e70..e08dcce76 100644
--- a/surfsense_backend/app/automations/registries/triggers/__init__.py
+++ b/surfsense_backend/app/automations/registries/triggers/__init__.py
@@ -11,3 +11,6 @@ __all__ = [
"get_trigger",
"register_trigger",
]
+
+# Built-in triggers self-register at import time.
+from . import manual, schedule # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/registries/triggers/manual.py b/surfsense_backend/app/automations/registries/triggers/manual.py
new file mode 100644
index 000000000..173c38655
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/triggers/manual.py
@@ -0,0 +1,17 @@
+"""Built-in ``manual`` trigger. Self-registers at import time."""
+
+from __future__ import annotations
+
+from app.automations.schemas.triggers import ManualTriggerParams
+
+from .store import register_trigger
+from .types import TriggerDefinition
+
+MANUAL_TRIGGER = TriggerDefinition(
+ type="manual",
+ description="Fire on a user-initiated 'Run now' invocation.",
+ params_schema=ManualTriggerParams.model_json_schema(),
+ payload_schema={"type": "object"},
+)
+
+register_trigger(MANUAL_TRIGGER)
diff --git a/surfsense_backend/app/automations/registries/triggers/schedule.py b/surfsense_backend/app/automations/registries/triggers/schedule.py
new file mode 100644
index 000000000..0a6575f39
--- /dev/null
+++ b/surfsense_backend/app/automations/registries/triggers/schedule.py
@@ -0,0 +1,21 @@
+"""Built-in ``schedule`` trigger. Self-registers at import time."""
+
+from __future__ import annotations
+
+from app.automations.schemas.triggers import ScheduleTriggerParams
+
+from .store import register_trigger
+from .types import TriggerDefinition
+
+SCHEDULE_TRIGGER = TriggerDefinition(
+ type="schedule",
+ description="Fire on a cron schedule in a given timezone.",
+ params_schema=ScheduleTriggerParams.model_json_schema(),
+ payload_schema={
+ "type": "object",
+ "additionalProperties": False,
+ "properties": {},
+ },
+)
+
+register_trigger(SCHEDULE_TRIGGER)
From b4e5bf95a46a0f9025403be1c587f2822d18030e Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 14:23:17 +0200
Subject: [PATCH 36/87] feat(automation): add template filter and test
allowlist
---
.../app/automations/templating/allowlist.py | 31 +++++++++++++++++++
1 file changed, 31 insertions(+)
create mode 100644 surfsense_backend/app/automations/templating/allowlist.py
diff --git a/surfsense_backend/app/automations/templating/allowlist.py b/surfsense_backend/app/automations/templating/allowlist.py
new file mode 100644
index 000000000..ed0103c8f
--- /dev/null
+++ b/surfsense_backend/app/automations/templating/allowlist.py
@@ -0,0 +1,31 @@
+"""Filter and test names admitted into the sandboxed environment."""
+
+from __future__ import annotations
+
+ALLOWED_FILTERS: tuple[str, ...] = (
+ "default",
+ "first",
+ "join",
+ "last",
+ "length",
+ "lower",
+ "replace",
+ "reverse",
+ "sort",
+ "tojson",
+ "trim",
+ "truncate",
+ "upper",
+ "date",
+ "slugify",
+)
+
+ALLOWED_TESTS: tuple[str, ...] = (
+ "defined",
+ "none",
+ "number",
+ "string",
+ "mapping",
+ "sequence",
+ "boolean",
+)
From 08e94ac5ca4056709220bfb9808b43966608610d Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 14:23:17 +0200
Subject: [PATCH 37/87] feat(automation): add custom template filters
---
.../app/automations/templating/filters.py | 29 +++++++++++++++++++
1 file changed, 29 insertions(+)
create mode 100644 surfsense_backend/app/automations/templating/filters.py
diff --git a/surfsense_backend/app/automations/templating/filters.py b/surfsense_backend/app/automations/templating/filters.py
new file mode 100644
index 000000000..65f66eb37
--- /dev/null
+++ b/surfsense_backend/app/automations/templating/filters.py
@@ -0,0 +1,29 @@
+"""Custom Jinja filters registered into the sandboxed environment."""
+
+from __future__ import annotations
+
+import re
+from typing import Any
+
+
+def filter_date(value: Any, fmt: str = "%Y-%m-%d") -> str:
+ """Format a datetime-like value with ``strftime``. Strings pass through."""
+ if value is None:
+ return ""
+ if isinstance(value, str):
+ return value
+ if hasattr(value, "strftime"):
+ return value.strftime(fmt)
+ raise ValueError(f"date filter requires datetime-like, got {type(value).__name__}")
+
+
+_SLUG_NONALNUM = re.compile(r"[^a-z0-9]+")
+_SLUG_DASHES = re.compile(r"-+")
+
+
+def filter_slugify(value: Any) -> str:
+ """Lowercase, replace non-alphanumerics with hyphens, collapse and trim."""
+ s = str(value).lower()
+ s = _SLUG_NONALNUM.sub("-", s)
+ s = _SLUG_DASHES.sub("-", s)
+ return s.strip("-")
From 8345e79f6d71c1b7355038c1e1302bd51f042fec Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 14:23:17 +0200
Subject: [PATCH 38/87] feat(automation): add sandboxed template environment
---
.../app/automations/templating/environment.py | 43 +++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 surfsense_backend/app/automations/templating/environment.py
diff --git a/surfsense_backend/app/automations/templating/environment.py b/surfsense_backend/app/automations/templating/environment.py
new file mode 100644
index 000000000..6ac5f7361
--- /dev/null
+++ b/surfsense_backend/app/automations/templating/environment.py
@@ -0,0 +1,43 @@
+"""SandboxedEnvironment construction with the audited filter/test allowlist."""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from typing import Any
+
+from jinja2 import StrictUndefined
+from jinja2.sandbox import SandboxedEnvironment
+
+from .allowlist import ALLOWED_FILTERS, ALLOWED_TESTS
+from .filters import filter_date, filter_slugify
+
+
+def _finalize(value: Any) -> Any:
+ """Stringify common non-string values at output sites."""
+ if value is None:
+ return ""
+ if isinstance(value, str):
+ return value
+ if isinstance(value, datetime):
+ return value.isoformat()
+ if isinstance(value, list | dict):
+ return json.dumps(value, ensure_ascii=False, default=str)
+ return value
+
+
+def _build_env() -> SandboxedEnvironment:
+ env = SandboxedEnvironment(
+ autoescape=False,
+ undefined=StrictUndefined,
+ finalize=_finalize,
+ )
+ env.globals.clear()
+ env.filters = {k: v for k, v in env.filters.items() if k in ALLOWED_FILTERS}
+ env.filters["date"] = filter_date
+ env.filters["slugify"] = filter_slugify
+ env.tests = {k: v for k, v in env.tests.items() if k in ALLOWED_TESTS}
+ return env
+
+
+ENV: SandboxedEnvironment = _build_env()
From de6da1b775708edd11e370e90d786604162527f4 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 14:23:17 +0200
Subject: [PATCH 39/87] feat(automation): add template render and predicate
evaluation
---
.../app/automations/templating/render.py | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
create mode 100644 surfsense_backend/app/automations/templating/render.py
diff --git a/surfsense_backend/app/automations/templating/render.py b/surfsense_backend/app/automations/templating/render.py
new file mode 100644
index 000000000..374095cd2
--- /dev/null
+++ b/surfsense_backend/app/automations/templating/render.py
@@ -0,0 +1,18 @@
+"""Render templates and evaluate predicates against the sandboxed environment."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+from .environment import ENV
+
+
+def render_template(template: str, context: Mapping[str, Any]) -> str:
+ """Render ``template`` with ``context``."""
+ return ENV.from_string(template).render(**context)
+
+
+def evaluate_predicate(expression: str, context: Mapping[str, Any]) -> bool:
+ """Evaluate a Jinja expression (not a template body) and coerce to bool."""
+ return bool(ENV.compile_expression(expression)(**context))
From cb42b3a84f7b03ab611b233cbfd3c67f6e9cf67c Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 14:23:18 +0200
Subject: [PATCH 40/87] feat(automation): add template run context builder
---
.../app/automations/templating/__init__.py | 12 ++++++
.../app/automations/templating/context.py | 41 +++++++++++++++++++
2 files changed, 53 insertions(+)
create mode 100644 surfsense_backend/app/automations/templating/__init__.py
create mode 100644 surfsense_backend/app/automations/templating/context.py
diff --git a/surfsense_backend/app/automations/templating/__init__.py b/surfsense_backend/app/automations/templating/__init__.py
new file mode 100644
index 000000000..8a00ec5ff
--- /dev/null
+++ b/surfsense_backend/app/automations/templating/__init__.py
@@ -0,0 +1,12 @@
+"""Sandboxed template engine for automation definitions."""
+
+from __future__ import annotations
+
+from .context import build_run_context
+from .render import evaluate_predicate, render_template
+
+__all__ = [
+ "build_run_context",
+ "evaluate_predicate",
+ "render_template",
+]
diff --git a/surfsense_backend/app/automations/templating/context.py b/surfsense_backend/app/automations/templating/context.py
new file mode 100644
index 000000000..3ca87694c
--- /dev/null
+++ b/surfsense_backend/app/automations/templating/context.py
@@ -0,0 +1,41 @@
+"""Builder for the ``{run, inputs, steps}`` namespace exposed to every template."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from datetime import datetime
+from typing import Any
+
+
+def build_run_context(
+ *,
+ run_id: int,
+ automation_id: int,
+ automation_name: str | None,
+ automation_version: int | None,
+ search_space_id: int | None,
+ creator_id: Any,
+ trigger_id: int | None,
+ trigger_type: str | None,
+ started_at: datetime | None,
+ attempt: int,
+ resolved_inputs: Mapping[str, Any],
+ step_outputs: Mapping[str, Any],
+) -> dict[str, Any]:
+ """Build the ``{run, inputs, steps}`` namespace exposed to every template."""
+ return {
+ "run": {
+ "id": run_id,
+ "automation_id": automation_id,
+ "automation_name": automation_name,
+ "automation_version": automation_version,
+ "search_space_id": search_space_id,
+ "creator_id": creator_id,
+ "trigger_id": trigger_id,
+ "trigger_type": trigger_type,
+ "started_at": started_at,
+ "attempt": attempt,
+ },
+ "inputs": dict(resolved_inputs),
+ "steps": dict(step_outputs),
+ }
From 8b87d179e92eb52b247e3e910074b15d6f54ade9 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 41/87] feat(automation): add recursive render_value to
templating
---
.../app/automations/templating/__init__.py | 3 ++-
.../app/automations/templating/render.py | 11 +++++++++++
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/surfsense_backend/app/automations/templating/__init__.py b/surfsense_backend/app/automations/templating/__init__.py
index 8a00ec5ff..1df1809c7 100644
--- a/surfsense_backend/app/automations/templating/__init__.py
+++ b/surfsense_backend/app/automations/templating/__init__.py
@@ -3,10 +3,11 @@
from __future__ import annotations
from .context import build_run_context
-from .render import evaluate_predicate, render_template
+from .render import evaluate_predicate, render_template, render_value
__all__ = [
"build_run_context",
"evaluate_predicate",
"render_template",
+ "render_value",
]
diff --git a/surfsense_backend/app/automations/templating/render.py b/surfsense_backend/app/automations/templating/render.py
index 374095cd2..42721ddeb 100644
--- a/surfsense_backend/app/automations/templating/render.py
+++ b/surfsense_backend/app/automations/templating/render.py
@@ -16,3 +16,14 @@ def render_template(template: str, context: Mapping[str, Any]) -> str:
def evaluate_predicate(expression: str, context: Mapping[str, Any]) -> bool:
"""Evaluate a Jinja expression (not a template body) and coerce to bool."""
return bool(ENV.compile_expression(expression)(**context))
+
+
+def render_value(value: Any, context: Mapping[str, Any]) -> Any:
+ """Recursively render every string in a JSON-like value structure."""
+ if isinstance(value, str):
+ return render_template(value, context)
+ if isinstance(value, dict):
+ return {k: render_value(v, context) for k, v in value.items()}
+ if isinstance(value, list):
+ return [render_value(v, context) for v in value]
+ return value
From 924a82c0b1b92e7bca10a2f4e7f5dfd5e39b7fcc Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 42/87] feat(automation): add retry policy helper
---
.../app/automations/runtime/retries.py | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 surfsense_backend/app/automations/runtime/retries.py
diff --git a/surfsense_backend/app/automations/runtime/retries.py b/surfsense_backend/app/automations/runtime/retries.py
new file mode 100644
index 000000000..d5bfb15ca
--- /dev/null
+++ b/surfsense_backend/app/automations/runtime/retries.py
@@ -0,0 +1,36 @@
+"""Retry policy enforcement for action handlers."""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Awaitable, Callable
+
+
+async def with_retries[T](
+ coro_factory: Callable[[], Awaitable[T]],
+ *,
+ max_retries: int,
+ backoff: str,
+ timeout: int | None,
+) -> tuple[T, int]:
+ """Call ``coro_factory`` up to ``1 + max_retries`` times. Return ``(result, attempts)``."""
+ total = 1 + max(0, max_retries)
+ for attempt in range(1, total + 1):
+ try:
+ coro = coro_factory()
+ if timeout is not None and timeout > 0:
+ return await asyncio.wait_for(coro, timeout=timeout), attempt
+ return await coro, attempt
+ except Exception:
+ if attempt >= total:
+ raise
+ await asyncio.sleep(_backoff_seconds(backoff, attempt))
+ raise RuntimeError("with_retries exhausted without raising or returning")
+
+
+def _backoff_seconds(strategy: str, attempt: int) -> float:
+ if strategy == "exponential":
+ return float(2 ** (attempt - 1))
+ if strategy == "linear":
+ return float(attempt)
+ return 0.0
From f71a02db2f27fe56467794a0f53a6d331414a273 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 43/87] feat(automation): add automation run repository
---
.../app/automations/runtime/repository.py | 62 +++++++++++++++++++
1 file changed, 62 insertions(+)
create mode 100644 surfsense_backend/app/automations/runtime/repository.py
diff --git a/surfsense_backend/app/automations/runtime/repository.py b/surfsense_backend/app/automations/runtime/repository.py
new file mode 100644
index 000000000..a8bdbc55a
--- /dev/null
+++ b/surfsense_backend/app/automations/runtime/repository.py
@@ -0,0 +1,62 @@
+"""Persistence operations on ``AutomationRun``. Pure SQL, no business logic."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
+
+from app.automations.persistence.enums.run_status import RunStatus
+from app.automations.persistence.models.run import AutomationRun
+
+
+async def load_run(session: AsyncSession, run_id: int) -> AutomationRun | None:
+ """Load a run with its automation and trigger eagerly loaded."""
+ stmt = (
+ select(AutomationRun)
+ .where(AutomationRun.id == run_id)
+ .options(
+ selectinload(AutomationRun.automation),
+ selectinload(AutomationRun.trigger),
+ )
+ )
+ result = await session.execute(stmt)
+ return result.scalar_one_or_none()
+
+
+async def mark_running(session: AsyncSession, run: AutomationRun) -> None:
+ run.status = RunStatus.RUNNING
+ run.started_at = datetime.now(UTC)
+ await session.flush()
+
+
+async def mark_succeeded(session: AsyncSession, run: AutomationRun) -> None:
+ run.status = RunStatus.SUCCEEDED
+ run.finished_at = datetime.now(UTC)
+ await session.flush()
+
+
+async def mark_failed(
+ session: AsyncSession,
+ run: AutomationRun,
+ error: dict[str, Any] | None,
+) -> None:
+ run.status = RunStatus.FAILED
+ run.finished_at = datetime.now(UTC)
+ run.error = error
+ await session.flush()
+
+
+async def append_step_result(
+ session: AsyncSession,
+ run: AutomationRun,
+ step_result: dict[str, Any],
+) -> None:
+ """Append one step result. Reassigns the list so SQLAlchemy detects the change."""
+ current = list(run.step_results or [])
+ current.append(step_result)
+ run.step_results = current
+ await session.flush()
From 0a329e5a69bd2604471730f9108a4fa9ee8d5a13 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 44/87] feat(automation): add per-step execution
---
.../app/automations/runtime/step.py | 92 +++++++++++++++++++
1 file changed, 92 insertions(+)
create mode 100644 surfsense_backend/app/automations/runtime/step.py
diff --git a/surfsense_backend/app/automations/runtime/step.py b/surfsense_backend/app/automations/runtime/step.py
new file mode 100644
index 000000000..07b894a91
--- /dev/null
+++ b/surfsense_backend/app/automations/runtime/step.py
@@ -0,0 +1,92 @@
+"""Execute one plan step: when-predicate, params render, handler dispatch, retries."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from datetime import UTC, datetime
+from typing import Any
+
+from app.automations.registries import get_action
+from app.automations.schemas.definition.plan_step import PlanStep
+from app.automations.templating import evaluate_predicate, render_value
+
+from .retries import with_retries
+
+
+async def execute_step(
+ *,
+ step: PlanStep,
+ template_context: Mapping[str, Any],
+ default_max_retries: int,
+ default_retry_backoff: str,
+ default_timeout_seconds: int,
+) -> dict[str, Any]:
+ """Run one step and return its structured result entry."""
+ started_at = datetime.now(UTC)
+
+ if step.when is not None:
+ try:
+ should_run = evaluate_predicate(step.when, template_context)
+ except Exception as exc:
+ return _result(step, "failed", started_at, attempts=0, error=_error(exc, "when"))
+ if not should_run:
+ return _result(step, "skipped", started_at, attempts=0)
+
+ try:
+ resolved_params = render_value(step.params, template_context)
+ except Exception as exc:
+ return _result(step, "failed", started_at, attempts=0, error=_error(exc, "render"))
+
+ action = get_action(step.action)
+ if action is None:
+ return _result(
+ step,
+ "failed",
+ started_at,
+ attempts=0,
+ error={"message": f"action not registered: {step.action}", "type": "ActionNotFound"},
+ )
+
+ max_retries = step.max_retries if step.max_retries is not None else default_max_retries
+ timeout = step.timeout_seconds or default_timeout_seconds
+
+ try:
+ result, attempts = await with_retries(
+ lambda: action.handler(resolved_params),
+ max_retries=max_retries,
+ backoff=default_retry_backoff,
+ timeout=timeout,
+ )
+ except Exception as exc:
+ return _result(step, "failed", started_at, attempts=max_retries + 1, error=_error(exc))
+
+ return _result(step, "succeeded", started_at, attempts=attempts, result=result)
+
+
+def _result(
+ step: PlanStep,
+ status: str,
+ started_at: datetime,
+ *,
+ attempts: int,
+ result: Any = None,
+ error: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+ entry: dict[str, Any] = {
+ "step_id": step.step_id,
+ "action": step.action,
+ "status": status,
+ "started_at": started_at.isoformat(),
+ "finished_at": datetime.now(UTC).isoformat(),
+ "attempts": attempts,
+ }
+ if result is not None:
+ entry["result"] = result
+ if error is not None:
+ entry["error"] = error
+ return entry
+
+
+def _error(exc: Exception, phase: str | None = None) -> dict[str, Any]:
+ msg = f"{phase}: {exc}" if phase else str(exc)
+ return {"message": msg, "type": type(exc).__name__}
From d3cda121917268f198a927ebfcff633c2b4cd8e3 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 45/87] feat(automation): add automation run executor
---
.../app/automations/runtime/executor.py | 105 ++++++++++++++++++
1 file changed, 105 insertions(+)
create mode 100644 surfsense_backend/app/automations/runtime/executor.py
diff --git a/surfsense_backend/app/automations/runtime/executor.py b/surfsense_backend/app/automations/runtime/executor.py
new file mode 100644
index 000000000..51c4417e3
--- /dev/null
+++ b/surfsense_backend/app/automations/runtime/executor.py
@@ -0,0 +1,105 @@
+"""Walk an ``AutomationRun``'s snapshot plan to terminal state."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.persistence.enums.run_status import RunStatus
+from app.automations.persistence.models.run import AutomationRun
+from app.automations.schemas.definition.envelope import AutomationDefinition
+from app.automations.templating import build_run_context
+
+from . import repository
+from .step import execute_step
+
+
+async def execute_run(session: AsyncSession, run_id: int) -> None:
+ """Load run ``run_id`` and execute its snapshot plan to a terminal state."""
+ run = await repository.load_run(session, run_id)
+ if run is None:
+ raise ValueError(f"automation_run {run_id} not found")
+
+ if run.status != RunStatus.PENDING:
+ return
+
+ try:
+ definition = AutomationDefinition.model_validate(run.definition_snapshot)
+ except Exception as exc:
+ await repository.mark_failed(
+ session,
+ run,
+ {"message": f"definition_snapshot invalid: {exc}", "type": type(exc).__name__},
+ )
+ await session.commit()
+ return
+
+ await repository.mark_running(session, run)
+ await session.commit()
+
+ step_outputs: dict[str, Any] = {}
+
+ for step in definition.plan:
+ ctx = _build_ctx(run, step_outputs)
+ result = await execute_step(
+ step=step,
+ template_context=ctx,
+ default_max_retries=definition.execution.max_retries,
+ default_retry_backoff=definition.execution.retry_backoff,
+ default_timeout_seconds=definition.execution.timeout_seconds,
+ )
+ await repository.append_step_result(session, run, result)
+ await session.commit()
+
+ if result["status"] == "failed":
+ await _run_on_failure(session, run, definition)
+ await repository.mark_failed(session, run, result.get("error"))
+ await session.commit()
+ return
+
+ if result["status"] == "succeeded":
+ step_outputs[step.output_as or step.step_id] = result.get("result")
+
+ await repository.mark_succeeded(session, run)
+ await session.commit()
+
+
+async def _run_on_failure(
+ session: AsyncSession,
+ run: AutomationRun,
+ definition: AutomationDefinition,
+) -> None:
+ """Run the on_failure steps. Their failures don't recurse into more on_failure."""
+ if not definition.execution.on_failure:
+ return
+ ctx = _build_ctx(run, step_outputs={})
+ for step in definition.execution.on_failure:
+ result = await execute_step(
+ step=step,
+ template_context=ctx,
+ default_max_retries=definition.execution.max_retries,
+ default_retry_backoff=definition.execution.retry_backoff,
+ default_timeout_seconds=definition.execution.timeout_seconds,
+ )
+ await repository.append_step_result(session, run, result)
+ await session.commit()
+
+
+def _build_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dict[str, Any]:
+ automation = run.automation
+ trigger = run.trigger
+ return build_run_context(
+ run_id=run.id,
+ automation_id=run.automation_id,
+ automation_name=automation.name if automation else None,
+ automation_version=automation.version if automation else None,
+ search_space_id=automation.search_space_id if automation else None,
+ creator_id=automation.created_by_user_id if automation else None,
+ trigger_id=run.trigger_id,
+ trigger_type=trigger.type.value if trigger else None,
+ started_at=run.started_at,
+ attempt=1,
+ resolved_inputs=run.resolved_inputs or {},
+ step_outputs=step_outputs,
+ )
From 273b98f350981dcd348527573d0569ab8e192590 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 46/87] feat(automation): expose runtime package surface
---
surfsense_backend/app/automations/runtime/__init__.py | 7 +++++++
1 file changed, 7 insertions(+)
create mode 100644 surfsense_backend/app/automations/runtime/__init__.py
diff --git a/surfsense_backend/app/automations/runtime/__init__.py b/surfsense_backend/app/automations/runtime/__init__.py
new file mode 100644
index 000000000..0650882b2
--- /dev/null
+++ b/surfsense_backend/app/automations/runtime/__init__.py
@@ -0,0 +1,7 @@
+"""Automation run executor: plan walker, step dispatch, retries, persistence."""
+
+from __future__ import annotations
+
+from .executor import execute_run
+
+__all__ = ["execute_run"]
From b26bf0bbcf0858839e188cacee4b7ace6a27b50c Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:02:36 +0200
Subject: [PATCH 47/87] feat(automation): register automation run celery task
---
.../app/automations/tasks/__init__.py | 3 ++
.../app/automations/tasks/execute_run.py | 33 +++++++++++++++++++
surfsense_backend/app/celery_app.py | 1 +
3 files changed, 37 insertions(+)
create mode 100644 surfsense_backend/app/automations/tasks/__init__.py
create mode 100644 surfsense_backend/app/automations/tasks/execute_run.py
diff --git a/surfsense_backend/app/automations/tasks/__init__.py b/surfsense_backend/app/automations/tasks/__init__.py
new file mode 100644
index 000000000..6fe0d62e8
--- /dev/null
+++ b/surfsense_backend/app/automations/tasks/__init__.py
@@ -0,0 +1,3 @@
+"""Celery task wrappers for the automation runtime."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/automations/tasks/execute_run.py b/surfsense_backend/app/automations/tasks/execute_run.py
new file mode 100644
index 000000000..5fc84698b
--- /dev/null
+++ b/surfsense_backend/app/automations/tasks/execute_run.py
@@ -0,0 +1,33 @@
+"""Celery task that runs one automation. Thin wrapper over ``runtime.executor``."""
+
+from __future__ import annotations
+
+import logging
+
+from app.automations.runtime import execute_run
+from app.celery_app import celery_app
+from app.tasks.celery_tasks import (
+ get_celery_session_maker,
+ run_async_celery_task,
+)
+
+logger = logging.getLogger(__name__)
+
+TASK_NAME = "automation_run_execute"
+
+
+@celery_app.task(name=TASK_NAME, bind=True)
+def automation_run_execute(self, run_id: int) -> None: # noqa: ARG001 — Celery bind
+ """Execute one ``AutomationRun``. Idempotent: terminal runs no-op."""
+ return run_async_celery_task(lambda: _impl(run_id))
+
+
+async def _impl(run_id: int) -> None:
+ session_maker = get_celery_session_maker()
+ async with session_maker() as session:
+ try:
+ await execute_run(session, run_id)
+ except Exception:
+ logger.exception("automation_run %d failed unexpectedly", run_id)
+ await session.rollback()
+ raise
diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py
index 5b45baca1..569178239 100644
--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@@ -188,6 +188,7 @@ celery_app = Celery(
"app.tasks.celery_tasks.document_reindex_tasks",
"app.tasks.celery_tasks.stale_notification_cleanup_task",
"app.tasks.celery_tasks.stripe_reconciliation_task",
+ "app.automations.tasks.execute_run",
],
)
From 1366c8a711b5cfd17caaa3f328d130bc7038ad27 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:30:34 +0200
Subject: [PATCH 48/87] feat(rbac): add automations permission family
---
surfsense_backend/app/db.py | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index 71466495b..ac880ded5 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -439,6 +439,13 @@ class Permission(StrEnum):
PUBLIC_SHARING_CREATE = "public_sharing:create"
PUBLIC_SHARING_DELETE = "public_sharing:delete"
+ # Automations
+ AUTOMATIONS_CREATE = "automations:create"
+ AUTOMATIONS_READ = "automations:read"
+ AUTOMATIONS_UPDATE = "automations:update"
+ AUTOMATIONS_DELETE = "automations:delete"
+ AUTOMATIONS_EXECUTE = "automations:execute"
+
# Full access wildcard
FULL_ACCESS = "*"
@@ -494,6 +501,11 @@ DEFAULT_ROLE_PERMISSIONS = {
# Public Sharing (can create and view, no delete)
Permission.PUBLIC_SHARING_VIEW.value,
Permission.PUBLIC_SHARING_CREATE.value,
+ # Automations (no delete)
+ Permission.AUTOMATIONS_CREATE.value,
+ Permission.AUTOMATIONS_READ.value,
+ Permission.AUTOMATIONS_UPDATE.value,
+ Permission.AUTOMATIONS_EXECUTE.value,
],
"Viewer": [
# Documents (read only)
@@ -525,6 +537,8 @@ DEFAULT_ROLE_PERMISSIONS = {
Permission.SETTINGS_VIEW.value,
# Public Sharing (view only)
Permission.PUBLIC_SHARING_VIEW.value,
+ # Automations (read only)
+ Permission.AUTOMATIONS_READ.value,
],
}
From 3bb02d8889559d0cd46f661e7f51f576a39600c9 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:30:41 +0200
Subject: [PATCH 49/87] feat(automations): add manual dispatch service
---
.../app/automations/dispatch/__init__.py | 8 ++
.../app/automations/dispatch/manual.py | 107 ++++++++++++++++++
2 files changed, 115 insertions(+)
create mode 100644 surfsense_backend/app/automations/dispatch/__init__.py
create mode 100644 surfsense_backend/app/automations/dispatch/manual.py
diff --git a/surfsense_backend/app/automations/dispatch/__init__.py b/surfsense_backend/app/automations/dispatch/__init__.py
new file mode 100644
index 000000000..4a549a4ce
--- /dev/null
+++ b/surfsense_backend/app/automations/dispatch/__init__.py
@@ -0,0 +1,8 @@
+"""Public dispatch surface for firing automations."""
+
+from .manual import DispatchError, dispatch_manual_run
+
+__all__ = [
+ "DispatchError",
+ "dispatch_manual_run",
+]
diff --git a/surfsense_backend/app/automations/dispatch/manual.py b/surfsense_backend/app/automations/dispatch/manual.py
new file mode 100644
index 000000000..221d6a3e2
--- /dev/null
+++ b/surfsense_backend/app/automations/dispatch/manual.py
@@ -0,0 +1,107 @@
+"""Manual ``Run now`` dispatch: validate inputs, snapshot the definition, enqueue."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import jsonschema
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.persistence.enums.automation_status import AutomationStatus
+from app.automations.persistence.enums.run_status import RunStatus
+from app.automations.persistence.enums.trigger_type import TriggerType
+from app.automations.persistence.models.automation import Automation
+from app.automations.persistence.models.run import AutomationRun
+from app.automations.persistence.models.trigger import AutomationTrigger
+from app.automations.schemas.definition.envelope import AutomationDefinition
+from app.automations.tasks.execute_run import automation_run_execute
+
+
+class DispatchError(Exception):
+ """A manual dispatch could not proceed (missing trigger, invalid inputs, ...)."""
+
+
+async def dispatch_manual_run(
+ *,
+ session: AsyncSession,
+ automation_id: int,
+ payload: dict[str, Any] | None,
+) -> AutomationRun:
+ """Validate, snapshot, persist, and enqueue an ``AutomationRun``."""
+ automation = await _load_automation(session, automation_id)
+ if automation is None:
+ raise DispatchError(f"automation {automation_id} not found")
+
+ if automation.status != AutomationStatus.ACTIVE:
+ raise DispatchError(
+ f"automation {automation_id} is {automation.status.value}, not active"
+ )
+
+ try:
+ definition = AutomationDefinition.model_validate(automation.definition)
+ except Exception as exc:
+ raise DispatchError(f"invalid automation definition: {exc}") from exc
+
+ trigger = await _find_manual_trigger(session, automation_id)
+ if trigger is None:
+ raise DispatchError(
+ f"automation {automation_id} has no enabled manual trigger"
+ )
+
+ resolved_inputs = _validate_inputs(definition, payload or {})
+ snapshot = definition.model_dump(mode="json", by_alias=True)
+
+ run = AutomationRun(
+ automation_id=automation_id,
+ trigger_id=trigger.id,
+ status=RunStatus.PENDING,
+ definition_snapshot=snapshot,
+ trigger_payload=payload,
+ resolved_inputs=resolved_inputs,
+ step_results=[],
+ artifacts=[],
+ )
+ session.add(run)
+ await session.commit()
+ await session.refresh(run)
+
+ automation_run_execute.apply_async(
+ args=[run.id],
+ time_limit=definition.execution.timeout_seconds,
+ )
+ return run
+
+
+async def _load_automation(
+ session: AsyncSession, automation_id: int
+) -> Automation | None:
+ stmt = select(Automation).where(Automation.id == automation_id)
+ return (await session.execute(stmt)).scalar_one_or_none()
+
+
+async def _find_manual_trigger(
+ session: AsyncSession, automation_id: int
+) -> AutomationTrigger | None:
+ stmt = (
+ select(AutomationTrigger)
+ .where(
+ AutomationTrigger.automation_id == automation_id,
+ AutomationTrigger.type == TriggerType.MANUAL,
+ AutomationTrigger.enabled.is_(True),
+ )
+ .limit(1)
+ )
+ return (await session.execute(stmt)).scalar_one_or_none()
+
+
+def _validate_inputs(
+ definition: AutomationDefinition, payload: dict[str, Any]
+) -> dict[str, Any]:
+ if definition.inputs is None or not definition.inputs.schema_:
+ return {}
+ try:
+ jsonschema.validate(instance=payload, schema=definition.inputs.schema_)
+ except jsonschema.ValidationError as exc:
+ raise DispatchError(f"inputs: {exc.message}") from exc
+ return payload
From cfbe2a7fe025cbf476f3ba1ada33ef3dcdcdbd35 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:30:45 +0200
Subject: [PATCH 50/87] feat(automations): expose POST /automations/{id}/run
---
surfsense_backend/app/routes/__init__.py | 2 +
.../app/routes/automations_routes.py | 55 +++++++++++++++++++
2 files changed, 57 insertions(+)
create mode 100644 surfsense_backend/app/routes/automations_routes.py
diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index ec4d1650f..1d3ca2141 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -7,6 +7,7 @@ from .agent_revert_route import router as agent_revert_router
from .airtable_add_connector_route import (
router as airtable_add_connector_router,
)
+from .automations_routes import router as automations_router
from .chat_comments_routes import router as chat_comments_router
from .circleback_webhook_route import router as circleback_webhook_router
from .clickup_add_connector_route import router as clickup_add_connector_router
@@ -119,3 +120,4 @@ router.include_router(youtube_router) # YouTube playlist resolution
router.include_router(prompts_router)
router.include_router(memory_router) # User personal memory (memory.md style)
router.include_router(team_memory_router) # Search-space team memory
+router.include_router(automations_router) # Automations (manual run-now)
diff --git a/surfsense_backend/app/routes/automations_routes.py b/surfsense_backend/app/routes/automations_routes.py
new file mode 100644
index 000000000..02c019625
--- /dev/null
+++ b/surfsense_backend/app/routes/automations_routes.py
@@ -0,0 +1,55 @@
+"""Routes for automations. v1: manual ``Run now``."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import APIRouter, Body, Depends, HTTPException
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.dispatch import DispatchError, dispatch_manual_run
+from app.automations.persistence.models.automation import Automation
+from app.db import Permission, User, get_async_session
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+router = APIRouter()
+
+
+@router.post("/automations/{automation_id}/run")
+async def run_automation_now(
+ automation_id: int,
+ payload: dict[str, Any] | None = Body(default=None),
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+) -> dict[str, Any]:
+ """Fire an automation manually. Returns the new run id and status."""
+ search_space_id = (
+ await session.execute(
+ select(Automation.search_space_id).where(Automation.id == automation_id)
+ )
+ ).scalar_one_or_none()
+ if search_space_id is None:
+ raise HTTPException(
+ status_code=404, detail=f"automation {automation_id} not found"
+ )
+
+ await check_permission(
+ session,
+ user,
+ search_space_id,
+ Permission.AUTOMATIONS_EXECUTE.value,
+ "You don't have permission to execute automations in this search space",
+ )
+
+ try:
+ run = await dispatch_manual_run(
+ session=session,
+ automation_id=automation_id,
+ payload=payload,
+ )
+ except DispatchError as exc:
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
+
+ return {"run_id": run.id, "status": run.status.value}
From f646b5cbab2d69c061b70caca8664e33dcaf379c Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 15:37:25 +0200
Subject: [PATCH 51/87] feat(rbac): backfill automations permissions on
existing roles
---
...45_add_automations_permissions_to_roles.py | 87 +++++++++++++++++++
1 file changed, 87 insertions(+)
create mode 100644 surfsense_backend/alembic/versions/145_add_automations_permissions_to_roles.py
diff --git a/surfsense_backend/alembic/versions/145_add_automations_permissions_to_roles.py b/surfsense_backend/alembic/versions/145_add_automations_permissions_to_roles.py
new file mode 100644
index 000000000..779656b44
--- /dev/null
+++ b/surfsense_backend/alembic/versions/145_add_automations_permissions_to_roles.py
@@ -0,0 +1,87 @@
+"""Add automations permissions to existing Editor/Viewer roles
+
+Revision ID: 145
+Revises: 144
+Create Date: 2026-05-27
+
+Owners already have ``*`` and need no backfill. Custom (non-system) roles
+are left untouched on purpose: workspace admins manage those explicitly.
+"""
+
+from collections.abc import Sequence
+
+from sqlalchemy import text
+
+from alembic import op
+
+revision: str = "145"
+down_revision: str | None = "144"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+_EDITOR_PERMISSIONS = (
+ "automations:create",
+ "automations:read",
+ "automations:update",
+ "automations:execute",
+)
+_VIEWER_PERMISSIONS = ("automations:read",)
+
+
+def upgrade():
+ connection = op.get_bind()
+
+ for permission in _EDITOR_PERMISSIONS:
+ connection.execute(
+ text(
+ """
+ UPDATE search_space_roles
+ SET permissions = array_append(permissions, :permission)
+ WHERE name = 'Editor'
+ AND NOT (:permission = ANY(permissions))
+ """
+ ),
+ {"permission": permission},
+ )
+
+ for permission in _VIEWER_PERMISSIONS:
+ connection.execute(
+ text(
+ """
+ UPDATE search_space_roles
+ SET permissions = array_append(permissions, :permission)
+ WHERE name = 'Viewer'
+ AND NOT (:permission = ANY(permissions))
+ """
+ ),
+ {"permission": permission},
+ )
+
+
+def downgrade():
+ connection = op.get_bind()
+
+ for permission in _EDITOR_PERMISSIONS:
+ connection.execute(
+ text(
+ """
+ UPDATE search_space_roles
+ SET permissions = array_remove(permissions, :permission)
+ WHERE name = 'Editor'
+ """
+ ),
+ {"permission": permission},
+ )
+
+ for permission in _VIEWER_PERMISSIONS:
+ connection.execute(
+ text(
+ """
+ UPDATE search_space_roles
+ SET permissions = array_remove(permissions, :permission)
+ WHERE name = 'Viewer'
+ """
+ ),
+ {"permission": permission},
+ )
From 7ec3468113fe28ffb7c634aee790bfd91c625766 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 16:29:32 +0200
Subject: [PATCH 52/87] refactor(automations): bind action handlers via
ActionContext factory
---
.../app/automations/registries/__init__.py | 4 +++
.../registries/actions/__init__.py | 4 ++-
.../registries/actions/agent_task.py | 17 +++++++----
.../automations/registries/actions/types.py | 20 +++++++++++--
.../app/automations/runtime/executor.py | 29 +++++++++++++++----
.../app/automations/runtime/step.py | 6 +++-
6 files changed, 65 insertions(+), 15 deletions(-)
diff --git a/surfsense_backend/app/automations/registries/__init__.py b/surfsense_backend/app/automations/registries/__init__.py
index f497caf59..f6af3817b 100644
--- a/surfsense_backend/app/automations/registries/__init__.py
+++ b/surfsense_backend/app/automations/registries/__init__.py
@@ -3,8 +3,10 @@
from __future__ import annotations
from .actions import (
+ ActionContext,
ActionDefinition,
ActionHandler,
+ ActionHandlerFactory,
all_actions,
get_action,
register_action,
@@ -17,8 +19,10 @@ from .triggers import (
)
__all__ = [
+ "ActionContext",
"ActionDefinition",
"ActionHandler",
+ "ActionHandlerFactory",
"TriggerDefinition",
"all_actions",
"all_triggers",
diff --git a/surfsense_backend/app/automations/registries/actions/__init__.py b/surfsense_backend/app/automations/registries/actions/__init__.py
index 68e507133..b95c634f2 100644
--- a/surfsense_backend/app/automations/registries/actions/__init__.py
+++ b/surfsense_backend/app/automations/registries/actions/__init__.py
@@ -3,11 +3,13 @@
from __future__ import annotations
from .store import all_actions, get_action, register_action
-from .types import ActionDefinition, ActionHandler
+from .types import ActionContext, ActionDefinition, ActionHandler, ActionHandlerFactory
__all__ = [
+ "ActionContext",
"ActionDefinition",
"ActionHandler",
+ "ActionHandlerFactory",
"all_actions",
"get_action",
"register_action",
diff --git a/surfsense_backend/app/automations/registries/actions/agent_task.py b/surfsense_backend/app/automations/registries/actions/agent_task.py
index 9acc11c2c..beba455cc 100644
--- a/surfsense_backend/app/automations/registries/actions/agent_task.py
+++ b/surfsense_backend/app/automations/registries/actions/agent_task.py
@@ -7,13 +7,18 @@ from typing import Any
from app.automations.schemas.actions import AgentTaskActionParams
from .store import register_action
-from .types import ActionDefinition
+from .types import ActionContext, ActionDefinition, ActionHandler
-async def _handle_agent_task(args: dict[str, Any]) -> dict[str, Any]:
- """Stub. Validates params; real wiring lands with the executor."""
- AgentTaskActionParams.model_validate(args)
- return {"status": "stubbed"}
+def _build_handler(ctx: ActionContext) -> ActionHandler:
+ """Bind run/session context to the agent_task handler. Real wiring lands in Phase 4b."""
+ del ctx # ignored by the stub; real handler will consume it
+
+ async def handle(params: dict[str, Any]) -> dict[str, Any]:
+ AgentTaskActionParams.model_validate(params)
+ return {"status": "stubbed"}
+
+ return handle
AGENT_TASK_ACTION = ActionDefinition(
@@ -21,7 +26,7 @@ AGENT_TASK_ACTION = ActionDefinition(
name="Agent task",
description="Run an agent task with a scoped tool allowlist.",
params_schema=AgentTaskActionParams.model_json_schema(),
- handler=_handle_agent_task,
+ build_handler=_build_handler,
)
register_action(AGENT_TASK_ACTION)
diff --git a/surfsense_backend/app/automations/registries/actions/types.py b/surfsense_backend/app/automations/registries/actions/types.py
index 99f94ae7c..433c60841 100644
--- a/surfsense_backend/app/automations/registries/actions/types.py
+++ b/surfsense_backend/app/automations/registries/actions/types.py
@@ -1,12 +1,28 @@
-"""``ActionDefinition`` dataclass and handler signature."""
+"""``ActionDefinition``, ``ActionContext``, and handler/factory signatures."""
from __future__ import annotations
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
+from uuid import UUID
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+@dataclass(frozen=True, slots=True)
+class ActionContext:
+ """Per-invocation dependencies bound to an action handler at execute time."""
+
+ session: AsyncSession
+ run_id: int
+ step_id: str
+ search_space_id: int
+ creator_user_id: UUID | None
+
ActionHandler = Callable[[dict[str, Any]], Awaitable[Any]]
+ActionHandlerFactory = Callable[[ActionContext], ActionHandler]
@dataclass(frozen=True, slots=True)
@@ -15,4 +31,4 @@ class ActionDefinition:
name: str
description: str
params_schema: dict[str, Any]
- handler: ActionHandler
+ build_handler: ActionHandlerFactory
diff --git a/surfsense_backend/app/automations/runtime/executor.py b/surfsense_backend/app/automations/runtime/executor.py
index 51c4417e3..e9e55b02d 100644
--- a/surfsense_backend/app/automations/runtime/executor.py
+++ b/surfsense_backend/app/automations/runtime/executor.py
@@ -8,7 +8,9 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.automations.persistence.enums.run_status import RunStatus
from app.automations.persistence.models.run import AutomationRun
+from app.automations.registries.actions.types import ActionContext
from app.automations.schemas.definition.envelope import AutomationDefinition
+from app.automations.schemas.definition.plan_step import PlanStep
from app.automations.templating import build_run_context
from . import repository
@@ -41,10 +43,12 @@ async def execute_run(session: AsyncSession, run_id: int) -> None:
step_outputs: dict[str, Any] = {}
for step in definition.plan:
- ctx = _build_ctx(run, step_outputs)
+ template_ctx = _build_template_ctx(run, step_outputs)
+ action_ctx = _build_action_ctx(session, run, step)
result = await execute_step(
step=step,
- template_context=ctx,
+ template_context=template_ctx,
+ action_context=action_ctx,
default_max_retries=definition.execution.max_retries,
default_retry_backoff=definition.execution.retry_backoff,
default_timeout_seconds=definition.execution.timeout_seconds,
@@ -73,11 +77,13 @@ async def _run_on_failure(
"""Run the on_failure steps. Their failures don't recurse into more on_failure."""
if not definition.execution.on_failure:
return
- ctx = _build_ctx(run, step_outputs={})
+ template_ctx = _build_template_ctx(run, step_outputs={})
for step in definition.execution.on_failure:
+ action_ctx = _build_action_ctx(session, run, step)
result = await execute_step(
step=step,
- template_context=ctx,
+ template_context=template_ctx,
+ action_context=action_ctx,
default_max_retries=definition.execution.max_retries,
default_retry_backoff=definition.execution.retry_backoff,
default_timeout_seconds=definition.execution.timeout_seconds,
@@ -86,7 +92,7 @@ async def _run_on_failure(
await session.commit()
-def _build_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dict[str, Any]:
+def _build_template_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dict[str, Any]:
automation = run.automation
trigger = run.trigger
return build_run_context(
@@ -103,3 +109,16 @@ def _build_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dict[str, An
resolved_inputs=run.resolved_inputs or {},
step_outputs=step_outputs,
)
+
+
+def _build_action_ctx(
+ session: AsyncSession, run: AutomationRun, step: PlanStep
+) -> ActionContext:
+ automation = run.automation
+ return ActionContext(
+ session=session,
+ run_id=run.id,
+ step_id=step.step_id,
+ search_space_id=automation.search_space_id,
+ creator_user_id=automation.created_by_user_id,
+ )
diff --git a/surfsense_backend/app/automations/runtime/step.py b/surfsense_backend/app/automations/runtime/step.py
index 07b894a91..76e3ba171 100644
--- a/surfsense_backend/app/automations/runtime/step.py
+++ b/surfsense_backend/app/automations/runtime/step.py
@@ -7,6 +7,7 @@ from datetime import UTC, datetime
from typing import Any
from app.automations.registries import get_action
+from app.automations.registries.actions.types import ActionContext
from app.automations.schemas.definition.plan_step import PlanStep
from app.automations.templating import evaluate_predicate, render_value
@@ -17,6 +18,7 @@ async def execute_step(
*,
step: PlanStep,
template_context: Mapping[str, Any],
+ action_context: ActionContext,
default_max_retries: int,
default_retry_backoff: str,
default_timeout_seconds: int,
@@ -47,12 +49,14 @@ async def execute_step(
error={"message": f"action not registered: {step.action}", "type": "ActionNotFound"},
)
+ handler = action.build_handler(action_context)
+
max_retries = step.max_retries if step.max_retries is not None else default_max_retries
timeout = step.timeout_seconds or default_timeout_seconds
try:
result, attempts = await with_retries(
- lambda: action.handler(resolved_params),
+ lambda: handler(resolved_params),
max_retries=max_retries,
backoff=default_retry_backoff,
timeout=timeout,
From ce45e110096aa560f5e53393a911d07fdffd0d30 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 17:02:44 +0200
Subject: [PATCH 53/87] feat(automations): wire agent_task to multi_agent_chat
with auto-approve loop
---
.../app/automations/actions/__init__.py | 1 +
.../actions/agent_task/__init__.py | 7 ++
.../actions/agent_task/auto_decide.py | 39 ++++++++
.../actions/agent_task/dependencies.py | 58 +++++++++++
.../automations/actions/agent_task/factory.py | 27 ++++++
.../actions/agent_task/finalize.py | 44 +++++++++
.../automations/actions/agent_task/invoke.py | 97 +++++++++++++++++++
.../registries/actions/agent_task.py | 21 +---
.../automations/schemas/actions/agent_task.py | 22 ++---
9 files changed, 285 insertions(+), 31 deletions(-)
create mode 100644 surfsense_backend/app/automations/actions/__init__.py
create mode 100644 surfsense_backend/app/automations/actions/agent_task/__init__.py
create mode 100644 surfsense_backend/app/automations/actions/agent_task/auto_decide.py
create mode 100644 surfsense_backend/app/automations/actions/agent_task/dependencies.py
create mode 100644 surfsense_backend/app/automations/actions/agent_task/factory.py
create mode 100644 surfsense_backend/app/automations/actions/agent_task/finalize.py
create mode 100644 surfsense_backend/app/automations/actions/agent_task/invoke.py
diff --git a/surfsense_backend/app/automations/actions/__init__.py b/surfsense_backend/app/automations/actions/__init__.py
new file mode 100644
index 000000000..2a518c1db
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/__init__.py
@@ -0,0 +1 @@
+"""Action implementations. One subpackage per built-in action type."""
diff --git a/surfsense_backend/app/automations/actions/agent_task/__init__.py b/surfsense_backend/app/automations/actions/agent_task/__init__.py
new file mode 100644
index 000000000..ecf79b448
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/agent_task/__init__.py
@@ -0,0 +1,7 @@
+"""``agent_task`` action: spin up multi_agent_chat for one rendered query."""
+
+from __future__ import annotations
+
+from .factory import build_handler
+
+__all__ = ["build_handler"]
diff --git a/surfsense_backend/app/automations/actions/agent_task/auto_decide.py b/surfsense_backend/app/automations/actions/agent_task/auto_decide.py
new file mode 100644
index 000000000..357eeb565
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/agent_task/auto_decide.py
@@ -0,0 +1,39 @@
+"""Synthesize HITL decisions for every pending interrupt (approve-all or reject-all)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def build_auto_decisions(
+ state: Any, decision: str
+) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, Any]]]:
+ """Return ``(lg_resume_map, surfsense_resume_value)`` covering every pending interrupt.
+
+ ``lg_resume_map`` is keyed by ``Interrupt.id`` for ``Command(resume=...)``;
+ ``surfsense_resume_value`` is keyed by ``tool_call_id`` for the subagent
+ middleware bridge. Action count is read from ``value.action_requests`` when
+ present and falls back to ``1`` for wrapped scalar interrupts.
+ """
+ lg_resume_map: dict[str, dict[str, Any]] = {}
+ routed: dict[str, dict[str, Any]] = {}
+
+ for interrupt_obj in getattr(state, "interrupts", ()) or ():
+ value = getattr(interrupt_obj, "value", None)
+ if not isinstance(value, dict):
+ continue
+ interrupt_id = getattr(interrupt_obj, "id", None)
+ if not isinstance(interrupt_id, str):
+ continue
+
+ action_requests = value.get("action_requests")
+ count = len(action_requests) if isinstance(action_requests, list) else 1
+ decisions = [{"type": decision} for _ in range(count)]
+
+ lg_resume_map[interrupt_id] = {"decisions": decisions}
+
+ tool_call_id = value.get("tool_call_id")
+ if isinstance(tool_call_id, str):
+ routed[tool_call_id] = {"decisions": decisions}
+
+ return lg_resume_map, routed
diff --git a/surfsense_backend/app/automations/actions/agent_task/dependencies.py b/surfsense_backend/app/automations/actions/agent_task/dependencies.py
new file mode 100644
index 000000000..12273aa0f
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/agent_task/dependencies.py
@@ -0,0 +1,58 @@
+"""Build the per-invocation dependencies the multi_agent_chat factory needs."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.tasks.chat.streaming.flows.shared.llm_bundle import load_llm_bundle
+from app.tasks.chat.streaming.flows.shared.pre_stream_setup import (
+ get_chat_checkpointer,
+ setup_connector_and_firecrawl,
+)
+
+
+class DependencyError(Exception):
+ """An external dependency (LLM config, checkpointer, ...) refused to load."""
+
+
+@dataclass(frozen=True, slots=True)
+class AgentDependencies:
+ """Everything ``create_multi_agent_chat_deep_agent`` needs from the environment."""
+
+ llm: Any
+ agent_config: Any
+ connector_service: Any
+ firecrawl_api_key: str | None
+ checkpointer: Any
+
+
+async def build_dependencies(
+ *,
+ session: AsyncSession,
+ search_space_id: int,
+) -> AgentDependencies:
+ """Load the LLM bundle, connector service, and checkpointer for one invoke.
+
+ Uses the search space's default LLM config (``config_id=-1``). Per-step
+ model overrides land in a future iteration alongside the ``model`` param.
+ """
+ llm, agent_config, err = await load_llm_bundle(
+ session, config_id=-1, search_space_id=search_space_id
+ )
+ if err is not None or llm is None:
+ raise DependencyError(err or "failed to load default LLM config")
+
+ connector_service, firecrawl_api_key = await setup_connector_and_firecrawl(
+ session, search_space_id=search_space_id
+ )
+ checkpointer = await get_chat_checkpointer()
+ return AgentDependencies(
+ llm=llm,
+ agent_config=agent_config,
+ connector_service=connector_service,
+ firecrawl_api_key=firecrawl_api_key,
+ checkpointer=checkpointer,
+ )
diff --git a/surfsense_backend/app/automations/actions/agent_task/factory.py b/surfsense_backend/app/automations/actions/agent_task/factory.py
new file mode 100644
index 000000000..a0d867f38
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/agent_task/factory.py
@@ -0,0 +1,27 @@
+"""Bind ``ActionContext`` to a callable that runs one ``agent_task`` step."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.automations.registries.actions.types import (
+ ActionContext,
+ ActionHandler,
+)
+from app.automations.schemas.actions import AgentTaskActionParams
+
+from .invoke import run_agent_task
+
+
+def build_handler(ctx: ActionContext) -> ActionHandler:
+ """Return a handler closure that validates params and runs the agent task."""
+
+ async def handle(params: dict[str, Any]) -> dict[str, Any]:
+ validated = AgentTaskActionParams.model_validate(params)
+ return await run_agent_task(
+ ctx=ctx,
+ query=validated.query,
+ auto_approve_all=validated.auto_approve_all,
+ )
+
+ return handle
diff --git a/surfsense_backend/app/automations/actions/agent_task/finalize.py b/surfsense_backend/app/automations/actions/agent_task/finalize.py
new file mode 100644
index 000000000..d5f1f95f6
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/agent_task/finalize.py
@@ -0,0 +1,44 @@
+"""Extract the agent's final assistant text from the terminal invoke result."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langchain_core.messages import AIMessage
+
+
+def extract_final_assistant_message(result: Any) -> str | None:
+ """Return the last ``AIMessage`` text content, or ``None`` if there isn't one.
+
+ Multi-part messages (content lists) are flattened by concatenating ``text``
+ parts in order. Non-string content (tool calls, images) is skipped.
+ """
+ if not isinstance(result, dict):
+ return None
+ messages = result.get("messages")
+ if not isinstance(messages, list):
+ return None
+
+ for msg in reversed(messages):
+ if not isinstance(msg, AIMessage):
+ continue
+ return _content_to_text(msg.content)
+ return None
+
+
+def _content_to_text(content: Any) -> str | None:
+ if isinstance(content, str):
+ text = content.strip()
+ return text or None
+ if isinstance(content, list):
+ parts: list[str] = []
+ for part in content:
+ if isinstance(part, str):
+ parts.append(part)
+ elif isinstance(part, dict) and part.get("type") == "text":
+ text = part.get("text")
+ if isinstance(text, str):
+ parts.append(text)
+ joined = "".join(parts).strip()
+ return joined or None
+ return None
diff --git a/surfsense_backend/app/automations/actions/agent_task/invoke.py b/surfsense_backend/app/automations/actions/agent_task/invoke.py
new file mode 100644
index 000000000..aa849d7e2
--- /dev/null
+++ b/surfsense_backend/app/automations/actions/agent_task/invoke.py
@@ -0,0 +1,97 @@
+"""Run one ``agent_task`` invocation: ainvoke + auto-decision resume loop."""
+
+from __future__ import annotations
+
+import time
+import uuid
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+from langgraph.types import Command
+
+from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
+from app.automations.registries.actions.types import ActionContext
+from app.db import ChatVisibility, async_session_maker
+
+from .auto_decide import build_auto_decisions
+from .dependencies import build_dependencies
+from .finalize import extract_final_assistant_message
+
+# Cap on HITL resume iterations. The agent should not need this many turns in one
+# step; treat overshoot as a runaway and fail the step.
+_MAX_RESUMES = 50
+
+
+async def run_agent_task(
+ *,
+ ctx: ActionContext,
+ query: str,
+ auto_approve_all: bool,
+) -> dict[str, Any]:
+ """Invoke multi_agent_chat for one rendered query and return its outcome.
+
+ Opens its own DB session so the executor's bookkeeping session isn't tied
+ up for the entire invocation. The LangGraph ``thread_id`` (a fresh UUID)
+ is returned as ``agent_session_id`` for later inspection.
+ """
+ agent_session_id = str(uuid.uuid4())
+ user_id = str(ctx.creator_user_id) if ctx.creator_user_id else None
+ decision = "approve" if auto_approve_all else "reject"
+
+ async with async_session_maker() as agent_session:
+ deps = await build_dependencies(
+ session=agent_session,
+ search_space_id=ctx.search_space_id,
+ )
+
+ agent = await create_multi_agent_chat_deep_agent(
+ llm=deps.llm,
+ search_space_id=ctx.search_space_id,
+ db_session=agent_session,
+ connector_service=deps.connector_service,
+ checkpointer=deps.checkpointer,
+ user_id=user_id,
+ thread_id=None,
+ agent_config=deps.agent_config,
+ firecrawl_api_key=deps.firecrawl_api_key,
+ thread_visibility=ChatVisibility.PRIVATE,
+ )
+
+ request_id = f"automation:{ctx.run_id}:{ctx.step_id}"
+ turn_id = f"{request_id}:{int(time.time() * 1000)}"
+ input_state: dict[str, Any] = {
+ "messages": [HumanMessage(content=query)],
+ "search_space_id": ctx.search_space_id,
+ "request_id": request_id,
+ "turn_id": turn_id,
+ }
+ config: dict[str, Any] = {
+ "configurable": {
+ "thread_id": agent_session_id,
+ "request_id": request_id,
+ "turn_id": turn_id,
+ },
+ "recursion_limit": 10_000,
+ }
+
+ result = await agent.ainvoke(input_state, config=config)
+
+ resumes = 0
+ while True:
+ state = await agent.aget_state(config)
+ if not getattr(state, "interrupts", None):
+ break
+ if resumes >= _MAX_RESUMES:
+ raise RuntimeError(
+ f"agent_task exceeded {_MAX_RESUMES} HITL resume iterations"
+ )
+ lg_resume_map, routed = build_auto_decisions(state, decision)
+ config["configurable"]["surfsense_resume_value"] = routed
+ result = await agent.ainvoke(Command(resume=lg_resume_map), config=config)
+ resumes += 1
+
+ return {
+ "agent_session_id": agent_session_id,
+ "final_message": extract_final_assistant_message(result),
+ "resumes": resumes,
+ }
diff --git a/surfsense_backend/app/automations/registries/actions/agent_task.py b/surfsense_backend/app/automations/registries/actions/agent_task.py
index beba455cc..51ee0eb7f 100644
--- a/surfsense_backend/app/automations/registries/actions/agent_task.py
+++ b/surfsense_backend/app/automations/registries/actions/agent_task.py
@@ -2,31 +2,18 @@
from __future__ import annotations
-from typing import Any
-
+from app.automations.actions.agent_task import build_handler
from app.automations.schemas.actions import AgentTaskActionParams
from .store import register_action
-from .types import ActionContext, ActionDefinition, ActionHandler
-
-
-def _build_handler(ctx: ActionContext) -> ActionHandler:
- """Bind run/session context to the agent_task handler. Real wiring lands in Phase 4b."""
- del ctx # ignored by the stub; real handler will consume it
-
- async def handle(params: dict[str, Any]) -> dict[str, Any]:
- AgentTaskActionParams.model_validate(params)
- return {"status": "stubbed"}
-
- return handle
-
+from .types import ActionDefinition
AGENT_TASK_ACTION = ActionDefinition(
type="agent_task",
name="Agent task",
- description="Run an agent task with a scoped tool allowlist.",
+ description="Run a multi_agent_chat turn from an automation step.",
params_schema=AgentTaskActionParams.model_json_schema(),
- build_handler=_build_handler,
+ build_handler=build_handler,
)
register_action(AGENT_TASK_ACTION)
diff --git a/surfsense_backend/app/automations/schemas/actions/agent_task.py b/surfsense_backend/app/automations/schemas/actions/agent_task.py
index 348db8095..b0e99a78b 100644
--- a/surfsense_backend/app/automations/schemas/actions/agent_task.py
+++ b/surfsense_backend/app/automations/schemas/actions/agent_task.py
@@ -2,26 +2,20 @@
from __future__ import annotations
-from typing import Any
-
from pydantic import BaseModel, ConfigDict, Field
class AgentTaskActionParams(BaseModel):
- """Run an agent task with a scoped tool allowlist."""
+ """Run a multi_agent_chat turn from an automation step."""
model_config = ConfigDict(extra="forbid")
- prompt: str = Field(..., min_length=1, description="Task prompt; rendered at execute time.")
- tools: list[str] = Field(
- default_factory=list,
- description="Tool identifiers the agent may call. Empty = no tool access.",
+ query: str = Field(
+ ...,
+ min_length=1,
+ description="User query for the agent; rendered at execute time.",
)
- model: str | None = Field(
- default=None,
- description="Model identifier. Defaults to the search space's agent_llm_id.",
- )
- output_schema: dict[str, Any] | None = Field(
- default=None,
- description="JSON Schema (draft 2020-12) the agent must return. Recommended.",
+ auto_approve_all: bool = Field(
+ default=False,
+ description="If true, every HITL approval is auto-approved; otherwise rejected.",
)
From 8c32455818094287faf3f3bfa6e602b98616d138 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 17:07:20 +0200
Subject: [PATCH 54/87] refactor(automations): vertical-slice actions and
triggers by domain
---
.../app/automations/actions/__init__.py | 25 +++++++++++++-
.../actions/agent_task/__init__.py | 12 +++++--
.../agent_task/definition.py} | 11 +++----
.../automations/actions/agent_task/factory.py | 8 ++---
.../automations/actions/agent_task/invoke.py | 3 +-
.../agent_task/params.py} | 0
.../{registries => }/actions/store.py | 0
.../{registries => }/actions/types.py | 0
.../app/automations/registries/__init__.py | 33 -------------------
.../registries/actions/__init__.py | 19 -----------
.../app/automations/runtime/executor.py | 2 +-
.../app/automations/runtime/step.py | 4 +--
.../app/automations/schemas/__init__.py | 13 ++++----
.../automations/schemas/actions/__init__.py | 9 -----
.../automations/schemas/triggers/__init__.py | 11 -------
.../{registries => }/triggers/__init__.py | 6 +++-
.../automations/triggers/manual/__init__.py | 10 ++++++
.../manual/definition.py} | 9 +++--
.../manual.py => triggers/manual/params.py} | 0
.../automations/triggers/schedule/__init__.py | 10 ++++++
.../schedule/definition.py} | 9 +++--
.../schedule/params.py} | 0
.../{registries => }/triggers/store.py | 0
.../{registries => }/triggers/types.py | 0
24 files changed, 86 insertions(+), 108 deletions(-)
rename surfsense_backend/app/automations/{registries/actions/agent_task.py => actions/agent_task/definition.py} (54%)
rename surfsense_backend/app/automations/{schemas/actions/agent_task.py => actions/agent_task/params.py} (100%)
rename surfsense_backend/app/automations/{registries => }/actions/store.py (100%)
rename surfsense_backend/app/automations/{registries => }/actions/types.py (100%)
delete mode 100644 surfsense_backend/app/automations/registries/__init__.py
delete mode 100644 surfsense_backend/app/automations/registries/actions/__init__.py
delete mode 100644 surfsense_backend/app/automations/schemas/actions/__init__.py
delete mode 100644 surfsense_backend/app/automations/schemas/triggers/__init__.py
rename surfsense_backend/app/automations/{registries => }/triggers/__init__.py (61%)
create mode 100644 surfsense_backend/app/automations/triggers/manual/__init__.py
rename surfsense_backend/app/automations/{registries/triggers/manual.py => triggers/manual/definition.py} (58%)
rename surfsense_backend/app/automations/{schemas/triggers/manual.py => triggers/manual/params.py} (100%)
create mode 100644 surfsense_backend/app/automations/triggers/schedule/__init__.py
rename surfsense_backend/app/automations/{registries/triggers/schedule.py => triggers/schedule/definition.py} (64%)
rename surfsense_backend/app/automations/{schemas/triggers/schedule.py => triggers/schedule/params.py} (100%)
rename surfsense_backend/app/automations/{registries => }/triggers/store.py (100%)
rename surfsense_backend/app/automations/{registries => }/triggers/types.py (100%)
diff --git a/surfsense_backend/app/automations/actions/__init__.py b/surfsense_backend/app/automations/actions/__init__.py
index 2a518c1db..9ef091cb3 100644
--- a/surfsense_backend/app/automations/actions/__init__.py
+++ b/surfsense_backend/app/automations/actions/__init__.py
@@ -1 +1,24 @@
-"""Action implementations. One subpackage per built-in action type."""
+"""Actions domain: registry surface + built-in action packages.
+
+Each action lives in its own subpackage (``agent_task/``, ...) and self-registers
+at import time via its ``definition`` module. Side-effect imports below ensure
+the registry is populated whenever anyone touches the actions package.
+"""
+
+from __future__ import annotations
+
+from .store import all_actions, get_action, register_action
+from .types import ActionContext, ActionDefinition, ActionHandler, ActionHandlerFactory
+
+__all__ = [
+ "ActionContext",
+ "ActionDefinition",
+ "ActionHandler",
+ "ActionHandlerFactory",
+ "all_actions",
+ "get_action",
+ "register_action",
+]
+
+# Built-in actions self-register at import time.
+from . import agent_task # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/actions/agent_task/__init__.py b/surfsense_backend/app/automations/actions/agent_task/__init__.py
index ecf79b448..308812211 100644
--- a/surfsense_backend/app/automations/actions/agent_task/__init__.py
+++ b/surfsense_backend/app/automations/actions/agent_task/__init__.py
@@ -1,7 +1,15 @@
-"""``agent_task`` action: spin up multi_agent_chat for one rendered query."""
+"""``agent_task`` action: spin up multi_agent_chat for one rendered query.
+
+Imports ``definition`` for its side-effect (self-registration on the actions
+registry) and re-exports ``build_handler`` for direct consumers.
+"""
from __future__ import annotations
from .factory import build_handler
+from .params import AgentTaskActionParams
-__all__ = ["build_handler"]
+__all__ = ["AgentTaskActionParams", "build_handler"]
+
+# Side-effect: register on the actions store.
+from . import definition # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/registries/actions/agent_task.py b/surfsense_backend/app/automations/actions/agent_task/definition.py
similarity index 54%
rename from surfsense_backend/app/automations/registries/actions/agent_task.py
rename to surfsense_backend/app/automations/actions/agent_task/definition.py
index 51ee0eb7f..d7db5cfcd 100644
--- a/surfsense_backend/app/automations/registries/actions/agent_task.py
+++ b/surfsense_backend/app/automations/actions/agent_task/definition.py
@@ -1,12 +1,11 @@
-"""Built-in ``agent_task`` action. Self-registers at import time."""
+"""``agent_task`` ``ActionDefinition`` registration."""
from __future__ import annotations
-from app.automations.actions.agent_task import build_handler
-from app.automations.schemas.actions import AgentTaskActionParams
-
-from .store import register_action
-from .types import ActionDefinition
+from ..store import register_action
+from ..types import ActionDefinition
+from .factory import build_handler
+from .params import AgentTaskActionParams
AGENT_TASK_ACTION = ActionDefinition(
type="agent_task",
diff --git a/surfsense_backend/app/automations/actions/agent_task/factory.py b/surfsense_backend/app/automations/actions/agent_task/factory.py
index a0d867f38..18a408e13 100644
--- a/surfsense_backend/app/automations/actions/agent_task/factory.py
+++ b/surfsense_backend/app/automations/actions/agent_task/factory.py
@@ -4,13 +4,9 @@ from __future__ import annotations
from typing import Any
-from app.automations.registries.actions.types import (
- ActionContext,
- ActionHandler,
-)
-from app.automations.schemas.actions import AgentTaskActionParams
-
+from ..types import ActionContext, ActionHandler
from .invoke import run_agent_task
+from .params import AgentTaskActionParams
def build_handler(ctx: ActionContext) -> ActionHandler:
diff --git a/surfsense_backend/app/automations/actions/agent_task/invoke.py b/surfsense_backend/app/automations/actions/agent_task/invoke.py
index aa849d7e2..a37e9beed 100644
--- a/surfsense_backend/app/automations/actions/agent_task/invoke.py
+++ b/surfsense_backend/app/automations/actions/agent_task/invoke.py
@@ -10,9 +10,10 @@ from langchain_core.messages import HumanMessage
from langgraph.types import Command
from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
-from app.automations.registries.actions.types import ActionContext
from app.db import ChatVisibility, async_session_maker
+from ..types import ActionContext
+
from .auto_decide import build_auto_decisions
from .dependencies import build_dependencies
from .finalize import extract_final_assistant_message
diff --git a/surfsense_backend/app/automations/schemas/actions/agent_task.py b/surfsense_backend/app/automations/actions/agent_task/params.py
similarity index 100%
rename from surfsense_backend/app/automations/schemas/actions/agent_task.py
rename to surfsense_backend/app/automations/actions/agent_task/params.py
diff --git a/surfsense_backend/app/automations/registries/actions/store.py b/surfsense_backend/app/automations/actions/store.py
similarity index 100%
rename from surfsense_backend/app/automations/registries/actions/store.py
rename to surfsense_backend/app/automations/actions/store.py
diff --git a/surfsense_backend/app/automations/registries/actions/types.py b/surfsense_backend/app/automations/actions/types.py
similarity index 100%
rename from surfsense_backend/app/automations/registries/actions/types.py
rename to surfsense_backend/app/automations/actions/types.py
diff --git a/surfsense_backend/app/automations/registries/__init__.py b/surfsense_backend/app/automations/registries/__init__.py
deleted file mode 100644
index f6af3817b..000000000
--- a/surfsense_backend/app/automations/registries/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""Action and trigger registries — populated at process startup."""
-
-from __future__ import annotations
-
-from .actions import (
- ActionContext,
- ActionDefinition,
- ActionHandler,
- ActionHandlerFactory,
- all_actions,
- get_action,
- register_action,
-)
-from .triggers import (
- TriggerDefinition,
- all_triggers,
- get_trigger,
- register_trigger,
-)
-
-__all__ = [
- "ActionContext",
- "ActionDefinition",
- "ActionHandler",
- "ActionHandlerFactory",
- "TriggerDefinition",
- "all_actions",
- "all_triggers",
- "get_action",
- "get_trigger",
- "register_action",
- "register_trigger",
-]
diff --git a/surfsense_backend/app/automations/registries/actions/__init__.py b/surfsense_backend/app/automations/registries/actions/__init__.py
deleted file mode 100644
index b95c634f2..000000000
--- a/surfsense_backend/app/automations/registries/actions/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Action registry."""
-
-from __future__ import annotations
-
-from .store import all_actions, get_action, register_action
-from .types import ActionContext, ActionDefinition, ActionHandler, ActionHandlerFactory
-
-__all__ = [
- "ActionContext",
- "ActionDefinition",
- "ActionHandler",
- "ActionHandlerFactory",
- "all_actions",
- "get_action",
- "register_action",
-]
-
-# Built-in actions self-register at import time.
-from . import agent_task # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/runtime/executor.py b/surfsense_backend/app/automations/runtime/executor.py
index e9e55b02d..ced44fb9b 100644
--- a/surfsense_backend/app/automations/runtime/executor.py
+++ b/surfsense_backend/app/automations/runtime/executor.py
@@ -8,7 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.automations.persistence.enums.run_status import RunStatus
from app.automations.persistence.models.run import AutomationRun
-from app.automations.registries.actions.types import ActionContext
+from app.automations.actions.types import ActionContext
from app.automations.schemas.definition.envelope import AutomationDefinition
from app.automations.schemas.definition.plan_step import PlanStep
from app.automations.templating import build_run_context
diff --git a/surfsense_backend/app/automations/runtime/step.py b/surfsense_backend/app/automations/runtime/step.py
index 76e3ba171..ac18b5e1f 100644
--- a/surfsense_backend/app/automations/runtime/step.py
+++ b/surfsense_backend/app/automations/runtime/step.py
@@ -6,8 +6,8 @@ from collections.abc import Mapping
from datetime import UTC, datetime
from typing import Any
-from app.automations.registries import get_action
-from app.automations.registries.actions.types import ActionContext
+from app.automations.actions import get_action
+from app.automations.actions.types import ActionContext
from app.automations.schemas.definition.plan_step import PlanStep
from app.automations.templating import evaluate_predicate, render_value
diff --git a/surfsense_backend/app/automations/schemas/__init__.py b/surfsense_backend/app/automations/schemas/__init__.py
index 8659ac9c9..2e2d60f12 100644
--- a/surfsense_backend/app/automations/schemas/__init__.py
+++ b/surfsense_backend/app/automations/schemas/__init__.py
@@ -1,8 +1,13 @@
-"""Schemas for the automation definition and per-type configs."""
+"""Schemas for the automation definition envelope.
+
+Per-action and per-trigger params schemas live with the action/trigger
+implementations (``app.automations.actions..params`` /
+``app.automations.triggers..params``); only the cross-cutting envelope
+lives here.
+"""
from __future__ import annotations
-from .actions import AgentTaskActionParams
from .definition import (
AutomationDefinition,
Execution,
@@ -11,16 +16,12 @@ from .definition import (
PlanStep,
TriggerSpec,
)
-from .triggers import ManualTriggerParams, ScheduleTriggerParams
__all__ = [
- "AgentTaskActionParams",
"AutomationDefinition",
"Execution",
"Inputs",
- "ManualTriggerParams",
"Metadata",
"PlanStep",
- "ScheduleTriggerParams",
"TriggerSpec",
]
diff --git a/surfsense_backend/app/automations/schemas/actions/__init__.py b/surfsense_backend/app/automations/schemas/actions/__init__.py
deleted file mode 100644
index c51d33b6a..000000000
--- a/surfsense_backend/app/automations/schemas/actions/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""Per-action params schemas, one per action type."""
-
-from __future__ import annotations
-
-from .agent_task import AgentTaskActionParams
-
-__all__ = [
- "AgentTaskActionParams",
-]
diff --git a/surfsense_backend/app/automations/schemas/triggers/__init__.py b/surfsense_backend/app/automations/schemas/triggers/__init__.py
deleted file mode 100644
index 3ddd26f95..000000000
--- a/surfsense_backend/app/automations/schemas/triggers/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""Per-trigger params schemas, one per trigger type."""
-
-from __future__ import annotations
-
-from .manual import ManualTriggerParams
-from .schedule import ScheduleTriggerParams
-
-__all__ = [
- "ManualTriggerParams",
- "ScheduleTriggerParams",
-]
diff --git a/surfsense_backend/app/automations/registries/triggers/__init__.py b/surfsense_backend/app/automations/triggers/__init__.py
similarity index 61%
rename from surfsense_backend/app/automations/registries/triggers/__init__.py
rename to surfsense_backend/app/automations/triggers/__init__.py
index e08dcce76..258b2fda9 100644
--- a/surfsense_backend/app/automations/registries/triggers/__init__.py
+++ b/surfsense_backend/app/automations/triggers/__init__.py
@@ -1,4 +1,8 @@
-"""Trigger registry."""
+"""Triggers domain: registry surface + built-in trigger packages.
+
+Each trigger lives in its own subpackage (``manual/``, ``schedule/``, ...) and
+self-registers at import time via its ``definition`` module.
+"""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/triggers/manual/__init__.py b/surfsense_backend/app/automations/triggers/manual/__init__.py
new file mode 100644
index 000000000..bd9b8bf43
--- /dev/null
+++ b/surfsense_backend/app/automations/triggers/manual/__init__.py
@@ -0,0 +1,10 @@
+"""``manual`` trigger: fired by a user clicking ``Run now``."""
+
+from __future__ import annotations
+
+from .params import ManualTriggerParams
+
+__all__ = ["ManualTriggerParams"]
+
+# Side-effect: register on the triggers store.
+from . import definition # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/registries/triggers/manual.py b/surfsense_backend/app/automations/triggers/manual/definition.py
similarity index 58%
rename from surfsense_backend/app/automations/registries/triggers/manual.py
rename to surfsense_backend/app/automations/triggers/manual/definition.py
index 173c38655..9eb0282af 100644
--- a/surfsense_backend/app/automations/registries/triggers/manual.py
+++ b/surfsense_backend/app/automations/triggers/manual/definition.py
@@ -1,11 +1,10 @@
-"""Built-in ``manual`` trigger. Self-registers at import time."""
+"""``manual`` ``TriggerDefinition`` registration."""
from __future__ import annotations
-from app.automations.schemas.triggers import ManualTriggerParams
-
-from .store import register_trigger
-from .types import TriggerDefinition
+from ..store import register_trigger
+from ..types import TriggerDefinition
+from .params import ManualTriggerParams
MANUAL_TRIGGER = TriggerDefinition(
type="manual",
diff --git a/surfsense_backend/app/automations/schemas/triggers/manual.py b/surfsense_backend/app/automations/triggers/manual/params.py
similarity index 100%
rename from surfsense_backend/app/automations/schemas/triggers/manual.py
rename to surfsense_backend/app/automations/triggers/manual/params.py
diff --git a/surfsense_backend/app/automations/triggers/schedule/__init__.py b/surfsense_backend/app/automations/triggers/schedule/__init__.py
new file mode 100644
index 000000000..e24750850
--- /dev/null
+++ b/surfsense_backend/app/automations/triggers/schedule/__init__.py
@@ -0,0 +1,10 @@
+"""``schedule`` trigger: fired on a cron schedule in a given timezone."""
+
+from __future__ import annotations
+
+from .params import ScheduleTriggerParams
+
+__all__ = ["ScheduleTriggerParams"]
+
+# Side-effect: register on the triggers store.
+from . import definition # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/registries/triggers/schedule.py b/surfsense_backend/app/automations/triggers/schedule/definition.py
similarity index 64%
rename from surfsense_backend/app/automations/registries/triggers/schedule.py
rename to surfsense_backend/app/automations/triggers/schedule/definition.py
index 0a6575f39..3f86d767c 100644
--- a/surfsense_backend/app/automations/registries/triggers/schedule.py
+++ b/surfsense_backend/app/automations/triggers/schedule/definition.py
@@ -1,11 +1,10 @@
-"""Built-in ``schedule`` trigger. Self-registers at import time."""
+"""``schedule`` ``TriggerDefinition`` registration."""
from __future__ import annotations
-from app.automations.schemas.triggers import ScheduleTriggerParams
-
-from .store import register_trigger
-from .types import TriggerDefinition
+from ..store import register_trigger
+from ..types import TriggerDefinition
+from .params import ScheduleTriggerParams
SCHEDULE_TRIGGER = TriggerDefinition(
type="schedule",
diff --git a/surfsense_backend/app/automations/schemas/triggers/schedule.py b/surfsense_backend/app/automations/triggers/schedule/params.py
similarity index 100%
rename from surfsense_backend/app/automations/schemas/triggers/schedule.py
rename to surfsense_backend/app/automations/triggers/schedule/params.py
diff --git a/surfsense_backend/app/automations/registries/triggers/store.py b/surfsense_backend/app/automations/triggers/store.py
similarity index 100%
rename from surfsense_backend/app/automations/registries/triggers/store.py
rename to surfsense_backend/app/automations/triggers/store.py
diff --git a/surfsense_backend/app/automations/registries/triggers/types.py b/surfsense_backend/app/automations/triggers/types.py
similarity index 100%
rename from surfsense_backend/app/automations/registries/triggers/types.py
rename to surfsense_backend/app/automations/triggers/types.py
From 861b91004d4e5a0f992c8bbc0b2cb2be50e3ec73 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 17:20:23 +0200
Subject: [PATCH 55/87] refactor(automations): extract dispatch_run; move
manual adapter under triggers/manual/dispatch.py
---
.../app/automations/dispatch/__init__.py | 12 ++--
.../app/automations/dispatch/errors.py | 7 ++
.../app/automations/dispatch/run.py | 72 +++++++++++++++++++
.../automations/triggers/manual/__init__.py | 3 +-
.../manual.py => triggers/manual/dispatch.py} | 56 +++------------
.../app/routes/automations_routes.py | 3 +-
6 files changed, 97 insertions(+), 56 deletions(-)
create mode 100644 surfsense_backend/app/automations/dispatch/errors.py
create mode 100644 surfsense_backend/app/automations/dispatch/run.py
rename surfsense_backend/app/automations/{dispatch/manual.py => triggers/manual/dispatch.py} (51%)
diff --git a/surfsense_backend/app/automations/dispatch/__init__.py b/surfsense_backend/app/automations/dispatch/__init__.py
index 4a549a4ce..be8a36581 100644
--- a/surfsense_backend/app/automations/dispatch/__init__.py
+++ b/surfsense_backend/app/automations/dispatch/__init__.py
@@ -1,8 +1,8 @@
-"""Public dispatch surface for firing automations."""
+"""Generic dispatch primitives shared across trigger types."""
-from .manual import DispatchError, dispatch_manual_run
+from __future__ import annotations
-__all__ = [
- "DispatchError",
- "dispatch_manual_run",
-]
+from .errors import DispatchError
+from .run import dispatch_run
+
+__all__ = ["DispatchError", "dispatch_run"]
diff --git a/surfsense_backend/app/automations/dispatch/errors.py b/surfsense_backend/app/automations/dispatch/errors.py
new file mode 100644
index 000000000..75640a987
--- /dev/null
+++ b/surfsense_backend/app/automations/dispatch/errors.py
@@ -0,0 +1,7 @@
+"""Dispatch errors raised when a fire request cannot be turned into a run."""
+
+from __future__ import annotations
+
+
+class DispatchError(Exception):
+ """A dispatch could not proceed (missing trigger, invalid inputs, ...)."""
diff --git a/surfsense_backend/app/automations/dispatch/run.py b/surfsense_backend/app/automations/dispatch/run.py
new file mode 100644
index 000000000..fd5107a18
--- /dev/null
+++ b/surfsense_backend/app/automations/dispatch/run.py
@@ -0,0 +1,72 @@
+"""Generic run dispatch: validate, snapshot, persist, enqueue. Shared by every trigger."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import jsonschema
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.persistence.enums.run_status import RunStatus
+from app.automations.persistence.models.automation import Automation
+from app.automations.persistence.models.run import AutomationRun
+from app.automations.persistence.models.trigger import AutomationTrigger
+from app.automations.schemas.definition.envelope import AutomationDefinition
+from app.automations.tasks.execute_run import automation_run_execute
+
+from .errors import DispatchError
+
+
+async def dispatch_run(
+ *,
+ session: AsyncSession,
+ automation: Automation,
+ trigger: AutomationTrigger,
+ payload: dict[str, Any] | None,
+) -> AutomationRun:
+ """Validate, snapshot the definition, persist an ``AutomationRun``, enqueue execution.
+
+ Callers (trigger-specific adapters) are responsible for resolving
+ ``automation`` and ``trigger`` and for the trigger-side ``ACTIVE`` /
+ ``enabled`` guards. This function only handles what's identical across
+ every trigger type.
+ """
+ try:
+ definition = AutomationDefinition.model_validate(automation.definition)
+ except Exception as exc:
+ raise DispatchError(f"invalid automation definition: {exc}") from exc
+
+ resolved_inputs = _validate_inputs(definition, payload or {})
+ snapshot = definition.model_dump(mode="json", by_alias=True)
+
+ run = AutomationRun(
+ automation_id=automation.id,
+ trigger_id=trigger.id,
+ status=RunStatus.PENDING,
+ definition_snapshot=snapshot,
+ trigger_payload=payload,
+ resolved_inputs=resolved_inputs,
+ step_results=[],
+ artifacts=[],
+ )
+ session.add(run)
+ await session.commit()
+ await session.refresh(run)
+
+ automation_run_execute.apply_async(
+ args=[run.id],
+ time_limit=definition.execution.timeout_seconds,
+ )
+ return run
+
+
+def _validate_inputs(
+ definition: AutomationDefinition, payload: dict[str, Any]
+) -> dict[str, Any]:
+ if definition.inputs is None or not definition.inputs.schema_:
+ return {}
+ try:
+ jsonschema.validate(instance=payload, schema=definition.inputs.schema_)
+ except jsonschema.ValidationError as exc:
+ raise DispatchError(f"inputs: {exc.message}") from exc
+ return payload
diff --git a/surfsense_backend/app/automations/triggers/manual/__init__.py b/surfsense_backend/app/automations/triggers/manual/__init__.py
index bd9b8bf43..65cca9270 100644
--- a/surfsense_backend/app/automations/triggers/manual/__init__.py
+++ b/surfsense_backend/app/automations/triggers/manual/__init__.py
@@ -2,9 +2,10 @@
from __future__ import annotations
+from .dispatch import dispatch_manual_run
from .params import ManualTriggerParams
-__all__ = ["ManualTriggerParams"]
+__all__ = ["ManualTriggerParams", "dispatch_manual_run"]
# Side-effect: register on the triggers store.
from . import definition # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/dispatch/manual.py b/surfsense_backend/app/automations/triggers/manual/dispatch.py
similarity index 51%
rename from surfsense_backend/app/automations/dispatch/manual.py
rename to surfsense_backend/app/automations/triggers/manual/dispatch.py
index 221d6a3e2..750c99937 100644
--- a/surfsense_backend/app/automations/dispatch/manual.py
+++ b/surfsense_backend/app/automations/triggers/manual/dispatch.py
@@ -1,25 +1,18 @@
-"""Manual ``Run now`` dispatch: validate inputs, snapshot the definition, enqueue."""
+"""Manual ``Run now`` dispatch adapter: load + guard, then call generic dispatch."""
from __future__ import annotations
from typing import Any
-import jsonschema
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
+from app.automations.dispatch import DispatchError, dispatch_run
from app.automations.persistence.enums.automation_status import AutomationStatus
-from app.automations.persistence.enums.run_status import RunStatus
from app.automations.persistence.enums.trigger_type import TriggerType
from app.automations.persistence.models.automation import Automation
from app.automations.persistence.models.run import AutomationRun
from app.automations.persistence.models.trigger import AutomationTrigger
-from app.automations.schemas.definition.envelope import AutomationDefinition
-from app.automations.tasks.execute_run import automation_run_execute
-
-
-class DispatchError(Exception):
- """A manual dispatch could not proceed (missing trigger, invalid inputs, ...)."""
async def dispatch_manual_run(
@@ -28,7 +21,7 @@ async def dispatch_manual_run(
automation_id: int,
payload: dict[str, Any] | None,
) -> AutomationRun:
- """Validate, snapshot, persist, and enqueue an ``AutomationRun``."""
+ """Find the automation + its enabled manual trigger, then run the generic dispatch."""
automation = await _load_automation(session, automation_id)
if automation is None:
raise DispatchError(f"automation {automation_id} not found")
@@ -38,39 +31,18 @@ async def dispatch_manual_run(
f"automation {automation_id} is {automation.status.value}, not active"
)
- try:
- definition = AutomationDefinition.model_validate(automation.definition)
- except Exception as exc:
- raise DispatchError(f"invalid automation definition: {exc}") from exc
-
trigger = await _find_manual_trigger(session, automation_id)
if trigger is None:
raise DispatchError(
f"automation {automation_id} has no enabled manual trigger"
)
- resolved_inputs = _validate_inputs(definition, payload or {})
- snapshot = definition.model_dump(mode="json", by_alias=True)
-
- run = AutomationRun(
- automation_id=automation_id,
- trigger_id=trigger.id,
- status=RunStatus.PENDING,
- definition_snapshot=snapshot,
- trigger_payload=payload,
- resolved_inputs=resolved_inputs,
- step_results=[],
- artifacts=[],
+ return await dispatch_run(
+ session=session,
+ automation=automation,
+ trigger=trigger,
+ payload=payload,
)
- session.add(run)
- await session.commit()
- await session.refresh(run)
-
- automation_run_execute.apply_async(
- args=[run.id],
- time_limit=definition.execution.timeout_seconds,
- )
- return run
async def _load_automation(
@@ -93,15 +65,3 @@ async def _find_manual_trigger(
.limit(1)
)
return (await session.execute(stmt)).scalar_one_or_none()
-
-
-def _validate_inputs(
- definition: AutomationDefinition, payload: dict[str, Any]
-) -> dict[str, Any]:
- if definition.inputs is None or not definition.inputs.schema_:
- return {}
- try:
- jsonschema.validate(instance=payload, schema=definition.inputs.schema_)
- except jsonschema.ValidationError as exc:
- raise DispatchError(f"inputs: {exc.message}") from exc
- return payload
diff --git a/surfsense_backend/app/routes/automations_routes.py b/surfsense_backend/app/routes/automations_routes.py
index 02c019625..6c169b199 100644
--- a/surfsense_backend/app/routes/automations_routes.py
+++ b/surfsense_backend/app/routes/automations_routes.py
@@ -8,8 +8,9 @@ from fastapi import APIRouter, Body, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
-from app.automations.dispatch import DispatchError, dispatch_manual_run
+from app.automations.dispatch import DispatchError
from app.automations.persistence.models.automation import Automation
+from app.automations.triggers.manual import dispatch_manual_run
from app.db import Permission, User, get_async_session
from app.users import current_active_user
from app.utils.rbac import check_permission
From f08b3164417f841644afe355edc794aa1a64f851 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 17:55:58 +0200
Subject: [PATCH 56/87] add next_fire_at to automation_triggers and croniter
dep
---
.../alembic/versions/144_add_automation_tables.py | 13 +++++++++++++
.../app/automations/persistence/models/trigger.py | 5 +++++
surfsense_backend/pyproject.toml | 1 +
surfsense_backend/uv.lock | 14 ++++++++++++++
4 files changed, 33 insertions(+)
diff --git a/surfsense_backend/alembic/versions/144_add_automation_tables.py b/surfsense_backend/alembic/versions/144_add_automation_tables.py
index 8b59ee969..6daf4075f 100644
--- a/surfsense_backend/alembic/versions/144_add_automation_tables.py
+++ b/surfsense_backend/alembic/versions/144_add_automation_tables.py
@@ -89,6 +89,7 @@ def upgrade() -> None:
params JSONB NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT true,
last_fired_at TIMESTAMP WITH TIME ZONE,
+ next_fire_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
@@ -105,6 +106,17 @@ def upgrade() -> None:
op.execute(
"CREATE INDEX ix_automation_triggers_created_at ON automation_triggers(created_at);"
)
+ # Partial index for the schedule tick: only enabled schedule triggers
+ # with a scheduled next fire are ever scanned for due rows.
+ op.execute(
+ """
+ CREATE INDEX ix_automation_triggers_due
+ ON automation_triggers (next_fire_at)
+ WHERE enabled = true
+ AND type = 'schedule'
+ AND next_fire_at IS NOT NULL;
+ """
+ )
# automation_runs — the immutable per-fire execution record
op.execute(
@@ -148,6 +160,7 @@ def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_automation_runs_automation_id;")
op.execute("DROP TABLE IF EXISTS automation_runs;")
+ op.execute("DROP INDEX IF EXISTS ix_automation_triggers_due;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_created_at;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_enabled;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_type;")
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index 7582234d4..b09bc3419 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -46,6 +46,11 @@ class AutomationTrigger(BaseModel, TimestampMixin):
last_fired_at = Column(TIMESTAMP(timezone=True), nullable=True)
+ # Precomputed next fire moment in UTC; advanced after each fire by the
+ # schedule tick. NULL means the trigger has never been scheduled (the
+ # tick self-heals on first sight). Manual triggers leave this NULL.
+ next_fire_at = Column(TIMESTAMP(timezone=True), nullable=True)
+
automation = relationship("Automation", back_populates="triggers")
runs = relationship(
"AutomationRun",
diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml
index 71c53caae..2ed0acca4 100644
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@@ -87,6 +87,7 @@ dependencies = [
"opentelemetry-instrumentation-httpx>=0.61b0",
"opentelemetry-instrumentation-celery>=0.61b0",
"opentelemetry-instrumentation-logging>=0.61b0",
+ "croniter>=2.0.0",
]
[dependency-groups]
diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock
index b902363dc..ba88153c5 100644
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@@ -1265,6 +1265,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848, upload-time = "2024-10-29T16:40:18.325Z" },
]
+[[package]]
+name = "croniter"
+version = "6.2.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "python-dateutil" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/de/5832661ed55107b8a09af3f0a2e71e0957226a59eb1dcf0a445cce6daf20/croniter-6.2.2.tar.gz", hash = "sha256:ba60832a5ec8e12e51b8691c3309a113d1cf6526bdf1a48150ce8ec7a532d0ab", size = 113762, upload-time = "2026-03-15T08:43:48.112Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d0/39/783980e78cb92c2d7bdb1fc7dbc86e94ccc6d58224d76a7f1f51b6c51e30/croniter-6.2.2-py3-none-any.whl", hash = "sha256:a5d17b1060974d36251ea4faf388233eca8acf0d09cbd92d35f4c4ac8f279960", size = 45422, upload-time = "2026-03-15T08:43:46.626Z" },
+]
+
[[package]]
name = "cryptography"
version = "46.0.6"
@@ -8132,6 +8144,7 @@ dependencies = [
{ name = "celery", extra = ["redis"] },
{ name = "chonkie", extra = ["all"] },
{ name = "composio" },
+ { name = "croniter" },
{ name = "datasets" },
{ name = "daytona" },
{ name = "deepagents" },
@@ -8228,6 +8241,7 @@ requires-dist = [
{ name = "celery", extras = ["redis"], specifier = ">=5.5.3" },
{ name = "chonkie", extras = ["all"], specifier = ">=1.5.0" },
{ name = "composio", specifier = ">=0.10.9" },
+ { name = "croniter", specifier = ">=2.0.0" },
{ name = "datasets", specifier = ">=2.21.0" },
{ name = "daytona", specifier = ">=0.146.0" },
{ name = "deepagents", specifier = ">=0.4.12,<0.5" },
From 3b1d7c4389b0224f86dd38c7ea121e94d78f35ee Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 17:56:02 +0200
Subject: [PATCH 57/87] add cron-based schedule trigger
---
.../automations/triggers/schedule/__init__.py | 10 +++-
.../app/automations/triggers/schedule/cron.py | 37 ++++++++++++++
.../automations/triggers/schedule/dispatch.py | 48 +++++++++++++++++++
.../automations/triggers/schedule/params.py | 12 ++++-
4 files changed, 105 insertions(+), 2 deletions(-)
create mode 100644 surfsense_backend/app/automations/triggers/schedule/cron.py
create mode 100644 surfsense_backend/app/automations/triggers/schedule/dispatch.py
diff --git a/surfsense_backend/app/automations/triggers/schedule/__init__.py b/surfsense_backend/app/automations/triggers/schedule/__init__.py
index e24750850..5587692b9 100644
--- a/surfsense_backend/app/automations/triggers/schedule/__init__.py
+++ b/surfsense_backend/app/automations/triggers/schedule/__init__.py
@@ -2,9 +2,17 @@
from __future__ import annotations
+from .cron import InvalidCronError, compute_next_fire_at, validate_cron
+from .dispatch import dispatch_schedule_run
from .params import ScheduleTriggerParams
-__all__ = ["ScheduleTriggerParams"]
+__all__ = [
+ "InvalidCronError",
+ "ScheduleTriggerParams",
+ "compute_next_fire_at",
+ "dispatch_schedule_run",
+ "validate_cron",
+]
# Side-effect: register on the triggers store.
from . import definition # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/triggers/schedule/cron.py b/surfsense_backend/app/automations/triggers/schedule/cron.py
new file mode 100644
index 000000000..7155bab33
--- /dev/null
+++ b/surfsense_backend/app/automations/triggers/schedule/cron.py
@@ -0,0 +1,37 @@
+"""Cron math for the ``schedule`` trigger: validate + advance ``next_fire_at``."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
+from croniter import CroniterBadCronError, croniter
+
+
+class InvalidCronError(ValueError):
+ """Raised when a cron expression or timezone fails validation."""
+
+
+def validate_cron(cron: str, timezone: str) -> None:
+ """Raise ``InvalidCronError`` if cron or timezone are unusable."""
+ try:
+ ZoneInfo(timezone)
+ except ZoneInfoNotFoundError as exc:
+ raise InvalidCronError(f"unknown timezone {timezone!r}") from exc
+
+ try:
+ croniter(cron)
+ except (CroniterBadCronError, ValueError) as exc:
+ raise InvalidCronError(f"invalid cron {cron!r}: {exc}") from exc
+
+
+def compute_next_fire_at(cron: str, timezone: str, *, after: datetime) -> datetime:
+ """Return the next moment matching ``cron`` in ``timezone`` strictly after ``after``.
+
+ The result is normalized to UTC for storage. ``after`` is converted into the
+ given timezone before evaluation so DST and IANA rules apply correctly.
+ """
+ tz = ZoneInfo(timezone)
+ base = after.astimezone(tz) if after.tzinfo else after.replace(tzinfo=UTC).astimezone(tz)
+ nxt: datetime = croniter(cron, base).get_next(datetime)
+ return nxt.astimezone(UTC)
diff --git a/surfsense_backend/app/automations/triggers/schedule/dispatch.py b/surfsense_backend/app/automations/triggers/schedule/dispatch.py
new file mode 100644
index 000000000..fb4fcf686
--- /dev/null
+++ b/surfsense_backend/app/automations/triggers/schedule/dispatch.py
@@ -0,0 +1,48 @@
+"""Schedule dispatch adapter: load + guard, then call generic dispatch."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.dispatch import DispatchError, dispatch_run
+from app.automations.persistence.enums.automation_status import AutomationStatus
+from app.automations.persistence.models.automation import Automation
+from app.automations.persistence.models.run import AutomationRun
+from app.automations.persistence.models.trigger import AutomationTrigger
+
+
+async def dispatch_schedule_run(
+ *,
+ session: AsyncSession,
+ trigger: AutomationTrigger,
+) -> AutomationRun:
+ """Fire one scheduled run for ``trigger``.
+
+ The caller (the schedule tick) is responsible for selecting due triggers
+ and advancing ``next_fire_at`` / ``last_fired_at`` before invoking this.
+ """
+ automation = await _load_automation(session, trigger.automation_id)
+ if automation is None:
+ raise DispatchError(
+ f"automation {trigger.automation_id} not found for trigger {trigger.id}"
+ )
+
+ if automation.status != AutomationStatus.ACTIVE:
+ raise DispatchError(
+ f"automation {trigger.automation_id} is {automation.status.value}, not active"
+ )
+
+ return await dispatch_run(
+ session=session,
+ automation=automation,
+ trigger=trigger,
+ payload=None,
+ )
+
+
+async def _load_automation(
+ session: AsyncSession, automation_id: int
+) -> Automation | None:
+ stmt = select(Automation).where(Automation.id == automation_id)
+ return (await session.execute(stmt)).scalar_one_or_none()
diff --git a/surfsense_backend/app/automations/triggers/schedule/params.py b/surfsense_backend/app/automations/triggers/schedule/params.py
index 0418bd1d9..21da84f68 100644
--- a/surfsense_backend/app/automations/triggers/schedule/params.py
+++ b/surfsense_backend/app/automations/triggers/schedule/params.py
@@ -2,7 +2,9 @@
from __future__ import annotations
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+
+from .cron import InvalidCronError, validate_cron
class ScheduleTriggerParams(BaseModel):
@@ -10,3 +12,11 @@ class ScheduleTriggerParams(BaseModel):
cron: str = Field(..., description="Five-field cron expression.", examples=["0 9 * * 1-5"])
timezone: str = Field(..., description="IANA timezone.", examples=["Africa/Kigali"])
+
+ @model_validator(mode="after")
+ def _validate(self) -> ScheduleTriggerParams:
+ try:
+ validate_cron(self.cron, self.timezone)
+ except InvalidCronError as exc:
+ raise ValueError(str(exc)) from exc
+ return self
From d84240a630f93ba1bd815f54e743c8b6d1acdca1 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 17:56:07 +0200
Subject: [PATCH 58/87] add schedule tick task and beat entry
---
.../app/automations/tasks/schedule_tick.py | 159 ++++++++++++++++++
surfsense_backend/app/celery_app.py | 11 ++
2 files changed, 170 insertions(+)
create mode 100644 surfsense_backend/app/automations/tasks/schedule_tick.py
diff --git a/surfsense_backend/app/automations/tasks/schedule_tick.py b/surfsense_backend/app/automations/tasks/schedule_tick.py
new file mode 100644
index 000000000..cade621c7
--- /dev/null
+++ b/surfsense_backend/app/automations/tasks/schedule_tick.py
@@ -0,0 +1,159 @@
+"""Celery Beat tick that fires due ``schedule`` triggers.
+
+Runs every minute. Each tick performs two passes:
+
+1. **Self-heal**: enabled schedule triggers with NULL ``next_fire_at`` get
+ it computed from their ``cron`` + ``timezone`` (e.g. fresh inserts or
+ rows restored from backup).
+2. **Claim & fire**: due rows are locked with ``FOR UPDATE SKIP LOCKED``,
+ their ``next_fire_at`` is advanced and ``last_fired_at`` is set, and
+ ``dispatch_schedule_run`` is invoked for each. Dispatch errors are
+ logged; a missed fire stays missed (matches K8s CronJob / Airflow
+ ``catchup=False`` semantics).
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.persistence.enums.trigger_type import TriggerType
+from app.automations.persistence.models.trigger import AutomationTrigger
+from app.automations.triggers.schedule import (
+ InvalidCronError,
+ compute_next_fire_at,
+ dispatch_schedule_run,
+)
+from app.celery_app import celery_app
+from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task
+
+logger = logging.getLogger(__name__)
+
+TASK_NAME = "automation_schedule_tick"
+
+# Cap rows touched per tick so a backlog of due triggers can't starve the
+# worker; remaining rows fire on the next tick.
+_TICK_BATCH = 200
+
+
+@celery_app.task(name=TASK_NAME)
+def automation_schedule_tick() -> None:
+ """Tick once: self-heal NULL next_fire_at, claim due rows, fire each."""
+ return run_async_celery_task(_tick)
+
+
+async def _tick() -> None:
+ session_maker = get_celery_session_maker()
+ async with session_maker() as session:
+ now = datetime.now(UTC)
+
+ await _self_heal_null_next_fire(session, now=now)
+
+ claimed_ids = await _claim_due_triggers(session, now=now)
+ if not claimed_ids:
+ return
+
+ for trigger_id in claimed_ids:
+ await _fire_one(session, trigger_id=trigger_id)
+
+
+async def _self_heal_null_next_fire(session: AsyncSession, *, now: datetime) -> None:
+ """Backfill ``next_fire_at`` for enabled schedule triggers missing it."""
+ stmt = (
+ select(AutomationTrigger)
+ .where(
+ AutomationTrigger.type == TriggerType.SCHEDULE,
+ AutomationTrigger.enabled.is_(True),
+ AutomationTrigger.next_fire_at.is_(None),
+ )
+ .limit(_TICK_BATCH)
+ )
+ triggers = (await session.execute(stmt)).scalars().all()
+ if not triggers:
+ return
+
+ for trigger in triggers:
+ try:
+ trigger.next_fire_at = compute_next_fire_at(
+ trigger.params["cron"],
+ trigger.params["timezone"],
+ after=now,
+ )
+ except (InvalidCronError, KeyError, TypeError) as exc:
+ logger.warning(
+ "automation_trigger %d has invalid schedule params, disabling: %s",
+ trigger.id,
+ exc,
+ )
+ trigger.enabled = False
+
+ await session.commit()
+
+
+async def _claim_due_triggers(
+ session: AsyncSession, *, now: datetime
+) -> list[int]:
+ """Lock and advance due rows; return claimed trigger ids."""
+ stmt = (
+ select(AutomationTrigger)
+ .where(
+ AutomationTrigger.type == TriggerType.SCHEDULE,
+ AutomationTrigger.enabled.is_(True),
+ AutomationTrigger.next_fire_at.isnot(None),
+ AutomationTrigger.next_fire_at <= now,
+ )
+ .order_by(AutomationTrigger.next_fire_at)
+ .limit(_TICK_BATCH)
+ .with_for_update(skip_locked=True)
+ )
+ triggers = (await session.execute(stmt)).scalars().all()
+ if not triggers:
+ return []
+
+ claimed: list[int] = []
+ for trigger in triggers:
+ try:
+ trigger.next_fire_at = compute_next_fire_at(
+ trigger.params["cron"],
+ trigger.params["timezone"],
+ after=now,
+ )
+ except (InvalidCronError, KeyError, TypeError) as exc:
+ logger.warning(
+ "automation_trigger %d has invalid schedule params, disabling: %s",
+ trigger.id,
+ exc,
+ )
+ trigger.enabled = False
+ continue
+
+ trigger.last_fired_at = now
+ claimed.append(trigger.id)
+
+ await session.commit()
+ return claimed
+
+
+async def _fire_one(session: AsyncSession, *, trigger_id: int) -> None:
+ """Reload the trigger post-commit and dispatch a run for it."""
+ trigger = await session.get(AutomationTrigger, trigger_id)
+ if trigger is None:
+ return
+
+ try:
+ run = await dispatch_schedule_run(session=session, trigger=trigger)
+ logger.info(
+ "scheduled fire: trigger=%d automation=%d run=%d",
+ trigger_id,
+ trigger.automation_id,
+ run.id,
+ )
+ except Exception:
+ logger.exception(
+ "scheduled fire failed for trigger %d (next attempt at next match)",
+ trigger_id,
+ )
+ await session.rollback()
diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py
index 569178239..9169592fd 100644
--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@@ -189,6 +189,7 @@ celery_app = Celery(
"app.tasks.celery_tasks.stale_notification_cleanup_task",
"app.tasks.celery_tasks.stripe_reconciliation_task",
"app.automations.tasks.execute_run",
+ "app.automations.tasks.schedule_tick",
],
)
@@ -283,4 +284,14 @@ celery_app.conf.beat_schedule = {
"expires": 60,
},
},
+ # Fire due automation schedule triggers. Ticks every minute; per-row cron
+ # math is precomputed (next_fire_at column) so the tick is an indexed
+ # lookup, not N cron evaluations.
+ "automation-schedule-tick": {
+ "task": "automation_schedule_tick",
+ "schedule": crontab(minute="*"),
+ "options": {
+ "expires": 50,
+ },
+ },
}
From dd6bc30f98a5fa82848b86d8a621ad78f5042ba6 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 18:56:16 +0200
Subject: [PATCH 59/87] move automations api into vertical slice with service
layer
---
.../app/automations/api/__init__.py | 12 ++++
.../app/automations/api/automation.py | 22 +++++++
.../app/automations/services/__init__.py | 7 ++
.../app/automations/services/automation.py | 65 +++++++++++++++++++
surfsense_backend/app/routes/__init__.py | 2 +-
.../app/routes/automations_routes.py | 56 ----------------
6 files changed, 107 insertions(+), 57 deletions(-)
create mode 100644 surfsense_backend/app/automations/api/__init__.py
create mode 100644 surfsense_backend/app/automations/api/automation.py
create mode 100644 surfsense_backend/app/automations/services/__init__.py
create mode 100644 surfsense_backend/app/automations/services/automation.py
delete mode 100644 surfsense_backend/app/routes/automations_routes.py
diff --git a/surfsense_backend/app/automations/api/__init__.py b/surfsense_backend/app/automations/api/__init__.py
new file mode 100644
index 000000000..459c6c1b4
--- /dev/null
+++ b/surfsense_backend/app/automations/api/__init__.py
@@ -0,0 +1,12 @@
+"""HTTP layer for the automations feature."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter
+
+from .automation import router as automation_router
+
+router = APIRouter()
+router.include_router(automation_router)
+
+__all__ = ["router"]
diff --git a/surfsense_backend/app/automations/api/automation.py b/surfsense_backend/app/automations/api/automation.py
new file mode 100644
index 000000000..42163f74d
--- /dev/null
+++ b/surfsense_backend/app/automations/api/automation.py
@@ -0,0 +1,22 @@
+"""Routes for the ``Automation`` resource."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import APIRouter, Body, Depends
+
+from app.automations.services import AutomationService, get_automation_service
+
+router = APIRouter()
+
+
+@router.post("/automations/{automation_id}/run")
+async def run_automation_now(
+ automation_id: int,
+ payload: dict[str, Any] | None = Body(default=None),
+ service: AutomationService = Depends(get_automation_service),
+) -> dict[str, Any]:
+ """Fire a manual run."""
+ run = await service.run_now(automation_id=automation_id, payload=payload)
+ return {"run_id": run.id, "status": run.status.value}
diff --git a/surfsense_backend/app/automations/services/__init__.py b/surfsense_backend/app/automations/services/__init__.py
new file mode 100644
index 000000000..f0a97d216
--- /dev/null
+++ b/surfsense_backend/app/automations/services/__init__.py
@@ -0,0 +1,7 @@
+"""Service layer for the automations feature."""
+
+from __future__ import annotations
+
+from .automation import AutomationService, get_automation_service
+
+__all__ = ["AutomationService", "get_automation_service"]
diff --git a/surfsense_backend/app/automations/services/automation.py b/surfsense_backend/app/automations/services/automation.py
new file mode 100644
index 000000000..2a921e331
--- /dev/null
+++ b/surfsense_backend/app/automations/services/automation.py
@@ -0,0 +1,65 @@
+"""``AutomationService`` — orchestration for the ``Automation`` resource."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import Depends, HTTPException
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.dispatch import DispatchError
+from app.automations.persistence.models.automation import Automation
+from app.automations.persistence.models.run import AutomationRun
+from app.automations.triggers.manual import dispatch_manual_run
+from app.db import Permission, User, get_async_session
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+
+class AutomationService:
+ """Service for the ``Automation`` resource."""
+
+ def __init__(self, *, session: AsyncSession, user: User) -> None:
+ self.session = session
+ self.user = user
+
+ async def run_now(
+ self,
+ *,
+ automation_id: int,
+ payload: dict[str, Any] | None,
+ ) -> AutomationRun:
+ """Fire a manual run for ``automation_id``."""
+ automation = await self._get_automation_or_raise(automation_id)
+ await check_permission(
+ self.session,
+ self.user,
+ automation.search_space_id,
+ Permission.AUTOMATIONS_EXECUTE.value,
+ "You don't have permission to execute automations in this search space",
+ )
+
+ try:
+ return await dispatch_manual_run(
+ session=self.session,
+ automation_id=automation_id,
+ payload=payload,
+ )
+ except DispatchError as exc:
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
+
+ async def _get_automation_or_raise(self, automation_id: int) -> Automation:
+ """Get the automation by id; 404 if missing."""
+ automation = await self.session.get(Automation, automation_id)
+ if automation is None:
+ raise HTTPException(
+ status_code=404, detail=f"automation {automation_id} not found"
+ )
+ return automation
+
+
+def get_automation_service(
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+) -> AutomationService:
+ return AutomationService(session=session, user=user)
diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index 1d3ca2141..64c8c6585 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -7,7 +7,7 @@ from .agent_revert_route import router as agent_revert_router
from .airtable_add_connector_route import (
router as airtable_add_connector_router,
)
-from .automations_routes import router as automations_router
+from app.automations.api import router as automations_router
from .chat_comments_routes import router as chat_comments_router
from .circleback_webhook_route import router as circleback_webhook_router
from .clickup_add_connector_route import router as clickup_add_connector_router
diff --git a/surfsense_backend/app/routes/automations_routes.py b/surfsense_backend/app/routes/automations_routes.py
deleted file mode 100644
index 6c169b199..000000000
--- a/surfsense_backend/app/routes/automations_routes.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""Routes for automations. v1: manual ``Run now``."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from fastapi import APIRouter, Body, Depends, HTTPException
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.automations.dispatch import DispatchError
-from app.automations.persistence.models.automation import Automation
-from app.automations.triggers.manual import dispatch_manual_run
-from app.db import Permission, User, get_async_session
-from app.users import current_active_user
-from app.utils.rbac import check_permission
-
-router = APIRouter()
-
-
-@router.post("/automations/{automation_id}/run")
-async def run_automation_now(
- automation_id: int,
- payload: dict[str, Any] | None = Body(default=None),
- session: AsyncSession = Depends(get_async_session),
- user: User = Depends(current_active_user),
-) -> dict[str, Any]:
- """Fire an automation manually. Returns the new run id and status."""
- search_space_id = (
- await session.execute(
- select(Automation.search_space_id).where(Automation.id == automation_id)
- )
- ).scalar_one_or_none()
- if search_space_id is None:
- raise HTTPException(
- status_code=404, detail=f"automation {automation_id} not found"
- )
-
- await check_permission(
- session,
- user,
- search_space_id,
- Permission.AUTOMATIONS_EXECUTE.value,
- "You don't have permission to execute automations in this search space",
- )
-
- try:
- run = await dispatch_manual_run(
- session=session,
- automation_id=automation_id,
- payload=payload,
- )
- except DispatchError as exc:
- raise HTTPException(status_code=422, detail=str(exc)) from exc
-
- return {"run_id": run.id, "status": run.status.value}
From 84d99f19a253ecf34c5e53285ce6cb65cd2d98c7 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 19:10:20 +0200
Subject: [PATCH 60/87] automations(api): API request/response schemas
---
.../app/automations/api/automation.py | 7 +-
.../app/automations/api/schemas/__init__.py | 28 ++++++++
.../app/automations/api/schemas/automation.py | 64 +++++++++++++++++++
.../app/automations/api/schemas/run.py | 50 +++++++++++++++
.../app/automations/api/schemas/trigger.py | 43 +++++++++++++
5 files changed, 189 insertions(+), 3 deletions(-)
create mode 100644 surfsense_backend/app/automations/api/schemas/__init__.py
create mode 100644 surfsense_backend/app/automations/api/schemas/automation.py
create mode 100644 surfsense_backend/app/automations/api/schemas/run.py
create mode 100644 surfsense_backend/app/automations/api/schemas/trigger.py
diff --git a/surfsense_backend/app/automations/api/automation.py b/surfsense_backend/app/automations/api/automation.py
index 42163f74d..4d0ce7209 100644
--- a/surfsense_backend/app/automations/api/automation.py
+++ b/surfsense_backend/app/automations/api/automation.py
@@ -6,17 +6,18 @@ from typing import Any
from fastapi import APIRouter, Body, Depends
+from app.automations.api.schemas import RunDispatched
from app.automations.services import AutomationService, get_automation_service
router = APIRouter()
-@router.post("/automations/{automation_id}/run")
+@router.post("/automations/{automation_id}/run", response_model=RunDispatched)
async def run_automation_now(
automation_id: int,
payload: dict[str, Any] | None = Body(default=None),
service: AutomationService = Depends(get_automation_service),
-) -> dict[str, Any]:
+) -> RunDispatched:
"""Fire a manual run."""
run = await service.run_now(automation_id=automation_id, payload=payload)
- return {"run_id": run.id, "status": run.status.value}
+ return RunDispatched(run_id=run.id, status=run.status)
diff --git a/surfsense_backend/app/automations/api/schemas/__init__.py b/surfsense_backend/app/automations/api/schemas/__init__.py
new file mode 100644
index 000000000..a8a010a2c
--- /dev/null
+++ b/surfsense_backend/app/automations/api/schemas/__init__.py
@@ -0,0 +1,28 @@
+"""Request/response schemas for the automations HTTP layer."""
+
+from __future__ import annotations
+
+from .automation import (
+ AutomationCreate,
+ AutomationDetail,
+ AutomationList,
+ AutomationSummary,
+ AutomationUpdate,
+)
+from .run import RunDetail, RunDispatched, RunList, RunSummary
+from .trigger import TriggerCreate, TriggerDetail, TriggerUpdate
+
+__all__ = [
+ "AutomationCreate",
+ "AutomationDetail",
+ "AutomationList",
+ "AutomationSummary",
+ "AutomationUpdate",
+ "RunDetail",
+ "RunDispatched",
+ "RunList",
+ "RunSummary",
+ "TriggerCreate",
+ "TriggerDetail",
+ "TriggerUpdate",
+]
diff --git a/surfsense_backend/app/automations/api/schemas/automation.py b/surfsense_backend/app/automations/api/schemas/automation.py
new file mode 100644
index 000000000..c1defd417
--- /dev/null
+++ b/surfsense_backend/app/automations/api/schemas/automation.py
@@ -0,0 +1,64 @@
+"""Request/response schemas for the ``Automation`` resource."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from app.automations.persistence.enums.automation_status import AutomationStatus
+from app.automations.schemas.definition import AutomationDefinition
+
+from .trigger import TriggerCreate, TriggerDetail
+
+
+class AutomationCreate(BaseModel):
+ """Create an automation, optionally with initial triggers (atomic)."""
+
+ model_config = ConfigDict(extra="forbid")
+
+ search_space_id: int
+ name: str = Field(..., min_length=1, max_length=200)
+ description: str | None = None
+ definition: AutomationDefinition
+ triggers: list[TriggerCreate] = Field(default_factory=list)
+
+
+class AutomationUpdate(BaseModel):
+ """Partial update of an automation. Triggers are managed separately."""
+
+ model_config = ConfigDict(extra="forbid")
+
+ name: str | None = Field(default=None, min_length=1, max_length=200)
+ description: str | None = None
+ status: AutomationStatus | None = None
+ definition: AutomationDefinition | None = None
+
+
+class AutomationSummary(BaseModel):
+ """Lightweight automation view for list endpoints."""
+
+ model_config = ConfigDict(from_attributes=True)
+
+ id: int
+ search_space_id: int
+ name: str
+ description: str | None = None
+ status: AutomationStatus
+ version: int
+ created_at: datetime
+ updated_at: datetime
+
+
+class AutomationDetail(AutomationSummary):
+ """Full automation view including definition and attached triggers."""
+
+ definition: AutomationDefinition
+ triggers: list[TriggerDetail] = Field(default_factory=list)
+
+
+class AutomationList(BaseModel):
+ """Paginated list of automations."""
+
+ items: list[AutomationSummary]
+ total: int
diff --git a/surfsense_backend/app/automations/api/schemas/run.py b/surfsense_backend/app/automations/api/schemas/run.py
new file mode 100644
index 000000000..789b6f674
--- /dev/null
+++ b/surfsense_backend/app/automations/api/schemas/run.py
@@ -0,0 +1,50 @@
+"""Response schemas for run sub-resources and run dispatch."""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict
+
+from app.automations.persistence.enums.run_status import RunStatus
+
+
+class RunSummary(BaseModel):
+ """Lightweight run view for list endpoints."""
+
+ model_config = ConfigDict(from_attributes=True)
+
+ id: int
+ automation_id: int
+ trigger_id: int | None = None
+ status: RunStatus
+ started_at: datetime | None = None
+ finished_at: datetime | None = None
+ created_at: datetime
+
+
+class RunDetail(RunSummary):
+ """Full run view including snapshot, results and artifacts."""
+
+ definition_snapshot: dict[str, Any]
+ trigger_payload: dict[str, Any] | None = None
+ resolved_inputs: dict[str, Any]
+ step_results: list[dict[str, Any]]
+ output: dict[str, Any] | None = None
+ artifacts: list[dict[str, Any]]
+ error: dict[str, Any] | None = None
+
+
+class RunList(BaseModel):
+ """Paginated list of runs."""
+
+ items: list[RunSummary]
+ total: int
+
+
+class RunDispatched(BaseModel):
+ """Response of a successful run dispatch."""
+
+ run_id: int
+ status: RunStatus
diff --git a/surfsense_backend/app/automations/api/schemas/trigger.py b/surfsense_backend/app/automations/api/schemas/trigger.py
new file mode 100644
index 000000000..32afe7c60
--- /dev/null
+++ b/surfsense_backend/app/automations/api/schemas/trigger.py
@@ -0,0 +1,43 @@
+"""Request/response schemas for trigger sub-resources."""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from app.automations.persistence.enums.trigger_type import TriggerType
+
+
+class TriggerCreate(BaseModel):
+ """Attach a trigger to an automation."""
+
+ model_config = ConfigDict(extra="forbid")
+
+ type: TriggerType
+ params: dict[str, Any] = Field(default_factory=dict)
+ enabled: bool = True
+
+
+class TriggerUpdate(BaseModel):
+ """Partial update of an existing trigger."""
+
+ model_config = ConfigDict(extra="forbid")
+
+ enabled: bool | None = None
+ params: dict[str, Any] | None = None
+
+
+class TriggerDetail(BaseModel):
+ """Trigger as returned to clients."""
+
+ model_config = ConfigDict(from_attributes=True)
+
+ id: int
+ type: TriggerType
+ params: dict[str, Any]
+ enabled: bool
+ last_fired_at: datetime | None = None
+ next_fire_at: datetime | None = None
+ created_at: datetime
From 27ab367a13492426f315fc7e40c8d987d6f28b74 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 21:21:43 +0200
Subject: [PATCH 61/87] feat(automations): static_inputs on triggers +
vertical-slice api/services
---
automation-design-plan.md | 489 ++++++++----------
.../versions/144_add_automation_tables.py | 4 +-
.../actions/agent_task/definition.py | 2 +-
.../app/automations/actions/types.py | 8 +-
.../app/automations/api/__init__.py | 4 +
.../app/automations/api/automation.py | 83 ++-
surfsense_backend/app/automations/api/run.py | 71 +++
.../app/automations/api/trigger.py | 55 ++
.../app/automations/dispatch/run.py | 18 +-
.../app/automations/persistence/models/run.py | 5 +-
.../automations/persistence/models/trigger.py | 4 +
.../app/automations/runtime/executor.py | 2 +-
.../{api/schemas => schemas/api}/__init__.py | 0
.../schemas => schemas/api}/automation.py | 0
.../{api/schemas => schemas/api}/run.py | 3 +-
.../{api/schemas => schemas/api}/trigger.py | 3 +
.../app/automations/services/__init__.py | 13 +-
.../app/automations/services/automation.py | 159 +++++-
.../app/automations/services/run.py | 93 ++++
.../app/automations/services/trigger.py | 143 +++++
.../app/automations/tasks/schedule_tick.py | 56 +-
.../app/automations/templating/context.py | 4 +-
.../automations/triggers/manual/definition.py | 3 +-
.../automations/triggers/manual/dispatch.py | 11 +-
.../triggers/schedule/definition.py | 7 +-
.../automations/triggers/schedule/dispatch.py | 21 +-
.../app/automations/triggers/types.py | 10 +-
27 files changed, 915 insertions(+), 356 deletions(-)
create mode 100644 surfsense_backend/app/automations/api/run.py
create mode 100644 surfsense_backend/app/automations/api/trigger.py
rename surfsense_backend/app/automations/{api/schemas => schemas/api}/__init__.py (100%)
rename surfsense_backend/app/automations/{api/schemas => schemas/api}/automation.py (100%)
rename surfsense_backend/app/automations/{api/schemas => schemas/api}/run.py (92%)
rename surfsense_backend/app/automations/{api/schemas => schemas/api}/trigger.py (87%)
create mode 100644 surfsense_backend/app/automations/services/run.py
create mode 100644 surfsense_backend/app/automations/services/trigger.py
diff --git a/automation-design-plan.md b/automation-design-plan.md
index f57385e31..db5f7a23c 100644
--- a/automation-design-plan.md
+++ b/automation-design-plan.md
@@ -34,24 +34,27 @@ system will survive feature growth:
---
-## 2. The four-layer contract
+## 2. The three-layer contract
-The system is structured as four layers. Layers 1, 2, and 4 are defined by
-SurfSense developers (at registration time). Layer 3 is what users write
-(or the NL generator produces). The runtime reads all four to do its job.
+The system is structured as three layers. Layers 1 and 3 are defined by
+SurfSense developers (at registration time). Layer 2 is what users write
+(or the NL generator produces). The runtime reads all three to do its job.
| Layer | What it is | Defined by |
| ----- | ---------- | ---------- |
-| **1. Capability registry** | What this SurfSense instance can do | Developers, at startup |
-| **2. Action contract** | Per-action input/output schema | Developers, at startup |
-| **3. Automation definition** | One concrete saved automation | Users (or NL generator) |
-| **4. Trigger contract** | Per-trigger config and payload schemas | Developers, at startup |
+| **1. Action contract** | Per-action params and output schema | Developers, at startup |
+| **2. Automation definition** | One concrete saved automation | Users (or NL generator) |
+| **3. Trigger contract** | Per-trigger params and payload schemas | Developers, at startup |
-Each layer constrains the one above. The runtime reads all four but doesn't
-know what's in them ahead of time. That's how a new capability or trigger
+Each layer constrains the next. The runtime reads all three but doesn't
+know what's in them ahead of time. That's how a new action or trigger
type becomes available across the engine without code changes outside its
registration.
+A unification layer below Layer 1 — one catalog of "things this SurfSense
+instance can do," shared by automations, agents, and future surfaces — was
+considered and deferred (§3). v1 actions are stand-alone.
+
### Schema language
Every shape in every layer is described in **JSON Schema (draft 2020-12).**
@@ -66,167 +69,126 @@ extensions on top:
---
-## 3. Capability registry (Layer 1)
+## 3. Capability unification layer — deferred to post-v1
-A `Capability` is one discrete thing the SurfSense backend exposes —
-"post a Slack message," "query the Search Space," "generate a podcast." It
-is the atomic unit of "things automations can do."
+Earlier drafts introduced a `Capability` registry as Layer 1: one catalog
+of "things this SurfSense instance can do," shared by the automation
+engine (as actions), the agent (as tools), and any future HTTP surface.
+The motivation is real — one source of truth beats N parallel registries —
+but v1 has a single action (`agent_task`) and a single consumer (the
+automation engine). The five-field shape sketched earlier (`id`,
+`description`, `input_schema`, `output_schema`, `handler`) cannot safely
+host any non-trivial capability: it carries no caller identity, no
+search-space scoping, and no authorization gate on tool delegation.
+Building the abstraction with one consumer would lock in a shape that
+doesn't survive the second consumer.
-```python
-@dataclass
-class Capability:
- id: str # "slack.post_message"
- description: str # for the NL generator + UI label
- input_schema: dict # JSON Schema
- output_schema: dict # JSON Schema
- handler: AsyncHandler
-```
+The unification layer returns when the second consumer lands (Phase 2
+tight actions or Phase 4 MCP), redesigned from the start with:
-### v1-minimum: five fields, nothing else
+- A `CallContext` carrying caller user id, search space id, and run id,
+ passed to every handler invocation.
+- Explicit scope declarations per capability (e.g. `reads:documents`,
+ `writes:slack`, `destructive`) for the authorization layer to read.
+- A per-user, per-search-space filter consulted at both definition save
+ time (validating `agent_task.tools`) and run time (scoping the agent's
+ tool list to what the automation creator can delegate).
-The Capability is **deliberately five fields in v1**. Every additional field
-that earlier drafts considered (`name`, `required_credentials`,
-`side_effects`, `expected_duration_seconds`, `cost_estimate`) has been
-removed until a concrete consumer feature demands it. Authoring stays cheap
-and the registry stays trivial to introspect:
+Until then:
-- `name` → folded into `description`. The UI can render a short label from
- the first line of `description` or fall back to `id`. No separate field
- needed in v1.
-- `required_credentials` → returns when external-credential capabilities
- ship (Phase 2). v1 capabilities run server-side with app config; nothing
- to declare.
-- `side_effects` → returns when RBAC inside automations or
- `READ_ONLY`-only agent tool gating arrives. v1 capabilities are
- hand-picked and all trusted code.
-- `expected_duration_seconds` → returns when multi-queue routing ships.
- Single Celery queue in v1.
-- `cost_estimate` → never returns as a declared field; cost is measured
- per run from a ledger, aggregated per Capability, and surfaced as a
- historical average. Pre-flight checks are deferred.
-
-The runtime invariant: a Capability is **a typed, named, callable thing
-the system can do.** Every consumer (executor, agent tool layer, future
-HTTP API) sees the same five-field shape and uses it the same way.
-
-### Where capabilities live (v1)
-
-In v1, the capability registry is a single in-memory dict, populated at
-process startup from native registrations in
-`automations/registries/capabilities/`. Identical across all workers.
-No database persistence, no closures rebuilt per worker.
-
-### MCP integration — deferred to Phase 4
-
-The earlier two-tier registry (native + MCP-derived), the
-`mcp_connections` / `mcp_tools` tables, the harvester, and the lazy
-per-worker closure cache are **deferred to Phase 4** along with the
-rest of the integration-tooling surface. They are removed from v1
-because:
-
-- v1 has no external connector capabilities (no Slack, Notion, Drive,
- etc.). The only capabilities that will ship are server-side helpers
- (search-space query / fetch) plus the loose `agent_task` action.
-- Without external connectors, the lifecycle mismatch that motivates
- the two-tier design (connect Monday, run Friday, workers restarted
- in between) doesn't arise. A startup-time dict is sufficient.
-- Phase 4 reintroduces this design as-is — the registry interface in
- v1 is the same callable surface a Phase-4 MCP harvester will register
- into. The deferral is additive, not a different design.
-
-See archived design at `docs/automation/archived/mcp-registry.md` once
-v1 ships; for now the only consumer of the registry is the in-memory
-native path.
+- v1 actions are stand-alone units (Layer 1 below); the automation engine
+ reads its own action registry, nothing else.
+- `agent_task.params.tools` is a forward-looking allowlist field with no
+ v1 semantics beyond "list of string identifiers." The handler's tool
+ resolution is opaque to the automation contract.
### Credentials — deferred to Phase 2
-The earlier per-call credential resolution pattern (`ctx.resolve_mcp_client`,
-`ctx.resolve_http_client`, `ctx.resolve_llm`) is **deferred to Phase 2**.
-v1 capabilities run server-side using app-level configuration; none of
-the seven v1 capabilities needs per-user or per-connection auth.
+External-credential handlers (Slack, email, etc.) require per-user or
+per-connection auth. v1 actions run server-side with app-level
+configuration. When tight actions ship in Phase 2, the credential design
+lands as part of the unification redesign: connection IDs in the
+definition (never tokens); credentials loaded per-call by the handler
+context (never pre-loaded into worker memory); credentials never enter
+LLM context.
-When Phase 2 ships external-credential capabilities (Slack, email, etc.),
-the three guarantees the original design promised are reintroduced
-unchanged:
+### MCP — deferred to Phase 4
-- Credentials never appear in the automation definition (connection IDs
- only).
-- Credentials never appear in the LLM's context (the host holds them
- and uses them on the LLM's behalf when executing tool calls).
-- Credentials are loaded per-call, not pre-loaded into worker memory.
-
-The Phase-2 design returns as-is; only the v1 surface is simplified.
+External tool servers feeding tools into a shared registry land with the
+rest of the integration tooling in Phase 4, after the unification layer
+is in place. The two-tier registry, `mcp_connections` and `mcp_tools`
+tables, and the harvester arrive as a single coherent step then.
---
-## 4. Action contract (Layer 2)
+## 4. Action contract
-An `Action` is what a user references in a plan step. Most actions are
-thin wrappers around one capability (e.g., `slack_post` wraps
-`slack.post_message`). Some compose: `agent_task` is one action whose
-handler invokes the LangGraph runtime, which in turn can call many
-capabilities.
+An `Action` is what a user references in a plan step. Some actions are
+deterministic single-purpose handlers (`slack_post`, `send_email`); one
+action (`agent_task`) hosts an LLM and a tool allowlist for cases where
+judgment is needed. The contract is the same in both cases — only the
+handler differs.
```python
-@dataclass
+@dataclass(frozen=True, slots=True)
class ActionDefinition:
- type: str # "agent_task", "slack_post"
- name: str # for the UI
- description: str # for the NL generator
- config_schema: dict # JSON Schema for action.config
- output_contract: dict | DynamicOutput # what it produces
- uses_capabilities: list[str] # IDs from the registry
- produces_artifacts: list[ArtifactSpec] # see §8
- handler: AsyncHandler
+ type: str # "agent_task", "slack_post"
+ name: str # short UI label
+ description: str # for the NL generator and the UI
+ params_schema: dict # JSON Schema for step.params
+ handler: ActionHandler
```
+This is the v1 shape: five fields, no handler context, no output
+contract, no artifact declaration. The deferrals are intentional:
+
+- **`output_contract`** — Phase 2. Deterministic handlers will return
+ a fixed shape; v1's only action (`agent_task`) takes an
+ `output_schema` inside `params` and validates against that instead.
+- **`produces_artifacts`** — Phase 5. Artifact lifecycle (storage,
+ signed URLs, retention) is its own design step; v1 handlers
+ persist their own outputs.
+- **Handler context** — paired with the unification redesign (§3).
+ v1 handlers receive `(args)` only; per-user / per-search-space
+ behavior is not yet a v1 concern.
+
### Tight vs loose actions
Two patterns coexist by design:
-- **Tight actions** (`slack_post`, `linear_create_issue`, `send_email`):
- config_schema is fully specified, output_contract is fixed, handler is a
- thin wrapper. ~20 LOC each. Used when the user knows exactly what they
- want done — no LLM tokens spent on trivial work.
+- **Tight actions** (`slack_post`, `linear_create_issue`,
+ `send_email`) — deterministic single-purpose handlers. ~20 LOC
+ each. **Phase 2.**
+- **Loose actions** (`agent_task`) — params_schema accepts a `prompt`,
+ a `tools` allowlist, and an optional `output_schema` declaring what
+ the agent must return; the handler validates the agent's output
+ against it. **v1.**
-- **Loose actions** (`agent_task`): config_schema accepts a `prompt` and a
- `tools` allowlist; output_contract is *dynamic* — the user declares the
- output shape they want via `output_schema` in the step config; the
- handler asks the LLM to return that shape and validates. Used when
- judgment is needed.
-
-The agent's tool list is **the same capabilities** that tight actions call
-directly. One registry, two invocation modes. Adding a new MCP server gives
-both modes access to its tools automatically.
+The agent's `tools` allowlist resolves opaquely in v1; the redesigned
+unification layer (§3) will give both invocation modes access to the
+same vocabulary, with per-user authorization gating both.
### How names in the definition become function calls
-The definition contains strings like `"action": "slack_post"`. The string is
-just a name — it does not point to a function. At runtime, the executor
-performs a **name-based lookup** against the action registry:
+The definition contains strings like `"action": "agent_task"`. The
+string is just a name — it does not point to a function. At runtime,
+the executor performs a **name-based lookup** against the action
+registry:
```python
-# step.action is a string from the JSON definition, e.g. "slack_post"
-action_def = _ACTION_REGISTRY[step.action] # dict lookup
-handler = action_def.handler # Python callable
-result = await handler(ctx, resolved_config) # invocation
+action_def = action_registry.get(step.action) # dict lookup
+handler = action_def.handler # Python callable
+result = await handler(resolved_params) # invocation
```
-The registry is a Python dict (or a thin wrapper around one) populated at
-process startup. Each entry in `automations/actions/*.py` calls a
-`register_action(...)` function at module import time, putting its
-`ActionDefinition` (including the handler function reference) into the
-registry.
+The registry is a Python dict populated at process startup. Each entry
+in `automations/registries/actions/*.py` calls `register_action(...)`
+at module import time, putting its `ActionDefinition` (including the
+handler function reference) into the registry.
-The same pattern applies to capabilities. The definition references
-capabilities by ID (`"slack.post_message"`); the capability registry maps
-the ID to a `Capability` object holding the handler. Definitions never
-reference Python code directly — they reference names that the registry
-resolves to code.
-
-This separation is what makes the contract portable. The definition is
-pure data. The registry is the engine's runtime vocabulary. They meet at
-name-based lookup; nothing else crosses the boundary.
+The definition is pure data. The registry is the engine's runtime
+vocabulary. They meet at name-based lookup; nothing else crosses the
+boundary.
### The full expressive spectrum
@@ -238,7 +200,7 @@ fully agentic. Six practical shapes worth recognizing:
| **1. Direct call** | `slack_post` with literal channel and template | No LLM. ~200ms. Fractions of a cent. |
| **2. Direct call with computed inputs** | `linear_create_issue` using `{{summary.title}}` from a prior step | No LLM for this step. Cheap. |
| **3. Single-domain agent task** | `agent_task` with `tools: ["slack.*"]` only | One LLM, bounded toolset. |
-| **4. Multi-domain agent task, narrow** | `agent_task` with `tools: ["github.list_pull_requests", "linear.create_issue"]` | One LLM, named capabilities. |
+| **4. Multi-domain agent task, narrow** | `agent_task` with `tools: ["github.list_pull_requests", "linear.create_issue"]` | One LLM, named tools. |
| **5. Multi-domain agent task, broad** | `agent_task` with `tools: ["slack.*", "github.*", "linear.*"]` | One LLM, large toolset, most agentic. |
| **6. Composed plan** | `agent_task` (narrow) for thinking → `slack_post` + `linear_create_issue` for acting | Best cost-to-power ratio. |
@@ -258,7 +220,7 @@ user's.
---
-## 5. Automation definition (Layer 3)
+## 5. Automation definition
This is the JSON the user writes (or the NL generator produces). Stored in
`automations.definition` as JSONB.
@@ -287,7 +249,7 @@ This is the JSON the user writes (or the NL generator produces). Stored in
"triggers": [
{
"type": "schedule",
- "config": { "cron": "0 9 * * 1-5", "timezone": "Africa/Kigali" }
+ "params": { "cron": "0 9 * * 1-5", "timezone": "Africa/Kigali" }
}
],
@@ -295,7 +257,7 @@ This is the JSON the user writes (or the NL generator produces). Stored in
{
"step_id": "research",
"action": "agent_task",
- "config": {
+ "params": {
"prompt": "Find documents tagged {{inputs.tags}} indexed since {{inputs.since}}. Return JSON with bullets and source_doc_ids.",
"tools": ["search_space.query", "search_space.fetch_document"],
"model": "anthropic/claude-sonnet-4-7",
@@ -313,7 +275,7 @@ This is the JSON the user writes (or the NL generator produces). Stored in
{
"step_id": "deliver",
"action": "slack_post",
- "config": {
+ "params": {
"channel_id": "C0123",
"message_template": "*Competitor digest*\n\n{% for b in summary.bullets %}• {{b}}\n{% endfor %}"
}
@@ -325,11 +287,10 @@ This is the JSON the user writes (or the NL generator produces). Stored in
"max_retries": 2,
"retry_backoff": "exponential",
"concurrency": "drop_if_running",
- "budget_cap_usd": 1.50,
"on_failure": [ /* steps to run if main plan fails after retries */ ]
},
- "metadata": { "tags": ["digest"], "created_from_nl": true }
+ "metadata": { "tags": ["digest"] }
}
```
@@ -340,7 +301,7 @@ This is the JSON the user writes (or the NL generator produces). Stored in
"step_id": "...", // unique within plan
"action": "...", // references an ActionDefinition.type
"when": "{{ ... }}", // optional Jinja expr → bool; false = skip
- "config": { ... }, // validated against action's config_schema
+ "params": { ... }, // validated against action's params_schema
"output_as": "...", // binds output to this name for later steps
"max_retries": 0, // optional, overrides automation default
"timeout_seconds": 1200 // optional, overrides automation default
@@ -354,7 +315,7 @@ about it, or they compose automations through events (§7.5).
---
-## 6. Trigger contract (Layer 4)
+## 6. Trigger contract
Three trigger types. That's the entire taxonomy.
@@ -363,23 +324,12 @@ Three trigger types. That's the entire taxonomy.
```python
TriggerDefinition(
type="schedule",
- config_schema={
- "type": "object",
- "required": ["cron", "timezone"],
- "properties": {
- "cron": { "type": "string" },
- "timezone": { "type": "string", "format": "iana-timezone" }
- }
- },
- payload_schema={
- "type": "object",
- "properties": {
- "fired_at": { "type": "string", "format": "date-time" },
- "scheduled_for": { "type": "string", "format": "date-time" },
- "last_fired_at": { "type": "string", "format": "date-time" }
- }
- }
+ params_model=ScheduleTriggerParams, # cron + timezone
)
+# At fire time the schedule producer emits runtime inputs
+# (fired_at, scheduled_for, last_fired_at) which are merged with the
+# trigger row's static_inputs (static wins) and validated against
+# automation.definition.inputs.schema_.
```
Implementation: extends `app/utils/periodic_scheduler.py`, which already
@@ -395,7 +345,7 @@ want an event trigger instead.
```python
TriggerDefinition(
type="webhook",
- config_schema={
+ params_schema={
"type": "object",
"properties": {
"input_mapping": {
@@ -422,7 +372,7 @@ Dedups against runs in the last 24 hours.
```python
TriggerDefinition(
type="event",
- config_schema={
+ params_schema={
"type": "object",
"required": ["event_type"],
"properties": {
@@ -485,11 +435,13 @@ Common path (after a trigger has fired):
4. **Snapshot the resolved definition** into the run row (immutable history)
5. Enqueue executor task on the single `automations_default` Celery queue
-The cost-estimate pre-check (originally step 3) is **deferred**.
-v1 capabilities do not declare `cost_estimate`; pre-flight budgeting
-returns when a historical-cost ledger exists. The mid-flight budget
-cap (§7.2) still kills the run if accumulated cost crosses
-`budget_cap_usd`.
+The cost-estimate pre-check (originally step 3) is **deferred**. v1
+actions do not declare cost estimates, the run row has no `cost_usd`
+column, and no handler reports tokens used — so neither pre-flight
+prediction nor mid-flight accumulation can be enforced. `Execution`
+therefore does not expose `budget_cap_usd` in v1; it returns as a single
+field addition the day the cost ledger ships (per-action cost reporting
++ `automation_runs.cost_usd` column + executor accumulation).
Queue routing by `expected_duration_seconds` is **deferred** until load
patterns justify a second queue. v1 uses a single queue.
@@ -510,15 +462,15 @@ async def execute_run(run_id: int) -> None:
if step.when and not evaluate_predicate(step.when, context | step_outputs):
record_step_skipped(run, step); continue
- resolved_config = render_config(step.config, context | step_outputs)
+ resolved_params = render_params(step.params, context | step_outputs)
action = action_registry.get(step.action)
- validate(resolved_config, action.config_schema)
+ validate(resolved_params, action.params_schema)
try:
result = await with_retries(
action.handler,
ctx=build_action_context(run, action),
- args=resolved_config,
+ args=resolved_params,
policy=step.retry_policy or run.execution.retry_policy,
)
validate(result, step.output_schema)
@@ -541,14 +493,20 @@ validated dict come back; it doesn't know that step was "smart."
### 7.3 Action handlers
-One handler per `ActionDefinition.type`. Receives `(ctx, args)`, returns
-a dict matching `output_contract` (or matching the user-declared
-`output_schema` for dynamic-output actions like `agent_task`).
+One handler per `ActionDefinition.type`. Receives the validated `args`
+dict and returns whatever the step's output validates against (a fixed
+shape declared by tight actions, or a dynamic shape declared via
+`output_schema` in the step params for `agent_task`).
-Handlers handle their own credential resolution via `ctx.resolve_credentials`.
-They do not know about retries, timeouts, or budget caps — those are the
+Handlers do not know about retries or timeouts — those are the
executor's concern.
+In v1, handlers take `(args)` only. The `CallContext` parameter sketched
+in §7.2's pseudo-code (caller user id, search space id, run id,
+credential resolver) arrives with the unification layer redesign (§3);
+v1's single action (`agent_task`) reads what it needs from app-level
+configuration.
+
### 7.4 Template engine
#### Why it exists
@@ -747,7 +705,7 @@ Three fields, per-automation defaults with optional per-step overrides:
- `timeout_seconds`: integer
Retries on:
-- Capability handler exceptions
+- Action handler exceptions
- Output schema validation failures (for dynamic-output actions, the
validation error is fed back to the LLM in the retry)
@@ -755,12 +713,21 @@ Not retries:
- `when:` evaluation failures (these are user errors, surface immediately)
- Input validation failures (caught at dispatch, never reach the executor)
-### Budget enforcement
+### Budget enforcement *(deferred — not in v1)*
-`budget_cap_usd` is per-run. The dispatcher refuses to enqueue if estimated
-cost exceeds it. The executor kills the run if accumulated cost crosses it
-mid-flight (the LLM ops handler reports tokens consumed back to the
-executor between calls).
+Future shape: `budget_cap_usd` on `Execution`, dispatcher refuses to
+enqueue if estimated cost exceeds it, executor kills the run if
+accumulated cost crosses it mid-flight (the LLM ops handler reports
+tokens consumed back to the executor between calls).
+
+Prerequisites before this can land:
+- Each action declares cost reporting (tokens × model price, API call
+ charges) — `ActionDefinition` has no such field today.
+- `automation_runs.cost_usd` column + executor accumulates per step.
+- A historical-cost ledger so pre-flight estimation can return useful
+ numbers (otherwise the dispatcher gate is guessing).
+
+Until all three exist, v1 has no surface for budget enforcement.
### On-failure handlers
@@ -787,14 +754,13 @@ nightly Celery Beat task deletes expired artifacts).
### Duration classes and queue routing — deferred
The original design routed runs to multiple Celery queues based on each
-capability's declared `expected_duration_seconds`. v1 ships with **one
-queue** (`automations_default`) and capabilities do not declare a
-duration. Multi-queue routing returns when burst load on a single queue
-actually justifies the operational complexity of independent worker
-pools.
+action's declared `expected_duration_seconds`. v1 ships with **one
+queue** (`automations_default`) and actions do not declare a duration.
+Multi-queue routing returns when burst load on a single queue actually
+justifies the operational complexity of independent worker pools.
Adding the second queue is a config change plus reintroducing
-`expected_duration_seconds` on the `Capability` dataclass — both
+`expected_duration_seconds` on the `ActionDefinition` dataclass — both
mechanical, additive, and free of design rewrite.
---
@@ -832,14 +798,16 @@ and an immutable run history.
### `automation_triggers`
-| field | type | notes |
-| --------------- | ----------------------------------------------------------------------------- | ------------------------------------------- |
-| `id` | int PK | |
-| `automation_id` | FK | |
-| `type` | enum: `schedule`, `manual` (Phase 2/3 add `webhook`, `event`) | |
-| `config` | jsonb | validated against trigger's `config_schema` |
-| `enabled` | bool | |
-| `last_fired_at` | timestamp | |
+| field | type | notes |
+| --------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------- |
+| `id` | int PK | |
+| `automation_id` | FK | |
+| `type` | enum: `schedule`, `manual` (Phase 2/3 add `webhook`, `event`) | |
+| `params` | jsonb | trigger-type config, validated against trigger's `params_schema` |
+| `static_inputs` | jsonb | per-attachment domain values merged into every run (static wins on collision) |
+| `enabled` | bool | |
+| `last_fired_at` | timestamp | |
+| `next_fire_at` | timestamp / null | precomputed next fire moment for schedule triggers |
`secret_hash` (for webhook bearer tokens) is **deferred to Phase 2** with
the webhook trigger.
@@ -853,8 +821,7 @@ the webhook trigger.
| `trigger_id` | FK / null | null = manual via UI |
| `status` | enum | `pending`, `running`, `succeeded`, `failed`, `cancelled`, `timed_out` |
| `definition_snapshot` | jsonb | the definition as it was when this run fired |
-| `trigger_payload` | jsonb | |
-| `resolved_inputs` | jsonb | |
+| `inputs` | jsonb | merged & validated inputs (trigger.static_inputs ∪ producer runtime data, static wins) |
| `step_results` | jsonb | array of per-step results with timing |
| `output` | jsonb / null | |
| `artifacts` | jsonb | references to created artifacts |
@@ -863,7 +830,7 @@ the webhook trigger.
| `agent_session_id`| str / null | link to LangGraph trace if agent_task was used |
`cost_usd` (per-run accumulated cost) is **deferred** until at least one
-v1 capability records token-level cost. When reintroduced it lands as a
+action records token-level cost. When reintroduced it lands as a
column-only migration.
### Deferred tables
@@ -897,8 +864,8 @@ not "trusted authors only."
User provides natural-language input. The Generator LLM is given:
- The full schema set (input schema for definition, registry of action
- types with their config_schemas, registry of trigger types, available
- capabilities for this SearchSpace, list of allowed Jinja filters)
+ types with their params_schemas, registry of trigger types, list of
+ allowed Jinja filters)
- A tool to list available connectors, channels, and other SearchSpace
resources, so it doesn't invent names that don't exist
- A few-shot set of examples
@@ -918,13 +885,13 @@ Output: a structured proposal matching the automation definition schema.
Server-side, before the proposal reaches the user:
- Validate against JSON Schema (shape correctness)
-- Verify every capability referenced exists in the registry (resource existence)
+- Verify every action and trigger type referenced exists in the registry
- Verify every connector/channel/resource referenced exists in this SearchSpace
- Validate every template against the sandbox's allowlist (no underscore
attributes, no unregistered filter names, length under cap)
Failures here are deterministic errors, not warnings. A proposal that
-references a non-existent capability or includes a template using
+references a non-existent action or includes a template using
`{{x.__class__}}` is rejected before the user sees it; the Generator is
re-prompted with the validation error and asked to fix the proposal.
@@ -947,7 +914,7 @@ produces two outputs for the user:
- Action sequences that touch external systems without obvious benefit
to the user
- Cost estimates that seem high relative to the goal
- - References to capabilities the user hasn't used before
+ - References to actions the user hasn't used before
- Schedules tighter than 15 minutes (likely should be event triggers)
The Review LLM is a **UX layer** that makes review actually useful. It is
@@ -1009,33 +976,18 @@ always.
surfsense_backend/app/
├── automations/ # NEW: the engine
│ ├── __init__.py
-│ ├── models.py # SQLAlchemy models for 6 tables
-│ ├── schemas.py # Pydantic schemas (definition envelope, etc.)
+│ ├── persistence/ # SQLAlchemy models + enums for 3 tables
+│ ├── schemas/ # Pydantic schemas (definition envelope, etc.)
│ ├── routes.py # FastAPI router (/api/v1/automations)
│ ├── service.py # CRUD + business logic
-│ ├── dispatcher.py # trigger matching, cost check, run creation
+│ ├── dispatcher.py # trigger matching, run creation
│ ├── executor.py # the Celery task that runs a plan
│ ├── templating.py # Jinja sandbox + filters
│ ├── events.py # publish/subscribe for domain_events
│ ├── filters.py # JSON filter grammar evaluator
-│ ├── actions/
-│ │ ├── registry.py
-│ │ ├── agent_task.py
-│ │ ├── transform_data.py
-│ │ ├── slack_post.py
-│ │ ├── send_email.py
-│ │ ├── notification.py
-│ │ └── (more in Phase 5: podcast_generation, report_generation, ...)
-│ ├── triggers/
-│ │ ├── registry.py
-│ │ ├── schedule.py # Celery Beat hookup
-│ │ ├── webhook.py # /fire endpoint
-│ │ └── event.py # subscribes to domain_events
-│ ├── capabilities/
-│ │ ├── registry.py
-│ │ ├── native.py # native capability registrations
-│ │ ├── mcp_harvester.py # registers MCP tools as capabilities (Phase 4)
-│ │ └── (LLM ops registered alongside)
+│ ├── registries/ # action and trigger registries
+│ │ ├── actions/ # ActionDefinition + handler registration
+│ │ └── triggers/ # TriggerDefinition
│ └── nl/ # Phase 1 — primary user path
│ ├── generator.py # Generator LLM
│ ├── reviewer.py # Review LLM (summary + flagged items)
@@ -1070,23 +1022,22 @@ automations in natural language.
**Step 1 (current scope, this batch of commits):**
- 3 tables (`automations`, `automation_triggers`, `automation_runs`) +
Alembic migration
-- Empty Capability, Action, Trigger registries (concrete entries land in
- later steps when the consuming feature lands)
+- Empty action and trigger registries under
+ `app/automations/registries/` (concrete entries land in later steps)
- Pydantic schemas for the automation definition envelope, the two v1
- trigger configs (`schedule`, `manual`), and the one v1 action config
- (`agent_task`)
-- Module structure under `app/automations/` (data/, schemas/,
+ trigger params shapes (`schedule`, `manual`), and the one v1 action
+ params shape (`agent_task`)
+- Module structure under `app/automations/` (persistence/, schemas/,
registries/), fully isolated from the existing codebase
**Step 2:**
-- Register the `agent_task` action and the `schedule` / `manual`
- triggers in the registries
-- Capability registry populated with native deliverable-producing
- capabilities (chosen when this step starts)
+- The `agent_task` action handler and the `schedule` / `manual` triggers
+ registered in `app/automations/registries/`. Tool resolution for
+ `agent_task.params.tools` is opaque to the contract — the handler
+ decides what string identifiers it accepts and how they resolve.
**Step 3:**
-- Executor (single-queue Celery task) with retries, timeouts, budget
- caps measured against `cost_usd` ledger on the run
+- Executor (single-queue Celery task) with retries and timeouts
- Template engine (Jinja sandbox + the v1 filter allowlist + runtime
limits)
- Manual "Run now" endpoint
@@ -1122,19 +1073,23 @@ somewhere humans see, complex pipelines have proper error handling.
**After Phase 3**: NL authoring is the polished primary surface; edit
flows are conversational rather than form-only.
-### Phase 4 — Event triggers
+### Phase 4 — Event triggers + integration tooling
- `domain_events` table and `events.py` module
- Indexing pipeline publishes `connector.*` events (smallest change — just
add publish calls to the existing flow)
- Automations publish `automation.run.*` events on completion
- `event` trigger with filter grammar
-- MCP capability harvester (so MCP-backed events and tools both work)
+- The unification layer redesign (see §3) — `CallContext`, scope
+ declarations, per-user authorization gating
+- MCP integration on top of the unification layer (external tool servers
+ harvested into the shared catalog)
**After Phase 4**: "do X when Y happens" automations work, including
-automation-chaining through events.
+automation-chaining through events; external MCP tools and SurfSense
+actions share one vocabulary.
### Phase 5 — Wrapping existing features and sharing
-- Wrap existing SurfSense capabilities as actions: `podcast_generation`,
+- Wrap existing SurfSense features as actions: `podcast_generation`,
`report_generation`, `indexing_sweep`
- Artifact lifecycle implementation
- `expected_duration_seconds` based queue routing (split `automations_long`
@@ -1144,7 +1099,7 @@ automation-chaining through events.
shift documented in §7.4's pre-Phase-5 gate
- Cross-automation composition examples in the docs
-**After Phase 5**: every existing SurfSense capability is automatable
+**After Phase 5**: every existing SurfSense feature is automatable
without any per-feature code, and automations can be shared between
SearchSpaces and users.
@@ -1156,13 +1111,12 @@ For reference — every decision made through the design process, in one
place.
### Foundations
-1. ✅ JSON Schema 2020-12 is the single schema language for everything
+1. ✅ JSON Schema (draft 2020-12) is the single schema language for everything
2. ✅ Definition is the program; infrastructure is the interpreter
3. ✅ List of steps (not single action) in the plan, with `output_as` chaining
-4. ✅ One capability registry serving native + MCP + LLM operations through the same interface
-5. ✅ Capability IDs do not leak handler kind (`slack.post_message`, not `mcp.slack.post_message`)
-6. ✅ Name-based resolution: definitions reference actions and capabilities by string ID. The registry is the runtime's vocabulary; lookup is a dict access. No code references in definitions.
-7. ✅ The expressive spectrum runs from pure direct calls to broad agent_task; the NL generator proposes the cheapest shape that meets intent (Shape 6 from §4 by default)
+4. ⏸ Capability unification layer (one catalog shared by automations, agents, and future surfaces) — **deferred to post-v1** (see §3). v1 ships actions only.
+5. ✅ Name-based resolution: definitions reference action and trigger types by string ID. The registry is the runtime's vocabulary; lookup is a dict access. No code references in definitions.
+6. ✅ The expressive spectrum runs from pure direct calls to broad agent_task; the NL generator proposes the cheapest shape that meets intent (Shape 6 from §4 by default)
### Trigger taxonomy
8. ✅ Three trigger types: `schedule`, `webhook`, `event`
@@ -1183,7 +1137,7 @@ place.
19. ✅ No DAGs, no parallelism, no loops — composition via agent_task or events
20. ✅ `on_failure` part of execution policy from v1
21. ✅ Step-level retry and timeout overrides
-22. ✅ Budget cap enforced pre-enqueue and mid-flight
+22. ⏸ Budget cap enforced pre-enqueue and mid-flight — **deferred** until the cost ledger ships (see §8 Budget enforcement)
### Components
23. ✅ Dispatcher / executor / handlers / registry — distinct, each replaceable
@@ -1197,25 +1151,22 @@ place.
29. ✅ Automations publish run events for composability
30. ✅ Publish/subscribe behind interface — no direct table access elsewhere
-### Capability storage
-31. ✅ Native capabilities registered in-memory at startup from the codebase. Identical across all workers.
-32. ⏸ MCP capability metadata persisted in `mcp_connections` and `mcp_tools` tables — **deferred to Phase 4**
-33. ⏸ MCP handler closures built lazily per worker from database state — **deferred to Phase 4**
-34. ⏸ MCP server tool list re-harvested on a schedule — **deferred to Phase 4**
-35. ⏸ MCP tools harvested into the capability registry at connection time — **deferred to Phase 4**
-36. ⏸ Side effects inferred from MCP hints + naming + admin overrides — **deferred to Phase 4**
-37. ⏸ MCP tools callable directly (no agent required) when caller knows args — **deferred to Phase 4**
+### Capability unification — all deferred to post-v1
+31. ⏸ One shared catalog of "things this SurfSense instance can do" — **deferred**, see §3
+32. ⏸ Handler `CallContext` (caller user id, search space id, run id) — **deferred** with unification
+33. ⏸ Per-capability scope declarations driving authorization — **deferred** with unification
+34. ⏸ MCP integration on top of the unification layer (`mcp_connections`, `mcp_tools`, harvester) — **deferred to Phase 4**
### Credentials — all deferred to Phase 2
-38. ⏸ Credentials never appear in the automation definition — only connection IDs do — **Phase 2**
-39. ⏸ Credentials never appear in the LLM's context — the host holds them — **Phase 2**
-40. ⏸ Credentials resolved per-call by `ActionContext`, not pre-loaded into worker environment — **Phase 2**
-41. ⏸ Tokens encrypted at rest; refresh handled automatically by `ActionContext.resolve_*_client` — **Phase 2**
+35. ⏸ Credentials never appear in the automation definition — only connection IDs do — **Phase 2**
+36. ⏸ Credentials never appear in the LLM's context — the host holds them — **Phase 2**
+37. ⏸ Credentials resolved per-call by the handler context, not pre-loaded into worker environment — **Phase 2**
+38. ⏸ Tokens encrypted at rest; refresh handled automatically by the handler context — **Phase 2**
-### v1-minimum (new lock)
-v1. ✅ `Capability` is exactly five fields: `id`, `description`, `input_schema`, `output_schema`, `handler`. Additional fields are added only when a concrete consumer feature requires them.
-v2. ✅ Cost is **measured** from a per-run ledger, not declared. Pre-flight cost checks return when the ledger has enough history.
-v3. ✅ Single `automations_default` Celery queue in v1. Multi-queue routing returns when load justifies it.
+### v1-minimum
+39. ✅ v1 ships actions only — no separate capability layer. `ActionDefinition` is five fields: `type`, `name`, `description`, `params_schema`, `handler`. Additional fields are added only when a concrete consumer feature requires them.
+40. ✅ Cost is **measured** from a per-run ledger, not declared. Pre-flight cost checks return when the ledger has enough history.
+41. ✅ Single `automations_default` Celery queue in v1. Multi-queue routing returns when load justifies it.
### NL authoring
42. ✅ LLM-authored templates is the primary path from day one — not a Phase 3 addition. Hand-authoring JSON is supported but secondary
@@ -1227,7 +1178,7 @@ v3. ✅ Single `automations_default` Celery queue in v1. Multi-queue routing ret
48. ✅ NL drafts are transient storage, not a core table
### Data model
-49. ✅ Six tables total — four for engine state, two for MCP persistence
+49. ✅ v1 ships three tables (`automations`, `automation_triggers`, `automation_runs`). `domain_events` lands in Phase 3; `mcp_connections` and `mcp_tools` in Phase 4.
50. ✅ Run rows snapshot the definition (immutable history)
51. ✅ All entities scoped by `search_space_id` for RBAC
52. ✅ Editing an automation bumps `version`; existing runs unaffected
@@ -1283,7 +1234,7 @@ Schemas spelled out concretely. Those follow mechanically from this plan.
agent (cron and skills subsystems); n8n documentation on node types and
workflow data model; the SurfSense repository and DeepWiki architecture
notes (FastAPI + Celery Beat + Electric SQL + LangGraph Deep Agents +
-Search Space RBAC); Model Context Protocol specification for capability
-harvesting; AWS EventBridge for filter grammar; workflow-pattern
+Search Space RBAC); Model Context Protocol specification for external
+tool harvesting; AWS EventBridge for filter grammar; workflow-pattern
literature (van der Aalst et al.) for the trigger / action / concurrency
vocabulary.*
diff --git a/surfsense_backend/alembic/versions/144_add_automation_tables.py b/surfsense_backend/alembic/versions/144_add_automation_tables.py
index 6daf4075f..8d836095d 100644
--- a/surfsense_backend/alembic/versions/144_add_automation_tables.py
+++ b/surfsense_backend/alembic/versions/144_add_automation_tables.py
@@ -87,6 +87,7 @@ def upgrade() -> None:
REFERENCES automations(id) ON DELETE CASCADE,
type automation_trigger_type NOT NULL,
params JSONB NOT NULL,
+ static_inputs JSONB NOT NULL DEFAULT '{}'::jsonb,
enabled BOOLEAN NOT NULL DEFAULT true,
last_fired_at TIMESTAMP WITH TIME ZONE,
next_fire_at TIMESTAMP WITH TIME ZONE,
@@ -129,8 +130,7 @@ def upgrade() -> None:
REFERENCES automation_triggers(id) ON DELETE SET NULL,
status automation_run_status NOT NULL DEFAULT 'pending',
definition_snapshot JSONB NOT NULL,
- trigger_payload JSONB,
- resolved_inputs JSONB NOT NULL DEFAULT '{}'::jsonb,
+ inputs JSONB NOT NULL DEFAULT '{}'::jsonb,
step_results JSONB NOT NULL DEFAULT '[]'::jsonb,
output JSONB,
artifacts JSONB NOT NULL DEFAULT '[]'::jsonb,
diff --git a/surfsense_backend/app/automations/actions/agent_task/definition.py b/surfsense_backend/app/automations/actions/agent_task/definition.py
index d7db5cfcd..7d14dc49e 100644
--- a/surfsense_backend/app/automations/actions/agent_task/definition.py
+++ b/surfsense_backend/app/automations/actions/agent_task/definition.py
@@ -11,7 +11,7 @@ AGENT_TASK_ACTION = ActionDefinition(
type="agent_task",
name="Agent task",
description="Run a multi_agent_chat turn from an automation step.",
- params_schema=AgentTaskActionParams.model_json_schema(),
+ params_model=AgentTaskActionParams,
build_handler=build_handler,
)
diff --git a/surfsense_backend/app/automations/actions/types.py b/surfsense_backend/app/automations/actions/types.py
index 433c60841..2c4ffad8d 100644
--- a/surfsense_backend/app/automations/actions/types.py
+++ b/surfsense_backend/app/automations/actions/types.py
@@ -7,6 +7,7 @@ from dataclasses import dataclass
from typing import Any
from uuid import UUID
+from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
@@ -30,5 +31,10 @@ class ActionDefinition:
type: str
name: str
description: str
- params_schema: dict[str, Any]
+ params_model: type[BaseModel]
build_handler: ActionHandlerFactory
+
+ @property
+ def params_schema(self) -> dict[str, Any]:
+ """JSON Schema (draft 2020-12) derived from ``params_model``."""
+ return self.params_model.model_json_schema()
diff --git a/surfsense_backend/app/automations/api/__init__.py b/surfsense_backend/app/automations/api/__init__.py
index 459c6c1b4..a18e91a95 100644
--- a/surfsense_backend/app/automations/api/__init__.py
+++ b/surfsense_backend/app/automations/api/__init__.py
@@ -5,8 +5,12 @@ from __future__ import annotations
from fastapi import APIRouter
from .automation import router as automation_router
+from .run import router as run_router
+from .trigger import router as trigger_router
router = APIRouter()
router.include_router(automation_router)
+router.include_router(trigger_router)
+router.include_router(run_router)
__all__ = ["router"]
diff --git a/surfsense_backend/app/automations/api/automation.py b/surfsense_backend/app/automations/api/automation.py
index 4d0ce7209..b67f0af09 100644
--- a/surfsense_backend/app/automations/api/automation.py
+++ b/surfsense_backend/app/automations/api/automation.py
@@ -1,23 +1,80 @@
-"""Routes for the ``Automation`` resource."""
+"""HTTP routes for the ``Automation`` resource."""
from __future__ import annotations
-from typing import Any
+from fastapi import APIRouter, Depends, Query, status
-from fastapi import APIRouter, Body, Depends
-
-from app.automations.api.schemas import RunDispatched
+from app.automations.schemas.api import (
+ AutomationCreate,
+ AutomationDetail,
+ AutomationList,
+ AutomationSummary,
+ AutomationUpdate,
+)
from app.automations.services import AutomationService, get_automation_service
router = APIRouter()
-@router.post("/automations/{automation_id}/run", response_model=RunDispatched)
-async def run_automation_now(
- automation_id: int,
- payload: dict[str, Any] | None = Body(default=None),
+@router.post(
+ "/automations",
+ response_model=AutomationDetail,
+ status_code=status.HTTP_201_CREATED,
+)
+async def create_automation(
+ payload: AutomationCreate,
service: AutomationService = Depends(get_automation_service),
-) -> RunDispatched:
- """Fire a manual run."""
- run = await service.run_now(automation_id=automation_id, payload=payload)
- return RunDispatched(run_id=run.id, status=run.status)
+) -> AutomationDetail:
+ """Create an automation, optionally with initial triggers (atomic)."""
+ automation = await service.create(payload)
+ return AutomationDetail.model_validate(automation)
+
+
+@router.get("/automations", response_model=AutomationList)
+async def list_automations(
+ search_space_id: int = Query(...),
+ limit: int = Query(default=50, ge=1, le=200),
+ offset: int = Query(default=0, ge=0),
+ service: AutomationService = Depends(get_automation_service),
+) -> AutomationList:
+ """List automations in a search space."""
+ items, total = await service.list(
+ search_space_id=search_space_id, limit=limit, offset=offset
+ )
+ return AutomationList(
+ items=[AutomationSummary.model_validate(a) for a in items],
+ total=total,
+ )
+
+
+@router.get("/automations/{automation_id}", response_model=AutomationDetail)
+async def get_automation(
+ automation_id: int,
+ service: AutomationService = Depends(get_automation_service),
+) -> AutomationDetail:
+ """Get one automation with its definition and triggers."""
+ automation = await service.get(automation_id)
+ return AutomationDetail.model_validate(automation)
+
+
+@router.patch("/automations/{automation_id}", response_model=AutomationDetail)
+async def update_automation(
+ automation_id: int,
+ patch: AutomationUpdate,
+ service: AutomationService = Depends(get_automation_service),
+) -> AutomationDetail:
+ """Partially update an automation. Triggers are managed separately."""
+ automation = await service.update(automation_id, patch)
+ return AutomationDetail.model_validate(automation)
+
+
+@router.delete(
+ "/automations/{automation_id}",
+ status_code=status.HTTP_204_NO_CONTENT,
+)
+async def delete_automation(
+ automation_id: int,
+ service: AutomationService = Depends(get_automation_service),
+) -> None:
+ """Delete an automation; triggers and runs are removed by FK cascade."""
+ await service.delete(automation_id)
diff --git a/surfsense_backend/app/automations/api/run.py b/surfsense_backend/app/automations/api/run.py
new file mode 100644
index 000000000..d0d4bbfb7
--- /dev/null
+++ b/surfsense_backend/app/automations/api/run.py
@@ -0,0 +1,71 @@
+"""HTTP routes for automation runs (dispatch + history)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import APIRouter, Body, Depends, Query, status
+
+from app.automations.schemas.api import (
+ RunDetail,
+ RunDispatched,
+ RunList,
+ RunSummary,
+)
+from app.automations.services import RunService, get_run_service
+
+router = APIRouter()
+
+
+@router.post(
+ "/automations/{automation_id}/run",
+ response_model=RunDispatched,
+ status_code=status.HTTP_202_ACCEPTED,
+)
+async def run_automation_now(
+ automation_id: int,
+ inputs: dict[str, Any] | None = Body(default=None),
+ service: RunService = Depends(get_run_service),
+) -> RunDispatched:
+ """Fire a manual run.
+
+ ``inputs`` is the runtime payload supplied by the caller; it is merged with
+ the manual trigger's ``static_inputs`` (static wins) and validated against
+ the automation's input schema.
+ """
+ run = await service.dispatch_manual(automation_id=automation_id, runtime_inputs=inputs)
+ return RunDispatched(run_id=run.id, status=run.status)
+
+
+@router.get(
+ "/automations/{automation_id}/runs",
+ response_model=RunList,
+)
+async def list_runs(
+ automation_id: int,
+ limit: int = Query(default=50, ge=1, le=200),
+ offset: int = Query(default=0, ge=0),
+ service: RunService = Depends(get_run_service),
+) -> RunList:
+ """List run history for an automation, newest first."""
+ items, total = await service.list(
+ automation_id=automation_id, limit=limit, offset=offset
+ )
+ return RunList(
+ items=[RunSummary.model_validate(r) for r in items],
+ total=total,
+ )
+
+
+@router.get(
+ "/automations/{automation_id}/runs/{run_id}",
+ response_model=RunDetail,
+)
+async def get_run(
+ automation_id: int,
+ run_id: int,
+ service: RunService = Depends(get_run_service),
+) -> RunDetail:
+ """Get the full record of a single run, including step results and artifacts."""
+ run = await service.get(automation_id=automation_id, run_id=run_id)
+ return RunDetail.model_validate(run)
diff --git a/surfsense_backend/app/automations/api/trigger.py b/surfsense_backend/app/automations/api/trigger.py
new file mode 100644
index 000000000..40e47a86b
--- /dev/null
+++ b/surfsense_backend/app/automations/api/trigger.py
@@ -0,0 +1,55 @@
+"""HTTP routes for triggers attached to an automation."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, Depends, status
+
+from app.automations.schemas.api import TriggerCreate, TriggerDetail, TriggerUpdate
+from app.automations.services import TriggerService, get_trigger_service
+
+router = APIRouter()
+
+
+@router.post(
+ "/automations/{automation_id}/triggers",
+ response_model=TriggerDetail,
+ status_code=status.HTTP_201_CREATED,
+)
+async def add_trigger(
+ automation_id: int,
+ payload: TriggerCreate,
+ service: TriggerService = Depends(get_trigger_service),
+) -> TriggerDetail:
+ """Attach a new trigger to an automation."""
+ trigger = await service.add(automation_id=automation_id, payload=payload)
+ return TriggerDetail.model_validate(trigger)
+
+
+@router.patch(
+ "/automations/{automation_id}/triggers/{trigger_id}",
+ response_model=TriggerDetail,
+)
+async def update_trigger(
+ automation_id: int,
+ trigger_id: int,
+ patch: TriggerUpdate,
+ service: TriggerService = Depends(get_trigger_service),
+) -> TriggerDetail:
+ """Toggle ``enabled`` or replace ``params``. Trigger type is immutable."""
+ trigger = await service.update(
+ automation_id=automation_id, trigger_id=trigger_id, patch=patch
+ )
+ return TriggerDetail.model_validate(trigger)
+
+
+@router.delete(
+ "/automations/{automation_id}/triggers/{trigger_id}",
+ status_code=status.HTTP_204_NO_CONTENT,
+)
+async def remove_trigger(
+ automation_id: int,
+ trigger_id: int,
+ service: TriggerService = Depends(get_trigger_service),
+) -> None:
+ """Detach a trigger from an automation."""
+ await service.remove(automation_id=automation_id, trigger_id=trigger_id)
diff --git a/surfsense_backend/app/automations/dispatch/run.py b/surfsense_backend/app/automations/dispatch/run.py
index fd5107a18..e317a13b9 100644
--- a/surfsense_backend/app/automations/dispatch/run.py
+++ b/surfsense_backend/app/automations/dispatch/run.py
@@ -22,10 +22,14 @@ async def dispatch_run(
session: AsyncSession,
automation: Automation,
trigger: AutomationTrigger,
- payload: dict[str, Any] | None,
+ runtime_inputs: dict[str, Any] | None = None,
) -> AutomationRun:
"""Validate, snapshot the definition, persist an ``AutomationRun``, enqueue execution.
+ Final inputs = ``trigger.static_inputs`` merged with ``runtime_inputs``,
+ static winning on key collision. The merged dict is validated against
+ ``automation.definition.inputs.schema_`` and stored on the run.
+
Callers (trigger-specific adapters) are responsible for resolving
``automation`` and ``trigger`` and for the trigger-side ``ACTIVE`` /
``enabled`` guards. This function only handles what's identical across
@@ -36,7 +40,8 @@ async def dispatch_run(
except Exception as exc:
raise DispatchError(f"invalid automation definition: {exc}") from exc
- resolved_inputs = _validate_inputs(definition, payload or {})
+ merged_inputs = {**(runtime_inputs or {}), **(trigger.static_inputs or {})}
+ validated_inputs = _validate_inputs(definition, merged_inputs)
snapshot = definition.model_dump(mode="json", by_alias=True)
run = AutomationRun(
@@ -44,8 +49,7 @@ async def dispatch_run(
trigger_id=trigger.id,
status=RunStatus.PENDING,
definition_snapshot=snapshot,
- trigger_payload=payload,
- resolved_inputs=resolved_inputs,
+ inputs=validated_inputs,
step_results=[],
artifacts=[],
)
@@ -61,12 +65,12 @@ async def dispatch_run(
def _validate_inputs(
- definition: AutomationDefinition, payload: dict[str, Any]
+ definition: AutomationDefinition, inputs: dict[str, Any]
) -> dict[str, Any]:
if definition.inputs is None or not definition.inputs.schema_:
return {}
try:
- jsonschema.validate(instance=payload, schema=definition.inputs.schema_)
+ jsonschema.validate(instance=inputs, schema=definition.inputs.schema_)
except jsonschema.ValidationError as exc:
raise DispatchError(f"inputs: {exc.message}") from exc
- return payload
+ return inputs
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index fdc355e8f..81b33c37c 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -45,8 +45,9 @@ class AutomationRun(BaseModel, TimestampMixin):
# locked at fire time so historical runs always show the exact code path
definition_snapshot = Column(JSONB, nullable=False)
- trigger_payload = Column(JSONB, nullable=True)
- resolved_inputs = Column(JSONB, nullable=False, server_default="{}")
+ # merged & validated inputs the run was dispatched with
+ # (trigger.static_inputs ∪ producer runtime data, static wins on collision)
+ inputs = Column(JSONB, nullable=False, server_default="{}")
# one entry per executed step; agent_task entries carry their own
# `agent_session_id` inside their entry
step_results = Column(JSONB, nullable=False, server_default="[]")
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index b09bc3419..72d1d8d07 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -36,6 +36,10 @@ class AutomationTrigger(BaseModel, TimestampMixin):
params = Column(JSONB, nullable=False)
+ # Per-attachment domain values merged into every dispatched run's inputs.
+ # Static wins over runtime data on key collision.
+ static_inputs = Column(JSONB, nullable=False, server_default="{}")
+
enabled = Column(
Boolean,
nullable=False,
diff --git a/surfsense_backend/app/automations/runtime/executor.py b/surfsense_backend/app/automations/runtime/executor.py
index ced44fb9b..b8a377e5b 100644
--- a/surfsense_backend/app/automations/runtime/executor.py
+++ b/surfsense_backend/app/automations/runtime/executor.py
@@ -106,7 +106,7 @@ def _build_template_ctx(run: AutomationRun, step_outputs: dict[str, Any]) -> dic
trigger_type=trigger.type.value if trigger else None,
started_at=run.started_at,
attempt=1,
- resolved_inputs=run.resolved_inputs or {},
+ inputs=run.inputs or {},
step_outputs=step_outputs,
)
diff --git a/surfsense_backend/app/automations/api/schemas/__init__.py b/surfsense_backend/app/automations/schemas/api/__init__.py
similarity index 100%
rename from surfsense_backend/app/automations/api/schemas/__init__.py
rename to surfsense_backend/app/automations/schemas/api/__init__.py
diff --git a/surfsense_backend/app/automations/api/schemas/automation.py b/surfsense_backend/app/automations/schemas/api/automation.py
similarity index 100%
rename from surfsense_backend/app/automations/api/schemas/automation.py
rename to surfsense_backend/app/automations/schemas/api/automation.py
diff --git a/surfsense_backend/app/automations/api/schemas/run.py b/surfsense_backend/app/automations/schemas/api/run.py
similarity index 92%
rename from surfsense_backend/app/automations/api/schemas/run.py
rename to surfsense_backend/app/automations/schemas/api/run.py
index 789b6f674..42ea7ac14 100644
--- a/surfsense_backend/app/automations/api/schemas/run.py
+++ b/surfsense_backend/app/automations/schemas/api/run.py
@@ -28,8 +28,7 @@ class RunDetail(RunSummary):
"""Full run view including snapshot, results and artifacts."""
definition_snapshot: dict[str, Any]
- trigger_payload: dict[str, Any] | None = None
- resolved_inputs: dict[str, Any]
+ inputs: dict[str, Any]
step_results: list[dict[str, Any]]
output: dict[str, Any] | None = None
artifacts: list[dict[str, Any]]
diff --git a/surfsense_backend/app/automations/api/schemas/trigger.py b/surfsense_backend/app/automations/schemas/api/trigger.py
similarity index 87%
rename from surfsense_backend/app/automations/api/schemas/trigger.py
rename to surfsense_backend/app/automations/schemas/api/trigger.py
index 32afe7c60..35176fb9f 100644
--- a/surfsense_backend/app/automations/api/schemas/trigger.py
+++ b/surfsense_backend/app/automations/schemas/api/trigger.py
@@ -17,6 +17,7 @@ class TriggerCreate(BaseModel):
type: TriggerType
params: dict[str, Any] = Field(default_factory=dict)
+ static_inputs: dict[str, Any] = Field(default_factory=dict)
enabled: bool = True
@@ -27,6 +28,7 @@ class TriggerUpdate(BaseModel):
enabled: bool | None = None
params: dict[str, Any] | None = None
+ static_inputs: dict[str, Any] | None = None
class TriggerDetail(BaseModel):
@@ -37,6 +39,7 @@ class TriggerDetail(BaseModel):
id: int
type: TriggerType
params: dict[str, Any]
+ static_inputs: dict[str, Any]
enabled: bool
last_fired_at: datetime | None = None
next_fire_at: datetime | None = None
diff --git a/surfsense_backend/app/automations/services/__init__.py b/surfsense_backend/app/automations/services/__init__.py
index f0a97d216..597aca98a 100644
--- a/surfsense_backend/app/automations/services/__init__.py
+++ b/surfsense_backend/app/automations/services/__init__.py
@@ -1,7 +1,16 @@
-"""Service layer for the automations feature."""
+"""Services for the automations HTTP layer (one service per resource)."""
from __future__ import annotations
from .automation import AutomationService, get_automation_service
+from .run import RunService, get_run_service
+from .trigger import TriggerService, get_trigger_service
-__all__ = ["AutomationService", "get_automation_service"]
+__all__ = [
+ "AutomationService",
+ "RunService",
+ "TriggerService",
+ "get_automation_service",
+ "get_run_service",
+ "get_trigger_service",
+]
diff --git a/surfsense_backend/app/automations/services/automation.py b/surfsense_backend/app/automations/services/automation.py
index 2a921e331..9140da3b5 100644
--- a/surfsense_backend/app/automations/services/automation.py
+++ b/surfsense_backend/app/automations/services/automation.py
@@ -2,54 +2,111 @@
from __future__ import annotations
-from typing import Any
+from datetime import UTC, datetime
from fastapi import Depends, HTTPException
+from pydantic import ValidationError
+from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
-from app.automations.dispatch import DispatchError
+from app.automations.schemas.api import (
+ AutomationCreate,
+ AutomationUpdate,
+ TriggerCreate,
+)
+from app.automations.persistence.enums.trigger_type import TriggerType
from app.automations.persistence.models.automation import Automation
-from app.automations.persistence.models.run import AutomationRun
-from app.automations.triggers.manual import dispatch_manual_run
+from app.automations.persistence.models.trigger import AutomationTrigger
+from app.automations.triggers import get_trigger
+from app.automations.triggers.schedule import compute_next_fire_at
from app.db import Permission, User, get_async_session
from app.users import current_active_user
from app.utils.rbac import check_permission
class AutomationService:
- """Service for the ``Automation`` resource."""
+ """Lifecycle of the ``Automation`` resource."""
def __init__(self, *, session: AsyncSession, user: User) -> None:
self.session = session
self.user = user
- async def run_now(
+ async def create(self, payload: AutomationCreate) -> Automation:
+ """Create an automation and its initial triggers in one transaction."""
+ await self._authorize(payload.search_space_id, Permission.AUTOMATIONS_CREATE.value)
+
+ automation = Automation(
+ search_space_id=payload.search_space_id,
+ created_by_user_id=self.user.id,
+ name=payload.name,
+ description=payload.description,
+ definition=payload.definition.model_dump(mode="json", by_alias=True),
+ version=1,
+ )
+ for spec in payload.triggers:
+ automation.triggers.append(_build_trigger(spec))
+
+ self.session.add(automation)
+ await self.session.commit()
+ return await self._get_with_triggers_or_raise(automation.id)
+
+ async def list(
self,
*,
- automation_id: int,
- payload: dict[str, Any] | None,
- ) -> AutomationRun:
- """Fire a manual run for ``automation_id``."""
- automation = await self._get_automation_or_raise(automation_id)
- await check_permission(
- self.session,
- self.user,
- automation.search_space_id,
- Permission.AUTOMATIONS_EXECUTE.value,
- "You don't have permission to execute automations in this search space",
+ search_space_id: int,
+ limit: int,
+ offset: int,
+ ) -> tuple[list[Automation], int]:
+ """Return a page of automations and the total count."""
+ await self._authorize(search_space_id, Permission.AUTOMATIONS_READ.value)
+
+ base = select(Automation).where(Automation.search_space_id == search_space_id)
+ total = await self.session.scalar(
+ select(func.count()).select_from(base.subquery())
)
- try:
- return await dispatch_manual_run(
- session=self.session,
- automation_id=automation_id,
- payload=payload,
+ rows = (
+ await self.session.execute(
+ base.order_by(Automation.created_at.desc()).limit(limit).offset(offset)
)
- except DispatchError as exc:
- raise HTTPException(status_code=422, detail=str(exc)) from exc
+ ).scalars().all()
+ return list(rows), int(total or 0)
- async def _get_automation_or_raise(self, automation_id: int) -> Automation:
- """Get the automation by id; 404 if missing."""
+ async def get(self, automation_id: int) -> Automation:
+ """Get an automation with its triggers loaded."""
+ automation = await self._get_with_triggers_or_raise(automation_id)
+ await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_READ.value)
+ return automation
+
+ async def update(self, automation_id: int, patch: AutomationUpdate) -> Automation:
+ """Patch fields. Bumps ``version`` when ``definition`` changes."""
+ automation = await self._get_with_triggers_or_raise(automation_id)
+ await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_UPDATE.value)
+
+ data = patch.model_dump(exclude_unset=True)
+
+ if "name" in data:
+ automation.name = data["name"]
+ if "description" in data:
+ automation.description = data["description"]
+ if "status" in data:
+ automation.status = data["status"]
+ if "definition" in data:
+ automation.definition = patch.definition.model_dump(mode="json", by_alias=True)
+ automation.version += 1
+
+ await self.session.commit()
+ return await self._get_with_triggers_or_raise(automation_id)
+
+ async def delete(self, automation_id: int) -> None:
+ """Delete an automation; FK cascades remove triggers and runs."""
+ automation = await self._get_or_raise(automation_id)
+ await self._authorize(automation.search_space_id, Permission.AUTOMATIONS_DELETE.value)
+ await self.session.delete(automation)
+ await self.session.commit()
+
+ async def _get_or_raise(self, automation_id: int) -> Automation:
automation = await self.session.get(Automation, automation_id)
if automation is None:
raise HTTPException(
@@ -57,6 +114,56 @@ class AutomationService:
)
return automation
+ async def _get_with_triggers_or_raise(self, automation_id: int) -> Automation:
+ stmt = (
+ select(Automation)
+ .where(Automation.id == automation_id)
+ .options(selectinload(Automation.triggers))
+ )
+ automation = (await self.session.execute(stmt)).scalar_one_or_none()
+ if automation is None:
+ raise HTTPException(
+ status_code=404, detail=f"automation {automation_id} not found"
+ )
+ return automation
+
+ async def _authorize(self, search_space_id: int, permission: str) -> None:
+ await check_permission(
+ self.session,
+ self.user,
+ search_space_id,
+ permission,
+ f"You don't have permission to {permission.split(':')[1]} automations in this search space",
+ )
+
+
+def _build_trigger(spec: TriggerCreate) -> AutomationTrigger:
+ """Validate trigger params via its registered Pydantic model and build the ORM row."""
+ definition = get_trigger(spec.type.value)
+ if definition is None:
+ raise HTTPException(status_code=422, detail=f"unknown trigger type {spec.type.value!r}")
+
+ try:
+ validated = definition.params_model.model_validate(spec.params)
+ except ValidationError as exc:
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
+
+ params = validated.model_dump(mode="json")
+
+ next_fire_at = None
+ if spec.type == TriggerType.SCHEDULE and spec.enabled:
+ next_fire_at = compute_next_fire_at(
+ params["cron"], params["timezone"], after=datetime.now(UTC)
+ )
+
+ return AutomationTrigger(
+ type=spec.type,
+ params=params,
+ static_inputs=spec.static_inputs,
+ enabled=spec.enabled,
+ next_fire_at=next_fire_at,
+ )
+
def get_automation_service(
session: AsyncSession = Depends(get_async_session),
diff --git a/surfsense_backend/app/automations/services/run.py b/surfsense_backend/app/automations/services/run.py
new file mode 100644
index 000000000..92d79e9bc
--- /dev/null
+++ b/surfsense_backend/app/automations/services/run.py
@@ -0,0 +1,93 @@
+"""``RunService`` — dispatch and history of automation runs."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import Depends, HTTPException
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.dispatch import DispatchError
+from app.automations.persistence.models.automation import Automation
+from app.automations.persistence.models.run import AutomationRun
+from app.automations.triggers.manual import dispatch_manual_run
+from app.db import Permission, User, get_async_session
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+
+class RunService:
+ """Lifecycle of the ``AutomationRun`` resource."""
+
+ def __init__(self, *, session: AsyncSession, user: User) -> None:
+ self.session = session
+ self.user = user
+
+ async def dispatch_manual(
+ self,
+ *,
+ automation_id: int,
+ runtime_inputs: dict[str, Any] | None,
+ ) -> AutomationRun:
+ """Fire a manual run via the registered manual trigger."""
+ await self._authorize(automation_id, Permission.AUTOMATIONS_EXECUTE.value)
+ try:
+ return await dispatch_manual_run(
+ session=self.session,
+ automation_id=automation_id,
+ runtime_inputs=runtime_inputs,
+ )
+ except DispatchError as exc:
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
+
+ async def list(
+ self,
+ *,
+ automation_id: int,
+ limit: int,
+ offset: int,
+ ) -> tuple[list[AutomationRun], int]:
+ """Return a page of runs for an automation, newest first."""
+ await self._authorize(automation_id, Permission.AUTOMATIONS_READ.value)
+
+ base = select(AutomationRun).where(AutomationRun.automation_id == automation_id)
+ total = await self.session.scalar(
+ select(func.count()).select_from(base.subquery())
+ )
+
+ rows = (
+ await self.session.execute(
+ base.order_by(AutomationRun.created_at.desc()).limit(limit).offset(offset)
+ )
+ ).scalars().all()
+ return list(rows), int(total or 0)
+
+ async def get(self, *, automation_id: int, run_id: int) -> AutomationRun:
+ await self._authorize(automation_id, Permission.AUTOMATIONS_READ.value)
+ run = await self.session.get(AutomationRun, run_id)
+ if run is None or run.automation_id != automation_id:
+ raise HTTPException(status_code=404, detail=f"run {run_id} not found")
+ return run
+
+ async def _authorize(self, automation_id: int, permission: str) -> Automation:
+ automation = await self.session.get(Automation, automation_id)
+ if automation is None:
+ raise HTTPException(
+ status_code=404, detail=f"automation {automation_id} not found"
+ )
+ await check_permission(
+ self.session,
+ self.user,
+ automation.search_space_id,
+ permission,
+ f"You don't have permission to {permission.split(':')[1]} automations in this search space",
+ )
+ return automation
+
+
+def get_run_service(
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+) -> RunService:
+ return RunService(session=session, user=user)
diff --git a/surfsense_backend/app/automations/services/trigger.py b/surfsense_backend/app/automations/services/trigger.py
new file mode 100644
index 000000000..33e9c1386
--- /dev/null
+++ b/surfsense_backend/app/automations/services/trigger.py
@@ -0,0 +1,143 @@
+"""``TriggerService`` — lifecycle of triggers attached to an automation."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from fastapi import Depends, HTTPException
+from pydantic import ValidationError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.automations.schemas.api import TriggerCreate, TriggerUpdate
+from app.automations.persistence.enums.trigger_type import TriggerType
+from app.automations.persistence.models.automation import Automation
+from app.automations.persistence.models.trigger import AutomationTrigger
+from app.automations.triggers import get_trigger
+from app.automations.triggers.schedule import compute_next_fire_at
+from app.db import Permission, User, get_async_session
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+
+class TriggerService:
+ """Lifecycle of the ``AutomationTrigger`` sub-resource."""
+
+ def __init__(self, *, session: AsyncSession, user: User) -> None:
+ self.session = session
+ self.user = user
+
+ async def add(
+ self, *, automation_id: int, payload: TriggerCreate
+ ) -> AutomationTrigger:
+ automation = await self._authorize_automation(
+ automation_id, Permission.AUTOMATIONS_UPDATE.value
+ )
+
+ validated_params = _validate_params(payload.type, payload.params)
+ trigger = AutomationTrigger(
+ automation_id=automation.id,
+ type=payload.type,
+ params=validated_params,
+ static_inputs=payload.static_inputs,
+ enabled=payload.enabled,
+ next_fire_at=_initial_next_fire(payload.type, validated_params, payload.enabled),
+ )
+ self.session.add(trigger)
+ await self.session.commit()
+ await self.session.refresh(trigger)
+ return trigger
+
+ async def update(
+ self,
+ *,
+ automation_id: int,
+ trigger_id: int,
+ patch: TriggerUpdate,
+ ) -> AutomationTrigger:
+ await self._authorize_automation(automation_id, Permission.AUTOMATIONS_UPDATE.value)
+ trigger = await self._get_trigger_or_raise(automation_id, trigger_id)
+
+ data = patch.model_dump(exclude_unset=True)
+
+ if "params" in data:
+ trigger.params = _validate_params(trigger.type, data["params"])
+
+ if "static_inputs" in data:
+ trigger.static_inputs = data["static_inputs"]
+
+ if "enabled" in data:
+ trigger.enabled = data["enabled"]
+
+ # Recompute next_fire_at when schedule timing changed or the trigger was
+ # toggled back on. Manual triggers always have NULL next_fire_at.
+ if trigger.type == TriggerType.SCHEDULE:
+ trigger.next_fire_at = _initial_next_fire(
+ trigger.type, trigger.params, trigger.enabled
+ )
+
+ await self.session.commit()
+ await self.session.refresh(trigger)
+ return trigger
+
+ async def remove(self, *, automation_id: int, trigger_id: int) -> None:
+ await self._authorize_automation(automation_id, Permission.AUTOMATIONS_UPDATE.value)
+ trigger = await self._get_trigger_or_raise(automation_id, trigger_id)
+ await self.session.delete(trigger)
+ await self.session.commit()
+
+ async def _authorize_automation(
+ self, automation_id: int, permission: str
+ ) -> Automation:
+ automation = await self.session.get(Automation, automation_id)
+ if automation is None:
+ raise HTTPException(
+ status_code=404, detail=f"automation {automation_id} not found"
+ )
+ await check_permission(
+ self.session,
+ self.user,
+ automation.search_space_id,
+ permission,
+ f"You don't have permission to {permission.split(':')[1]} automations in this search space",
+ )
+ return automation
+
+ async def _get_trigger_or_raise(
+ self, automation_id: int, trigger_id: int
+ ) -> AutomationTrigger:
+ trigger = await self.session.get(AutomationTrigger, trigger_id)
+ if trigger is None or trigger.automation_id != automation_id:
+ raise HTTPException(
+ status_code=404, detail=f"trigger {trigger_id} not found"
+ )
+ return trigger
+
+
+def _validate_params(trigger_type: TriggerType, raw: dict) -> dict:
+ definition = get_trigger(trigger_type.value)
+ if definition is None:
+ raise HTTPException(
+ status_code=422, detail=f"unknown trigger type {trigger_type.value!r}"
+ )
+ try:
+ validated = definition.params_model.model_validate(raw)
+ except ValidationError as exc:
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
+ return validated.model_dump(mode="json")
+
+
+def _initial_next_fire(
+ trigger_type: TriggerType, params: dict, enabled: bool
+) -> datetime | None:
+ if trigger_type != TriggerType.SCHEDULE or not enabled:
+ return None
+ return compute_next_fire_at(
+ params["cron"], params["timezone"], after=datetime.now(UTC)
+ )
+
+
+def get_trigger_service(
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+) -> TriggerService:
+ return TriggerService(session=session, user=user)
diff --git a/surfsense_backend/app/automations/tasks/schedule_tick.py b/surfsense_backend/app/automations/tasks/schedule_tick.py
index cade621c7..385bd7242 100644
--- a/surfsense_backend/app/automations/tasks/schedule_tick.py
+++ b/surfsense_backend/app/automations/tasks/schedule_tick.py
@@ -15,6 +15,7 @@ Runs every minute. Each tick performs two passes:
from __future__ import annotations
import logging
+from dataclasses import dataclass
from datetime import UTC, datetime
from sqlalchemy import select
@@ -39,6 +40,15 @@ TASK_NAME = "automation_schedule_tick"
_TICK_BATCH = 200
+@dataclass(frozen=True, slots=True)
+class _Claim:
+ """Per-trigger fire context captured before row state is mutated."""
+
+ trigger_id: int
+ scheduled_for: datetime
+ previous_last_fired_at: datetime | None
+
+
@celery_app.task(name=TASK_NAME)
def automation_schedule_tick() -> None:
"""Tick once: self-heal NULL next_fire_at, claim due rows, fire each."""
@@ -52,12 +62,12 @@ async def _tick() -> None:
await _self_heal_null_next_fire(session, now=now)
- claimed_ids = await _claim_due_triggers(session, now=now)
- if not claimed_ids:
+ claims = await _claim_due_triggers(session, now=now)
+ if not claims:
return
- for trigger_id in claimed_ids:
- await _fire_one(session, trigger_id=trigger_id)
+ for claim in claims:
+ await _fire_one(session, claim=claim, fired_at=now)
async def _self_heal_null_next_fire(session: AsyncSession, *, now: datetime) -> None:
@@ -95,8 +105,8 @@ async def _self_heal_null_next_fire(session: AsyncSession, *, now: datetime) ->
async def _claim_due_triggers(
session: AsyncSession, *, now: datetime
-) -> list[int]:
- """Lock and advance due rows; return claimed trigger ids."""
+) -> list[_Claim]:
+ """Lock and advance due rows; return per-trigger fire context."""
stmt = (
select(AutomationTrigger)
.where(
@@ -113,8 +123,12 @@ async def _claim_due_triggers(
if not triggers:
return []
- claimed: list[int] = []
+ claims: list[_Claim] = []
for trigger in triggers:
+ # Snapshot fire-context BEFORE we advance the row.
+ scheduled_for = trigger.next_fire_at
+ previous_last_fired_at = trigger.last_fired_at
+
try:
trigger.next_fire_at = compute_next_fire_at(
trigger.params["cron"],
@@ -131,29 +145,43 @@ async def _claim_due_triggers(
continue
trigger.last_fired_at = now
- claimed.append(trigger.id)
+ claims.append(
+ _Claim(
+ trigger_id=trigger.id,
+ scheduled_for=scheduled_for,
+ previous_last_fired_at=previous_last_fired_at,
+ )
+ )
await session.commit()
- return claimed
+ return claims
-async def _fire_one(session: AsyncSession, *, trigger_id: int) -> None:
+async def _fire_one(
+ session: AsyncSession, *, claim: _Claim, fired_at: datetime
+) -> None:
"""Reload the trigger post-commit and dispatch a run for it."""
- trigger = await session.get(AutomationTrigger, trigger_id)
+ trigger = await session.get(AutomationTrigger, claim.trigger_id)
if trigger is None:
return
try:
- run = await dispatch_schedule_run(session=session, trigger=trigger)
+ run = await dispatch_schedule_run(
+ session=session,
+ trigger=trigger,
+ fired_at=fired_at,
+ scheduled_for=claim.scheduled_for,
+ previous_last_fired_at=claim.previous_last_fired_at,
+ )
logger.info(
"scheduled fire: trigger=%d automation=%d run=%d",
- trigger_id,
+ claim.trigger_id,
trigger.automation_id,
run.id,
)
except Exception:
logger.exception(
"scheduled fire failed for trigger %d (next attempt at next match)",
- trigger_id,
+ claim.trigger_id,
)
await session.rollback()
diff --git a/surfsense_backend/app/automations/templating/context.py b/surfsense_backend/app/automations/templating/context.py
index 3ca87694c..96fdb02e9 100644
--- a/surfsense_backend/app/automations/templating/context.py
+++ b/surfsense_backend/app/automations/templating/context.py
@@ -19,7 +19,7 @@ def build_run_context(
trigger_type: str | None,
started_at: datetime | None,
attempt: int,
- resolved_inputs: Mapping[str, Any],
+ inputs: Mapping[str, Any],
step_outputs: Mapping[str, Any],
) -> dict[str, Any]:
"""Build the ``{run, inputs, steps}`` namespace exposed to every template."""
@@ -36,6 +36,6 @@ def build_run_context(
"started_at": started_at,
"attempt": attempt,
},
- "inputs": dict(resolved_inputs),
+ "inputs": dict(inputs),
"steps": dict(step_outputs),
}
diff --git a/surfsense_backend/app/automations/triggers/manual/definition.py b/surfsense_backend/app/automations/triggers/manual/definition.py
index 9eb0282af..5a3529116 100644
--- a/surfsense_backend/app/automations/triggers/manual/definition.py
+++ b/surfsense_backend/app/automations/triggers/manual/definition.py
@@ -9,8 +9,7 @@ from .params import ManualTriggerParams
MANUAL_TRIGGER = TriggerDefinition(
type="manual",
description="Fire on a user-initiated 'Run now' invocation.",
- params_schema=ManualTriggerParams.model_json_schema(),
- payload_schema={"type": "object"},
+ params_model=ManualTriggerParams,
)
register_trigger(MANUAL_TRIGGER)
diff --git a/surfsense_backend/app/automations/triggers/manual/dispatch.py b/surfsense_backend/app/automations/triggers/manual/dispatch.py
index 750c99937..6c92317d0 100644
--- a/surfsense_backend/app/automations/triggers/manual/dispatch.py
+++ b/surfsense_backend/app/automations/triggers/manual/dispatch.py
@@ -19,9 +19,14 @@ async def dispatch_manual_run(
*,
session: AsyncSession,
automation_id: int,
- payload: dict[str, Any] | None,
+ runtime_inputs: dict[str, Any] | None,
) -> AutomationRun:
- """Find the automation + its enabled manual trigger, then run the generic dispatch."""
+ """Find the automation + its enabled manual trigger, then run the generic dispatch.
+
+ ``runtime_inputs`` is the caller-supplied payload (e.g. an HTTP body for a
+ "Run now" API call); it is merged with the trigger's ``static_inputs`` by
+ the generic dispatcher, with static winning on key collision.
+ """
automation = await _load_automation(session, automation_id)
if automation is None:
raise DispatchError(f"automation {automation_id} not found")
@@ -41,7 +46,7 @@ async def dispatch_manual_run(
session=session,
automation=automation,
trigger=trigger,
- payload=payload,
+ runtime_inputs=runtime_inputs,
)
diff --git a/surfsense_backend/app/automations/triggers/schedule/definition.py b/surfsense_backend/app/automations/triggers/schedule/definition.py
index 3f86d767c..605765307 100644
--- a/surfsense_backend/app/automations/triggers/schedule/definition.py
+++ b/surfsense_backend/app/automations/triggers/schedule/definition.py
@@ -9,12 +9,7 @@ from .params import ScheduleTriggerParams
SCHEDULE_TRIGGER = TriggerDefinition(
type="schedule",
description="Fire on a cron schedule in a given timezone.",
- params_schema=ScheduleTriggerParams.model_json_schema(),
- payload_schema={
- "type": "object",
- "additionalProperties": False,
- "properties": {},
- },
+ params_model=ScheduleTriggerParams,
)
register_trigger(SCHEDULE_TRIGGER)
diff --git a/surfsense_backend/app/automations/triggers/schedule/dispatch.py b/surfsense_backend/app/automations/triggers/schedule/dispatch.py
index fb4fcf686..6d3d5fcb9 100644
--- a/surfsense_backend/app/automations/triggers/schedule/dispatch.py
+++ b/surfsense_backend/app/automations/triggers/schedule/dispatch.py
@@ -2,6 +2,8 @@
from __future__ import annotations
+from datetime import datetime
+
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -16,9 +18,18 @@ async def dispatch_schedule_run(
*,
session: AsyncSession,
trigger: AutomationTrigger,
+ fired_at: datetime,
+ scheduled_for: datetime,
+ previous_last_fired_at: datetime | None,
) -> AutomationRun:
"""Fire one scheduled run for ``trigger``.
+ Emits calendar context as runtime inputs:
+
+ - ``fired_at`` — actual fire time
+ - ``scheduled_for`` — cron-derived target time for this fire
+ - ``last_fired_at`` — fire time of the previous run, or null on first fire
+
The caller (the schedule tick) is responsible for selecting due triggers
and advancing ``next_fire_at`` / ``last_fired_at`` before invoking this.
"""
@@ -33,11 +44,19 @@ async def dispatch_schedule_run(
f"automation {trigger.automation_id} is {automation.status.value}, not active"
)
+ runtime_inputs = {
+ "fired_at": fired_at.isoformat(),
+ "scheduled_for": scheduled_for.isoformat(),
+ "last_fired_at": (
+ previous_last_fired_at.isoformat() if previous_last_fired_at else None
+ ),
+ }
+
return await dispatch_run(
session=session,
automation=automation,
trigger=trigger,
- payload=None,
+ runtime_inputs=runtime_inputs,
)
diff --git a/surfsense_backend/app/automations/triggers/types.py b/surfsense_backend/app/automations/triggers/types.py
index 783bd7842..aa2808e4d 100644
--- a/surfsense_backend/app/automations/triggers/types.py
+++ b/surfsense_backend/app/automations/triggers/types.py
@@ -5,10 +5,16 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import Any
+from pydantic import BaseModel
+
@dataclass(frozen=True, slots=True)
class TriggerDefinition:
type: str
description: str
- params_schema: dict[str, Any]
- payload_schema: dict[str, Any]
+ params_model: type[BaseModel]
+
+ @property
+ def params_schema(self) -> dict[str, Any]:
+ """JSON Schema (draft 2020-12) derived from ``params_model``."""
+ return self.params_model.model_json_schema()
From 8fb65d7188c05d8193a211af4cfbf96d6b0327c4 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 21:53:07 +0200
Subject: [PATCH 62/87] fix(automations): use enum values not names for
postgres enum columns
---
.../app/automations/persistence/models/automation.py | 6 +++++-
surfsense_backend/app/automations/persistence/models/run.py | 6 +++++-
.../app/automations/persistence/models/trigger.py | 6 +++++-
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/surfsense_backend/app/automations/persistence/models/automation.py b/surfsense_backend/app/automations/persistence/models/automation.py
index ee86851c1..cb0b2ed31 100644
--- a/surfsense_backend/app/automations/persistence/models/automation.py
+++ b/surfsense_backend/app/automations/persistence/models/automation.py
@@ -42,7 +42,11 @@ class Automation(BaseModel, TimestampMixin):
description = Column(Text, nullable=True)
status = Column(
- SQLAlchemyEnum(AutomationStatus, name="automation_status"),
+ SQLAlchemyEnum(
+ AutomationStatus,
+ name="automation_status",
+ values_callable=lambda x: [e.value for e in x],
+ ),
nullable=False,
default=AutomationStatus.ACTIVE,
server_default=AutomationStatus.ACTIVE.value,
diff --git a/surfsense_backend/app/automations/persistence/models/run.py b/surfsense_backend/app/automations/persistence/models/run.py
index 81b33c37c..262e4c2bf 100644
--- a/surfsense_backend/app/automations/persistence/models/run.py
+++ b/surfsense_backend/app/automations/persistence/models/run.py
@@ -35,7 +35,11 @@ class AutomationRun(BaseModel, TimestampMixin):
)
status = Column(
- SQLAlchemyEnum(RunStatus, name="automation_run_status"),
+ SQLAlchemyEnum(
+ RunStatus,
+ name="automation_run_status",
+ values_callable=lambda x: [e.value for e in x],
+ ),
nullable=False,
default=RunStatus.PENDING,
server_default=RunStatus.PENDING.value,
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index 72d1d8d07..f73a8f350 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -29,7 +29,11 @@ class AutomationTrigger(BaseModel, TimestampMixin):
)
type = Column(
- SQLAlchemyEnum(TriggerType, name="automation_trigger_type"),
+ SQLAlchemyEnum(
+ TriggerType,
+ name="automation_trigger_type",
+ values_callable=lambda x: [e.value for e in x],
+ ),
nullable=False,
index=True,
)
From c0232fdcfe346f4521c0b5aa578b3ac178bcdc6f Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Wed, 27 May 2026 22:29:51 +0200
Subject: [PATCH 63/87] refactor(automations): park manual trigger pending
Run-now redesign
Manual-as-a-standalone-trigger conflates "user clicks Run now" with the
trigger model and forces ad-hoc input plumbing on the caller. Remove the
unreachable surface so the tree reflects reality (schedule is the only
v1 trigger).
- Unregister `manual`: drop import from triggers/__init__.py
- Delete `app/automations/triggers/manual/`
- Drop `RunService.dispatch_manual` (RunService is now read-only)
- Drop `POST /automations/{id}/run` and `RunDispatched` schema
- Keep `TriggerType.MANUAL` Python + PG enum value (reserved, documented)
to avoid an Alembic round-trip when Run-now is redesigned
---
surfsense_backend/app/automations/api/run.py | 33 +--------
.../persistence/enums/trigger_type.py | 7 +-
.../automations/persistence/models/trigger.py | 2 +-
.../app/automations/schemas/api/__init__.py | 3 +-
.../app/automations/schemas/api/run.py | 9 +--
.../app/automations/services/run.py | 25 +------
.../app/automations/services/trigger.py | 2 +-
.../app/automations/triggers/__init__.py | 4 +-
.../automations/triggers/manual/__init__.py | 11 ---
.../automations/triggers/manual/definition.py | 15 ----
.../automations/triggers/manual/dispatch.py | 72 -------------------
.../app/automations/triggers/manual/params.py | 9 ---
surfsense_backend/app/routes/__init__.py | 2 +-
13 files changed, 18 insertions(+), 176 deletions(-)
delete mode 100644 surfsense_backend/app/automations/triggers/manual/__init__.py
delete mode 100644 surfsense_backend/app/automations/triggers/manual/definition.py
delete mode 100644 surfsense_backend/app/automations/triggers/manual/dispatch.py
delete mode 100644 surfsense_backend/app/automations/triggers/manual/params.py
diff --git a/surfsense_backend/app/automations/api/run.py b/surfsense_backend/app/automations/api/run.py
index d0d4bbfb7..b662a5943 100644
--- a/surfsense_backend/app/automations/api/run.py
+++ b/surfsense_backend/app/automations/api/run.py
@@ -1,42 +1,15 @@
-"""HTTP routes for automation runs (dispatch + history)."""
+"""HTTP routes for automation run history."""
from __future__ import annotations
-from typing import Any
+from fastapi import APIRouter, Depends, Query
-from fastapi import APIRouter, Body, Depends, Query, status
-
-from app.automations.schemas.api import (
- RunDetail,
- RunDispatched,
- RunList,
- RunSummary,
-)
+from app.automations.schemas.api import RunDetail, RunList, RunSummary
from app.automations.services import RunService, get_run_service
router = APIRouter()
-@router.post(
- "/automations/{automation_id}/run",
- response_model=RunDispatched,
- status_code=status.HTTP_202_ACCEPTED,
-)
-async def run_automation_now(
- automation_id: int,
- inputs: dict[str, Any] | None = Body(default=None),
- service: RunService = Depends(get_run_service),
-) -> RunDispatched:
- """Fire a manual run.
-
- ``inputs`` is the runtime payload supplied by the caller; it is merged with
- the manual trigger's ``static_inputs`` (static wins) and validated against
- the automation's input schema.
- """
- run = await service.dispatch_manual(automation_id=automation_id, runtime_inputs=inputs)
- return RunDispatched(run_id=run.id, status=run.status)
-
-
@router.get(
"/automations/{automation_id}/runs",
response_model=RunList,
diff --git a/surfsense_backend/app/automations/persistence/enums/trigger_type.py b/surfsense_backend/app/automations/persistence/enums/trigger_type.py
index 8318bfdee..a583b1bd6 100644
--- a/surfsense_backend/app/automations/persistence/enums/trigger_type.py
+++ b/surfsense_backend/app/automations/persistence/enums/trigger_type.py
@@ -1,4 +1,9 @@
-"""Trigger-kind discriminator. v1: schedule | manual; webhook/event in Phase 2/3."""
+"""Trigger-kind discriminator.
+
+v1 only registers ``schedule``. ``manual`` is reserved in the enum (mirrors the
+postgres enum) but is intentionally unregistered pending a redesign of the
+"Run now" UX.
+"""
from __future__ import annotations
diff --git a/surfsense_backend/app/automations/persistence/models/trigger.py b/surfsense_backend/app/automations/persistence/models/trigger.py
index f73a8f350..de1078acf 100644
--- a/surfsense_backend/app/automations/persistence/models/trigger.py
+++ b/surfsense_backend/app/automations/persistence/models/trigger.py
@@ -56,7 +56,7 @@ class AutomationTrigger(BaseModel, TimestampMixin):
# Precomputed next fire moment in UTC; advanced after each fire by the
# schedule tick. NULL means the trigger has never been scheduled (the
- # tick self-heals on first sight). Manual triggers leave this NULL.
+ # tick self-heals on first sight).
next_fire_at = Column(TIMESTAMP(timezone=True), nullable=True)
automation = relationship("Automation", back_populates="triggers")
diff --git a/surfsense_backend/app/automations/schemas/api/__init__.py b/surfsense_backend/app/automations/schemas/api/__init__.py
index a8a010a2c..f49e5c589 100644
--- a/surfsense_backend/app/automations/schemas/api/__init__.py
+++ b/surfsense_backend/app/automations/schemas/api/__init__.py
@@ -9,7 +9,7 @@ from .automation import (
AutomationSummary,
AutomationUpdate,
)
-from .run import RunDetail, RunDispatched, RunList, RunSummary
+from .run import RunDetail, RunList, RunSummary
from .trigger import TriggerCreate, TriggerDetail, TriggerUpdate
__all__ = [
@@ -19,7 +19,6 @@ __all__ = [
"AutomationSummary",
"AutomationUpdate",
"RunDetail",
- "RunDispatched",
"RunList",
"RunSummary",
"TriggerCreate",
diff --git a/surfsense_backend/app/automations/schemas/api/run.py b/surfsense_backend/app/automations/schemas/api/run.py
index 42ea7ac14..3f6eaab82 100644
--- a/surfsense_backend/app/automations/schemas/api/run.py
+++ b/surfsense_backend/app/automations/schemas/api/run.py
@@ -1,4 +1,4 @@
-"""Response schemas for run sub-resources and run dispatch."""
+"""Response schemas for run sub-resources."""
from __future__ import annotations
@@ -40,10 +40,3 @@ class RunList(BaseModel):
items: list[RunSummary]
total: int
-
-
-class RunDispatched(BaseModel):
- """Response of a successful run dispatch."""
-
- run_id: int
- status: RunStatus
diff --git a/surfsense_backend/app/automations/services/run.py b/surfsense_backend/app/automations/services/run.py
index 92d79e9bc..ac9970241 100644
--- a/surfsense_backend/app/automations/services/run.py
+++ b/surfsense_backend/app/automations/services/run.py
@@ -1,46 +1,25 @@
-"""``RunService`` — dispatch and history of automation runs."""
+"""``RunService`` — read-only access to automation run history."""
from __future__ import annotations
-from typing import Any
-
from fastapi import Depends, HTTPException
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
-from app.automations.dispatch import DispatchError
from app.automations.persistence.models.automation import Automation
from app.automations.persistence.models.run import AutomationRun
-from app.automations.triggers.manual import dispatch_manual_run
from app.db import Permission, User, get_async_session
from app.users import current_active_user
from app.utils.rbac import check_permission
class RunService:
- """Lifecycle of the ``AutomationRun`` resource."""
+ """Read-only access to ``AutomationRun`` history."""
def __init__(self, *, session: AsyncSession, user: User) -> None:
self.session = session
self.user = user
- async def dispatch_manual(
- self,
- *,
- automation_id: int,
- runtime_inputs: dict[str, Any] | None,
- ) -> AutomationRun:
- """Fire a manual run via the registered manual trigger."""
- await self._authorize(automation_id, Permission.AUTOMATIONS_EXECUTE.value)
- try:
- return await dispatch_manual_run(
- session=self.session,
- automation_id=automation_id,
- runtime_inputs=runtime_inputs,
- )
- except DispatchError as exc:
- raise HTTPException(status_code=422, detail=str(exc)) from exc
-
async def list(
self,
*,
diff --git a/surfsense_backend/app/automations/services/trigger.py b/surfsense_backend/app/automations/services/trigger.py
index 33e9c1386..c76cc0740 100644
--- a/surfsense_backend/app/automations/services/trigger.py
+++ b/surfsense_backend/app/automations/services/trigger.py
@@ -69,7 +69,7 @@ class TriggerService:
trigger.enabled = data["enabled"]
# Recompute next_fire_at when schedule timing changed or the trigger was
- # toggled back on. Manual triggers always have NULL next_fire_at.
+ # toggled back on.
if trigger.type == TriggerType.SCHEDULE:
trigger.next_fire_at = _initial_next_fire(
trigger.type, trigger.params, trigger.enabled
diff --git a/surfsense_backend/app/automations/triggers/__init__.py b/surfsense_backend/app/automations/triggers/__init__.py
index 258b2fda9..d7abb6b5d 100644
--- a/surfsense_backend/app/automations/triggers/__init__.py
+++ b/surfsense_backend/app/automations/triggers/__init__.py
@@ -1,6 +1,6 @@
"""Triggers domain: registry surface + built-in trigger packages.
-Each trigger lives in its own subpackage (``manual/``, ``schedule/``, ...) and
+Each trigger lives in its own subpackage (``schedule/``, ...) and
self-registers at import time via its ``definition`` module.
"""
@@ -17,4 +17,4 @@ __all__ = [
]
# Built-in triggers self-register at import time.
-from . import manual, schedule # noqa: E402, F401
+from . import schedule # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/triggers/manual/__init__.py b/surfsense_backend/app/automations/triggers/manual/__init__.py
deleted file mode 100644
index 65cca9270..000000000
--- a/surfsense_backend/app/automations/triggers/manual/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""``manual`` trigger: fired by a user clicking ``Run now``."""
-
-from __future__ import annotations
-
-from .dispatch import dispatch_manual_run
-from .params import ManualTriggerParams
-
-__all__ = ["ManualTriggerParams", "dispatch_manual_run"]
-
-# Side-effect: register on the triggers store.
-from . import definition # noqa: E402, F401
diff --git a/surfsense_backend/app/automations/triggers/manual/definition.py b/surfsense_backend/app/automations/triggers/manual/definition.py
deleted file mode 100644
index 5a3529116..000000000
--- a/surfsense_backend/app/automations/triggers/manual/definition.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""``manual`` ``TriggerDefinition`` registration."""
-
-from __future__ import annotations
-
-from ..store import register_trigger
-from ..types import TriggerDefinition
-from .params import ManualTriggerParams
-
-MANUAL_TRIGGER = TriggerDefinition(
- type="manual",
- description="Fire on a user-initiated 'Run now' invocation.",
- params_model=ManualTriggerParams,
-)
-
-register_trigger(MANUAL_TRIGGER)
diff --git a/surfsense_backend/app/automations/triggers/manual/dispatch.py b/surfsense_backend/app/automations/triggers/manual/dispatch.py
deleted file mode 100644
index 6c92317d0..000000000
--- a/surfsense_backend/app/automations/triggers/manual/dispatch.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""Manual ``Run now`` dispatch adapter: load + guard, then call generic dispatch."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.automations.dispatch import DispatchError, dispatch_run
-from app.automations.persistence.enums.automation_status import AutomationStatus
-from app.automations.persistence.enums.trigger_type import TriggerType
-from app.automations.persistence.models.automation import Automation
-from app.automations.persistence.models.run import AutomationRun
-from app.automations.persistence.models.trigger import AutomationTrigger
-
-
-async def dispatch_manual_run(
- *,
- session: AsyncSession,
- automation_id: int,
- runtime_inputs: dict[str, Any] | None,
-) -> AutomationRun:
- """Find the automation + its enabled manual trigger, then run the generic dispatch.
-
- ``runtime_inputs`` is the caller-supplied payload (e.g. an HTTP body for a
- "Run now" API call); it is merged with the trigger's ``static_inputs`` by
- the generic dispatcher, with static winning on key collision.
- """
- automation = await _load_automation(session, automation_id)
- if automation is None:
- raise DispatchError(f"automation {automation_id} not found")
-
- if automation.status != AutomationStatus.ACTIVE:
- raise DispatchError(
- f"automation {automation_id} is {automation.status.value}, not active"
- )
-
- trigger = await _find_manual_trigger(session, automation_id)
- if trigger is None:
- raise DispatchError(
- f"automation {automation_id} has no enabled manual trigger"
- )
-
- return await dispatch_run(
- session=session,
- automation=automation,
- trigger=trigger,
- runtime_inputs=runtime_inputs,
- )
-
-
-async def _load_automation(
- session: AsyncSession, automation_id: int
-) -> Automation | None:
- stmt = select(Automation).where(Automation.id == automation_id)
- return (await session.execute(stmt)).scalar_one_or_none()
-
-
-async def _find_manual_trigger(
- session: AsyncSession, automation_id: int
-) -> AutomationTrigger | None:
- stmt = (
- select(AutomationTrigger)
- .where(
- AutomationTrigger.automation_id == automation_id,
- AutomationTrigger.type == TriggerType.MANUAL,
- AutomationTrigger.enabled.is_(True),
- )
- .limit(1)
- )
- return (await session.execute(stmt)).scalar_one_or_none()
diff --git a/surfsense_backend/app/automations/triggers/manual/params.py b/surfsense_backend/app/automations/triggers/manual/params.py
deleted file mode 100644
index 577655086..000000000
--- a/surfsense_backend/app/automations/triggers/manual/params.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""``ManualTriggerParams`` — params for the ``manual`` trigger (empty in v1)."""
-
-from __future__ import annotations
-
-from pydantic import BaseModel, ConfigDict
-
-
-class ManualTriggerParams(BaseModel):
- model_config = ConfigDict(extra="forbid")
diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index 64c8c6585..ef1c9312a 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -120,4 +120,4 @@ router.include_router(youtube_router) # YouTube playlist resolution
router.include_router(prompts_router)
router.include_router(memory_router) # User personal memory (memory.md style)
router.include_router(team_memory_router) # Search-space team memory
-router.include_router(automations_router) # Automations (manual run-now)
+router.include_router(automations_router) # Automations CRUD + run history
From 2b7d91aa0323a757b169294302c3ab12a4b39881 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 00:12:02 +0200
Subject: [PATCH 64/87] =?UTF-8?q?feat(automations):=20add=20create=5Fautom?=
=?UTF-8?q?ation=20HITL=20tool=20(NL=20=E2=86=92=20draft=20=E2=86=92=20app?=
=?UTF-8?q?rove=20=E2=86=92=20save)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Single tool exposed to the main agent. The main agent passes a natural-language
`intent`; a focused drafter sub-LLM turns it into a full AutomationCreate JSON;
that JSON is surfaced via request_approval (action_type "automation_create") so
the user can edit/approve it on a frontend card; on approval the tool persists
via AutomationService. Three phases, one tool call.
Scope split:
- main agent sees only `intent: str` (no schema knowledge leaks into the calling
graph) — prompt fragments scoped accordingly.
- drafter sub-LLM owns the schema + few-shot intent→JSON examples — lives in
the generating graph's prompt (tools/automation/prompt.py).
Files:
- main_agent/tools/automation/{create.py, prompt.py, __init__.py}: new tool
+ drafter system prompt with two few-shot intent→JSON examples.
- system_prompt/prompts/tools/create_automation/{description.md, example.md}:
intent-only guidance for the main agent.
- main_agent/tools/index.py: add create_automation to the main-agent allowlist.
- new_chat/tools/registry.py: deferred-import factory to break the
multi_agent_chat ↔ registry cycle; one ToolDefinition entry.
---
.../tools/create_automation/__init__.py | 1 +
.../tools/create_automation/description.md | 31 +++
.../tools/create_automation/example.md | 13 ++
.../main_agent/tools/automation/__init__.py | 7 +
.../main_agent/tools/automation/create.py | 203 ++++++++++++++++++
.../main_agent/tools/automation/prompt.py | 179 +++++++++++++++
.../main_agent/tools/index.py | 1 +
.../app/agents/new_chat/tools/registry.py | 37 ++++
8 files changed, 472 insertions(+)
create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/__init__.py
create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/example.md
create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/__init__.py
create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/__init__.py
new file mode 100644
index 000000000..30699a4a1
--- /dev/null
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/__init__.py
@@ -0,0 +1 @@
+"""``create_automation`` — description + few-shot examples."""
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
new file mode 100644
index 000000000..25b4eec47
--- /dev/null
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
@@ -0,0 +1,31 @@
+- `create_automation` — Draft and author a new automation. You describe the
+ user's intent; a focused drafter inside the tool turns it into the full
+ automation JSON; the user reviews and edits it on an approval card; on
+ approval it's saved. All three phases happen in a single tool call.
+ - Call when the user wants SurfSense to do something on its own: anything
+ recurring or scheduled ("every morning…", "each Monday…", "weekly
+ recap…").
+ - Args:
+ - `intent` (string): restate the user's request **concretely**, in one
+ paragraph. Cover three things:
+ - **What** should run (the action: summarize, recap, post, draft, …).
+ - **When** it should run (schedule + timezone if the user mentioned one;
+ otherwise leave the timezone for the drafter to default to UTC).
+ - **Static values** the automation needs (folder ids, channel names,
+ project keys, parent page ids, …) — list them with their values.
+ If the user did NOT supply one the automation needs, say so
+ explicitly ("the Notion parent page id was not specified") so the
+ drafter leaves a placeholder.
+ - Do NOT prompt the user to confirm before calling — the approval card
+ IS the confirmation. The user can edit any field on the card.
+ - Returns:
+ - `{status: "saved", automation_id, name}` — confirm briefly to the
+ user ("Saved as automation #N — runs ."). Don't dump JSON back.
+ - `{status: "rejected", message}` — the user declined on the card.
+ Acknowledge once ("Understood, I didn't create it.") and stop. Do
+ NOT retry or pitch variants.
+ - `{status: "invalid", issues, raw?}` — drafting/validation failed
+ before the card was shown. Read the issues, refine your `intent`
+ with the missing details, call again.
+ - `{status: "error", message}` — surface the message verbatim and
+ offer to retry.
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/example.md
new file mode 100644
index 000000000..19311bef0
--- /dev/null
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/example.md
@@ -0,0 +1,13 @@
+
+user: "Every weekday at 9am, summarize new documents in folder 12 and post the summary to Slack channel #daily-digest."
+→ create_automation(intent="Every weekday at 09:00 UTC, summarize documents added to folder_id=12 since the last run, then post the summary to Slack channel '#daily-digest'. Static inputs: folder_id=12, slack_channel='#daily-digest'.")
+tool returns: {"status": "saved", "automation_id": 42, "name": "Daily folder 12 digest"}
+(Reply briefly: "Saved as automation #42 — runs weekdays at 9am UTC.")
+
+
+
+user: "Once a week on Mondays at 7am Paris time, draft a Notion page recapping last week's Jira tickets in project CORE."
+→ create_automation(intent="Every Monday at 07:00 Europe/Paris, read last week's Jira issues in project CORE, then draft a Notion page recapping them. Static inputs: jira_project_key='CORE'. The user did NOT specify which Notion page the recap should sit under — leave notion_parent_page_id as a placeholder.")
+tool returns: {"status": "saved", "automation_id": 51, "name": "Weekly CORE Jira recap"}
+(Reply: "Saved as automation #51. I left the Notion parent page id as a placeholder — set it on the automation before next Monday.")
+
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/__init__.py
new file mode 100644
index 000000000..d47bbac7e
--- /dev/null
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/__init__.py
@@ -0,0 +1,7 @@
+"""``create_automation`` — author + persist an automation via a HITL card."""
+
+from __future__ import annotations
+
+from .create import create_create_automation_tool
+
+__all__ = ["create_create_automation_tool"]
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
new file mode 100644
index 000000000..78fedde22
--- /dev/null
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
@@ -0,0 +1,203 @@
+"""``create_automation`` — NL intent → drafted JSON → HITL approval card → persisted.
+
+Single tool that:
+
+1. Drafts a structured automation from the user's intent via a focused sub-LLM
+ (system prompt in :mod:`.prompt`).
+2. Surfaces the validated draft in a HITL approval card
+ (``action_type="automation_create"``).
+3. On approval, validates the (possibly edited) payload again and persists
+ it via :class:`AutomationService`.
+
+The main agent only restates the user's request as a single ``intent`` string.
+The drafting sub-LLM owns the JSON shape; the HITL card is the user's review.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from typing import Any
+from uuid import UUID
+
+from fastapi import HTTPException
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+from pydantic import ValidationError
+
+from app.agents.new_chat.tools.hitl import request_approval
+from app.automations.schemas.api import AutomationCreate
+from app.automations.services.automation import AutomationService
+from app.db import User, async_session_maker
+from app.utils.content_utils import extract_text_content
+
+from .prompt import build_draft_prompt
+
+logger = logging.getLogger(__name__)
+
+_JSON_FENCE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL)
+
+
+def create_create_automation_tool(
+ *,
+ search_space_id: int,
+ user_id: str | UUID,
+ llm: Any,
+):
+ """Factory for the ``create_automation`` tool.
+
+ ``search_space_id`` is injected from the chat session (the model never
+ has to guess it). ``llm`` is the drafting sub-model — we reuse the main
+ agent's LLM and tag the call so it's identifiable in traces. A fresh
+ ``AsyncSession`` is opened per call to avoid stale sessions on
+ compiled-agent cache hits (same pattern as the Notion / memory tools).
+ """
+ uid = UUID(user_id) if isinstance(user_id, str) else user_id
+
+ @tool
+ async def create_automation(intent: str) -> dict[str, Any]:
+ """Draft + save an automation from a natural-language intent.
+
+ Use this when the user wants SurfSense to do something on its own
+ on a schedule (e.g. "every morning summarize folder 12 to Slack").
+ Restate the user's request as ONE concrete ``intent`` string: what
+ should run, when, and which static values (folder ids, channel
+ names, …) it needs.
+
+ The tool drafts the full automation JSON internally, shows the user
+ an approval card for review, and persists on approval. Do NOT
+ prompt the user to confirm before calling — the card IS the
+ confirmation. The user can edit any field there.
+
+ Args:
+ intent: Concrete restatement of the user's request. Include
+ the schedule (with timezone if mentioned), the action to
+ take, and any static values. Example: "Every weekday at
+ 09:00 UTC, summarize new docs added to folder_id=12 since
+ the last run, then post the summary to Slack channel
+ '#daily-digest'."
+
+ Returns:
+ ``{"status": "saved", "automation_id": int, "name": str}`` on
+ approval + save.
+ ``{"status": "rejected", "message": "..."}`` when the user
+ declines on the card.
+ ``{"status": "invalid", "issues": [...], "raw": ...}`` when
+ the drafter produced output that did not validate (call again
+ with a more precise intent).
+ ``{"status": "error", "message": "..."}`` on drafter or
+ persistence failure.
+
+ IMPORTANT: when status is ``"rejected"`` the user explicitly
+ declined. Acknowledge once and stop — do NOT retry or pitch
+ variants without a fresh user request.
+ """
+ # --- 1. Draft via sub-LLM ---
+ prompt = build_draft_prompt(search_space_id=search_space_id, intent=intent)
+ try:
+ response = await llm.ainvoke(
+ [HumanMessage(content=prompt)],
+ config={"tags": ["surfsense:internal", "automation-draft"]},
+ )
+ except Exception as exc:
+ logger.exception("create_automation drafting LLM call failed")
+ return {"status": "error", "message": f"drafting failed: {exc}"}
+
+ raw_text = extract_text_content(response.content).strip()
+ draft = _extract_json(raw_text)
+ if draft is None:
+ return {
+ "status": "invalid",
+ "issues": ["model output was not parseable JSON"],
+ "raw": raw_text,
+ }
+
+ # search_space_id is injected here so the sub-LLM never has to guess.
+ draft["search_space_id"] = search_space_id
+ try:
+ validated_draft = AutomationCreate.model_validate(draft)
+ except ValidationError as exc:
+ return {
+ "status": "invalid",
+ "issues": _format_validation_issues(exc),
+ "raw": draft,
+ }
+
+ # --- 2. HITL approval card ---
+ try:
+ card_params = validated_draft.model_dump(mode="json", by_alias=True)
+ # search_space_id is session-scoped, not user-editable.
+ card_params.pop("search_space_id", None)
+
+ result = request_approval(
+ action_type="automation_create",
+ tool_name="create_automation",
+ params=card_params,
+ context={"search_space_id": search_space_id},
+ )
+
+ if result.rejected:
+ return {
+ "status": "rejected",
+ "message": "User declined. Do not retry or suggest alternatives.",
+ }
+
+ # --- 3. Persist (re-validate in case the user edited) ---
+ final_payload = {**result.params, "search_space_id": search_space_id}
+ try:
+ final_validated = AutomationCreate.model_validate(final_payload)
+ except ValidationError as exc:
+ return {
+ "status": "invalid",
+ "issues": _format_validation_issues(exc),
+ }
+
+ async with async_session_maker() as session:
+ user = await session.get(User, uid)
+ if user is None:
+ return {
+ "status": "error",
+ "message": "user not found in this session",
+ }
+ service = AutomationService(session=session, user=user)
+ created = await service.create(final_validated)
+ return {
+ "status": "saved",
+ "automation_id": created.id,
+ "name": created.name,
+ }
+
+ except HTTPException as exc:
+ return {"status": "error", "message": exc.detail}
+ except Exception as exc:
+ from langgraph.errors import GraphInterrupt
+
+ if isinstance(exc, GraphInterrupt):
+ raise
+ logger.exception("create_automation failed")
+ return {"status": "error", "message": f"persistence failed: {exc}"}
+
+ return create_automation
+
+
+def _extract_json(text: str) -> dict[str, Any] | None:
+ """Pull a JSON object out of the model response, tolerating ``` fences."""
+ if not text:
+ return None
+ candidate = text
+ fence_match = _JSON_FENCE.search(text)
+ if fence_match:
+ candidate = fence_match.group(1)
+ try:
+ parsed = json.loads(candidate)
+ except json.JSONDecodeError:
+ return None
+ return parsed if isinstance(parsed, dict) else None
+
+
+def _format_validation_issues(exc: ValidationError) -> list[str]:
+ return [
+ f"{'.'.join(str(p) for p in err['loc'])}: {err['msg']}"
+ for err in exc.errors()
+ ]
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py
new file mode 100644
index 000000000..45870e768
--- /dev/null
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/prompt.py
@@ -0,0 +1,179 @@
+"""System prompt for the drafting sub-LLM inside ``create_automation``.
+
+Converts a natural-language ``intent`` into a structured ``AutomationCreate``
+JSON object. That object becomes the payload the HITL approval card surfaces.
+
+Scope split:
+ Real automation JSONs live here — this is the graph that *generates*
+ the JSON. The main agent's prompt fragments (``description.md`` /
+ ``example.md``) only carry intent-string examples; the main agent
+ never sees the schema.
+
+Layout:
+ The prompt is concatenated from four format-safe pieces. ``_HEADER`` /
+ ``_FOOTER`` carry the only ``str.format`` placeholders; ``_SCHEMA`` and
+ ``_FEW_SHOTS`` are plain strings so their JSON literals (and the
+ ``{{ inputs.X }}`` Jinja references in queries) can stay readable
+ without doubled-brace escaping.
+
+Catalog handling:
+ v1 hard-codes the action/trigger catalog (one action, one trigger).
+ When new types ship, swap the inline lines for a render-time pull
+ from ``app.automations.actions`` / ``app.automations.triggers`` via
+ lazy imports inside :func:`build_draft_prompt` so this module never
+ participates in the ``multi_agent_chat`` import cycle.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+
+_HEADER = """\
+You are the SurfSense automation drafter. Convert the user intent below
+into a SINGLE JSON object matching the AutomationCreate schema. Output
+ONLY that JSON object — no prose, no markdown fence, no commentary.
+
+Current UTC time (for cron context): {now}
+Target search_space_id: {search_space_id}
+"""
+
+
+_SCHEMA = """
+Required JSON shape:
+{
+ "name": "<1-200 char identifier>",
+ "description": "",
+ "definition": {
+ "schema_version": "1.0",
+ "name": "",
+ "goal": "",
+ "plan": [
+ {
+ "step_id": "",
+ "action": "agent_task",
+ "params": {
+ "query": "",
+ "auto_approve_all": true
+ }
+ }
+ ],
+ "metadata": {"tags": ["..."]}
+ },
+ "triggers": [
+ {
+ "type": "schedule",
+ "params": {"cron": "<5-field cron>", "timezone": ""},
+ "static_inputs": {"": , ...},
+ "enabled": true
+ }
+ ]
+}
+
+v1 catalog (only these are valid):
+- Actions: agent_task — params: query (string, Jinja), auto_approve_all (bool).
+- Triggers: schedule — params: cron (5-field), timezone (IANA, e.g. "UTC",
+ "Europe/Paris"). Has static_inputs (object).
+
+Conventions:
+- Whatever the plan references via {{ inputs.X }} MUST appear either in a
+ trigger's static_inputs OR in definition.inputs.schema_.properties so the
+ executor can resolve it at fire time.
+- static_inputs carries values that stay the same across every fire
+ (folder ids, channel names, project keys, parent page ids). Put them on
+ the trigger that supplies them, not in the plan.
+- If the user did NOT supply a value the plan needs, put "REPLACE_ME" in
+ static_inputs. Do NOT invent ids, channels, or paths.
+- Cron is 5-field (minute hour day-of-month month day-of-week). Use the
+ timezone the user mentioned; default "UTC" when unspecified.
+- Templating variables available at fire time: inputs.* (merged
+ static_inputs + runtime), inputs.fired_at, inputs.last_fired_at.
+"""
+
+
+_FEW_SHOTS = """
+Few-shot examples (intent → JSON output):
+
+### Example 1 — schedule with all static values supplied
+intent: "Every weekday at 09:00 UTC, summarize documents added to folder_id=12 since the last run, then post the summary to Slack channel '#daily-digest'. Static inputs: folder_id=12, slack_channel='#daily-digest'."
+output:
+{
+ "name": "Daily folder 12 digest",
+ "description": "Weekday 09:00 UTC summary of folder 12 documents posted to #daily-digest",
+ "definition": {
+ "schema_version": "1.0",
+ "name": "Daily folder 12 digest",
+ "goal": "Summarize new docs in folder 12 since the last run and post to #daily-digest",
+ "plan": [
+ {
+ "step_id": "summarize_and_post",
+ "action": "agent_task",
+ "params": {
+ "query": "Summarize documents added to folder {{ inputs.folder_id }} since {{ inputs.last_fired_at or 'yesterday' }}, then send the summary to Slack channel {{ inputs.slack_channel }}.",
+ "auto_approve_all": true
+ }
+ }
+ ],
+ "metadata": {"tags": ["daily", "digest", "slack"]}
+ },
+ "triggers": [
+ {
+ "type": "schedule",
+ "params": {"cron": "0 9 * * 1-5", "timezone": "UTC"},
+ "static_inputs": {"folder_id": 12, "slack_channel": "#daily-digest"},
+ "enabled": true
+ }
+ ]
+}
+
+### Example 2 — schedule with a missing value (REPLACE_ME placeholder)
+intent: "Every Monday at 07:00 Europe/Paris, read last week's Jira issues in project CORE, then draft a Notion page recapping them. Static inputs: jira_project_key='CORE'. The user did NOT specify the Notion parent page id — leave it as a placeholder."
+output:
+{
+ "name": "Weekly CORE Jira recap",
+ "description": "Monday 07:00 Europe/Paris recap of last week's CORE Jira issues, drafted to Notion",
+ "definition": {
+ "schema_version": "1.0",
+ "name": "Weekly CORE Jira recap",
+ "goal": "Recap last week's CORE Jira issues into a Notion page",
+ "plan": [
+ {
+ "step_id": "recap",
+ "action": "agent_task",
+ "params": {
+ "query": "List Jira issues in project {{ inputs.jira_project_key }} updated in the 7 days before {{ inputs.fired_at }}. Draft a Notion page under parent id {{ inputs.notion_parent_page_id }} titled 'CORE recap — week of {{ inputs.fired_at }}'.",
+ "auto_approve_all": true
+ }
+ }
+ ],
+ "metadata": {"tags": ["weekly", "recap", "jira", "notion"]}
+ },
+ "triggers": [
+ {
+ "type": "schedule",
+ "params": {"cron": "0 7 * * 1", "timezone": "Europe/Paris"},
+ "static_inputs": {"jira_project_key": "CORE", "notion_parent_page_id": "REPLACE_ME"},
+ "enabled": true
+ }
+ ]
+}
+"""
+
+
+_FOOTER = """
+User intent:
+{intent}
+"""
+
+
+def build_draft_prompt(*, search_space_id: int, intent: str) -> str:
+ """Render the drafting sub-LLM system prompt for the given intent."""
+ return (
+ _HEADER.format(
+ now=datetime.now(UTC).isoformat(timespec="seconds"),
+ search_space_id=search_space_id,
+ )
+ + _SCHEMA
+ + _FEW_SHOTS
+ + _FOOTER.format(intent=intent.strip())
+ )
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
index 5d309261c..88509eda7 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
@@ -10,6 +10,7 @@ MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
"web_search",
"scrape_webpage",
"update_memory",
+ "create_automation",
)
MAIN_AGENT_SURFSENSE_TOOL_NAMES: frozenset[str] = frozenset(
diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py
index b842d7a20..8c263ca20 100644
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@@ -150,6 +150,28 @@ class ToolDefinition:
reverse: Callable[[dict[str, Any], Any], dict[str, Any]] | None = None
+# =============================================================================
+# Deferred-import factories
+# =============================================================================
+# Used for tools whose impls live under ``multi_agent_chat``. Importing those
+# at module-load time would cycle (``multi_agent_chat`` middleware imports
+# this registry). The import inside the factory runs only when
+# ``build_tools`` is called, by which point ``multi_agent_chat`` is fully
+# initialised.
+
+
+def _build_create_automation_tool(deps: dict[str, Any]) -> BaseTool:
+ from app.agents.multi_agent_chat.main_agent.tools.automation import (
+ create_create_automation_tool,
+ )
+
+ return create_create_automation_tool(
+ search_space_id=deps["search_space_id"],
+ user_id=deps["user_id"],
+ llm=deps["llm"],
+ )
+
+
# =============================================================================
# Built-in Tools Registry
# =============================================================================
@@ -261,6 +283,21 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
requires=["db_session", "search_space_id", "user_id"],
),
# =========================================================================
+ # AUTOMATION AUTHORING - single HITL tool. The tool takes an NL ``intent``
+ # from the main agent, drafts the full AutomationCreate JSON via a focused
+ # sub-LLM, surfaces it on an approval card, and persists on approval. The
+ # factory defers its import because the impl lives under ``multi_agent_chat``
+ # and that package transitively pulls this registry via middleware;
+ # deferring to ``build_tools`` call-time breaks the cycle without a
+ # parallel registry.
+ # =========================================================================
+ ToolDefinition(
+ name="create_automation",
+ description="Draft an automation from an NL intent; user approves the card; tool saves",
+ factory=_build_create_automation_tool,
+ requires=["search_space_id", "user_id", "llm"],
+ ),
+ # =========================================================================
# MEMORY TOOL - single update_memory, private or team by thread_visibility
# =========================================================================
ToolDefinition(
From 79f0218360e713ad552d2d8138cd744efc1687ca Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 00:30:40 +0200
Subject: [PATCH 65/87] rbac: surface automations permissions in the UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Backend already defined automations:create/read/update/delete/execute and
seeded them on Owner/Editor/Viewer roles, but the Settings → Roles UI was
missing the metadata to render them properly.
- backend: add PERMISSION_DESCRIPTIONS entries for the 5 automations perms so
the role editor stops falling back to "Permission for automations:create".
- frontend: add automations to CATEGORY_CONFIG (Workflow icon, slotted between
podcasts and connectors) so the role editor groups them as a real section.
- frontend: extend the three ROLE_PRESETS — Editor and Contributor get
create/read/update/execute (mirroring backend Editor); Viewer gets read.
Prep work for the automations frontend; canPerform/usePermissionGate already
handle the runtime gating, so no new hook is needed.
---
surfsense_backend/app/routes/rbac_routes.py | 6 ++++++
.../components/settings/roles-manager.tsx | 16 ++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/surfsense_backend/app/routes/rbac_routes.py b/surfsense_backend/app/routes/rbac_routes.py
index 38ae31269..3b91e456d 100644
--- a/surfsense_backend/app/routes/rbac_routes.py
+++ b/surfsense_backend/app/routes/rbac_routes.py
@@ -107,6 +107,12 @@ PERMISSION_DESCRIPTIONS = {
"settings:view": "View search space settings",
"settings:update": "Modify search space settings",
"settings:delete": "Delete the entire search space",
+ # Automations
+ "automations:create": "Create automations from chat or JSON",
+ "automations:read": "View automations, their triggers, and run history",
+ "automations:update": "Edit automations and manage their triggers",
+ "automations:delete": "Remove automations from the search space",
+ "automations:execute": "Manually fire automations",
# Full access
"*": "Full access to all features and settings",
}
diff --git a/surfsense_web/components/settings/roles-manager.tsx b/surfsense_web/components/settings/roles-manager.tsx
index 88595e748..5c034470d 100644
--- a/surfsense_web/components/settings/roles-manager.tsx
+++ b/surfsense_web/components/settings/roles-manager.tsx
@@ -23,6 +23,7 @@ import {
Unplug,
Users,
Video,
+ Workflow,
} from "lucide-react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { toast } from "sonner";
@@ -126,6 +127,12 @@ const CATEGORY_CONFIG: Record<
description: "Generate AI podcasts from content",
order: 5,
},
+ automations: {
+ label: "Automations",
+ icon: Workflow,
+ description: "Scheduled and event-driven agent tasks",
+ order: 5.5,
+ },
connectors: {
label: "Connectors",
icon: Unplug,
@@ -200,6 +207,10 @@ const ROLE_PRESETS = {
"podcasts:create",
"podcasts:read",
"podcasts:update",
+ "automations:create",
+ "automations:read",
+ "automations:update",
+ "automations:execute",
"connectors:create",
"connectors:read",
"connectors:update",
@@ -220,6 +231,7 @@ const ROLE_PRESETS = {
"comments:read",
"llm_configs:read",
"podcasts:read",
+ "automations:read",
"connectors:read",
"logs:read",
"members:view",
@@ -240,6 +252,10 @@ const ROLE_PRESETS = {
"comments:read",
"llm_configs:read",
"podcasts:read",
+ "automations:create",
+ "automations:read",
+ "automations:update",
+ "automations:execute",
"connectors:read",
"logs:read",
"members:view",
From d48bb2033be03f4a86d12ebff79468c1f1913314 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 00:55:46 +0200
Subject: [PATCH 66/87] fix(web): handle 204 No Content responses in base API
service
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
DELETE endpoints in the automations API return 204; calling .json() on
an empty body throws SyntaxError. Treat 204 as data=null and skip
schema validation so callers can opt out of response bodies without
errors or spurious schema-mismatch warnings.
Also drops a pre-existing 'unknown → BodyInit' type error on the
non-JSON body branch via a narrow cast (caller is responsible for
passing a real BodyInit when Content-Type isn't application/json).
---
surfsense_web/lib/apis/base-api.service.ts | 57 +++++++++++++---------
1 file changed, 33 insertions(+), 24 deletions(-)
diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts
index 0819cbc7c..a0039b63a 100644
--- a/surfsense_web/lib/apis/base-api.service.ts
+++ b/surfsense_web/lib/apis/base-api.service.ts
@@ -1,4 +1,5 @@
import type { ZodType } from "zod";
+import { BACKEND_URL } from "@/lib/env-config";
import { getClientPlatform } from "../agent-filesystem";
import { getBearerToken, handleUnauthorized, refreshAccessToken } from "../auth-utils";
import {
@@ -9,7 +10,7 @@ import {
NetworkError,
NotFoundError,
} from "../error";
-import { BACKEND_URL } from "@/lib/env-config";
+
enum ResponseType {
JSON = "json",
TEXT = "text",
@@ -122,8 +123,9 @@ class BaseApiService {
if (contentType === "application/json" && typeof mergedOptions.body === "object") {
fetchOptions.body = JSON.stringify(mergedOptions.body);
} else {
- // Pass body as-is for other content types (e.g., form data, already stringified)
- fetchOptions.body = mergedOptions.body;
+ // Pass body as-is for other content types (form data, already stringified).
+ // Caller is responsible for passing a real BodyInit when Content-Type is not JSON.
+ fetchOptions.body = mergedOptions.body as BodyInit;
}
}
@@ -210,32 +212,39 @@ class BaseApiService {
let data;
const responseType = mergedOptions.responseType;
- try {
- switch (responseType) {
- case ResponseType.JSON:
- data = await response.json();
- break;
- case ResponseType.TEXT:
- data = await response.text();
- break;
- case ResponseType.BLOB:
- data = await response.blob();
- break;
- case ResponseType.ARRAY_BUFFER:
- data = await response.arrayBuffer();
- break;
- // Add more cases as needed
- default:
- data = await response.json();
+ if (response.status === 204) {
+ // 204 No Content has no body; .json() would throw SyntaxError.
+ // Leave data as null and skip schema validation below so endpoints
+ // that opt out of bodies (REST-style DELETE) don't error on success.
+ data = null;
+ } else {
+ try {
+ switch (responseType) {
+ case ResponseType.JSON:
+ data = await response.json();
+ break;
+ case ResponseType.TEXT:
+ data = await response.text();
+ break;
+ case ResponseType.BLOB:
+ data = await response.blob();
+ break;
+ case ResponseType.ARRAY_BUFFER:
+ data = await response.arrayBuffer();
+ break;
+ // Add more cases as needed
+ default:
+ data = await response.json();
+ }
+ } catch (error) {
+ console.error("Failed to parse response as JSON:", error);
+ throw new AppError("Failed to parse response", response.status, response.statusText);
}
- } catch (error) {
- console.error("Failed to parse response as JSON:", error);
- throw new AppError("Failed to parse response", response.status, response.statusText);
}
// Validate response
if (responseType === ResponseType.JSON) {
- if (!responseSchema) {
+ if (!responseSchema || response.status === 204) {
return data;
}
const parsedData = responseSchema.safeParse(data);
From b18a5fdca92ba7fcfd9e3240747e47dc2adedbf0 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 00:55:57 +0200
Subject: [PATCH 67/87] feat(web): automations contracts, API client, atoms and
hooks
Foundation for the v1 automations UI. Mirrors backend Pydantic schemas
into Zod and wires the data layer end-to-end so feature surfaces can
be built on top.
contracts/types/automation.types.ts:
- AutomationStatus, TriggerType, RunStatus enums.
- AutomationDefinition envelope (PlanStep, TriggerSpec, Execution,
Metadata, Inputs).
- AutomationCreate/Update/Detail/Summary/List + listParams.
- TriggerCreate/Update/Detail.
- RunSummary/Detail/List + runListParams.
lib/apis/automations-api.service.ts:
- list/get/create/update/delete automations.
- add/update/remove triggers (sub-resource).
- list/get runs (read-only sub-resource).
- safeParse on every write, 204-safe deletes.
atoms/automations/:
- automationsListAtom (active search space, first page).
- 6 mutation atoms with toast + cache invalidation.
hooks/:
- use-automations.ts wraps the list atom.
- use-automation.ts: parameterized detail by id.
- use-automation-runs.ts: useAutomationRuns + useAutomationRun.
lib/query-client/cache-keys.ts: automations namespace (list, detail,
runs, run) keyed by (id, limit, offset) where relevant.
Smoke: zod round-trip OK on backend-shape payloads (Automation,
AutomationCreate, Trigger, Run); typecheck clean for new files;
biome clean.
---
.../automations/automations-mutation.atoms.ts | 127 ++++++++++++
.../automations/automations-query.atoms.ts | 31 +++
.../contracts/types/automation.types.ts | 193 ++++++++++++++++++
surfsense_web/hooks/use-automation-runs.ts | 42 ++++
surfsense_web/hooks/use-automation.ts | 19 ++
surfsense_web/hooks/use-automations.ts | 24 +++
.../lib/apis/automations-api.service.ts | 102 +++++++++
surfsense_web/lib/query-client/cache-keys.ts | 10 +
8 files changed, 548 insertions(+)
create mode 100644 surfsense_web/atoms/automations/automations-mutation.atoms.ts
create mode 100644 surfsense_web/atoms/automations/automations-query.atoms.ts
create mode 100644 surfsense_web/contracts/types/automation.types.ts
create mode 100644 surfsense_web/hooks/use-automation-runs.ts
create mode 100644 surfsense_web/hooks/use-automation.ts
create mode 100644 surfsense_web/hooks/use-automations.ts
create mode 100644 surfsense_web/lib/apis/automations-api.service.ts
diff --git a/surfsense_web/atoms/automations/automations-mutation.atoms.ts b/surfsense_web/atoms/automations/automations-mutation.atoms.ts
new file mode 100644
index 000000000..f5e4fd5f4
--- /dev/null
+++ b/surfsense_web/atoms/automations/automations-mutation.atoms.ts
@@ -0,0 +1,127 @@
+import { atomWithMutation } from "jotai-tanstack-query";
+import { toast } from "sonner";
+import type {
+ AutomationCreateRequest,
+ AutomationUpdateRequest,
+ TriggerCreateRequest,
+ TriggerUpdateRequest,
+} from "@/contracts/types/automation.types";
+import { automationsApiService } from "@/lib/apis/automations-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+import { queryClient } from "@/lib/query-client/client";
+
+// Cache invalidation strategy:
+// - Automation writes invalidate the search-space list + the touched detail.
+// - Trigger writes only invalidate the parent automation detail (triggers
+// come back inline in AutomationDetail).
+// We deliberately invalidate the whole "automations" prefix on the list side
+// because list is keyed by (searchSpaceId, limit, offset) and we don't track
+// the active pagination in this layer.
+
+function invalidateList(searchSpaceId: number) {
+ queryClient.invalidateQueries({ queryKey: ["automations", "list", searchSpaceId] });
+}
+
+function invalidateDetail(automationId: number) {
+ queryClient.invalidateQueries({
+ queryKey: cacheKeys.automations.detail(automationId),
+ });
+}
+
+export const createAutomationMutationAtom = atomWithMutation(() => ({
+ meta: { suppressGlobalErrorToast: true },
+ mutationFn: async (request: AutomationCreateRequest) => {
+ return automationsApiService.createAutomation(request);
+ },
+ onSuccess: (_, variables) => {
+ invalidateList(variables.search_space_id);
+ toast.success("Automation created");
+ },
+ onError: (error: Error) => {
+ console.error("Error creating automation:", error);
+ toast.error("Failed to create automation");
+ },
+}));
+
+export const updateAutomationMutationAtom = atomWithMutation(() => ({
+ meta: { suppressGlobalErrorToast: true },
+ mutationFn: async (vars: { automationId: number; patch: AutomationUpdateRequest }) => {
+ return automationsApiService.updateAutomation(vars.automationId, vars.patch);
+ },
+ onSuccess: (automation, vars) => {
+ invalidateDetail(vars.automationId);
+ invalidateList(automation.search_space_id);
+ toast.success("Automation updated");
+ },
+ onError: (error: Error) => {
+ console.error("Error updating automation:", error);
+ toast.error("Failed to update automation");
+ },
+}));
+
+export const deleteAutomationMutationAtom = atomWithMutation(() => ({
+ meta: { suppressGlobalErrorToast: true },
+ mutationFn: async (vars: { automationId: number; searchSpaceId: number }) => {
+ await automationsApiService.deleteAutomation(vars.automationId);
+ return vars;
+ },
+ onSuccess: (vars) => {
+ invalidateList(vars.searchSpaceId);
+ invalidateDetail(vars.automationId);
+ toast.success("Automation deleted");
+ },
+ onError: (error: Error) => {
+ console.error("Error deleting automation:", error);
+ toast.error("Failed to delete automation");
+ },
+}));
+
+export const addTriggerMutationAtom = atomWithMutation(() => ({
+ meta: { suppressGlobalErrorToast: true },
+ mutationFn: async (vars: { automationId: number; payload: TriggerCreateRequest }) => {
+ return automationsApiService.addTrigger(vars.automationId, vars.payload);
+ },
+ onSuccess: (_, vars) => {
+ invalidateDetail(vars.automationId);
+ toast.success("Trigger added");
+ },
+ onError: (error: Error) => {
+ console.error("Error adding trigger:", error);
+ toast.error("Failed to add trigger");
+ },
+}));
+
+export const updateTriggerMutationAtom = atomWithMutation(() => ({
+ meta: { suppressGlobalErrorToast: true },
+ mutationFn: async (vars: {
+ automationId: number;
+ triggerId: number;
+ patch: TriggerUpdateRequest;
+ }) => {
+ return automationsApiService.updateTrigger(vars.automationId, vars.triggerId, vars.patch);
+ },
+ onSuccess: (_, vars) => {
+ invalidateDetail(vars.automationId);
+ toast.success("Trigger updated");
+ },
+ onError: (error: Error) => {
+ console.error("Error updating trigger:", error);
+ toast.error("Failed to update trigger");
+ },
+}));
+
+export const removeTriggerMutationAtom = atomWithMutation(() => ({
+ meta: { suppressGlobalErrorToast: true },
+ mutationFn: async (vars: { automationId: number; triggerId: number }) => {
+ await automationsApiService.removeTrigger(vars.automationId, vars.triggerId);
+ return vars;
+ },
+ onSuccess: (vars) => {
+ invalidateDetail(vars.automationId);
+ toast.success("Trigger removed");
+ },
+ onError: (error: Error) => {
+ console.error("Error removing trigger:", error);
+ toast.error("Failed to remove trigger");
+ },
+}));
diff --git a/surfsense_web/atoms/automations/automations-query.atoms.ts b/surfsense_web/atoms/automations/automations-query.atoms.ts
new file mode 100644
index 000000000..4117f9bc8
--- /dev/null
+++ b/surfsense_web/atoms/automations/automations-query.atoms.ts
@@ -0,0 +1,31 @@
+import { atomWithQuery } from "jotai-tanstack-query";
+import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
+import { automationsApiService } from "@/lib/apis/automations-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+
+// First page of the active search space's automations.
+// Detail + paginated/parameterized reads live in hooks (see use-automation.ts,
+// use-automation-runs.ts) so atoms stay tied to "current scope" and don't
+// proliferate atom families for every (id, limit, offset) tuple.
+const DEFAULT_LIMIT = 50;
+const DEFAULT_OFFSET = 0;
+
+export const automationsListAtom = atomWithQuery((get) => {
+ const searchSpaceId = get(activeSearchSpaceIdAtom);
+
+ return {
+ queryKey: cacheKeys.automations.list(Number(searchSpaceId ?? 0), DEFAULT_LIMIT, DEFAULT_OFFSET),
+ enabled: !!searchSpaceId,
+ staleTime: 60 * 1000,
+ queryFn: async () => {
+ if (!searchSpaceId) {
+ return { items: [], total: 0 };
+ }
+ return automationsApiService.listAutomations({
+ search_space_id: Number(searchSpaceId),
+ limit: DEFAULT_LIMIT,
+ offset: DEFAULT_OFFSET,
+ });
+ },
+ };
+});
diff --git a/surfsense_web/contracts/types/automation.types.ts b/surfsense_web/contracts/types/automation.types.ts
new file mode 100644
index 000000000..a93249735
--- /dev/null
+++ b/surfsense_web/contracts/types/automation.types.ts
@@ -0,0 +1,193 @@
+import { z } from "zod";
+
+// =============================================================================
+// Enums — mirror app/automations/persistence/enums/*
+// =============================================================================
+
+export const automationStatus = z.enum(["active", "paused", "archived"]);
+export type AutomationStatus = z.infer;
+
+export const triggerType = z.enum(["schedule", "manual"]);
+export type TriggerType = z.infer;
+
+export const runStatus = z.enum([
+ "pending",
+ "running",
+ "succeeded",
+ "failed",
+ "cancelled",
+ "timed_out",
+]);
+export type RunStatus = z.infer;
+
+// =============================================================================
+// Definition envelope — mirror app/automations/schemas/definition/*
+// =============================================================================
+
+export const planStep = z.object({
+ step_id: z.string().min(1),
+ action: z.string().min(1),
+ when: z.string().nullable().optional(),
+ params: z.record(z.string(), z.any()).default({}),
+ output_as: z.string().nullable().optional(),
+ max_retries: z.number().int().min(0).nullable().optional(),
+ timeout_seconds: z.number().int().positive().nullable().optional(),
+});
+export type PlanStep = z.infer;
+
+export const definitionTriggerSpec = z.object({
+ type: z.string().min(1),
+ params: z.record(z.string(), z.any()).default({}),
+});
+export type DefinitionTriggerSpec = z.infer;
+
+export const execution = z.object({
+ timeout_seconds: z.number().int().positive().default(600),
+ max_retries: z.number().int().min(0).default(2),
+ retry_backoff: z.enum(["exponential", "linear", "none"]).default("exponential"),
+ concurrency: z.enum(["drop_if_running", "queue", "always"]).default("drop_if_running"),
+ on_failure: z.array(planStep).default([]),
+});
+export type Execution = z.infer;
+
+// Backend ``Metadata`` is ``extra="allow"`` — keep ``tags`` typed, accept arbitrary keys.
+export const metadata = z.object({ tags: z.array(z.string()).default([]) }).catchall(z.any());
+export type Metadata = z.infer;
+
+// Backend ``Inputs`` serializes its ``schema_`` field as ``schema`` (alias).
+export const inputs = z.object({
+ schema: z.record(z.string(), z.any()),
+});
+export type Inputs = z.infer;
+
+export const automationDefinition = z.object({
+ schema_version: z.string().default("1.0"),
+ name: z.string().min(1).max(200),
+ goal: z.string().nullable().optional(),
+ inputs: inputs.nullable().optional(),
+ triggers: z.array(definitionTriggerSpec).default([]),
+ plan: z.array(planStep).min(1),
+ execution: execution.default(execution.parse({})),
+ metadata: metadata.default(metadata.parse({})),
+});
+export type AutomationDefinition = z.infer;
+
+// =============================================================================
+// Triggers (sub-resource) — mirror app/automations/schemas/api/trigger.py
+// =============================================================================
+
+export const triggerCreateRequest = z.object({
+ type: triggerType,
+ params: z.record(z.string(), z.any()).default({}),
+ static_inputs: z.record(z.string(), z.any()).default({}),
+ enabled: z.boolean().default(true),
+});
+export type TriggerCreateRequest = z.infer;
+
+export const triggerUpdateRequest = z.object({
+ enabled: z.boolean().nullable().optional(),
+ params: z.record(z.string(), z.any()).nullable().optional(),
+ static_inputs: z.record(z.string(), z.any()).nullable().optional(),
+});
+export type TriggerUpdateRequest = z.infer;
+
+export const trigger = z.object({
+ id: z.number(),
+ type: triggerType,
+ params: z.record(z.string(), z.any()),
+ static_inputs: z.record(z.string(), z.any()),
+ enabled: z.boolean(),
+ last_fired_at: z.string().nullable().optional(),
+ next_fire_at: z.string().nullable().optional(),
+ created_at: z.string(),
+});
+export type Trigger = z.infer;
+
+// =============================================================================
+// Automations — mirror app/automations/schemas/api/automation.py
+// =============================================================================
+
+export const automationCreateRequest = z.object({
+ search_space_id: z.number(),
+ name: z.string().min(1).max(200),
+ description: z.string().nullable().optional(),
+ definition: automationDefinition,
+ triggers: z.array(triggerCreateRequest).default([]),
+});
+export type AutomationCreateRequest = z.infer;
+
+export const automationUpdateRequest = z.object({
+ name: z.string().min(1).max(200).nullable().optional(),
+ description: z.string().nullable().optional(),
+ status: automationStatus.nullable().optional(),
+ definition: automationDefinition.nullable().optional(),
+});
+export type AutomationUpdateRequest = z.infer;
+
+export const automationSummary = z.object({
+ id: z.number(),
+ search_space_id: z.number(),
+ name: z.string(),
+ description: z.string().nullable().optional(),
+ status: automationStatus,
+ version: z.number(),
+ created_at: z.string(),
+ updated_at: z.string(),
+});
+export type AutomationSummary = z.infer;
+
+export const automation = automationSummary.extend({
+ definition: automationDefinition,
+ triggers: z.array(trigger).default([]),
+});
+export type Automation = z.infer;
+
+export const automationListResponse = z.object({
+ items: z.array(automationSummary),
+ total: z.number(),
+});
+export type AutomationListResponse = z.infer;
+
+export const automationListParams = z.object({
+ search_space_id: z.number(),
+ limit: z.number().int().min(1).max(200).default(50),
+ offset: z.number().int().min(0).default(0),
+});
+export type AutomationListParams = z.infer;
+
+// =============================================================================
+// Runs (sub-resource) — mirror app/automations/schemas/api/run.py
+// =============================================================================
+
+export const runSummary = z.object({
+ id: z.number(),
+ automation_id: z.number(),
+ trigger_id: z.number().nullable().optional(),
+ status: runStatus,
+ started_at: z.string().nullable().optional(),
+ finished_at: z.string().nullable().optional(),
+ created_at: z.string(),
+});
+export type RunSummary = z.infer;
+
+export const run = runSummary.extend({
+ definition_snapshot: z.record(z.string(), z.any()),
+ inputs: z.record(z.string(), z.any()),
+ step_results: z.array(z.record(z.string(), z.any())),
+ output: z.record(z.string(), z.any()).nullable().optional(),
+ artifacts: z.array(z.record(z.string(), z.any())),
+ error: z.record(z.string(), z.any()).nullable().optional(),
+});
+export type Run = z.infer;
+
+export const runListResponse = z.object({
+ items: z.array(runSummary),
+ total: z.number(),
+});
+export type RunListResponse = z.infer;
+
+export const runListParams = z.object({
+ limit: z.number().int().min(1).max(200).default(50),
+ offset: z.number().int().min(0).default(0),
+});
+export type RunListParams = z.infer;
diff --git a/surfsense_web/hooks/use-automation-runs.ts b/surfsense_web/hooks/use-automation-runs.ts
new file mode 100644
index 000000000..c91c7bd6e
--- /dev/null
+++ b/surfsense_web/hooks/use-automation-runs.ts
@@ -0,0 +1,42 @@
+"use client";
+import { useQuery } from "@tanstack/react-query";
+import type { Run, RunListResponse } from "@/contracts/types/automation.types";
+import { automationsApiService } from "@/lib/apis/automations-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+
+const DEFAULT_LIMIT = 50;
+const DEFAULT_OFFSET = 0;
+
+export interface UseAutomationRunsOptions {
+ limit?: number;
+ offset?: number;
+ enabled?: boolean;
+}
+
+/** Paginated run history for one automation. Newest-first per backend. */
+export function useAutomationRuns(
+ automationId: number | undefined,
+ { limit = DEFAULT_LIMIT, offset = DEFAULT_OFFSET, enabled = true }: UseAutomationRunsOptions = {}
+) {
+ return useQuery({
+ queryKey: cacheKeys.automations.runs(automationId ?? 0, limit, offset),
+ queryFn: () => automationsApiService.listRuns(automationId as number, { limit, offset }),
+ enabled: enabled && !!automationId,
+ staleTime: 30_000,
+ });
+}
+
+/** Single run with the full snapshot, step results, output and artifacts. */
+export function useAutomationRun(
+ automationId: number | undefined,
+ runId: number | undefined,
+ options: { enabled?: boolean } = {}
+) {
+ const { enabled = true } = options;
+ return useQuery({
+ queryKey: cacheKeys.automations.run(automationId ?? 0, runId ?? 0),
+ queryFn: () => automationsApiService.getRun(automationId as number, runId as number),
+ enabled: enabled && !!automationId && !!runId,
+ staleTime: 30_000,
+ });
+}
diff --git a/surfsense_web/hooks/use-automation.ts b/surfsense_web/hooks/use-automation.ts
new file mode 100644
index 000000000..d49ec03a1
--- /dev/null
+++ b/surfsense_web/hooks/use-automation.ts
@@ -0,0 +1,19 @@
+"use client";
+import { useQuery } from "@tanstack/react-query";
+import type { Automation } from "@/contracts/types/automation.types";
+import { automationsApiService } from "@/lib/apis/automations-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+
+/**
+ * Fetch a single automation with its definition and triggers.
+ * Lives outside the jotai atom layer because it's keyed by id, not by the
+ * "current scope" the atom layer assumes.
+ */
+export function useAutomation(automationId: number | undefined) {
+ return useQuery({
+ queryKey: cacheKeys.automations.detail(automationId ?? 0),
+ queryFn: () => automationsApiService.getAutomation(automationId as number),
+ enabled: !!automationId,
+ staleTime: 60_000,
+ });
+}
diff --git a/surfsense_web/hooks/use-automations.ts b/surfsense_web/hooks/use-automations.ts
new file mode 100644
index 000000000..945e91866
--- /dev/null
+++ b/surfsense_web/hooks/use-automations.ts
@@ -0,0 +1,24 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { automationsListAtom } from "@/atoms/automations/automations-query.atoms";
+
+/**
+ * List automations in the active search space (first page).
+ * Pagination knobs live in detail/list hooks below; v1 surfaces only the
+ * first page since automation counts are expected to be small.
+ */
+export function useAutomations() {
+ const { data, isLoading, error, refetch } = useAutomationsRaw();
+ return {
+ automations: data?.items ?? [],
+ total: data?.total ?? 0,
+ loading: isLoading,
+ error,
+ refresh: refetch,
+ };
+}
+
+// Exposed for callers that prefer the raw react-query result shape.
+export function useAutomationsRaw() {
+ return useAtomValue(automationsListAtom);
+}
diff --git a/surfsense_web/lib/apis/automations-api.service.ts b/surfsense_web/lib/apis/automations-api.service.ts
new file mode 100644
index 000000000..ebe72bea5
--- /dev/null
+++ b/surfsense_web/lib/apis/automations-api.service.ts
@@ -0,0 +1,102 @@
+import {
+ type AutomationCreateRequest,
+ type AutomationListParams,
+ type AutomationUpdateRequest,
+ automation,
+ automationCreateRequest,
+ automationListResponse,
+ automationUpdateRequest,
+ type RunListParams,
+ run,
+ runListResponse,
+ type TriggerCreateRequest,
+ type TriggerUpdateRequest,
+ trigger,
+ triggerCreateRequest,
+ triggerUpdateRequest,
+} from "@/contracts/types/automation.types";
+import { ValidationError } from "../error";
+import { baseApiService } from "./base-api.service";
+
+const BASE = "/api/v1/automations";
+
+function rejectIfInvalid(
+ parsed: { success: true; data: T } | { success: false; error: { issues: { message: string }[] } }
+): T {
+ if (!parsed.success) {
+ throw new ValidationError(
+ `Invalid request: ${parsed.error.issues.map((i) => i.message).join(", ")}`
+ );
+ }
+ return parsed.data;
+}
+
+class AutomationsApiService {
+ // ---- Automations ---------------------------------------------------------
+
+ listAutomations = async (params: AutomationListParams) => {
+ const qs = new URLSearchParams({
+ search_space_id: String(params.search_space_id),
+ limit: String(params.limit),
+ offset: String(params.offset),
+ });
+ return baseApiService.get(`${BASE}?${qs.toString()}`, automationListResponse);
+ };
+
+ getAutomation = async (automationId: number) => {
+ return baseApiService.get(`${BASE}/${automationId}`, automation);
+ };
+
+ createAutomation = async (request: AutomationCreateRequest) => {
+ const data = rejectIfInvalid(automationCreateRequest.safeParse(request));
+ return baseApiService.post(BASE, automation, { body: data });
+ };
+
+ updateAutomation = async (automationId: number, request: AutomationUpdateRequest) => {
+ const data = rejectIfInvalid(automationUpdateRequest.safeParse(request));
+ return baseApiService.patch(`${BASE}/${automationId}`, automation, { body: data });
+ };
+
+ // Server returns 204; baseApiService now resolves to null and skips schema validation.
+ deleteAutomation = async (automationId: number) => {
+ return baseApiService.delete(`${BASE}/${automationId}`);
+ };
+
+ // ---- Triggers (sub-resource) --------------------------------------------
+
+ addTrigger = async (automationId: number, request: TriggerCreateRequest) => {
+ const data = rejectIfInvalid(triggerCreateRequest.safeParse(request));
+ return baseApiService.post(`${BASE}/${automationId}/triggers`, trigger, { body: data });
+ };
+
+ updateTrigger = async (
+ automationId: number,
+ triggerId: number,
+ request: TriggerUpdateRequest
+ ) => {
+ const data = rejectIfInvalid(triggerUpdateRequest.safeParse(request));
+ return baseApiService.patch(`${BASE}/${automationId}/triggers/${triggerId}`, trigger, {
+ body: data,
+ });
+ };
+
+ removeTrigger = async (automationId: number, triggerId: number) => {
+ return baseApiService.delete(`${BASE}/${automationId}/triggers/${triggerId}`);
+ };
+
+ // ---- Runs (sub-resource, read-only) -------------------------------------
+
+ listRuns = async (automationId: number, params: RunListParams) => {
+ const qs = new URLSearchParams({
+ limit: String(params.limit),
+ offset: String(params.offset),
+ });
+ return baseApiService.get(`${BASE}/${automationId}/runs?${qs.toString()}`, runListResponse);
+ };
+
+ getRun = async (automationId: number, runId: number) => {
+ return baseApiService.get(`${BASE}/${automationId}/runs/${runId}`, run);
+ };
+}
+
+export const automationsApiService = new AutomationsApiService();
diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts
index ce45ee143..8943d6842 100644
--- a/surfsense_web/lib/query-client/cache-keys.ts
+++ b/surfsense_web/lib/query-client/cache-keys.ts
@@ -126,4 +126,14 @@ export const cacheKeys = {
batchUnreadCounts: (searchSpaceId: number | null) =>
["notifications", "unread-counts-batch", searchSpaceId] as const,
},
+ automations: {
+ // list endpoint is keyed by pagination too so distinct pages don't collide
+ list: (searchSpaceId: number, limit: number, offset: number) =>
+ ["automations", "list", searchSpaceId, limit, offset] as const,
+ detail: (automationId: number) => ["automations", "detail", automationId] as const,
+ runs: (automationId: number, limit: number, offset: number) =>
+ ["automations", "runs", automationId, limit, offset] as const,
+ run: (automationId: number, runId: number) =>
+ ["automations", "runs", automationId, runId] as const,
+ },
};
From fe28833ad47bcff944a44ecdafde394d3247de96 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 01:02:48 +0200
Subject: [PATCH 68/87] feat(web): automations list page with status,
pause/resume and delete
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Vertical slice at /dashboard/[id]/automations. The page is read-only by
default; every action gates on backend automations:* permissions via a
co-located permissions hook so adding/removing surfaces stays a
one-file change.
Route:
- page.tsx — server boundary; extracts search_space_id.
- automations-content.tsx — client orchestrator (loading / no-access /
error / empty / table branches).
Components (one concern per file):
- automations-header.tsx — title + count + "Create via chat" CTA.
- automations-table.tsx + automation-row.tsx — name/status/updated
columns; row name links to detail (PR4).
- automation-status-badge.tsx — active / paused / archived pill.
- automation-row-actions.tsx — ⋯ menu with pause/resume + delete,
gated on canUpdate / canDelete. Archived rows hide the toggle.
- delete-automation-dialog.tsx — destructive confirm; mentions FK
cascade explicitly so users know triggers/runs go too.
- automations-empty-state.tsx — zero-state pointing to chat (creation
is intent-driven via the create_automation HITL tool, not a form).
- automations-loading.tsx — skeleton rows in the same shell so the
layout doesn't shift on data arrival.
- automation-triggers-summary.tsx — small cron-describer (daily,
weekdays, weekly, monthly, hourly) + timezone for the detail page.
Kept inline since v1 only registers schedule.
Hooks:
- use-automation-permissions.ts — single source of truth for the
slice's canCreate/canRead/canUpdate/canDelete/canExecute gates,
backed by myAccessAtom.
Pause/resume and delete reuse the PR2 mutation atoms, so list +
detail caches stay coherent without bespoke invalidation.
Out of scope (later PRs):
- detail route (definition viewer + triggers manager) — PR4
- raw JSON editor — PR5
- nav entry / sidebar wiring — small follow-up PR
---
.../automations/automations-content.tsx | 101 +++++++++++++++
.../components/automation-row-actions.tsx | 98 ++++++++++++++
.../automations/components/automation-row.tsx | 61 +++++++++
.../components/automation-status-badge.tsx | 49 +++++++
.../automation-triggers-summary.tsx | 120 ++++++++++++++++++
.../components/automations-empty-state.tsx | 42 ++++++
.../components/automations-header.tsx | 44 +++++++
.../components/automations-loading.tsx | 36 ++++++
.../components/automations-table.tsx | 73 +++++++++++
.../components/delete-automation-dialog.tsx | 80 ++++++++++++
.../hooks/use-automation-permissions.ts | 37 ++++++
.../[search_space_id]/automations/page.tsx | 15 +++
12 files changed, 756 insertions(+)
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row-actions.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-status-badge.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-loading.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-table.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/hooks/use-automation-permissions.ts
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/page.tsx
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx
new file mode 100644
index 000000000..fa1caff96
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx
@@ -0,0 +1,101 @@
+"use client";
+import { ShieldAlert } from "lucide-react";
+import { useAutomations } from "@/hooks/use-automations";
+import { AutomationsEmptyState } from "./components/automations-empty-state";
+import { AutomationsHeader } from "./components/automations-header";
+import { AutomationsTable } from "./components/automations-table";
+import { useAutomationPermissions } from "./hooks/use-automation-permissions";
+
+interface AutomationsContentProps {
+ searchSpaceId: number;
+}
+
+/**
+ * Client orchestrator for the automations list page. Pulls the active
+ * search space's first page (via ``useAutomations`` → ``automationsListAtom``)
+ * and the user's permissions, then decides between empty / loading / table.
+ *
+ * Read access is mandatory; anything else is hidden behind RBAC. The
+ * permissions hook is co-located in this slice so adding/removing
+ * surfaces is a one-file change.
+ */
+export function AutomationsContent({ searchSpaceId }: AutomationsContentProps) {
+ const { automations, total, loading, error } = useAutomations();
+ const perms = useAutomationPermissions();
+
+ if (perms.loading) {
+ // Permissions gate the entire page; defer everything until we know.
+ return (
+ <>
+
+
+ >
+ );
+ }
+
+ if (!perms.canRead) {
+ return (
+
+
+
Access denied
+
+ You don't have permission to view automations in this search space.
+
+
+ );
+ }
+
+ if (error) {
+ return (
+ <>
+
+
+
Couldn't load automations. {error.message}
+
+ >
+ );
+ }
+
+ if (!loading && automations.length === 0) {
+ return (
+ <>
+
+
+ >
+ );
+ }
+
+ return (
+ <>
+
+
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row-actions.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row-actions.tsx
new file mode 100644
index 000000000..229a417dc
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row-actions.tsx
@@ -0,0 +1,98 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { MoreHorizontal, Pause, Play, Trash2 } from "lucide-react";
+import { useState } from "react";
+import { updateAutomationMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { Button } from "@/components/ui/button";
+import {
+ DropdownMenu,
+ DropdownMenuContent,
+ DropdownMenuItem,
+ DropdownMenuSeparator,
+ DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import type { AutomationSummary } from "@/contracts/types/automation.types";
+import { DeleteAutomationDialog } from "./delete-automation-dialog";
+
+interface AutomationRowActionsProps {
+ automation: AutomationSummary;
+ searchSpaceId: number;
+ canUpdate: boolean;
+ canDelete: boolean;
+}
+
+/**
+ * Three-dot menu on each row: pause/resume (if updatable) and delete
+ * (if deletable). The menu itself is hidden when the user has neither
+ * permission so we don't render an empty trigger.
+ */
+export function AutomationRowActions({
+ automation,
+ searchSpaceId,
+ canUpdate,
+ canDelete,
+}: AutomationRowActionsProps) {
+ const { mutateAsync: updateAutomation, isPending: updating } = useAtomValue(
+ updateAutomationMutationAtom
+ );
+ const [deleteOpen, setDeleteOpen] = useState(false);
+
+ if (!canUpdate && !canDelete) return null;
+
+ const nextStatus = automation.status === "active" ? "paused" : "active";
+ const pauseLabel = automation.status === "active" ? "Pause" : "Resume";
+ const PauseIcon = automation.status === "active" ? Pause : Play;
+ const canToggle = canUpdate && automation.status !== "archived";
+
+ async function handleTogglePause() {
+ await updateAutomation({
+ automationId: automation.id,
+ patch: { status: nextStatus },
+ });
+ }
+
+ return (
+ <>
+
+
+
+
+
+
+
+ {canToggle && (
+
+
+ {pauseLabel}
+
+ )}
+ {canToggle && canDelete && }
+ {canDelete && (
+ setDeleteOpen(true)}
+ className="text-destructive focus:text-destructive"
+ >
+
+ Delete
+
+ )}
+
+
+
+ {canDelete && (
+
+ )}
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row.tsx
new file mode 100644
index 000000000..a59fb4527
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-row.tsx
@@ -0,0 +1,61 @@
+"use client";
+import Link from "next/link";
+import { TableCell, TableRow } from "@/components/ui/table";
+import type { AutomationSummary } from "@/contracts/types/automation.types";
+import { formatRelativeDate } from "@/lib/format-date";
+import { AutomationRowActions } from "./automation-row-actions";
+import { AutomationStatusBadge } from "./automation-status-badge";
+
+interface AutomationRowProps {
+ automation: AutomationSummary;
+ searchSpaceId: number;
+ canUpdate: boolean;
+ canDelete: boolean;
+}
+
+/**
+ * One row in the automations table. The name links to the detail page;
+ * actions are gated by ``canUpdate`` / ``canDelete``. Trigger summary
+ * is intentionally left to the detail page — list responses don't
+ * include triggers and we want to avoid N+1 detail fetches.
+ */
+export function AutomationRow({
+ automation,
+ searchSpaceId,
+ canUpdate,
+ canDelete,
+}: AutomationRowProps) {
+ return (
+
+
+
+
+ {automation.name}
+
+ {automation.description && (
+
+ {automation.description}
+
+ )}
+
+
+
+
+
+
+ {formatRelativeDate(automation.updated_at)}
+
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-status-badge.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-status-badge.tsx
new file mode 100644
index 000000000..ecf171e78
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-status-badge.tsx
@@ -0,0 +1,49 @@
+"use client";
+import { Archive, CircleDot, Pause } from "lucide-react";
+import type { AutomationStatus } from "@/contracts/types/automation.types";
+import { cn } from "@/lib/utils";
+
+interface AutomationStatusBadgeProps {
+ status: AutomationStatus;
+ className?: string;
+}
+
+// Color + icon per status. Active = green, paused = amber, archived = muted.
+const STATUS_STYLES: Record<
+ AutomationStatus,
+ { label: string; icon: typeof CircleDot; classes: string }
+> = {
+ active: {
+ label: "Active",
+ icon: CircleDot,
+ classes:
+ "bg-emerald-50 text-emerald-700 border border-emerald-200 dark:bg-emerald-950/40 dark:text-emerald-300 dark:border-emerald-900/50",
+ },
+ paused: {
+ label: "Paused",
+ icon: Pause,
+ classes:
+ "bg-amber-50 text-amber-700 border border-amber-200 dark:bg-amber-950/40 dark:text-amber-300 dark:border-amber-900/50",
+ },
+ archived: {
+ label: "Archived",
+ icon: Archive,
+ classes: "bg-muted text-muted-foreground border border-border/60",
+ },
+};
+
+export function AutomationStatusBadge({ status, className }: AutomationStatusBadgeProps) {
+ const { label, icon: Icon, classes } = STATUS_STYLES[status];
+ return (
+
+
+ {label}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
new file mode 100644
index 000000000..ac27b01e2
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
@@ -0,0 +1,120 @@
+"use client";
+import { CalendarClock, Pause } from "lucide-react";
+import type { Trigger } from "@/contracts/types/automation.types";
+
+interface AutomationTriggersSummaryProps {
+ triggers: Trigger[];
+}
+
+/**
+ * One-line summary of an automation's triggers for the list view.
+ *
+ * v1 only registers ``schedule`` so this stays compact:
+ * - 0 triggers → "No triggers"
+ * - 1 schedule trigger → "Mon–Fri at 09:00 · UTC" + disabled badge if off
+ * - >1 → "N triggers"
+ *
+ * The detail page renders the full per-trigger editor.
+ */
+export function AutomationTriggersSummary({ triggers }: AutomationTriggersSummaryProps) {
+ if (triggers.length === 0) {
+ return No triggers ;
+ }
+
+ if (triggers.length > 1) {
+ return {triggers.length} triggers ;
+ }
+
+ const [trigger] = triggers;
+
+ if (trigger.type === "schedule") {
+ const cron = typeof trigger.params.cron === "string" ? trigger.params.cron : undefined;
+ const tz = typeof trigger.params.timezone === "string" ? trigger.params.timezone : "UTC";
+ const human = cron ? describeCron(cron) : "Schedule";
+
+ return (
+
+
+ {human}
+ · {tz}
+ {!trigger.enabled && (
+
+
+ Off
+
+ )}
+
+ );
+ }
+
+ return {trigger.type} ;
+}
+
+// ----------------------------------------------------------------------------
+// Minimal cron describer for the common 5-field patterns SurfSense automations
+// surface today. Falls back to the raw expression when unrecognized so the user
+// still sees something honest instead of a guess.
+//
+// Kept inline (not a library) because:
+// - v1 only needs to recognize a small set of patterns produced by the
+// drafter LLM (hourly/daily/weekdays/weekly/monthly).
+// - All current consumers live in this slice. If reuse grows, lift to
+// ``lib/cron-describe.ts``.
+// ----------------------------------------------------------------------------
+
+const DAY_NAMES = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
+
+function describeCron(cron: string): string {
+ const parts = cron.trim().split(/\s+/);
+ if (parts.length !== 5) return cron;
+
+ const [minute, hour, dom, month, dow] = parts;
+
+ // Daily at H:MM (matches the very common "0 9 * * *")
+ if (month === "*" && dom === "*" && dow === "*" && /^\d+$/.test(minute) && /^\d+$/.test(hour)) {
+ return `Daily at ${formatTime(hour, minute)}`;
+ }
+
+ // Weekdays at H:MM ("0 9 * * 1-5")
+ if (month === "*" && dom === "*" && dow === "1-5" && /^\d+$/.test(minute) && /^\d+$/.test(hour)) {
+ return `Mon–Fri at ${formatTime(hour, minute)}`;
+ }
+
+ // Specific weekday(s) ("0 9 * * 1" or "0 9 * * 1,3,5")
+ if (
+ month === "*" &&
+ dom === "*" &&
+ /^\d+$/.test(minute) &&
+ /^\d+$/.test(hour) &&
+ /^[\d,]+$/.test(dow)
+ ) {
+ const days = dow
+ .split(",")
+ .map((d) => DAY_NAMES[Number(d) % 7])
+ .filter(Boolean)
+ .join(", ");
+ if (days) return `${days} at ${formatTime(hour, minute)}`;
+ }
+
+ // Monthly on day N ("0 9 1 * *")
+ if (
+ month === "*" &&
+ dow === "*" &&
+ /^\d+$/.test(dom) &&
+ /^\d+$/.test(hour) &&
+ /^\d+$/.test(minute)
+ ) {
+ return `Day ${dom} of each month at ${formatTime(hour, minute)}`;
+ }
+
+ // Hourly ("0 * * * *")
+ if (month === "*" && dom === "*" && dow === "*" && hour === "*" && /^\d+$/.test(minute)) {
+ return minute === "0" ? "Every hour" : `Every hour at :${minute.padStart(2, "0")}`;
+ }
+
+ return cron;
+}
+
+function formatTime(hour: string, minute: string): string {
+ return `${hour.padStart(2, "0")}:${minute.padStart(2, "0")}`;
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx
new file mode 100644
index 000000000..4004cce9b
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx
@@ -0,0 +1,42 @@
+"use client";
+import { MessageSquarePlus, Workflow } from "lucide-react";
+import Link from "next/link";
+import { Button } from "@/components/ui/button";
+
+interface AutomationsEmptyStateProps {
+ searchSpaceId: number;
+ canCreate: boolean;
+}
+
+/**
+ * Zero-state for the automations list. The primary CTA points to a new
+ * chat — creation happens via the ``create_automation`` HITL tool, not a
+ * "new automation" form. We surface the chat path explicitly so users
+ * don't go hunting for an "add" button that doesn't exist.
+ */
+export function AutomationsEmptyState({ searchSpaceId, canCreate }: AutomationsEmptyStateProps) {
+ return (
+
+
+
+
+
No automations yet
+
+ Automations let SurfSense run agent tasks on a schedule. Describe what you want in chat and
+ SurfSense drafts the automation for your approval.
+
+ {canCreate ? (
+
+
+
+ Create via chat
+
+
+ ) : (
+
+ You don't have permission to create automations in this search space.
+
+ )}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
new file mode 100644
index 000000000..b938825a6
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
@@ -0,0 +1,44 @@
+"use client";
+import { MessageSquarePlus } from "lucide-react";
+import Link from "next/link";
+import { Button } from "@/components/ui/button";
+
+interface AutomationsHeaderProps {
+ searchSpaceId: number;
+ total: number;
+ loading: boolean;
+ canCreate: boolean;
+}
+
+/**
+ * Page header: title + count + "Create via chat" CTA. Creation is intent-driven
+ * (the create_automation tool runs inside chat with a HITL approval card), so
+ * the CTA links to a new chat rather than opening a form.
+ */
+export function AutomationsHeader({
+ searchSpaceId,
+ total,
+ loading,
+ canCreate,
+}: AutomationsHeaderProps) {
+ return (
+
+
+
Automations
+ {!loading && (
+
+ {total} {total === 1 ? "automation" : "automations"}
+
+ )}
+
+ {canCreate && (
+
+
+
+ Create via chat
+
+
+ )}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-loading.tsx
new file mode 100644
index 000000000..1156be3f6
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-loading.tsx
@@ -0,0 +1,36 @@
+"use client";
+import { Skeleton } from "@/components/ui/skeleton";
+import { TableCell, TableRow } from "@/components/ui/table";
+
+const ROW_KEYS = ["sk-1", "sk-2", "sk-3"];
+
+/**
+ * Skeleton rows for the automations table. Number of rows is fixed since
+ * we don't know the count ahead of time and three placeholders is enough
+ * to communicate "loading" without flashing too much chrome.
+ */
+export function AutomationsLoadingRows() {
+ return (
+ <>
+ {ROW_KEYS.map((key) => (
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ))}
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-table.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-table.tsx
new file mode 100644
index 000000000..ec3aeeef5
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-table.tsx
@@ -0,0 +1,73 @@
+"use client";
+import { Activity, CalendarDays, Workflow } from "lucide-react";
+import { Table, TableBody, TableHead, TableHeader, TableRow } from "@/components/ui/table";
+import type { AutomationSummary } from "@/contracts/types/automation.types";
+import { AutomationRow } from "./automation-row";
+import { AutomationsLoadingRows } from "./automations-loading";
+
+interface AutomationsTableProps {
+ automations: AutomationSummary[];
+ searchSpaceId: number;
+ loading: boolean;
+ canUpdate: boolean;
+ canDelete: boolean;
+}
+
+/**
+ * Table shell + header. Rows render below — loading state renders skeleton
+ * rows in the same shell so the layout doesn't shift on data arrival.
+ */
+export function AutomationsTable({
+ automations,
+ searchSpaceId,
+ loading,
+ canUpdate,
+ canDelete,
+}: AutomationsTableProps) {
+ return (
+
+
+
+
+
+
+
+ Name
+
+
+
+
+
+ Status
+
+
+
+
+
+ Updated
+
+
+
+ Actions
+
+
+
+
+ {loading ? (
+
+ ) : (
+ automations.map((automation) => (
+
+ ))
+ )}
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx
new file mode 100644
index 000000000..db73ddad5
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx
@@ -0,0 +1,80 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { useState } from "react";
+import { deleteAutomationMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import {
+ AlertDialog,
+ AlertDialogAction,
+ AlertDialogCancel,
+ AlertDialogContent,
+ AlertDialogDescription,
+ AlertDialogFooter,
+ AlertDialogHeader,
+ AlertDialogTitle,
+} from "@/components/ui/alert-dialog";
+import { Spinner } from "@/components/ui/spinner";
+
+interface DeleteAutomationDialogProps {
+ open: boolean;
+ onOpenChange: (open: boolean) => void;
+ automationId: number;
+ automationName: string;
+ searchSpaceId: number;
+}
+
+/**
+ * Confirm + delete one automation. FK cascade on the backend wipes attached
+ * triggers and runs, so we mention it explicitly. List re-fetch is handled
+ * by the mutation atom's onSuccess.
+ */
+export function DeleteAutomationDialog({
+ open,
+ onOpenChange,
+ automationId,
+ automationName,
+ searchSpaceId,
+}: DeleteAutomationDialogProps) {
+ const { mutateAsync: deleteAutomation } = useAtomValue(deleteAutomationMutationAtom);
+ const [submitting, setSubmitting] = useState(false);
+
+ async function handleConfirm() {
+ setSubmitting(true);
+ try {
+ await deleteAutomation({ automationId, searchSpaceId });
+ onOpenChange(false);
+ } finally {
+ setSubmitting(false);
+ }
+ }
+
+ return (
+
+
+
+ Delete this automation?
+
+ {automationName} and all of its
+ triggers and run history will be removed. This cannot be undone.
+
+
+
+ Cancel
+
+ {submitting ? (
+
+
+ Deleting…
+
+ ) : (
+ "Delete"
+ )}
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/hooks/use-automation-permissions.ts b/surfsense_web/app/dashboard/[search_space_id]/automations/hooks/use-automation-permissions.ts
new file mode 100644
index 000000000..293688710
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/hooks/use-automation-permissions.ts
@@ -0,0 +1,37 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { useMemo } from "react";
+import { canPerform, myAccessAtom } from "@/atoms/members/members-query.atoms";
+
+/**
+ * Centralized RBAC gates for the automations slice. Co-located with the
+ * route so adding/removing surfaces stays a one-file change. Backed by
+ * the same ``myAccessAtom`` the rest of the app uses; owners short-circuit
+ * to ``true`` for every action.
+ *
+ * Mirrors backend permissions in ``app.db.permissions`` (automations:*).
+ */
+export interface AutomationPermissions {
+ loading: boolean;
+ canCreate: boolean;
+ canRead: boolean;
+ canUpdate: boolean;
+ canDelete: boolean;
+ canExecute: boolean;
+}
+
+export function useAutomationPermissions(): AutomationPermissions {
+ const { data: access, isLoading } = useAtomValue(myAccessAtom);
+
+ return useMemo(
+ () => ({
+ loading: isLoading,
+ canCreate: canPerform(access, "automations:create"),
+ canRead: canPerform(access, "automations:read"),
+ canUpdate: canPerform(access, "automations:update"),
+ canDelete: canPerform(access, "automations:delete"),
+ canExecute: canPerform(access, "automations:execute"),
+ }),
+ [access, isLoading]
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/page.tsx
new file mode 100644
index 000000000..b77cb20f4
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/page.tsx
@@ -0,0 +1,15 @@
+import { AutomationsContent } from "./automations-content";
+
+export default async function AutomationsPage({
+ params,
+}: {
+ params: Promise<{ search_space_id: string }>;
+}) {
+ const { search_space_id } = await params;
+
+ return (
+
+ );
+}
From 7bc52dcdc04124dfff9b598eee5c33dfed488044 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 01:11:20 +0200
Subject: [PATCH 69/87] feat(web): surface Automations in the sidebar under
Inbox
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adds an "Automations" nav entry rendered explicitly between Inbox and
(on mobile) Documents, mirroring how those two are pulled out of the
nav list and rendered above the chat sections. The icon is Workflow
to match settings/RBAC labelling.
LayoutDataProvider:
- Adds the entry to navItems pointing at /dashboard/[id]/automations.
- Marks isActive via pathname so the row highlights on the route.
- Tags /automations as a workspace-panel page so it renders in the
centered settings-style viewport (same chrome as Team / settings).
Sidebar:
- Pulls out automationsItem alongside inboxItem and documentsItem.
- Renders it between them.
- Excludes its URL from footerNavItems so it doesn't double-render.
Page-level RBAC still gates the actual view; the sidebar entry is
always visible (consistent with Inbox/Documents which are also not
gated at the nav layer).
Anonymous (FreeLayoutDataProvider) intentionally not touched —
automations is an authenticated feature.
---
.../layout/providers/LayoutDataProvider.tsx | 34 ++++++++++++++-----
.../components/layout/ui/sidebar/Sidebar.tsx | 28 ++++++++++++---
2 files changed, 50 insertions(+), 12 deletions(-)
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 917d1c6e1..67971e435 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -2,7 +2,7 @@
import { useQuery, useQueryClient } from "@tanstack/react-query";
import { useAtom, useAtomValue, useSetAtom } from "jotai";
-import { AlertTriangle, Inbox, LibraryBig } from "lucide-react";
+import { AlertTriangle, Inbox, LibraryBig, Workflow } from "lucide-react";
import { useParams, usePathname, useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
import { useTheme } from "next-themes";
@@ -335,9 +335,10 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid
}, [threadsData, searchSpaceId]);
// Navigation items
- // Inbox is rendered explicitly below "New chat" in the sidebar (it is also
- // surfaced in the icon rail's collapsed mode via this list). Announcements
- // has been moved to the avatar dropdown and is no longer a nav item.
+ // Inbox, Automations, and Documents are rendered explicitly below "New chat"
+ // in the sidebar (also surfaced in the icon rail's collapsed mode via this
+ // list). Announcements has been moved to the avatar dropdown.
+ const isAutomationsActive = pathname?.includes("/automations") === true;
const navItems: NavItem[] = useMemo(
() =>
(
@@ -349,6 +350,12 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid
isActive: isInboxSidebarOpen,
badge: totalUnreadCount > 0 ? formatInboxCount(totalUnreadCount) : undefined,
},
+ {
+ title: "Automations",
+ url: `/dashboard/${searchSpaceId}/automations`,
+ icon: Workflow,
+ isActive: isAutomationsActive,
+ },
isMobile
? {
title: "Documents",
@@ -359,7 +366,14 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid
: null,
] as (NavItem | null)[]
).filter((item): item is NavItem => item !== null),
- [isMobile, isInboxSidebarOpen, isDocumentsSidebarOpen, totalUnreadCount]
+ [
+ isMobile,
+ isInboxSidebarOpen,
+ isDocumentsSidebarOpen,
+ totalUnreadCount,
+ searchSpaceId,
+ isAutomationsActive,
+ ]
);
// Handlers
@@ -660,12 +674,14 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid
const isUserSettingsPage = pathname?.includes("/user-settings") === true;
const isSearchSpaceSettingsPage = pathname?.includes("/search-space-settings") === true;
const isTeamPage = pathname?.endsWith("/team") === true;
+ const isAutomationsPage = pathname?.includes("/automations") === true;
const useWorkspacePanel =
pathname?.endsWith("/buy-more") === true ||
pathname?.endsWith("/more-pages") === true ||
isUserSettingsPage ||
isSearchSpaceSettingsPage ||
- isTeamPage;
+ isTeamPage ||
+ isAutomationsPage;
return (
<>
@@ -705,12 +721,14 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid
isChatPage={isChatPage}
useWorkspacePanel={useWorkspacePanel}
workspacePanelViewportClassName={
- isUserSettingsPage || isSearchSpaceSettingsPage || isTeamPage
+ isUserSettingsPage || isSearchSpaceSettingsPage || isTeamPage || isAutomationsPage
? "items-start justify-center px-6 py-8 md:px-10 md:py-10"
: undefined
}
workspacePanelContentClassName={
- isUserSettingsPage || isSearchSpaceSettingsPage || isTeamPage ? "max-w-5xl" : undefined
+ isUserSettingsPage || isSearchSpaceSettingsPage || isTeamPage || isAutomationsPage
+ ? "max-w-5xl"
+ : undefined
}
isLoadingChats={isLoadingThreads}
activeSlideoutPanel={activeSlideoutPanel}
diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
index e0cb3072a..805f8bfd3 100644
--- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx
@@ -140,16 +140,26 @@ export function Sidebar({
const t = useTranslations("sidebar");
const [openDropdownChatId, setOpenDropdownChatId] = useState(null);
- // Inbox and Documents are rendered explicitly right below New Chat. Pull
- // them out of the nav items list so they don't also appear in the bottom
- // NavSection. Documents is only present in navItems on mobile.
+ // Inbox, Automations, and Documents are rendered explicitly right below
+ // New Chat. Pull them out of the nav items list so they don't also appear
+ // in the bottom NavSection. Documents is only present in navItems on
+ // mobile; Automations is identified by URL suffix so the same code path
+ // works across search spaces.
const inboxItem = useMemo(() => navItems.find((item) => item.url === "#inbox"), [navItems]);
+ const automationsItem = useMemo(
+ () => navItems.find((item) => item.url.endsWith("/automations")),
+ [navItems]
+ );
const documentsItem = useMemo(
() => navItems.find((item) => item.url === "#documents"),
[navItems]
);
const footerNavItems = useMemo(
- () => navItems.filter((item) => item.url !== "#inbox" && item.url !== "#documents"),
+ () =>
+ navItems.filter(
+ (item) =>
+ item.url !== "#inbox" && item.url !== "#documents" && !item.url.endsWith("/automations")
+ ),
[navItems]
);
@@ -227,6 +237,16 @@ export function Sidebar({
}
/>
)}
+ {automationsItem && (
+ onNavItemClick?.(automationsItem)}
+ isCollapsed={isCollapsed}
+ isActive={automationsItem.isActive}
+ tooltipContent={isCollapsed ? automationsItem.title : undefined}
+ />
+ )}
{documentsItem && (
Date: Thu, 28 May 2026 01:14:10 +0200
Subject: [PATCH 70/87] fix(web): hide header Create CTA on the automations
empty state
The empty-state card already hosts the primary "Create via chat" CTA;
keeping the header button on the same screen showed two identical
buttons. Adds an optional ``showCreateCta`` prop to AutomationsHeader
(default true) and turns it off only in the empty branch so the card
stays the focal point.
---
.../automations/automations-content.tsx | 1 +
.../automations/components/automations-header.tsx | 9 ++++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx
index fa1caff96..756221d38 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/automations-content.tsx
@@ -75,6 +75,7 @@ export function AutomationsContent({ searchSpaceId }: AutomationsContentProps) {
total={0}
loading={false}
canCreate={perms.canCreate}
+ showCreateCta={false}
/>
>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
index b938825a6..22ea60664 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
@@ -8,6 +8,12 @@ interface AutomationsHeaderProps {
total: number;
loading: boolean;
canCreate: boolean;
+ /**
+ * Render the header's Create CTA. Defaults to true; the empty state owns
+ * the primary CTA on its own card, so the orchestrator turns this off
+ * there to avoid a duplicate button.
+ */
+ showCreateCta?: boolean;
}
/**
@@ -20,6 +26,7 @@ export function AutomationsHeader({
total,
loading,
canCreate,
+ showCreateCta = true,
}: AutomationsHeaderProps) {
return (
@@ -31,7 +38,7 @@ export function AutomationsHeader({
)}
- {canCreate && (
+ {canCreate && showCreateCta && (
From c0a9ea368f43af57e12342a49d17d102c2642b85 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 01:21:54 +0200
Subject: [PATCH 71/87] feat(web): automations detail page (definition viewer +
trigger manager)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Vertical slice at /dashboard/[id]/automations/[automation_id]. Branches
in the orchestrator are: perms loading → skeleton, no-access → access
denied panel, bad id → not-found, fetch loading → skeleton, fetch
error → not-found, loaded → header + definition + triggers.
Route:
- page.tsx — server boundary; extracts both ids.
- automation-detail-content.tsx — client orchestrator.
Header:
- automation-detail-header.tsx — back link, name, status badge,
description, pause/resume + delete actions. Delete navigates back to
the list via a new onDeleted hook on DeleteAutomationDialog so the
list page (where the row just vanishes) stays unaffected.
- automation-not-found.tsx — 404/403/NaN-id panel. We don't
distinguish missing vs. forbidden in the UI.
Definition (read-only in v1):
- automation-definition-section.tsx — wrapper Card; renders goal +
tags + execution defaults + inputs schema (if present) + plan.
- plan-step-card.tsx — one step (when, output_as, retries, timeout,
params JSON).
- execution-summary.tsx — timeout / max_retries / backoff /
concurrency + on_failure step count.
- inputs-schema-preview.tsx — formatted JSON of inputs.schema; only
rendered when the definition declares inputs.
Triggers:
- automation-triggers-section.tsx — wrapper Card, "Add via chat" CTA
(creation is intent-driven, same philosophy as automations).
- trigger-card.tsx — schedule + timezone + cron, last/next fire
hints, static_inputs JSON, enable Switch and remove button.
- delete-trigger-dialog.tsx — confirm + mutation atom.
Shared:
- lib/describe-cron.ts — moved out of automation-triggers-summary.tsx
so both list and detail can describe schedules consistently
(daily/weekdays/weekly/monthly/hourly, raw cron fallback).
Loading:
- automation-detail-loading.tsx — same shell as the loaded view so the
layout doesn't jump on data arrival.
RBAC: each interactive surface is independently gated
(canUpdate/canDelete/canCreate) so the orchestrator stays thin and the
component tree is self-documenting about what each action requires.
Out of scope (later PRs):
- Editing definition / trigger params (raw-JSON path) — PR5
- Run history — PR6
---
.../automation-detail-content.tsx | 86 ++++++++++
.../automation-definition-section.tsx | 99 ++++++++++++
.../components/automation-detail-header.tsx | 129 +++++++++++++++
.../components/automation-detail-loading.tsx | 42 +++++
.../components/automation-not-found.tsx | 34 ++++
.../automation-triggers-section.tsx | 75 +++++++++
.../components/delete-trigger-dialog.tsx | 80 +++++++++
.../components/execution-summary.tsx | 37 +++++
.../components/inputs-schema-preview.tsx | 20 +++
.../components/plan-step-card.tsx | 73 +++++++++
.../components/trigger-card.tsx | 152 ++++++++++++++++++
.../automations/[automation_id]/page.tsx | 18 +++
.../automation-triggers-summary.tsx | 70 +-------
.../components/delete-automation-dialog.tsx | 8 +
.../automations/lib/describe-cron.ts | 66 ++++++++
15 files changed, 920 insertions(+), 69 deletions(-)
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-not-found.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/delete-trigger-dialog.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/execution-summary.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/inputs-schema-preview.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/page.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
new file mode 100644
index 000000000..a82887721
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
@@ -0,0 +1,86 @@
+"use client";
+import { ShieldAlert } from "lucide-react";
+import { useAutomation } from "@/hooks/use-automation";
+import { useAutomationPermissions } from "../hooks/use-automation-permissions";
+import { AutomationDefinitionSection } from "./components/automation-definition-section";
+import { AutomationDetailHeader } from "./components/automation-detail-header";
+import { AutomationDetailLoading } from "./components/automation-detail-loading";
+import { AutomationNotFound } from "./components/automation-not-found";
+import { AutomationTriggersSection } from "./components/automation-triggers-section";
+
+interface AutomationDetailContentProps {
+ searchSpaceId: number;
+ automationId: number;
+}
+
+/**
+ * Client orchestrator for one automation's detail view. Branches:
+ * - permissions loading → skeleton
+ * - no read permission → access denied panel
+ * - bad id (NaN) → not-found panel
+ * - detail fetching → skeleton
+ * - detail error / null → not-found panel (we don't distinguish 404
+ * from 403 in the UI)
+ * - detail loaded → header + definition + triggers
+ *
+ * Each child component is gated independently on the relevant permission
+ * so the orchestrator stays thin.
+ */
+export function AutomationDetailContent({
+ searchSpaceId,
+ automationId,
+}: AutomationDetailContentProps) {
+ const perms = useAutomationPermissions();
+ const validId = Number.isInteger(automationId) && automationId > 0;
+ const { data: automation, isLoading, error } = useAutomation(validId ? automationId : undefined);
+
+ if (perms.loading) {
+ return ;
+ }
+
+ if (!perms.canRead) {
+ return (
+
+
+
Access denied
+
+ You don't have permission to view automations in this search space.
+
+
+ );
+ }
+
+ if (!validId) {
+ return ;
+ }
+
+ if (isLoading) {
+ return ;
+ }
+
+ if (error || !automation) {
+ return ;
+ }
+
+ return (
+ <>
+
+
+
+
+
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
new file mode 100644
index 000000000..9545f363b
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
@@ -0,0 +1,99 @@
+"use client";
+import { ListOrdered, Settings2, Tag, Target } from "lucide-react";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import type { AutomationDefinition } from "@/contracts/types/automation.types";
+import { ExecutionSummary } from "./execution-summary";
+import { InputsSchemaPreview } from "./inputs-schema-preview";
+import { PlanStepCard } from "./plan-step-card";
+
+interface AutomationDefinitionSectionProps {
+ definition: AutomationDefinition;
+}
+
+/**
+ * The Definition card. Read-only in v1 — editing definitions happens via
+ * chat (re-run create_automation with a refined intent) or, later, via
+ * the raw-JSON path. Layout is top-down:
+ * goal → tags → execution defaults → inputs schema (if any) → plan
+ *
+ * The schema_version is rendered as a small badge next to the section
+ * title so it's discoverable but doesn't fight for attention.
+ */
+export function AutomationDefinitionSection({ definition }: AutomationDefinitionSectionProps) {
+ const hasTags = definition.metadata.tags.length > 0;
+ const hasInputs = !!definition.inputs;
+
+ return (
+
+
+ Definition
+
+ v{definition.schema_version}
+
+
+
+ {definition.goal && (
+
+ {definition.goal}
+
+ )}
+
+ {hasTags && (
+
+
+ {definition.metadata.tags.map((tag) => (
+
+ {tag}
+
+ ))}
+
+
+ )}
+
+
+
+
+
+ {hasInputs && (
+
+ {definition.inputs && }
+
+ )}
+
+
+
+ {definition.plan.map((step, idx) => (
+
+ ))}
+
+
+
+
+ );
+}
+
+function Field({
+ icon: Icon,
+ label,
+ children,
+}: {
+ icon: typeof Target;
+ label: string;
+ children: React.ReactNode;
+}) {
+ return (
+
+
+
+ {label}
+
+ {children}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx
new file mode 100644
index 000000000..4cf3efcc1
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx
@@ -0,0 +1,129 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { ArrowLeft, Pause, Play, Trash2 } from "lucide-react";
+import Link from "next/link";
+import { useRouter } from "next/navigation";
+import { useCallback, useState } from "react";
+import { updateAutomationMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { Button } from "@/components/ui/button";
+import { Spinner } from "@/components/ui/spinner";
+import type { Automation } from "@/contracts/types/automation.types";
+import { AutomationStatusBadge } from "../../components/automation-status-badge";
+import { DeleteAutomationDialog } from "../../components/delete-automation-dialog";
+
+interface AutomationDetailHeaderProps {
+ automation: Automation;
+ searchSpaceId: number;
+ canUpdate: boolean;
+ canDelete: boolean;
+}
+
+/**
+ * Title bar for the detail page: back link, name, status badge,
+ * description, and the two destructive-ish primary actions (pause /
+ * resume + delete). Same mutation atoms as the list-row actions to
+ * keep caches coherent.
+ *
+ * Archived automations hide the pause/resume toggle (we don't unarchive
+ * here — that flow comes later if we need it).
+ */
+export function AutomationDetailHeader({
+ automation,
+ searchSpaceId,
+ canUpdate,
+ canDelete,
+}: AutomationDetailHeaderProps) {
+ const router = useRouter();
+ const { mutateAsync: updateAutomation, isPending: updating } = useAtomValue(
+ updateAutomationMutationAtom
+ );
+ const [deleteOpen, setDeleteOpen] = useState(false);
+
+ const canToggle = canUpdate && automation.status !== "archived";
+ const nextStatus = automation.status === "active" ? "paused" : "active";
+ const pauseLabel = automation.status === "active" ? "Pause" : "Resume";
+ const PauseIcon = automation.status === "active" ? Pause : Play;
+
+ const handleDeleted = useCallback(() => {
+ router.push(`/dashboard/${searchSpaceId}/automations`);
+ }, [router, searchSpaceId]);
+
+ async function handleTogglePause() {
+ await updateAutomation({
+ automationId: automation.id,
+ patch: { status: nextStatus },
+ });
+ }
+
+ return (
+ <>
+
+
+
+
+ Back to automations
+
+
+
+
+
+
+
+ {automation.name}
+
+
+
+ {automation.description && (
+
{automation.description}
+ )}
+
+
+
+ {canToggle && (
+
+ {updating ? (
+
+ ) : (
+
+ )}
+ {pauseLabel}
+
+ )}
+ {canDelete && (
+
setDeleteOpen(true)}
+ className="text-destructive hover:text-destructive hover:bg-destructive/10"
+ >
+
+ Delete
+
+ )}
+
+
+
+
+ {canDelete && (
+
+ )}
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx
new file mode 100644
index 000000000..1d01305ee
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx
@@ -0,0 +1,42 @@
+"use client";
+import { Card, CardContent, CardHeader } from "@/components/ui/card";
+import { Skeleton } from "@/components/ui/skeleton";
+
+/**
+ * Skeleton for the detail page. Same shell as the loaded view (header +
+ * two stacked cards) so the layout doesn't jump on data arrival.
+ */
+export function AutomationDetailLoading() {
+ return (
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-not-found.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-not-found.tsx
new file mode 100644
index 000000000..1681caf25
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-not-found.tsx
@@ -0,0 +1,34 @@
+"use client";
+import { ArrowLeft, FileWarning } from "lucide-react";
+import Link from "next/link";
+import { Button } from "@/components/ui/button";
+
+interface AutomationNotFoundProps {
+ searchSpaceId: number;
+ error?: Error | null;
+}
+
+/**
+ * Rendered when the detail fetch fails (404 / 403 / network) or the id
+ * is not a number. We don't distinguish "missing" from "forbidden" in the
+ * UI on purpose — leaking that an id exists you can't read is worse than
+ * a vague message.
+ */
+export function AutomationNotFound({ searchSpaceId, error }: AutomationNotFoundProps) {
+ return (
+
+
+
Automation not found
+
+ This automation doesn't exist or you don't have access to it.
+ {error?.message ? ` (${error.message})` : null}
+
+
+
+
+ Back to automations
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
new file mode 100644
index 000000000..8cc62f5c8
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
@@ -0,0 +1,75 @@
+"use client";
+import { CalendarClock, MessageSquarePlus } from "lucide-react";
+import Link from "next/link";
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import type { Trigger } from "@/contracts/types/automation.types";
+import { TriggerCard } from "./trigger-card";
+
+interface AutomationTriggersSectionProps {
+ triggers: Trigger[];
+ automationId: number;
+ searchSpaceId: number;
+ canUpdate: boolean;
+ canDelete: boolean;
+ canCreate: boolean;
+}
+
+/**
+ * The Triggers card. Lists each attached trigger with its own enable
+ * toggle and remove button. Adding a new trigger is intent-driven (via
+ * chat) for v1 — same philosophy as creating an automation, so the
+ * empty/add CTA links to a new chat rather than opening a form.
+ */
+export function AutomationTriggersSection({
+ triggers,
+ automationId,
+ searchSpaceId,
+ canUpdate,
+ canDelete,
+ canCreate,
+}: AutomationTriggersSectionProps) {
+ return (
+
+
+
+
Triggers
+
+ When this automation fires. v1 supports scheduled triggers only.
+
+
+ {canCreate && (
+
+
+
+ Add via chat
+
+
+ )}
+
+
+ {triggers.length === 0 ? (
+
+
+
No triggers attached
+
+ This automation can still be invoked, but nothing will fire it on its own.
+
+
+ ) : (
+
+ {triggers.map((trigger) => (
+
+ ))}
+
+ )}
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/delete-trigger-dialog.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/delete-trigger-dialog.tsx
new file mode 100644
index 000000000..71e905724
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/delete-trigger-dialog.tsx
@@ -0,0 +1,80 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { useState } from "react";
+import { removeTriggerMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import {
+ AlertDialog,
+ AlertDialogAction,
+ AlertDialogCancel,
+ AlertDialogContent,
+ AlertDialogDescription,
+ AlertDialogFooter,
+ AlertDialogHeader,
+ AlertDialogTitle,
+} from "@/components/ui/alert-dialog";
+import { Spinner } from "@/components/ui/spinner";
+
+interface DeleteTriggerDialogProps {
+ open: boolean;
+ onOpenChange: (open: boolean) => void;
+ automationId: number;
+ triggerId: number;
+ triggerLabel: string;
+}
+
+/**
+ * Confirm + detach one trigger from its automation. The automation itself
+ * is untouched; only this trigger row is removed. The mutation atom
+ * invalidates the parent automation detail so the page rerenders.
+ */
+export function DeleteTriggerDialog({
+ open,
+ onOpenChange,
+ automationId,
+ triggerId,
+ triggerLabel,
+}: DeleteTriggerDialogProps) {
+ const { mutateAsync: removeTrigger } = useAtomValue(removeTriggerMutationAtom);
+ const [submitting, setSubmitting] = useState(false);
+
+ async function handleConfirm() {
+ setSubmitting(true);
+ try {
+ await removeTrigger({ automationId, triggerId });
+ onOpenChange(false);
+ } finally {
+ setSubmitting(false);
+ }
+ }
+
+ return (
+
+
+
+ Remove this trigger?
+
+ {triggerLabel} will be detached.
+ The automation itself stays, but it won't fire on this trigger anymore.
+
+
+
+ Cancel
+
+ {submitting ? (
+
+
+ Removing…
+
+ ) : (
+ "Remove"
+ )}
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/execution-summary.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/execution-summary.tsx
new file mode 100644
index 000000000..5c4dc381c
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/execution-summary.tsx
@@ -0,0 +1,37 @@
+"use client";
+import type { Execution } from "@/contracts/types/automation.types";
+
+interface ExecutionSummaryProps {
+ execution: Execution;
+}
+
+/**
+ * Compact view of an automation's execution defaults (wall-clock cap,
+ * retries, backoff, concurrency, on_failure presence). Per-step overrides
+ * are shown inside each PlanStepCard, not here.
+ */
+export function ExecutionSummary({ execution }: ExecutionSummaryProps) {
+ return (
+
+
+
+
+
+ {execution.on_failure.length > 0 && (
+
+ )}
+
+ );
+}
+
+function Item({ label, value }: { label: string; value: string }) {
+ return (
+
+
{label}
+ {value}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/inputs-schema-preview.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/inputs-schema-preview.tsx
new file mode 100644
index 000000000..bf2db8986
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/inputs-schema-preview.tsx
@@ -0,0 +1,20 @@
+"use client";
+import type { Inputs } from "@/contracts/types/automation.types";
+
+interface InputsSchemaPreviewProps {
+ inputs: Inputs;
+}
+
+/**
+ * Read-only JSON preview of an automation's accepted-inputs schema.
+ * Most automations don't define inputs (defaults are baked into the
+ * trigger's static_inputs), so the parent skips rendering this card
+ * when ``inputs`` is null.
+ */
+export function InputsSchemaPreview({ inputs }: InputsSchemaPreviewProps) {
+ return (
+
+ {JSON.stringify(inputs.schema, null, 2)}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
new file mode 100644
index 000000000..3feb77712
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
@@ -0,0 +1,73 @@
+"use client";
+import { ArrowRightCircle, GitCommitHorizontal } from "lucide-react";
+import type { PlanStep } from "@/contracts/types/automation.types";
+
+interface PlanStepCardProps {
+ step: PlanStep;
+ index: number;
+}
+
+/**
+ * Read-only view of one plan step. Renders the step_id + action prominently,
+ * then a definition list of the per-step knobs, and finally the params as
+ * formatted JSON. Editable mode is out of scope here — definition edits live
+ * on the (future) raw-JSON path.
+ */
+export function PlanStepCard({ step, index }: PlanStepCardProps) {
+ return (
+
+
+
+ {index + 1}
+
+
{step.step_id}
+
+
{step.action}
+
+
+
+ {(step.when ||
+ step.output_as ||
+ step.max_retries != null ||
+ step.timeout_seconds != null) && (
+
+ {step.when && (
+ {step.when}} />
+ )}
+ {step.output_as && (
+ {step.output_as}}
+ />
+ )}
+ {step.max_retries != null && (
+
+ )}
+ {step.timeout_seconds != null && (
+
+ )}
+
+ )}
+
+
+
+
+ Params
+
+
+ {JSON.stringify(step.params, null, 2)}
+
+
+
+
+ );
+}
+
+function DefRow({ label, value }: { label: string; value: React.ReactNode }) {
+ return (
+
+
{label}:
+ {value}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
new file mode 100644
index 000000000..0caaf968f
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
@@ -0,0 +1,152 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { CalendarClock, Clock, Trash2 } from "lucide-react";
+import { useState } from "react";
+import { updateTriggerMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { Button } from "@/components/ui/button";
+import { Switch } from "@/components/ui/switch";
+import type { Trigger } from "@/contracts/types/automation.types";
+import { formatRelativeDate } from "@/lib/format-date";
+import { describeCron } from "../../lib/describe-cron";
+import { DeleteTriggerDialog } from "./delete-trigger-dialog";
+
+interface TriggerCardProps {
+ trigger: Trigger;
+ automationId: number;
+ canUpdate: boolean;
+ canDelete: boolean;
+}
+
+/**
+ * One trigger row in the Triggers section of the detail page. Renders:
+ * - type icon + human-readable schedule + timezone
+ * - last_fired_at / next_fire_at hints
+ * - static_inputs as formatted JSON (when present)
+ * - enable toggle + remove button (each gated independently)
+ *
+ * Editing params (cron, timezone, static_inputs) lives behind the future
+ * raw-JSON path; this card stays read-only-except-for-toggle for v1.
+ */
+export function TriggerCard({ trigger, automationId, canUpdate, canDelete }: TriggerCardProps) {
+ const { mutateAsync: updateTrigger, isPending: updating } =
+ useAtomValue(updateTriggerMutationAtom);
+ const [deleteOpen, setDeleteOpen] = useState(false);
+
+ const cron = typeof trigger.params.cron === "string" ? trigger.params.cron : undefined;
+ const tz = typeof trigger.params.timezone === "string" ? trigger.params.timezone : "UTC";
+ const human = cron ? describeCron(cron) : trigger.type;
+ const triggerLabel = cron ? `${human} · ${tz}` : trigger.type;
+ const hasStaticInputs = Object.keys(trigger.static_inputs ?? {}).length > 0;
+
+ async function handleToggle(checked: boolean) {
+ await updateTrigger({
+ automationId,
+ triggerId: trigger.id,
+ patch: { enabled: checked },
+ });
+ }
+
+ return (
+ <>
+
+
+
+
+
+
+ {human}
+ · {tz}
+
+ {cron &&
{cron}}
+
+
+
+
+ {canUpdate && (
+
+
+ {trigger.enabled ? "Enabled" : "Off"}
+
+
+
+ )}
+ {canDelete && (
+
setDeleteOpen(true)}
+ aria-label="Remove trigger"
+ >
+
+
+ )}
+
+
+
+
+ {(trigger.last_fired_at || trigger.next_fire_at) && (
+
+ {trigger.next_fire_at && (
+
+ )}
+ {trigger.last_fired_at && }
+
+ )}
+
+ {hasStaticInputs && (
+
+
Static inputs
+
+ {JSON.stringify(trigger.static_inputs, null, 2)}
+
+
+ )}
+
+
+
+ {canDelete && (
+
+ )}
+ >
+ );
+}
+
+function TimeRow({
+ label,
+ iso,
+ highlight = false,
+}: {
+ label: string;
+ iso: string;
+ highlight?: boolean;
+}) {
+ return (
+
+
+
+ {label}:
+
+
+ {formatRelativeDate(iso)}
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/page.tsx
new file mode 100644
index 000000000..dbaceecdd
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/page.tsx
@@ -0,0 +1,18 @@
+import { AutomationDetailContent } from "./automation-detail-content";
+
+export default async function AutomationDetailPage({
+ params,
+}: {
+ params: Promise<{ search_space_id: string; automation_id: string }>;
+}) {
+ const { search_space_id, automation_id } = await params;
+
+ return (
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
index ac27b01e2..8b61a1e02 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
@@ -1,6 +1,7 @@
"use client";
import { CalendarClock, Pause } from "lucide-react";
import type { Trigger } from "@/contracts/types/automation.types";
+import { describeCron } from "../lib/describe-cron";
interface AutomationTriggersSummaryProps {
triggers: Trigger[];
@@ -49,72 +50,3 @@ export function AutomationTriggersSummary({ triggers }: AutomationTriggersSummar
return {trigger.type} ;
}
-
-// ----------------------------------------------------------------------------
-// Minimal cron describer for the common 5-field patterns SurfSense automations
-// surface today. Falls back to the raw expression when unrecognized so the user
-// still sees something honest instead of a guess.
-//
-// Kept inline (not a library) because:
-// - v1 only needs to recognize a small set of patterns produced by the
-// drafter LLM (hourly/daily/weekdays/weekly/monthly).
-// - All current consumers live in this slice. If reuse grows, lift to
-// ``lib/cron-describe.ts``.
-// ----------------------------------------------------------------------------
-
-const DAY_NAMES = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
-
-function describeCron(cron: string): string {
- const parts = cron.trim().split(/\s+/);
- if (parts.length !== 5) return cron;
-
- const [minute, hour, dom, month, dow] = parts;
-
- // Daily at H:MM (matches the very common "0 9 * * *")
- if (month === "*" && dom === "*" && dow === "*" && /^\d+$/.test(minute) && /^\d+$/.test(hour)) {
- return `Daily at ${formatTime(hour, minute)}`;
- }
-
- // Weekdays at H:MM ("0 9 * * 1-5")
- if (month === "*" && dom === "*" && dow === "1-5" && /^\d+$/.test(minute) && /^\d+$/.test(hour)) {
- return `Mon–Fri at ${formatTime(hour, minute)}`;
- }
-
- // Specific weekday(s) ("0 9 * * 1" or "0 9 * * 1,3,5")
- if (
- month === "*" &&
- dom === "*" &&
- /^\d+$/.test(minute) &&
- /^\d+$/.test(hour) &&
- /^[\d,]+$/.test(dow)
- ) {
- const days = dow
- .split(",")
- .map((d) => DAY_NAMES[Number(d) % 7])
- .filter(Boolean)
- .join(", ");
- if (days) return `${days} at ${formatTime(hour, minute)}`;
- }
-
- // Monthly on day N ("0 9 1 * *")
- if (
- month === "*" &&
- dow === "*" &&
- /^\d+$/.test(dom) &&
- /^\d+$/.test(hour) &&
- /^\d+$/.test(minute)
- ) {
- return `Day ${dom} of each month at ${formatTime(hour, minute)}`;
- }
-
- // Hourly ("0 * * * *")
- if (month === "*" && dom === "*" && dow === "*" && hour === "*" && /^\d+$/.test(minute)) {
- return minute === "0" ? "Every hour" : `Every hour at :${minute.padStart(2, "0")}`;
- }
-
- return cron;
-}
-
-function formatTime(hour: string, minute: string): string {
- return `${hour.padStart(2, "0")}:${minute.padStart(2, "0")}`;
-}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx
index db73ddad5..23fc522ca 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/delete-automation-dialog.tsx
@@ -20,6 +20,12 @@ interface DeleteAutomationDialogProps {
automationId: number;
automationName: string;
searchSpaceId: number;
+ /**
+ * Fired after a successful delete, before the dialog closes. The detail
+ * page uses this to navigate back to the list (the row simply vanishes
+ * on the list page so no callback is needed there).
+ */
+ onDeleted?: () => void;
}
/**
@@ -33,6 +39,7 @@ export function DeleteAutomationDialog({
automationId,
automationName,
searchSpaceId,
+ onDeleted,
}: DeleteAutomationDialogProps) {
const { mutateAsync: deleteAutomation } = useAtomValue(deleteAutomationMutationAtom);
const [submitting, setSubmitting] = useState(false);
@@ -41,6 +48,7 @@ export function DeleteAutomationDialog({
setSubmitting(true);
try {
await deleteAutomation({ automationId, searchSpaceId });
+ onDeleted?.();
onOpenChange(false);
} finally {
setSubmitting(false);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts b/surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts
new file mode 100644
index 000000000..e10a99a44
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts
@@ -0,0 +1,66 @@
+/**
+ * Minimal cron describer for the 5-field patterns the SurfSense drafter LLM
+ * actually produces (daily, weekdays, weekly, monthly, hourly). Falls back
+ * to the raw expression when unrecognized so the user still sees something
+ * honest instead of a guess.
+ *
+ * Lives in the automations slice because it's a UI display concern with no
+ * consumers outside it. If reuse grows, lift to ``lib/cron-describe.ts``.
+ */
+
+const DAY_NAMES = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
+
+export function describeCron(cron: string): string {
+ const parts = cron.trim().split(/\s+/);
+ if (parts.length !== 5) return cron;
+
+ const [minute, hour, dom, month, dow] = parts;
+
+ // Daily at H:MM ("0 9 * * *")
+ if (month === "*" && dom === "*" && dow === "*" && /^\d+$/.test(minute) && /^\d+$/.test(hour)) {
+ return `Daily at ${formatTime(hour, minute)}`;
+ }
+
+ // Weekdays at H:MM ("0 9 * * 1-5")
+ if (month === "*" && dom === "*" && dow === "1-5" && /^\d+$/.test(minute) && /^\d+$/.test(hour)) {
+ return `Mon–Fri at ${formatTime(hour, minute)}`;
+ }
+
+ // Specific weekday(s) ("0 9 * * 1" or "0 9 * * 1,3,5")
+ if (
+ month === "*" &&
+ dom === "*" &&
+ /^\d+$/.test(minute) &&
+ /^\d+$/.test(hour) &&
+ /^[\d,]+$/.test(dow)
+ ) {
+ const days = dow
+ .split(",")
+ .map((d) => DAY_NAMES[Number(d) % 7])
+ .filter(Boolean)
+ .join(", ");
+ if (days) return `${days} at ${formatTime(hour, minute)}`;
+ }
+
+ // Monthly on day N ("0 9 1 * *")
+ if (
+ month === "*" &&
+ dow === "*" &&
+ /^\d+$/.test(dom) &&
+ /^\d+$/.test(hour) &&
+ /^\d+$/.test(minute)
+ ) {
+ return `Day ${dom} of each month at ${formatTime(hour, minute)}`;
+ }
+
+ // Hourly ("0 * * * *")
+ if (month === "*" && dom === "*" && dow === "*" && hour === "*" && /^\d+$/.test(minute)) {
+ return minute === "0" ? "Every hour" : `Every hour at :${minute.padStart(2, "0")}`;
+ }
+
+ return cron;
+}
+
+function formatTime(hour: string, minute: string): string {
+ return `${hour.padStart(2, "0")}:${minute.padStart(2, "0")}`;
+}
From 2e572d781855292aa58981417ade345bb4f6fe21 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 01:32:04 +0200
Subject: [PATCH 72/87] feat(web): create_automation HITL approval card in chat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Closes the create loop in chat: the agent describes user intent → the
drafter sub-LLM produces an AutomationCreate JSON → this card surfaces
a structured preview → approve persists; reject cancels. Edits flow
through chat refinement (re-call with a refined intent), not in-card,
so the card stays simple and the multi-turn checkpointer carries the
context.
Tool UI (components/tool-ui/automation/):
- create-automation.tsx — entry dispatcher + ApprovalCard chrome
(pending/processing/complete/rejected via useHitlPhase) + SavedCard
(links to the detail page) + InvalidCard (lists drafter validation
issues) + ErrorCard (verbatim message). Rejection result is hidden
because the approval card itself shows the rejected phase inline.
- automation-draft-preview.tsx — structured preview body: name +
description + goal, triggers (humanised cron + tz + static-input
keys), plan steps (step_id → action), and a collapsible raw JSON
for power users.
Wiring:
- components/tool-ui/index.ts — re-export.
- features/chat-messages/timeline/tool-registry/registry.ts —
register create_automation → CreateAutomationToolUI (dynamic import,
same pattern as other connector tools).
- contracts/enums/toolIcons.tsx — Workflow icon + "Create automation"
display name so fallback chrome (and timeline headers) are honest.
Shared util:
- lib/automations/describe-cron.ts — lifted from the route slice's
lib/ folder since both the dashboard slice and the new approval card
now render schedule descriptions. Slice imports updated; the now-
empty slice lib/ folder is gone.
Backend prompt fragments:
- main_agent/system_prompt/.../create_automation/description.md and
the tool's docstring no longer promise in-card edits. They make the
refinement path explicit: if the user wants changes after seeing the
draft, they reply in chat and the agent calls the tool again with a
refined intent.
v1 deliberately excludes:
- In-card edit form / right-side edit panel — defer until we see real
demand. The chat refinement loop covers the common case.
- approve_always / persistent allow rules — automations are a single
artifact, not a repeated mutation, so the "trust this kind of call"
affordance doesn't apply.
---
.../tools/create_automation/description.md | 11 +-
.../main_agent/tools/automation/create.py | 8 +-
.../components/trigger-card.tsx | 2 +-
.../automation-triggers-summary.tsx | 2 +-
.../automation/automation-draft-preview.tsx | 183 ++++++++++
.../tool-ui/automation/create-automation.tsx | 328 ++++++++++++++++++
.../components/tool-ui/automation/index.ts | 1 +
surfsense_web/components/tool-ui/index.ts | 1 +
surfsense_web/contracts/enums/toolIcons.tsx | 5 +
.../timeline/tool-registry/registry.ts | 6 +
.../lib => lib/automations}/describe-cron.ts | 5 +-
11 files changed, 541 insertions(+), 11 deletions(-)
create mode 100644 surfsense_web/components/tool-ui/automation/automation-draft-preview.tsx
create mode 100644 surfsense_web/components/tool-ui/automation/create-automation.tsx
create mode 100644 surfsense_web/components/tool-ui/automation/index.ts
rename surfsense_web/{app/dashboard/[search_space_id]/automations/lib => lib/automations}/describe-cron.ts (88%)
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
index 25b4eec47..ce6562c97 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/create_automation/description.md
@@ -1,7 +1,7 @@
- `create_automation` — Draft and author a new automation. You describe the
user's intent; a focused drafter inside the tool turns it into the full
- automation JSON; the user reviews and edits it on an approval card; on
- approval it's saved. All three phases happen in a single tool call.
+ automation JSON; the user sees a preview on an approval card and chooses
+ approve or reject. All three phases happen in a single tool call.
- Call when the user wants SurfSense to do something on its own: anything
recurring or scheduled ("every morning…", "each Monday…", "weekly
recap…").
@@ -17,13 +17,16 @@
explicitly ("the Notion parent page id was not specified") so the
drafter leaves a placeholder.
- Do NOT prompt the user to confirm before calling — the approval card
- IS the confirmation. The user can edit any field on the card.
+ IS the confirmation. The card shows a structured preview plus the raw
+ JSON; it offers approve/reject only. If the user wants changes after
+ seeing the draft, they reply in chat and you call this tool again with
+ a refined `intent` — that's the edit path.
- Returns:
- `{status: "saved", automation_id, name}` — confirm briefly to the
user ("Saved as automation #N — runs ."). Don't dump JSON back.
- `{status: "rejected", message}` — the user declined on the card.
Acknowledge once ("Understood, I didn't create it.") and stop. Do
- NOT retry or pitch variants.
+ NOT retry or pitch variants without a fresh user request.
- `{status: "invalid", issues, raw?}` — drafting/validation failed
before the card was shown. Read the issues, refine your `intent`
with the missing details, call again.
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
index 78fedde22..07b579f3b 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
@@ -66,9 +66,11 @@ def create_create_automation_tool(
names, …) it needs.
The tool drafts the full automation JSON internally, shows the user
- an approval card for review, and persists on approval. Do NOT
- prompt the user to confirm before calling — the card IS the
- confirmation. The user can edit any field there.
+ a structured preview on an approval card, and persists on approval.
+ The card supports approve/reject only — if the user wants edits
+ after seeing the draft, they say so in chat and you call this tool
+ again with a refined intent. Do NOT prompt the user to confirm
+ before calling — the card IS the confirmation.
Args:
intent: Concrete restatement of the user's request. Include
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
index 0caaf968f..afadf589a 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
@@ -6,8 +6,8 @@ import { updateTriggerMutationAtom } from "@/atoms/automations/automations-mutat
import { Button } from "@/components/ui/button";
import { Switch } from "@/components/ui/switch";
import type { Trigger } from "@/contracts/types/automation.types";
+import { describeCron } from "@/lib/automations/describe-cron";
import { formatRelativeDate } from "@/lib/format-date";
-import { describeCron } from "../../lib/describe-cron";
import { DeleteTriggerDialog } from "./delete-trigger-dialog";
interface TriggerCardProps {
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
index 8b61a1e02..270a1f844 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automation-triggers-summary.tsx
@@ -1,7 +1,7 @@
"use client";
import { CalendarClock, Pause } from "lucide-react";
import type { Trigger } from "@/contracts/types/automation.types";
-import { describeCron } from "../lib/describe-cron";
+import { describeCron } from "@/lib/automations/describe-cron";
interface AutomationTriggersSummaryProps {
triggers: Trigger[];
diff --git a/surfsense_web/components/tool-ui/automation/automation-draft-preview.tsx b/surfsense_web/components/tool-ui/automation/automation-draft-preview.tsx
new file mode 100644
index 000000000..b0b5c8f78
--- /dev/null
+++ b/surfsense_web/components/tool-ui/automation/automation-draft-preview.tsx
@@ -0,0 +1,183 @@
+"use client";
+import { CalendarClock, ChevronDown, ChevronRight, ListOrdered, Target } from "lucide-react";
+import { useState } from "react";
+import { describeCron } from "@/lib/automations/describe-cron";
+
+interface DraftTrigger {
+ type: string;
+ params: Record;
+ static_inputs: Record;
+ enabled: boolean;
+}
+
+interface DraftPlanStep {
+ step_id: string;
+ action: string;
+ when?: string | null;
+}
+
+interface AutomationDraft {
+ name: string;
+ description?: string | null;
+ definition: {
+ goal?: string | null;
+ plan: DraftPlanStep[];
+ };
+ triggers: DraftTrigger[];
+}
+
+interface AutomationDraftPreviewProps {
+ draft: AutomationDraft;
+ /** Full unmodified args dict — surfaced as the "raw JSON" escape hatch. */
+ raw: Record;
+}
+
+/**
+ * Structured preview of a drafted automation rendered inside the chat
+ * approval card.
+ *
+ * Three layers, top to bottom:
+ * 1. Name + description (and goal when present).
+ * 2. Triggers — humanised cron string + timezone + static_inputs hint.
+ * 3. Plan steps — ordered list of ``step_id → action``.
+ *
+ * A "View raw JSON" toggle reveals the full payload for power users who
+ * want to inspect every field; it's collapsed by default so the card
+ * stays scannable for the common case.
+ */
+export function AutomationDraftPreview({ draft, raw }: AutomationDraftPreviewProps) {
+ const [showRaw, setShowRaw] = useState(false);
+
+ return (
+
+
+
{draft.name}
+ {draft.description &&
{draft.description}
}
+
+
+ {draft.definition.goal && (
+
+ {draft.definition.goal}
+
+ )}
+
+
+ {draft.triggers.length === 0 ? (
+
+ No triggers — automation will need one before it can run.
+
+ ) : (
+
+ {draft.triggers.map((trigger) => (
+
+
+
+ ))}
+
+ )}
+
+
+
+
+ {draft.definition.plan.map((step, idx) => (
+
+
+ {idx + 1}
+
+
+ {step.step_id}
+ →
+ {step.action}
+ {step.when && when {step.when} }
+
+
+ ))}
+
+
+
+
setShowRaw((value) => !value)}
+ className="inline-flex items-center gap-1 text-xs text-muted-foreground hover:text-foreground"
+ >
+ {showRaw ? (
+
+ ) : (
+
+ )}
+ {showRaw ? "Hide raw JSON" : "View raw JSON"}
+
+ {showRaw && (
+
+ {JSON.stringify(raw, null, 2)}
+
+ )}
+
+ );
+}
+
+/**
+ * Stable key derived from the trigger's identifying fields. Drafts are
+ * static snapshots so collisions only happen if the LLM emits two literally
+ * identical triggers — harmless in practice.
+ */
+function triggerKey(trigger: DraftTrigger): string {
+ const cron = typeof trigger.params.cron === "string" ? trigger.params.cron : "";
+ const tz = typeof trigger.params.timezone === "string" ? trigger.params.timezone : "";
+ return `${trigger.type}|${cron}|${tz}`;
+}
+
+function TriggerLine({ trigger }: { trigger: DraftTrigger }) {
+ if (trigger.type === "schedule") {
+ const cron = typeof trigger.params.cron === "string" ? trigger.params.cron : undefined;
+ const tz = typeof trigger.params.timezone === "string" ? trigger.params.timezone : "UTC";
+ const human = cron ? describeCron(cron) : "Schedule";
+ const staticKeys = Object.keys(trigger.static_inputs ?? {});
+ return (
+
+
+ {human}
+ · {tz}
+ {!trigger.enabled && (
+
+ Disabled
+
+ )}
+
+ {cron &&
{cron}}
+ {staticKeys.length > 0 && (
+
+ Static inputs: {staticKeys.join(", ")}
+
+ )}
+
+ );
+ }
+ return {trigger.type} ;
+}
+
+function Section({
+ icon: Icon,
+ label,
+ children,
+}: {
+ icon: typeof Target;
+ label: string;
+ children: React.ReactNode;
+}) {
+ return (
+
+
+
+ {label}
+
+ {children}
+
+ );
+}
diff --git a/surfsense_web/components/tool-ui/automation/create-automation.tsx b/surfsense_web/components/tool-ui/automation/create-automation.tsx
new file mode 100644
index 000000000..713c5fd46
--- /dev/null
+++ b/surfsense_web/components/tool-ui/automation/create-automation.tsx
@@ -0,0 +1,328 @@
+"use client";
+
+import type { ToolCallMessagePartProps } from "@assistant-ui/react";
+import { useAtomValue } from "jotai";
+import { CornerDownLeftIcon, ExternalLink, Workflow } from "lucide-react";
+import Link from "next/link";
+import { useCallback, useEffect, useMemo } from "react";
+import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
+import { TextShimmerLoader } from "@/components/prompt-kit/loader";
+import { Button } from "@/components/ui/button";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
+import { AutomationDraftPreview } from "./automation-draft-preview";
+
+// ----------------------------------------------------------------------------
+// Result discrimination — mirrors the backend return shapes in
+// app/agents/multi_agent_chat/main_agent/tools/automation/create.py.
+// ----------------------------------------------------------------------------
+
+type AutomationCreateContext = {
+ search_space_id?: number;
+};
+
+interface SavedResult {
+ status: "saved";
+ automation_id: number;
+ name: string;
+}
+
+interface RejectedResult {
+ status: "rejected";
+ message?: string;
+}
+
+interface InvalidResult {
+ status: "invalid";
+ issues: string[];
+ raw?: unknown;
+}
+
+interface ErrorResult {
+ status: "error";
+ message: string;
+}
+
+type CreateAutomationResult =
+ | InterruptResult
+ | SavedResult
+ | RejectedResult
+ | InvalidResult
+ | ErrorResult;
+
+function hasStatus(value: unknown, status: string): boolean {
+ return (
+ typeof value === "object" &&
+ value !== null &&
+ "status" in value &&
+ (value as { status: unknown }).status === status
+ );
+}
+
+// ----------------------------------------------------------------------------
+// Approval card — pending → processing → complete / rejected.
+//
+// v1 deliberately supports only approve/reject. The drafted JSON is complex
+// (full plan + triggers) and we already have a multi-turn refinement path via
+// chat ("make it run at 10am instead" → the agent re-calls the tool with a
+// refined intent). An in-card edit form would duplicate that flow and add UX
+// surface area we don't need yet — leave it for the raw-JSON path on the
+// detail page.
+// ----------------------------------------------------------------------------
+
+interface ApprovalCardProps {
+ args: Record;
+ interruptData: InterruptResult;
+ onDecision: (decision: HitlDecision) => void;
+}
+
+function ApprovalCard({ args, interruptData, onDecision }: ApprovalCardProps) {
+ const { phase, setProcessing, setRejected } = useHitlPhase(interruptData);
+
+ const reviewConfig = interruptData.review_configs[0];
+ const allowedDecisions = reviewConfig?.allowed_decisions ?? ["approve", "reject"];
+ const canApprove = allowedDecisions.includes("approve");
+ const canReject = allowedDecisions.includes("reject");
+
+ const draft = useMemo(() => extractDraft(args), [args]);
+
+ const handleApprove = useCallback(() => {
+ if (phase !== "pending" || !canApprove) return;
+ setProcessing();
+ onDecision({
+ type: "approve",
+ edited_action: {
+ name: interruptData.action_requests[0]?.name ?? "create_automation",
+ args,
+ },
+ });
+ }, [phase, canApprove, setProcessing, onDecision, interruptData, args]);
+
+ const handleReject = useCallback(() => {
+ if (phase !== "pending" || !canReject) return;
+ setRejected();
+ onDecision({ type: "reject", message: "User rejected the automation draft." });
+ }, [phase, canReject, setRejected, onDecision]);
+
+ useEffect(() => {
+ const handler = (e: KeyboardEvent) => {
+ if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey) {
+ handleApprove();
+ }
+ };
+ window.addEventListener("keydown", handler);
+ return () => window.removeEventListener("keydown", handler);
+ }, [handleApprove]);
+
+ return (
+
+
+
+
+
+ {phase === "rejected"
+ ? "Automation cancelled"
+ : phase === "processing"
+ ? "Saving automation"
+ : phase === "complete"
+ ? "Automation saved"
+ : "Create automation"}
+
+ {phase === "processing" ? (
+
+ ) : phase === "complete" ? (
+
+ Automation created from this draft
+
+ ) : phase === "rejected" ? (
+
+ No automation was saved — ask in chat to refine and try again.
+
+ ) : (
+
+ Review and approve to save. To change anything, reply in chat — I'll redraft.
+
+ )}
+
+
+
+
+
+
+ {phase === "pending" && (
+ <>
+
+
+ {canApprove && (
+
+ Approve
+
+
+ )}
+ {canReject && (
+
+ Reject
+
+ )}
+
+ >
+ )}
+
+ );
+}
+
+// ----------------------------------------------------------------------------
+// Terminal result cards.
+// ----------------------------------------------------------------------------
+
+function SavedCard({ result }: { result: SavedResult }) {
+ const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
+ const detailHref = searchSpaceId
+ ? `/dashboard/${searchSpaceId}/automations/${result.automation_id}`
+ : null;
+
+ return (
+
+
+
+
+
Automation saved
+
{result.name}
+
+
+ {detailHref && (
+ <>
+
+
+
+
+ Open automation #{result.automation_id}
+
+
+ >
+ )}
+
+ );
+}
+
+function InvalidCard({ result }: { result: InvalidResult }) {
+ return (
+
+
+
Couldn't draft this automation
+
+ The drafter produced output that didn't validate. I'll refine and retry.
+
+
+ {result.issues.length > 0 && (
+ <>
+
+
+ {result.issues.map((issue) => (
+ {issue}
+ ))}
+
+ >
+ )}
+
+ );
+}
+
+function ErrorCard({ result }: { result: ErrorResult }) {
+ return (
+
+
+
Failed to create automation
+
+
+
+
+ );
+}
+
+// ----------------------------------------------------------------------------
+// Entry — dispatches between the approval card and terminal result cards.
+//
+// Rejection is special: we hide the standalone "rejected" card because the
+// approval card itself already transitions to a "rejected" phase inline. A
+// second message in the timeline would be noisy.
+// ----------------------------------------------------------------------------
+
+export const CreateAutomationToolUI = ({
+ args,
+ result,
+}: ToolCallMessagePartProps<{ intent: string }, CreateAutomationResult>) => {
+ const { dispatch } = useHitlDecision();
+
+ if (!result) return null;
+
+ if (isInterruptResult(result)) {
+ return (
+ }
+ interruptData={result as InterruptResult}
+ onDecision={(decision) => dispatch([decision])}
+ />
+ );
+ }
+
+ if (hasStatus(result, "rejected")) return null;
+ if (hasStatus(result, "saved")) return ;
+ if (hasStatus(result, "invalid")) return ;
+ if (hasStatus(result, "error")) return ;
+
+ return null;
+};
+
+// ----------------------------------------------------------------------------
+// Helpers.
+// ----------------------------------------------------------------------------
+
+/**
+ * Project raw args into the shape ``AutomationDraftPreview`` expects.
+ *
+ * The args dict is the full ``AutomationCreate`` payload (minus
+ * ``search_space_id`` which is injected server-side), so we trust the
+ * top-level fields but defend against missing nested defaults.
+ */
+function extractDraft(args: Record) {
+ const definition = (args.definition ?? {}) as Record;
+ const planSteps = Array.isArray(definition.plan)
+ ? (definition.plan as Array>).map((step) => ({
+ step_id: String(step.step_id ?? "(unnamed)"),
+ action: String(step.action ?? ""),
+ when: typeof step.when === "string" ? step.when : null,
+ }))
+ : [];
+
+ const triggers = Array.isArray(args.triggers)
+ ? (args.triggers as Array>).map((trigger) => ({
+ type: String(trigger.type ?? "schedule"),
+ params: (trigger.params ?? {}) as Record,
+ static_inputs: (trigger.static_inputs ?? {}) as Record,
+ enabled: trigger.enabled !== false,
+ }))
+ : [];
+
+ return {
+ name: String(args.name ?? "(unnamed automation)"),
+ description: typeof args.description === "string" ? args.description : null,
+ definition: {
+ goal: typeof definition.goal === "string" ? definition.goal : null,
+ plan: planSteps,
+ },
+ triggers,
+ };
+}
diff --git a/surfsense_web/components/tool-ui/automation/index.ts b/surfsense_web/components/tool-ui/automation/index.ts
new file mode 100644
index 000000000..50cf1a478
--- /dev/null
+++ b/surfsense_web/components/tool-ui/automation/index.ts
@@ -0,0 +1 @@
+export { CreateAutomationToolUI } from "./create-automation";
diff --git a/surfsense_web/components/tool-ui/index.ts b/surfsense_web/components/tool-ui/index.ts
index 4d885a38c..ee5072dad 100644
--- a/surfsense_web/components/tool-ui/index.ts
+++ b/surfsense_web/components/tool-ui/index.ts
@@ -7,6 +7,7 @@
*/
export { Audio } from "./audio";
+export { CreateAutomationToolUI } from "./automation";
export { CreateDropboxFileToolUI, DeleteDropboxFileToolUI } from "./dropbox";
export {
type GenerateImageArgs,
diff --git a/surfsense_web/contracts/enums/toolIcons.tsx b/surfsense_web/contracts/enums/toolIcons.tsx
index bb87be0ba..668cb51cd 100644
--- a/surfsense_web/contracts/enums/toolIcons.tsx
+++ b/surfsense_web/contracts/enums/toolIcons.tsx
@@ -25,6 +25,7 @@ import {
SearchCheck,
Send,
Trash2,
+ Workflow,
Wrench,
} from "lucide-react";
@@ -47,6 +48,8 @@ const TOOL_ICONS: Record = {
scrape_webpage: ScanLine,
web_search: Globe,
search_surfsense_docs: BookOpen,
+ // Automations
+ create_automation: Workflow,
// Memory
update_memory: Brain,
// Filesystem (built-in deepagent + middleware)
@@ -150,6 +153,8 @@ const TOOL_DISPLAY_NAMES: Record = {
scrape_webpage: "Read webpage",
web_search: "Search the web",
search_surfsense_docs: "Search knowledge base",
+ // Automations
+ create_automation: "Create automation",
// Memory
update_memory: "Update memory",
// Calendar
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts
index 8acc6b4fa..c4cfe7cd3 100644
--- a/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts
@@ -17,6 +17,11 @@ const UpdateMemoryToolUI = dynamic(
() => import("@/components/tool-ui/user-memory").then((m) => ({ default: m.UpdateMemoryToolUI })),
{ ssr: false }
);
+const CreateAutomationToolUI = dynamic(
+ () =>
+ import("@/components/tool-ui/automation").then((m) => ({ default: m.CreateAutomationToolUI })),
+ { ssr: false }
+);
const SandboxExecuteToolUI = dynamic(
() =>
import("@/components/tool-ui/sandbox-execute").then((m) => ({
@@ -184,6 +189,7 @@ const NullTimelineBody: TimelineToolComponent = () => null;
*/
const TOOLS_BY_NAME = {
task: NullTimelineBody,
+ create_automation: CreateAutomationToolUI,
update_memory: UpdateMemoryToolUI,
execute: SandboxExecuteToolUI,
execute_code: SandboxExecuteToolUI,
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts b/surfsense_web/lib/automations/describe-cron.ts
similarity index 88%
rename from surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts
rename to surfsense_web/lib/automations/describe-cron.ts
index e10a99a44..19f7ff991 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/lib/describe-cron.ts
+++ b/surfsense_web/lib/automations/describe-cron.ts
@@ -4,8 +4,9 @@
* to the raw expression when unrecognized so the user still sees something
* honest instead of a guess.
*
- * Lives in the automations slice because it's a UI display concern with no
- * consumers outside it. If reuse grows, lift to ``lib/cron-describe.ts``.
+ * Lives under ``lib/automations/`` because both the dashboard slice and the
+ * chat ``create_automation`` approval card render schedule descriptions —
+ * keeping the helper outside either feature avoids a layering violation.
*/
const DAY_NAMES = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
From 4625bd937e02d5f1f6eb7b62fe25fa7a0b657894 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 01:35:48 +0200
Subject: [PATCH 73/87] feat(web): run history section on automations detail
page
Recent runs card under triggers. Each row expands lazily to fetch the
full run (step results, output, artifacts, error). 20-row cap for now;
real pagination lands if usage demands it.
---
.../automation-detail-content.tsx | 3 +
.../components/automation-runs-section.tsx | 67 ++++++++++
.../components/run-details-panel.tsx | 116 ++++++++++++++++++
.../[automation_id]/components/run-row.tsx | 75 +++++++++++
.../components/run-status-badge.tsx | 57 +++++++++
.../components/runs-loading.tsx | 23 ++++
6 files changed, 341 insertions(+)
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-status-badge.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
index a82887721..253d6ae67 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
@@ -6,6 +6,7 @@ import { AutomationDefinitionSection } from "./components/automation-definition-
import { AutomationDetailHeader } from "./components/automation-detail-header";
import { AutomationDetailLoading } from "./components/automation-detail-loading";
import { AutomationNotFound } from "./components/automation-not-found";
+import { AutomationRunsSection } from "./components/automation-runs-section";
import { AutomationTriggersSection } from "./components/automation-triggers-section";
interface AutomationDetailContentProps {
@@ -81,6 +82,8 @@ export function AutomationDetailContent({
canDelete={perms.canDelete}
canCreate={perms.canCreate}
/>
+
+
>
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx
new file mode 100644
index 000000000..b6158cab2
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx
@@ -0,0 +1,67 @@
+"use client";
+import { History } from "lucide-react";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { useAutomationRuns } from "@/hooks/use-automation-runs";
+import { RunRow } from "./run-row";
+import { RunsLoading } from "./runs-loading";
+
+interface AutomationRunsSectionProps {
+ automationId: number;
+}
+
+const LIMIT = 20;
+
+/**
+ * Run history card. Shows the most recent ``LIMIT`` runs; pagination is
+ * intentionally deferred — for the foreseeable v1 surface (one-trigger
+ * automations firing daily), 20 covers ~3 weeks of history which is
+ * enough to tell whether things are working. Real "load more" lands if
+ * we see usage spike past that.
+ */
+export function AutomationRunsSection({ automationId }: AutomationRunsSectionProps) {
+ const { data, isLoading, error } = useAutomationRuns(automationId, { limit: LIMIT });
+ const runs = data?.items ?? [];
+
+ return (
+
+
+
+
+
+ Recent runs
+
+
+ Most recent first. Click a row to inspect step results, output and artifacts.
+
+
+ {!isLoading && !error && data && (
+ {data.total} total
+ )}
+
+
+ {isLoading ? (
+
+ ) : error ? (
+
+ Couldn't load runs{error.message ? `: ${error.message}` : "."}
+
+ ) : runs.length === 0 ? (
+
+
+
No runs yet
+
+ This automation hasn't fired. Once a trigger fires (or you invoke it manually), runs
+ will appear here.
+
+
+ ) : (
+
+ {runs.map((run) => (
+
+ ))}
+
+ )}
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
new file mode 100644
index 000000000..d1d46900a
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
@@ -0,0 +1,116 @@
+"use client";
+import { AlertCircle, FileOutput, GitCommitHorizontal, Package, Settings2 } from "lucide-react";
+import { Skeleton } from "@/components/ui/skeleton";
+import { useAutomationRun } from "@/hooks/use-automation-runs";
+
+interface RunDetailsPanelProps {
+ automationId: number;
+ runId: number;
+}
+
+/**
+ * Expanded view of a single run. Fetches lazily — the parent only renders
+ * this once the row is opened, so the list view stays cheap.
+ *
+ * We surface the four most actionable sections (error first when present,
+ * then output, step results, artifacts, inputs). The full
+ * ``definition_snapshot`` is omitted because it usually mirrors the live
+ * definition — surfacing it would dominate the panel without informing
+ * what the user is trying to learn ("did this work? what did it do?").
+ */
+export function RunDetailsPanel({ automationId, runId }: RunDetailsPanelProps) {
+ const { data: run, isLoading, error } = useAutomationRun(automationId, runId);
+
+ if (isLoading) {
+ return (
+
+
+
+
+ );
+ }
+
+ if (error || !run) {
+ return (
+
+ Couldn't load run details{error?.message ? `: ${error.message}` : "."}
+
+ );
+ }
+
+ const hasError = run.error && Object.keys(run.error).length > 0;
+ const hasOutput = run.output && Object.keys(run.output).length > 0;
+ const hasInputs = Object.keys(run.inputs ?? {}).length > 0;
+
+ return (
+
+ {hasError && (
+
+ )}
+
+ {hasOutput && (
+
+ )}
+
+
+ {run.step_results.length === 0 ? (
+ No steps recorded.
+ ) : (
+
+ )}
+
+
+ {run.artifacts.length > 0 && (
+
+ )}
+
+ {hasInputs && (
+
+ )}
+
+ );
+}
+
+function Section({
+ icon: Icon,
+ label,
+ tone = "default",
+ children,
+}: {
+ icon: typeof AlertCircle;
+ label: string;
+ tone?: "default" | "destructive";
+ children: React.ReactNode;
+}) {
+ return (
+
+
+
+ {label}
+
+ {children}
+
+ );
+}
+
+function JsonBlock({ value }: { value: unknown }) {
+ return (
+
+ {JSON.stringify(value, null, 2)}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx
new file mode 100644
index 000000000..b8d2bcc8b
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx
@@ -0,0 +1,75 @@
+"use client";
+import { ChevronDown, ChevronRight, Hand } from "lucide-react";
+import { useState } from "react";
+import type { RunSummary } from "@/contracts/types/automation.types";
+import { formatRelativeDate } from "@/lib/format-date";
+import { RunDetailsPanel } from "./run-details-panel";
+import { RunStatusBadge } from "./run-status-badge";
+
+interface RunRowProps {
+ run: RunSummary;
+ automationId: number;
+}
+
+/**
+ * One run row. Click to expand → fetches the full run and shows the
+ * details panel inline. State is local to each row so multiple panels
+ * can be open at once (or none).
+ */
+export function RunRow({ run, automationId }: RunRowProps) {
+ const [open, setOpen] = useState(false);
+ const duration = computeDuration(run.started_at, run.finished_at);
+ const startedLabel = run.started_at
+ ? formatRelativeDate(run.started_at)
+ : formatRelativeDate(run.created_at);
+
+ return (
+
+
setOpen((value) => !value)}
+ className="flex w-full items-center justify-between gap-4 px-4 py-3 text-left hover:bg-muted/30 transition-colors"
+ aria-expanded={open}
+ >
+
+ {open ? (
+
+ ) : (
+
+ )}
+
+ {startedLabel}
+
+
+ {duration && {duration} }
+
+
+
+
+ {open &&
}
+
+ );
+}
+
+function TriggerSource({ triggerId }: { triggerId: number | null }) {
+ if (triggerId == null) {
+ return (
+
+
+ Manual
+
+ );
+ }
+ return via trigger #{triggerId} ;
+}
+
+function computeDuration(started: string | null | undefined, finished: string | null | undefined) {
+ if (!started || !finished) return null;
+ const ms = new Date(finished).getTime() - new Date(started).getTime();
+ if (!Number.isFinite(ms) || ms < 0) return null;
+ if (ms < 1000) return `${ms}ms`;
+ if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
+ const minutes = Math.floor(ms / 60_000);
+ const seconds = Math.floor((ms % 60_000) / 1000);
+ return `${minutes}m ${seconds}s`;
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-status-badge.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-status-badge.tsx
new file mode 100644
index 000000000..e5532a500
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-status-badge.tsx
@@ -0,0 +1,57 @@
+"use client";
+import { AlertCircle, CheckCircle2, Clock, Loader2, TimerOff, XCircle } from "lucide-react";
+import type { RunStatus } from "@/contracts/types/automation.types";
+import { cn } from "@/lib/utils";
+
+const STATUS_STYLES: Record<
+ RunStatus,
+ { label: string; icon: typeof CheckCircle2; classes: string; spin?: boolean }
+> = {
+ pending: {
+ label: "Pending",
+ icon: Clock,
+ classes: "bg-muted text-muted-foreground border-border/60",
+ },
+ running: {
+ label: "Running",
+ icon: Loader2,
+ classes: "bg-blue-500/10 text-blue-600 border-blue-500/20",
+ spin: true,
+ },
+ succeeded: {
+ label: "Succeeded",
+ icon: CheckCircle2,
+ classes: "bg-emerald-500/10 text-emerald-600 border-emerald-500/20",
+ },
+ failed: {
+ label: "Failed",
+ icon: XCircle,
+ classes: "bg-destructive/10 text-destructive border-destructive/20",
+ },
+ cancelled: {
+ label: "Cancelled",
+ icon: AlertCircle,
+ classes: "bg-muted text-muted-foreground border-border/60",
+ },
+ timed_out: {
+ label: "Timed out",
+ icon: TimerOff,
+ classes: "bg-amber-500/10 text-amber-600 border-amber-500/20",
+ },
+};
+
+export function RunStatusBadge({ status, className }: { status: RunStatus; className?: string }) {
+ const { label, icon: Icon, classes, spin } = STATUS_STYLES[status];
+ return (
+
+
+ {label}
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx
new file mode 100644
index 000000000..5cab18f4c
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx
@@ -0,0 +1,23 @@
+"use client";
+import { Skeleton } from "@/components/ui/skeleton";
+
+const ROW_KEYS = ["a", "b", "c"] as const;
+
+export function RunsLoading() {
+ return (
+
+ {ROW_KEYS.map((key) => (
+
+ ))}
+
+ );
+}
From ed8d56aa16dc1beba7dfee1e5a93c18a49b29725 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 01:44:13 +0200
Subject: [PATCH 74/87] feat(web): create automation via raw JSON
---
.../components/automations-empty-state.tsx | 22 +++-
.../components/automations-header.tsx | 22 +++-
.../new/automation-new-content.tsx | 42 ++++++
.../new/components/automation-json-form.tsx | 122 ++++++++++++++++++
.../new/components/automation-new-header.tsx | 42 ++++++
.../automations/new/page.tsx | 15 +++
.../lib/automations/default-template.ts | 44 +++++++
7 files changed, 295 insertions(+), 14 deletions(-)
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/new/automation-new-content.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-new-header.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/new/page.tsx
create mode 100644 surfsense_web/lib/automations/default-template.ts
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx
index 4004cce9b..83fa52fa8 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-empty-state.tsx
@@ -1,5 +1,5 @@
"use client";
-import { MessageSquarePlus, Workflow } from "lucide-react";
+import { FileJson, MessageSquarePlus, Workflow } from "lucide-react";
import Link from "next/link";
import { Button } from "@/components/ui/button";
@@ -26,12 +26,20 @@ export function AutomationsEmptyState({ searchSpaceId, canCreate }: AutomationsE
SurfSense drafts the automation for your approval.
{canCreate ? (
-
-
-
- Create via chat
-
-
+
+
+
+
+ Create via chat
+
+
+
+
+
+ Create via JSON
+
+
+
) : (
You don't have permission to create automations in this search space.
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
index 22ea60664..544c6b7ac 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/components/automations-header.tsx
@@ -1,5 +1,5 @@
"use client";
-import { MessageSquarePlus } from "lucide-react";
+import { FileJson, MessageSquarePlus } from "lucide-react";
import Link from "next/link";
import { Button } from "@/components/ui/button";
@@ -39,12 +39,20 @@ export function AutomationsHeader({
)}
{canCreate && showCreateCta && (
-
-
-
- Create via chat
-
-
+
+
+
+
+ Create via JSON
+
+
+
+
+
+ Create via chat
+
+
+
)}
);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/new/automation-new-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/new/automation-new-content.tsx
new file mode 100644
index 000000000..f03b3f4c8
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/new/automation-new-content.tsx
@@ -0,0 +1,42 @@
+"use client";
+import { ShieldAlert } from "lucide-react";
+import { useAutomationPermissions } from "../hooks/use-automation-permissions";
+import { AutomationJsonForm } from "./components/automation-json-form";
+import { AutomationNewHeader } from "./components/automation-new-header";
+
+interface AutomationNewContentProps {
+ searchSpaceId: number;
+}
+
+/**
+ * Orchestrator for the raw-JSON create route. Gates on
+ * ``automations:create`` so users who can't create don't even see the
+ * form; same panel as the detail page's access-denied state for
+ * consistency.
+ */
+export function AutomationNewContent({ searchSpaceId }: AutomationNewContentProps) {
+ const perms = useAutomationPermissions();
+
+ if (perms.loading) {
+ return
;
+ }
+
+ if (!perms.canCreate) {
+ return (
+
+
+
Access denied
+
+ You don't have permission to create automations in this search space.
+
+
+ );
+ }
+
+ return (
+ <>
+
+
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
new file mode 100644
index 000000000..845d95166
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
@@ -0,0 +1,122 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { AlertCircle, Code, FileJson, Save } from "lucide-react";
+import { useRouter } from "next/navigation";
+import { useState } from "react";
+import { createAutomationMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Spinner } from "@/components/ui/spinner";
+import { automationCreateRequest } from "@/contracts/types/automation.types";
+import { DEFAULT_AUTOMATION_TEMPLATE } from "@/lib/automations/default-template";
+
+interface AutomationJsonFormProps {
+ searchSpaceId: number;
+}
+
+/**
+ * Raw-JSON create form. Lets power users skip the chat drafter when they
+ * already know the shape they want. Flow:
+ * parse JSON → inject search_space_id → Zod validate → POST → navigate
+ *
+ * ``search_space_id`` is injected here rather than required in the pasted
+ * payload — the user shouldn't have to know their numeric id, and it
+ * keeps the template copy-paste-friendly across search spaces.
+ */
+export function AutomationJsonForm({ searchSpaceId }: AutomationJsonFormProps) {
+ const router = useRouter();
+ const { mutateAsync: createAutomation, isPending } = useAtomValue(createAutomationMutationAtom);
+ const [text, setText] = useState(() => JSON.stringify(DEFAULT_AUTOMATION_TEMPLATE, null, 2));
+ const [issues, setIssues] = useState([]);
+
+ function handleFormat() {
+ try {
+ const parsed = JSON.parse(text);
+ setText(JSON.stringify(parsed, null, 2));
+ setIssues([]);
+ } catch (err) {
+ setIssues([`Cannot format — not valid JSON: ${(err as Error).message}`]);
+ }
+ }
+
+ async function handleSubmit() {
+ setIssues([]);
+
+ let parsed: unknown;
+ try {
+ parsed = JSON.parse(text);
+ } catch (err) {
+ setIssues([`Invalid JSON: ${(err as Error).message}`]);
+ return;
+ }
+
+ if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
+ setIssues(["Root must be a JSON object."]);
+ return;
+ }
+
+ const payload = { ...(parsed as Record), search_space_id: searchSpaceId };
+ const result = automationCreateRequest.safeParse(payload);
+ if (!result.success) {
+ setIssues(
+ result.error.issues.map((issue) => `${issue.path.join(".") || "(root)"}: ${issue.message}`)
+ );
+ return;
+ }
+
+ try {
+ const created = await createAutomation(result.data);
+ router.push(`/dashboard/${searchSpaceId}/automations/${created.id}`);
+ } catch (err) {
+ setIssues([(err as Error).message ?? "Submit failed"]);
+ }
+ }
+
+ const hasIssues = issues.length > 0;
+
+ return (
+
+
+
+
+ Definition + triggers
+
+
+
+ Format
+
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-new-header.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-new-header.tsx
new file mode 100644
index 000000000..aef2744d5
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-new-header.tsx
@@ -0,0 +1,42 @@
+"use client";
+import { ArrowLeft, MessageSquarePlus } from "lucide-react";
+import Link from "next/link";
+import { Button } from "@/components/ui/button";
+
+interface AutomationNewHeaderProps {
+ searchSpaceId: number;
+}
+
+export function AutomationNewHeader({ searchSpaceId }: AutomationNewHeaderProps) {
+ return (
+
+
+
+
+ Back to automations
+
+
+
+
+
+
+ New automation · raw JSON
+
+
+ Paste an ``AutomationCreate`` payload and submit. Validated against the schema before
+ save. Prefer natural language? Use chat instead.
+
+
+
+
+
+ Switch to chat
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/new/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/new/page.tsx
new file mode 100644
index 000000000..f6e8e0008
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/new/page.tsx
@@ -0,0 +1,15 @@
+import { AutomationNewContent } from "./automation-new-content";
+
+export default async function NewAutomationPage({
+ params,
+}: {
+ params: Promise<{ search_space_id: string }>;
+}) {
+ const { search_space_id } = await params;
+
+ return (
+
+ );
+}
diff --git a/surfsense_web/lib/automations/default-template.ts b/surfsense_web/lib/automations/default-template.ts
new file mode 100644
index 000000000..8963992cb
--- /dev/null
+++ b/surfsense_web/lib/automations/default-template.ts
@@ -0,0 +1,44 @@
+/**
+ * Minimal valid ``AutomationCreate`` skeleton used to seed the raw-JSON
+ * create form. ``search_space_id`` is omitted on purpose — the form
+ * injects it from the route so users never have to know their id.
+ *
+ * The shape matches the Pydantic ``AutomationCreate`` model less the
+ * search_space_id field; Zod validates the merged payload before submit.
+ */
+export const DEFAULT_AUTOMATION_TEMPLATE = {
+ name: "My automation",
+ description: null,
+ definition: {
+ name: "My automation",
+ goal: null,
+ plan: [
+ {
+ step_id: "step_1",
+ action: "agent_task",
+ params: {
+ query: "Summarize new docs added to folder 12 since the last run.",
+ },
+ },
+ ],
+ execution: {
+ timeout_seconds: 600,
+ max_retries: 2,
+ retry_backoff: "exponential",
+ concurrency: "drop_if_running",
+ on_failure: [],
+ },
+ metadata: { tags: [] },
+ },
+ triggers: [
+ {
+ type: "schedule",
+ params: {
+ cron: "0 9 * * 1-5",
+ timezone: "UTC",
+ },
+ static_inputs: {},
+ enabled: true,
+ },
+ ],
+} as const;
From 91962ba879a677e0fcf43bc2e1dc7abe0732b462 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 02:48:47 +0200
Subject: [PATCH 75/87] fix automation run inputs, hitl routing, and detail UI
polish
---
.../main_agent/tools/automation/create.py | 11 +-
.../hitl/approvals/self_gated/request.py | 7 +
.../app/automations/dispatch/run.py | 9 +-
.../automation-detail-content.tsx | 2 -
.../automation-triggers-section.tsx | 33 +--
.../components/trigger-card.tsx | 29 ++-
.../tool-ui/automation/create-automation.tsx | 213 ++++++++++++++----
surfsense_web/lib/format-date.ts | 40 +++-
8 files changed, 258 insertions(+), 86 deletions(-)
diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
index 07b579f3b..173d302e5 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/automation/create.py
@@ -22,11 +22,14 @@ from typing import Any
from uuid import UUID
from fastapi import HTTPException
+from langchain.tools import ToolRuntime
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool
from pydantic import ValidationError
-from app.agents.new_chat.tools.hitl import request_approval
+from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
+ request_approval,
+)
from app.automations.schemas.api import AutomationCreate
from app.automations.services.automation import AutomationService
from app.db import User, async_session_maker
@@ -56,7 +59,7 @@ def create_create_automation_tool(
uid = UUID(user_id) if isinstance(user_id, str) else user_id
@tool
- async def create_automation(intent: str) -> dict[str, Any]:
+ async def create_automation(intent: str, runtime: ToolRuntime) -> dict[str, Any]:
"""Draft + save an automation from a natural-language intent.
Use this when the user wants SurfSense to do something on its own
@@ -137,6 +140,7 @@ def create_create_automation_tool(
tool_name="create_automation",
params=card_params,
context={"search_space_id": search_space_id},
+ tool_call_id=runtime.tool_call_id,
)
if result.rejected:
@@ -200,6 +204,5 @@ def _extract_json(text: str) -> dict[str, Any] | None:
def _format_validation_issues(exc: ValidationError) -> list[str]:
return [
- f"{'.'.join(str(p) for p in err['loc'])}: {err['msg']}"
- for err in exc.errors()
+ f"{'.'.join(str(p) for p in err['loc'])}: {err['msg']}" for err in exc.errors()
]
diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/hitl/approvals/self_gated/request.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/hitl/approvals/self_gated/request.py
index 8729ea85b..2f7e3cd35 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/hitl/approvals/self_gated/request.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/shared/hitl/approvals/self_gated/request.py
@@ -49,6 +49,7 @@ def request_approval(
params: dict[str, Any],
context: dict[str, Any] | None = None,
trusted_tools: list[str] | None = None,
+ tool_call_id: str | None = None,
) -> HITLResult:
"""Pause the graph for user approval and return the user's decision.
@@ -64,6 +65,10 @@ def request_approval(
forwarded verbatim to the FE for richer card chrome.
trusted_tools: Per-session allowlist; when ``tool_name`` is in it the
interrupt is skipped and the tool runs immediately.
+ tool_call_id: Caller's LangChain tool-call id. Required for tools
+ running directly on the main agent; subagent-mounted tools omit
+ it (the ``task`` chokepoint stamps it on re-raise — see
+ :mod:`...checkpointed_subagent_middleware.propagation`).
Returns:
:class:`HITLResult` with ``rejected=True`` if the user declined or
@@ -90,6 +95,8 @@ def request_approval(
interrupt_type=action_type,
context=context,
)
+ if tool_call_id:
+ payload["tool_call_id"] = tool_call_id
approval = interrupt(payload)
parsed = parse_lc_envelope(approval)
diff --git a/surfsense_backend/app/automations/dispatch/run.py b/surfsense_backend/app/automations/dispatch/run.py
index e317a13b9..02d0b0356 100644
--- a/surfsense_backend/app/automations/dispatch/run.py
+++ b/surfsense_backend/app/automations/dispatch/run.py
@@ -67,8 +67,15 @@ async def dispatch_run(
def _validate_inputs(
definition: AutomationDefinition, inputs: dict[str, Any]
) -> dict[str, Any]:
+ """Validate merged inputs against the optional declared schema.
+
+ No declared schema → pass through (runtime inputs like ``fired_at`` /
+ ``last_fired_at`` and trigger ``static_inputs`` must still reach the
+ template context). Returning ``{}`` here strips them and makes Jinja
+ blow up on any ``{{ inputs.* }}`` reference.
+ """
if definition.inputs is None or not definition.inputs.schema_:
- return {}
+ return inputs
try:
jsonschema.validate(instance=inputs, schema=definition.inputs.schema_)
except jsonschema.ValidationError as exc:
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
index 253d6ae67..49df3633e 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
@@ -77,10 +77,8 @@ export function AutomationDetailContent({
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
index 8cc62f5c8..33c8373a1 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
@@ -1,7 +1,5 @@
"use client";
-import { CalendarClock, MessageSquarePlus } from "lucide-react";
-import Link from "next/link";
-import { Button } from "@/components/ui/button";
+import { CalendarClock } from "lucide-react";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Trigger } from "@/contracts/types/automation.types";
import { TriggerCard } from "./trigger-card";
@@ -9,43 +7,28 @@ import { TriggerCard } from "./trigger-card";
interface AutomationTriggersSectionProps {
triggers: Trigger[];
automationId: number;
- searchSpaceId: number;
canUpdate: boolean;
canDelete: boolean;
- canCreate: boolean;
}
/**
* The Triggers card. Lists each attached trigger with its own enable
- * toggle and remove button. Adding a new trigger is intent-driven (via
- * chat) for v1 — same philosophy as creating an automation, so the
- * empty/add CTA links to a new chat rather than opening a form.
+ * toggle and remove button. v1 attaches triggers at automation-creation
+ * time only; there is no in-place "add trigger" affordance here.
*/
export function AutomationTriggersSection({
triggers,
automationId,
- searchSpaceId,
canUpdate,
canDelete,
- canCreate,
}: AutomationTriggersSectionProps) {
return (
-
-
-
Triggers
-
- When this automation fires. v1 supports scheduled triggers only.
-
-
- {canCreate && (
-
-
-
- Add via chat
-
-
- )}
+
+ Triggers
+
+ When this automation fires. v1 supports scheduled triggers only.
+
{triggers.length === 0 ? (
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
index afadf589a..ec0246e49 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
@@ -7,7 +7,7 @@ import { Button } from "@/components/ui/button";
import { Switch } from "@/components/ui/switch";
import type { Trigger } from "@/contracts/types/automation.types";
import { describeCron } from "@/lib/automations/describe-cron";
-import { formatRelativeDate } from "@/lib/format-date";
+import { formatRelativeDate, formatRelativeFutureDate } from "@/lib/format-date";
import { DeleteTriggerDialog } from "./delete-trigger-dialog";
interface TriggerCardProps {
@@ -91,11 +91,18 @@ export function TriggerCard({ trigger, automationId, canUpdate, canDelete }: Tri
{(trigger.last_fired_at || trigger.next_fire_at) && (
-
+
{trigger.next_fire_at && (
-
+
+ )}
+ {trigger.last_fired_at && (
+
)}
- {trigger.last_fired_at && }
)}
@@ -126,17 +133,20 @@ export function TriggerCard({ trigger, automationId, canUpdate, canDelete }: Tri
function TimeRow({
label,
iso,
+ tense,
highlight = false,
}: {
label: string;
iso: string;
+ tense: "past" | "future";
highlight?: boolean;
}) {
+ const formatted = tense === "future" ? formatRelativeFutureDate(iso) : formatRelativeDate(iso);
return (
-
-
+ <>
+
- {label}:
+ {label}
- {formatRelativeDate(iso)}
+ {formatted}
-
+ >
);
}
diff --git a/surfsense_web/components/tool-ui/automation/create-automation.tsx b/surfsense_web/components/tool-ui/automation/create-automation.tsx
index 713c5fd46..00b120d38 100644
--- a/surfsense_web/components/tool-ui/automation/create-automation.tsx
+++ b/surfsense_web/components/tool-ui/automation/create-automation.tsx
@@ -2,16 +2,26 @@
import type { ToolCallMessagePartProps } from "@assistant-ui/react";
import { useAtomValue } from "jotai";
-import { CornerDownLeftIcon, ExternalLink, Workflow } from "lucide-react";
+import {
+ AlertCircle,
+ Code,
+ CornerDownLeftIcon,
+ ExternalLink,
+ Pencil,
+ Workflow,
+} from "lucide-react";
import Link from "next/link";
-import { useCallback, useEffect, useMemo } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { TextShimmerLoader } from "@/components/prompt-kit/loader";
import { Button } from "@/components/ui/button";
+import { automationCreateRequest } from "@/contracts/types/automation.types";
import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
import { AutomationDraftPreview } from "./automation-draft-preview";
+const editArgsSchema = automationCreateRequest.omit({ search_space_id: true });
+
// ----------------------------------------------------------------------------
// Result discrimination — mirrors the backend return shapes in
// app/agents/multi_agent_chat/main_agent/tools/automation/create.py.
@@ -62,12 +72,11 @@ function hasStatus(value: unknown, status: string): boolean {
// ----------------------------------------------------------------------------
// Approval card — pending → processing → complete / rejected.
//
-// v1 deliberately supports only approve/reject. The drafted JSON is complex
-// (full plan + triggers) and we already have a multi-turn refinement path via
-// chat ("make it run at 10am instead" → the agent re-calls the tool with a
-// refined intent). An in-card edit form would duplicate that flow and add UX
-// surface area we don't need yet — leave it for the raw-JSON path on the
-// detail page.
+// Edit toggle reuses the same primitives as the Create-via-JSON page: raw
+// textarea, Format, Zod validation against ``AutomationCreate`` (minus the
+// ``search_space_id`` field, which the backend injects). Approve dispatches
+// an ``edit`` decision with the parsed args when edits are pending, otherwise
+// a plain ``approve``. Multi-turn chat refinement still works as a fallback.
// ----------------------------------------------------------------------------
interface ApprovalCardProps {
@@ -83,28 +92,34 @@ function ApprovalCard({ args, interruptData, onDecision }: ApprovalCardProps) {
const allowedDecisions = reviewConfig?.allowed_decisions ?? ["approve", "reject"];
const canApprove = allowedDecisions.includes("approve");
const canReject = allowedDecisions.includes("reject");
+ const canEdit = allowedDecisions.includes("edit");
- const draft = useMemo(() => extractDraft(args), [args]);
+ const [pendingEdits, setPendingEdits] = useState | null>(null);
+ const [isEditing, setIsEditing] = useState(false);
+
+ const effectiveArgs = pendingEdits ?? args;
+ const draft = useMemo(() => extractDraft(effectiveArgs), [effectiveArgs]);
const handleApprove = useCallback(() => {
- if (phase !== "pending" || !canApprove) return;
+ if (phase !== "pending" || !canApprove || isEditing) return;
setProcessing();
onDecision({
- type: "approve",
+ type: pendingEdits ? "edit" : "approve",
edited_action: {
name: interruptData.action_requests[0]?.name ?? "create_automation",
- args,
+ args: pendingEdits ?? args,
},
});
- }, [phase, canApprove, setProcessing, onDecision, interruptData, args]);
+ }, [phase, canApprove, isEditing, setProcessing, onDecision, interruptData, args, pendingEdits]);
const handleReject = useCallback(() => {
- if (phase !== "pending" || !canReject) return;
+ if (phase !== "pending" || !canReject || isEditing) return;
setRejected();
onDecision({ type: "reject", message: "User rejected the automation draft." });
- }, [phase, canReject, setRejected, onDecision]);
+ }, [phase, canReject, isEditing, setRejected, onDecision]);
useEffect(() => {
+ if (isEditing) return;
const handler = (e: KeyboardEvent) => {
if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey) {
handleApprove();
@@ -112,46 +127,77 @@ function ApprovalCard({ args, interruptData, onDecision }: ApprovalCardProps) {
};
window.addEventListener("keydown", handler);
return () => window.removeEventListener("keydown", handler);
- }, [handleApprove]);
+ }, [handleApprove, isEditing]);
return (
-
-
-
-
- {phase === "rejected"
- ? "Automation cancelled"
- : phase === "processing"
- ? "Saving automation"
- : phase === "complete"
- ? "Automation saved"
- : "Create automation"}
-
- {phase === "processing" ? (
-
- ) : phase === "complete" ? (
-
- Automation created from this draft
+
+
+
+
+
+ {phase === "rejected"
+ ? "Automation cancelled"
+ : phase === "processing"
+ ? "Saving automation"
+ : phase === "complete"
+ ? "Automation saved"
+ : "Create automation"}
- ) : phase === "rejected" ? (
-
- No automation was saved — ask in chat to refine and try again.
-
- ) : (
-
- Review and approve to save. To change anything, reply in chat — I'll redraft.
-
- )}
+ {phase === "processing" ? (
+
+ ) : phase === "complete" ? (
+
+ {pendingEdits
+ ? "Automation saved with your edits"
+ : "Automation created from this draft"}
+
+ ) : phase === "rejected" ? (
+
+ No automation was saved — ask in chat to refine and try again.
+
+ ) : (
+
+ {pendingEdits
+ ? "Showing your edits. Approve to save, or edit again."
+ : "Review and approve to save. Edit for fine-tuning, or reply in chat for a redraft."}
+
+ )}
+
+ {phase === "pending" && canEdit && !isEditing && (
+
setIsEditing(true)}
+ >
+
+ Edit
+
+ )}
-
+ {isEditing ? (
+
{
+ setPendingEdits(parsed);
+ setIsEditing(false);
+ }}
+ onCancel={() => setIsEditing(false)}
+ />
+ ) : (
+
+ )}
- {phase === "pending" && (
+ {phase === "pending" && !isEditing && (
<>
@@ -178,6 +224,85 @@ function ApprovalCard({ args, interruptData, onDecision }: ApprovalCardProps) {
);
}
+interface JsonEditorProps {
+ initialValue: Record
;
+ onSave: (parsed: Record) => void;
+ onCancel: () => void;
+}
+
+function JsonEditor({ initialValue, onSave, onCancel }: JsonEditorProps) {
+ const [text, setText] = useState(() => JSON.stringify(initialValue, null, 2));
+ const [issues, setIssues] = useState([]);
+
+ function handleFormat() {
+ try {
+ setText(JSON.stringify(JSON.parse(text), null, 2));
+ setIssues([]);
+ } catch (err) {
+ setIssues([`Cannot format — not valid JSON: ${(err as Error).message}`]);
+ }
+ }
+
+ function handleSave() {
+ setIssues([]);
+ let parsed: unknown;
+ try {
+ parsed = JSON.parse(text);
+ } catch (err) {
+ setIssues([`Invalid JSON: ${(err as Error).message}`]);
+ return;
+ }
+ const result = editArgsSchema.safeParse(parsed);
+ if (!result.success) {
+ setIssues(
+ result.error.issues.map((issue) => `${issue.path.join(".") || "(root)"}: ${issue.message}`)
+ );
+ return;
+ }
+ onSave(result.data as unknown as Record);
+ }
+
+ return (
+
+ );
+}
+
// ----------------------------------------------------------------------------
// Terminal result cards.
// ----------------------------------------------------------------------------
diff --git a/surfsense_web/lib/format-date.ts b/surfsense_web/lib/format-date.ts
index 9decd3402..c2f445537 100644
--- a/surfsense_web/lib/format-date.ts
+++ b/surfsense_web/lib/format-date.ts
@@ -1,4 +1,12 @@
-import { differenceInDays, differenceInMinutes, format, isToday, isYesterday } from "date-fns";
+import {
+ differenceInDays,
+ differenceInMinutes,
+ format,
+ isThisYear,
+ isToday,
+ isTomorrow,
+ isYesterday,
+} from "date-fns";
/**
* Format a date string as a human-readable relative time
@@ -23,6 +31,36 @@ export function formatRelativeDate(dateString: string): string {
return format(date, "MMM d, yyyy");
}
+/**
+ * Format a future date string as a human-readable countdown.
+ * - < 1 min: "Any moment"
+ * - < 60 min: "in 15m"
+ * - Today: "Today, 2:30 PM"
+ * - Tomorrow: "Tomorrow, 2:30 PM"
+ * - < 7 days: "in 3d"
+ * - This year: "May 30, 2:30 PM"
+ * - Older: "Jan 15, 2027"
+ *
+ * Mirrors {@link formatRelativeDate} but for moments strictly after now.
+ * Falls back to the past-relative formatter if the timestamp is not in
+ * the future (defensive — guards against stale "next_fire_at" values).
+ */
+export function formatRelativeFutureDate(dateString: string): string {
+ const date = new Date(dateString);
+ const now = new Date();
+ const minutesAhead = differenceInMinutes(date, now);
+ const daysAhead = differenceInDays(date, now);
+
+ if (minutesAhead <= 0) return formatRelativeDate(dateString);
+ if (minutesAhead < 1) return "Any moment";
+ if (minutesAhead < 60) return `in ${minutesAhead}m`;
+ if (isToday(date)) return `Today, ${format(date, "h:mm a")}`;
+ if (isTomorrow(date)) return `Tomorrow, ${format(date, "h:mm a")}`;
+ if (daysAhead < 7) return `in ${daysAhead}d`;
+ if (isThisYear(date)) return format(date, "MMM d, h:mm a");
+ return format(date, "MMM d, yyyy");
+}
+
/**
* Format a thread's last-updated timestamp for the chats sidebars.
* Example: "Mar 23, 2026 at 4:30 PM"
From b90bed2dbdfe7941f1a886481c45121309076501 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 15:38:57 +0200
Subject: [PATCH 76/87] chore: drop local design plan
---
automation-design-plan.md | 1240 -------------------------------------
1 file changed, 1240 deletions(-)
delete mode 100644 automation-design-plan.md
diff --git a/automation-design-plan.md b/automation-design-plan.md
deleted file mode 100644
index db5f7a23c..000000000
--- a/automation-design-plan.md
+++ /dev/null
@@ -1,1240 +0,0 @@
-# SurfSense Automation Feature — Design Plan (v2)
-
-A generic, extensible automation system for SurfSense that lets users (and
-future SurfSense features) trigger agent work on a schedule, on an external
-event, or on demand — with the ability to author automations either by hand
-or from a natural-language description that yields an editable, structured
-definition.
-
-This document supersedes the v1 draft. It folds in the design audit pass and
-the corrections from working through worked examples (notably: removing the
-connector bias, clarifying the executor's role, integrating MCP cleanly, and
-committing to JSON Schema as the single declarative language).
-
----
-
-## 1. The load-bearing principle
-
-> **The JSON definition is the program. Everything else is interpreter.**
-
-Every decision in this document serves that principle. If we ever face a
-design choice and one option lets some behavior leak out of the definition
-into the engine, we pick the other option.
-
-Three properties follow from this principle, and they're the reason the
-system will survive feature growth:
-
-- **Reproducibility** — same definition + same inputs → same observable
- behavior, regardless of which version of the engine runs it.
-- **Portability** — definitions can be exported, imported, version-
- controlled, code-reviewed, and shared across SurfSense instances.
-- **LLM tractability** — the NL authoring flow works because the LLM only
- needs to produce a self-contained JSON document that validates against a
- schema. It doesn't need to understand the engine.
-
----
-
-## 2. The three-layer contract
-
-The system is structured as three layers. Layers 1 and 3 are defined by
-SurfSense developers (at registration time). Layer 2 is what users write
-(or the NL generator produces). The runtime reads all three to do its job.
-
-| Layer | What it is | Defined by |
-| ----- | ---------- | ---------- |
-| **1. Action contract** | Per-action params and output schema | Developers, at startup |
-| **2. Automation definition** | One concrete saved automation | Users (or NL generator) |
-| **3. Trigger contract** | Per-trigger params and payload schemas | Developers, at startup |
-
-Each layer constrains the next. The runtime reads all three but doesn't
-know what's in them ahead of time. That's how a new action or trigger
-type becomes available across the engine without code changes outside its
-registration.
-
-A unification layer below Layer 1 — one catalog of "things this SurfSense
-instance can do," shared by automations, agents, and future surfaces — was
-considered and deferred (§3). v1 actions are stand-alone.
-
-### Schema language
-
-Every shape in every layer is described in **JSON Schema (draft 2020-12).**
-No exceptions, no parallel languages, no inline shorthand. Two documented
-extensions on top:
-
-- `default: "$some_token"` — runtime-resolved defaults. The vocabulary is
- fixed: `$last_fired_at`, `$creator`, `$space_default`. The engine resolves
- these to values before validation.
-- `x-surfsense-*` annotations — editor hints (widget type, autocomplete
- source). The validator ignores them; the form editor reads them.
-
----
-
-## 3. Capability unification layer — deferred to post-v1
-
-Earlier drafts introduced a `Capability` registry as Layer 1: one catalog
-of "things this SurfSense instance can do," shared by the automation
-engine (as actions), the agent (as tools), and any future HTTP surface.
-The motivation is real — one source of truth beats N parallel registries —
-but v1 has a single action (`agent_task`) and a single consumer (the
-automation engine). The five-field shape sketched earlier (`id`,
-`description`, `input_schema`, `output_schema`, `handler`) cannot safely
-host any non-trivial capability: it carries no caller identity, no
-search-space scoping, and no authorization gate on tool delegation.
-Building the abstraction with one consumer would lock in a shape that
-doesn't survive the second consumer.
-
-The unification layer returns when the second consumer lands (Phase 2
-tight actions or Phase 4 MCP), redesigned from the start with:
-
-- A `CallContext` carrying caller user id, search space id, and run id,
- passed to every handler invocation.
-- Explicit scope declarations per capability (e.g. `reads:documents`,
- `writes:slack`, `destructive`) for the authorization layer to read.
-- A per-user, per-search-space filter consulted at both definition save
- time (validating `agent_task.tools`) and run time (scoping the agent's
- tool list to what the automation creator can delegate).
-
-Until then:
-
-- v1 actions are stand-alone units (Layer 1 below); the automation engine
- reads its own action registry, nothing else.
-- `agent_task.params.tools` is a forward-looking allowlist field with no
- v1 semantics beyond "list of string identifiers." The handler's tool
- resolution is opaque to the automation contract.
-
-### Credentials — deferred to Phase 2
-
-External-credential handlers (Slack, email, etc.) require per-user or
-per-connection auth. v1 actions run server-side with app-level
-configuration. When tight actions ship in Phase 2, the credential design
-lands as part of the unification redesign: connection IDs in the
-definition (never tokens); credentials loaded per-call by the handler
-context (never pre-loaded into worker memory); credentials never enter
-LLM context.
-
-### MCP — deferred to Phase 4
-
-External tool servers feeding tools into a shared registry land with the
-rest of the integration tooling in Phase 4, after the unification layer
-is in place. The two-tier registry, `mcp_connections` and `mcp_tools`
-tables, and the harvester arrive as a single coherent step then.
-
----
-
-## 4. Action contract
-
-An `Action` is what a user references in a plan step. Some actions are
-deterministic single-purpose handlers (`slack_post`, `send_email`); one
-action (`agent_task`) hosts an LLM and a tool allowlist for cases where
-judgment is needed. The contract is the same in both cases — only the
-handler differs.
-
-```python
-@dataclass(frozen=True, slots=True)
-class ActionDefinition:
- type: str # "agent_task", "slack_post"
- name: str # short UI label
- description: str # for the NL generator and the UI
- params_schema: dict # JSON Schema for step.params
- handler: ActionHandler
-```
-
-This is the v1 shape: five fields, no handler context, no output
-contract, no artifact declaration. The deferrals are intentional:
-
-- **`output_contract`** — Phase 2. Deterministic handlers will return
- a fixed shape; v1's only action (`agent_task`) takes an
- `output_schema` inside `params` and validates against that instead.
-- **`produces_artifacts`** — Phase 5. Artifact lifecycle (storage,
- signed URLs, retention) is its own design step; v1 handlers
- persist their own outputs.
-- **Handler context** — paired with the unification redesign (§3).
- v1 handlers receive `(args)` only; per-user / per-search-space
- behavior is not yet a v1 concern.
-
-### Tight vs loose actions
-
-Two patterns coexist by design:
-
-- **Tight actions** (`slack_post`, `linear_create_issue`,
- `send_email`) — deterministic single-purpose handlers. ~20 LOC
- each. **Phase 2.**
-- **Loose actions** (`agent_task`) — params_schema accepts a `prompt`,
- a `tools` allowlist, and an optional `output_schema` declaring what
- the agent must return; the handler validates the agent's output
- against it. **v1.**
-
-The agent's `tools` allowlist resolves opaquely in v1; the redesigned
-unification layer (§3) will give both invocation modes access to the
-same vocabulary, with per-user authorization gating both.
-
-### How names in the definition become function calls
-
-The definition contains strings like `"action": "agent_task"`. The
-string is just a name — it does not point to a function. At runtime,
-the executor performs a **name-based lookup** against the action
-registry:
-
-```python
-action_def = action_registry.get(step.action) # dict lookup
-handler = action_def.handler # Python callable
-result = await handler(resolved_params) # invocation
-```
-
-The registry is a Python dict populated at process startup. Each entry
-in `automations/registries/actions/*.py` calls `register_action(...)`
-at module import time, putting its `ActionDefinition` (including the
-handler function reference) into the registry.
-
-The definition is pure data. The registry is the engine's runtime
-vocabulary. They meet at name-based lookup; nothing else crosses the
-boundary.
-
-### The full expressive spectrum
-
-The contract supports a continuous spectrum from purely deterministic to
-fully agentic. Six practical shapes worth recognizing:
-
-| Shape | Example | Cost / latency profile |
-| --- | --- | --- |
-| **1. Direct call** | `slack_post` with literal channel and template | No LLM. ~200ms. Fractions of a cent. |
-| **2. Direct call with computed inputs** | `linear_create_issue` using `{{summary.title}}` from a prior step | No LLM for this step. Cheap. |
-| **3. Single-domain agent task** | `agent_task` with `tools: ["slack.*"]` only | One LLM, bounded toolset. |
-| **4. Multi-domain agent task, narrow** | `agent_task` with `tools: ["github.list_pull_requests", "linear.create_issue"]` | One LLM, named tools. |
-| **5. Multi-domain agent task, broad** | `agent_task` with `tools: ["slack.*", "github.*", "linear.*"]` | One LLM, large toolset, most agentic. |
-| **6. Composed plan** | `agent_task` (narrow) for thinking → `slack_post` + `linear_create_issue` for acting | Best cost-to-power ratio. |
-
-Shape 6 is the underrated one and the cost-and-speed answer. The agent
-reasons once (Shape 3 or 4) and its structured output drives several
-deterministic actions. This is roughly 5–10x cheaper and 3–4x faster than
-forcing the agent to do everything (Shape 5) and produces the same outcome.
-
-**The NL generator's job is to propose Shape 6-style plans by default.**
-The Review LLM flags proposals that use `agent_task` for steps a
-deterministic action could handle. This is the discipline that keeps
-automations cheap at scale.
-
-The user navigates the spectrum by intent (describing what they want), not
-by mechanism — the shape selection is the engine's responsibility, not the
-user's.
-
----
-
-## 5. Automation definition
-
-This is the JSON the user writes (or the NL generator produces). Stored in
-`automations.definition` as JSONB.
-
-### Top-level shape
-
-```jsonc
-{
- "schema_version": "1.0",
- "name": "Daily competitor digest",
- "goal": "Summarize new competitor content and post to Slack",
-
- "inputs": {
- "schema": {
- "type": "object",
- "required": ["since"],
- "properties": {
- "since": { "type": "string", "format": "date-time",
- "default": "$last_fired_at" },
- "tags": { "type": "array", "items": { "type": "string" },
- "default": ["competitor"] }
- }
- }
- },
-
- "triggers": [
- {
- "type": "schedule",
- "params": { "cron": "0 9 * * 1-5", "timezone": "Africa/Kigali" }
- }
- ],
-
- "plan": [
- {
- "step_id": "research",
- "action": "agent_task",
- "params": {
- "prompt": "Find documents tagged {{inputs.tags}} indexed since {{inputs.since}}. Return JSON with bullets and source_doc_ids.",
- "tools": ["search_space.query", "search_space.fetch_document"],
- "model": "anthropic/claude-sonnet-4-7",
- "output_schema": {
- "type": "object",
- "required": ["bullets", "source_doc_ids"],
- "properties": {
- "bullets": { "type": "array", "items": { "type": "string" } },
- "source_doc_ids": { "type": "array", "items": { "type": "string" } }
- }
- }
- },
- "output_as": "summary"
- },
- {
- "step_id": "deliver",
- "action": "slack_post",
- "params": {
- "channel_id": "C0123",
- "message_template": "*Competitor digest*\n\n{% for b in summary.bullets %}• {{b}}\n{% endfor %}"
- }
- }
- ],
-
- "execution": {
- "timeout_seconds": 600,
- "max_retries": 2,
- "retry_backoff": "exponential",
- "concurrency": "drop_if_running",
- "on_failure": [ /* steps to run if main plan fails after retries */ ]
- },
-
- "metadata": { "tags": ["digest"] }
-}
-```
-
-### Plan steps
-
-```jsonc
-{
- "step_id": "...", // unique within plan
- "action": "...", // references an ActionDefinition.type
- "when": "{{ ... }}", // optional Jinja expr → bool; false = skip
- "params": { ... }, // validated against action's params_schema
- "output_as": "...", // binds output to this name for later steps
- "max_retries": 0, // optional, overrides automation default
- "timeout_seconds": 1200 // optional, overrides automation default
-}
-```
-
-Steps run **sequentially**. No parallelism, no DAGs, no loops. If a user
-needs branching, they use `when:` on multiple steps. If they need
-parallelism or iteration, they use `agent_task` and let the agent reason
-about it, or they compose automations through events (§7.5).
-
----
-
-## 6. Trigger contract
-
-Three trigger types. That's the entire taxonomy.
-
-### `schedule`
-
-```python
-TriggerDefinition(
- type="schedule",
- params_model=ScheduleTriggerParams, # cron + timezone
-)
-# At fire time the schedule producer emits runtime inputs
-# (fired_at, scheduled_for, last_fired_at) which are merged with the
-# trigger row's static_inputs (static wins) and validated against
-# automation.definition.inputs.schema_.
-```
-
-Implementation: extends `app/utils/periodic_scheduler.py`, which already
-reads connector sync schedules. Adds a second source — `automation_triggers
-WHERE type='schedule'`. Same Celery Beat checker, two source tables.
-
-Minimum interval: 1 minute (the existing checker's resolution). The form
-editor warns when users set intervals under 15 minutes that they probably
-want an event trigger instead.
-
-### `webhook`
-
-```python
-TriggerDefinition(
- type="webhook",
- params_schema={
- "type": "object",
- "properties": {
- "input_mapping": {
- "type": "object",
- "additionalProperties": { "type": "string" }
- # values are JSONPath expressions
- }
- }
- },
- # payload is whatever the POST body is; user-defined shape via mapping
-)
-```
-
-Endpoint: `POST /api/v1/automations/{id}/fire`. Bearer token shown once,
-hashed at rest, rotatable, revocable. Returns `202 Accepted` with the
-created run's URL. Caller polls for status; we do not push callbacks in
-v1 (a `callback_webhook` action can be added later).
-
-Idempotency: honors `Idempotency-Key` header or `idempotency_key` in body.
-Dedups against runs in the last 24 hours.
-
-### `event`
-
-```python
-TriggerDefinition(
- type="event",
- params_schema={
- "type": "object",
- "required": ["event_type"],
- "properties": {
- "event_type": { "type": "string" }, # e.g. "drive.file_added"
- # or "surfsense.podcast.generated"
- "filters": { "$ref": "#/definitions/filter_expression" }
- }
- }
- # payload shape is documented per event_type in a separate registry
-)
-```
-
-**Events absorb both connector events and internal SurfSense events.** A
-file added to Drive and a podcast finishing in SurfSense are both events
-in the same `domain_events` table, both subscribable by automations, both
-matched by the same dispatcher code. The engine doesn't distinguish.
-
-### Filter grammar
-
-Filters are JSON-structured operators, not expressions. This is the one
-place we deliberately don't use Jinja, because filters run on a hot path
-(every event matched against every subscribing trigger) and structured
-filters can be indexed and short-circuited.
-
-Vocabulary:
-- Equality: `equals`, `not_equals`
-- String: `starts_with`, `ends_with`, `contains`, `regex`
-- Numeric: `gt`, `gte`, `lt`, `lte`
-- Set: `in`, `not_in`
-- Existence: `exists`
-- Composition: `$and`, `$or`, `$not`
-
-Inspired by AWS EventBridge and MongoDB query syntax. The filter grammar
-itself is published as a JSON Schema, so users get inline error messages.
-
----
-
-## 7. Runtime components
-
-Each component is distinct, replaceable, and has one job.
-
-### 7.1 Dispatcher
-
-What it does: matches firing triggers to automations, creates `AutomationRun`
-rows, enqueues executor tasks.
-
-For schedule triggers: Celery Beat polls the trigger table, computes due
-ones, fires.
-
-For webhook triggers: the FastAPI handler is the dispatcher entry point.
-Validates token, runs input_mapping, creates run.
-
-For event triggers: subscribes to the `domain_events` table. For each new
-event, evaluates all matching triggers' filters, fires the matches.
-
-Common path (after a trigger has fired):
-1. Resolve `inputs` from trigger payload and defaults
-2. Validate resolved inputs against the automation's input schema
-3. **Idempotency check** — dedup against existing pending/running runs
-4. **Snapshot the resolved definition** into the run row (immutable history)
-5. Enqueue executor task on the single `automations_default` Celery queue
-
-The cost-estimate pre-check (originally step 3) is **deferred**. v1
-actions do not declare cost estimates, the run row has no `cost_usd`
-column, and no handler reports tokens used — so neither pre-flight
-prediction nor mid-flight accumulation can be enforced. `Execution`
-therefore does not expose `budget_cap_usd` in v1; it returns as a single
-field addition the day the cost ledger ships (per-action cost reporting
-+ `automation_runs.cost_usd` column + executor accumulation).
-
-Queue routing by `expected_duration_seconds` is **deferred** until load
-patterns justify a second queue. v1 uses a single queue.
-
-### 7.2 Executor
-
-What it is: **a Celery task wrapping a single function that walks a plan
-step by step.** Not an agent, not a workflow engine, not a scheduler. A
-loop with bookkeeping. Maybe 200 lines.
-
-```python
-async def execute_run(run_id: int) -> None:
- run = load_run(run_id); run.status = "running"; save(run)
- context = build_run_context(run)
- step_outputs = {}
-
- for step in run.plan:
- if step.when and not evaluate_predicate(step.when, context | step_outputs):
- record_step_skipped(run, step); continue
-
- resolved_params = render_params(step.params, context | step_outputs)
- action = action_registry.get(step.action)
- validate(resolved_params, action.params_schema)
-
- try:
- result = await with_retries(
- action.handler,
- ctx=build_action_context(run, action),
- args=resolved_params,
- policy=step.retry_policy or run.execution.retry_policy,
- )
- validate(result, step.output_schema)
- if step.output_as:
- step_outputs[step.output_as] = result
- record_step_succeeded(run, step, result)
- except Exception as e:
- record_step_failed(run, step, e)
- await run_on_failure(run, e)
- return
-
- run.status = "succeeded"; save(run)
- publish_event("automation.run.succeeded", run) # see §7.5
-```
-
-Intelligence lives **inside handlers**, not in the executor. The most
-intelligent handler is `agent_task`, which spins up a LangGraph Deep Agent
-for one step and returns when the agent finishes. The executor sees a
-validated dict come back; it doesn't know that step was "smart."
-
-### 7.3 Action handlers
-
-One handler per `ActionDefinition.type`. Receives the validated `args`
-dict and returns whatever the step's output validates against (a fixed
-shape declared by tight actions, or a dynamic shape declared via
-`output_schema` in the step params for `agent_task`).
-
-Handlers do not know about retries or timeouts — those are the
-executor's concern.
-
-In v1, handlers take `(args)` only. The `CallContext` parameter sketched
-in §7.2's pseudo-code (caller user id, search space id, run id,
-credential resolver) arrives with the unification layer redesign (§3);
-v1's single action (`agent_task`) reads what it needs from app-level
-configuration.
-
-### 7.4 Template engine
-
-#### Why it exists
-
-Most fields in an automation definition contain literal strings the user
-authored once — but the actual rendered value has to change per run, because
-it includes data from the trigger payload or from prior step outputs. The
-template engine is what turns `"Daily digest for {{run.started_at}}"` into
-`"Daily digest for 2026-05-26"` at run time.
-
-Three fields use it:
-- `*_template` strings in tight action configs (Slack messages, email bodies,
- Linear titles, etc.)
-- `prompt` in `agent_task` configs (so the agent sees resolved values, not
- `{{...}}` placeholders)
-- `when:` step predicates (which need to evaluate to a boolean)
-
-#### Public interface
-
-Single module, ~80 lines. Three public functions — everything else in the
-engine routes through these:
-
-```python
-def render_template(template: str, context: dict) -> str: ...
-def evaluate_predicate(expression: str, context: dict) -> bool: ...
-def build_run_context(run, step_outputs) -> dict: ...
-```
-
-Backed by Jinja2's `SandboxedEnvironment`. The whole module is the seam: if
-the template language is ever swapped, only this file changes.
-
-#### Security architecture: allowlist by default
-
-`SandboxedEnvironment` starts empty. A freshly-created instance gives a
-template access to:
-- Variables in the context dict we pass in (`run`, `inputs`, prior step
- outputs)
-- Public (non-underscore) attributes of those variables
-- Jinja's built-in control flow (`{% if %}`, `{% for %}`, `{% set %}`)
-
-Nothing else. No Python builtins, no modules, no I/O, no network, no
-filesystem. Everything beyond the above must be **explicitly registered.**
-This is the structurally important property: anything we didn't add is
-inaccessible. The risk surface equals the size of what we registered.
-
-The three sandbox rules that enforce this:
-1. **Attribute access is filtered** — names starting with underscore are
- rejected. This blocks the entire family of `{{x.__class__.__mro__...}}`
- Python escape paths in one rule.
-2. **Globals are allowlist-only** — `open`, `eval`, `exec`, `__import__`,
- `getattr`, every module name, are all absent unless we register them.
- We register zero globals.
-3. **Unsafe callables are blocked** — `str.format` and `str.format_map`
- specifically (due to CVE-2016-10745), plus anything marked
- `unsafe_callable`.
-
-#### What we register, exactly
-
-- **Filters: a curated 15**, no more. `join`, `length`, `default`, `upper`,
- `lower`, `truncate`, `tojson`, `date`, `replace`, `trim`, `slugify`,
- `first`, `last`, `sort`, `reverse`. Each one is audited for what it does
- with its input; none of them takes a callable, runs `eval`, or reaches
- into Python objects beyond simple data transformation.
-- **Globals: none.**
-- **Tests: only the safe built-ins** (`defined`, `none`, `number`, `string`,
- `mapping`, `sequence`, `boolean`).
-
-Adding a new filter requires a deliberate code change and review: does this
-filter do anything dangerous with its input? If yes, don't add it. The list
-only grows by audited additions.
-
-#### Runtime limits (defense in depth)
-
-The sandbox handles the attack surface inside the template language. Three
-additional limits handle resource exhaustion that the language permits but
-the runtime shouldn't tolerate:
-
-- **Template source length capped at 8 KB.** Checked before parsing.
-- **Render time capped at 100 ms per render.** Implemented via a watchdog
- thread; renders that exceed are killed and the step fails. Catches
- `{% for i in range(10**9) %}` and nested loop bombs.
-- **Output size capped at 1 MB.** A small template can produce a multi-GB
- string via `{{ 'A' * 10**8 }}`-style multiplication; this catches it.
-
-Plus `StrictUndefined`: any reference to a missing variable raises
-immediately rather than silently rendering empty, so misconfigurations
-fail fast.
-
-#### Threat model and residual risk
-
-The trust model from day one is:
-
-- Templates are generated by an LLM from a user's natural-language input
- (see §10), or written/edited by humans in the editable form
-- A second LLM reviews the proposal and produces a plain-language summary
- plus flagged anomalies for the user
-- The user reviews and approves before the automation runs
-- The Generator LLM's input is scoped (user prompt + schema + registry
- only — no arbitrary document content), minimizing prompt-injection paths
-
-The sandbox + runtime limits + curated filter list protect against the
-malformed-template attack. Human review protects against the
-semantically-malicious-but-syntactically-valid attack. These are
-complementary layers, not redundant.
-
-Known residual risks, each genuinely small:
-
-- **Future Jinja CVEs.** Historical sandbox bypasses have existed and
- been patched. This is a generic third-party-dependency risk, comparable
- to bugs in any other library we rely on. Mitigation: subscribe to
- security advisories, ship updates within a week of disclosure.
-- **Side channels via prompts to LLMs.** A template that renders into a
- prompt can attempt prompt injection of the agent at run time. This is
- not a sandbox concern but a separate concern in `agent_task`'s design.
-- **Operator deployments with long-lived secrets in worker env vars.**
- Mitigation: credentials fetched per-handler-per-call via
- `ActionContext.resolve_credentials`, never pre-loaded into worker
- env vars accessible to templates.
-
-The sandbox-with-allowlist architecture means **the attack surface
-equals the set of things we registered.** With zero globals registered
-and 15 audited filters, the surface is small, bounded, and reviewable.
-This is the structural property that makes the architecture sound, and
-it doesn't depend on hypothetical assumptions about who authors templates.
-
-#### Pre-Phase-5 gate
-
-One trust-model change is documented in the roadmap: **Phase 5 introduces
-template sharing across SearchSpaces** (automation templates as
-exportable, importable artifacts). At that point, the *approver* of a
-template (the original author) is no longer the *runner* (the importer).
-The "human reviews before save" mitigation breaks down because the
-reviewer doesn't bear the risk.
-
-Before Phase 5 ships, this needs an explicit re-approval flow: importing
-a template triggers a fresh review pass by the importing user, with the
-flagged-anomalies output prominently displayed, and the import cannot
-complete without explicit per-template approval.
-
-This is a UX/flow decision, not a template-language migration. Jinja
-itself stays; what changes is the approval workflow at the import boundary.
-
-#### The `run.*` namespace exposed in every template
-
-```
-run.id, run.started_at, run.automation_id, run.automation_name,
-run.automation_version, run.trigger_type, run.trigger_id,
-run.search_space_id, run.creator_id, run.attempt,
-run.failed_step_id, run.error.* (only in on_failure context)
-```
-
-#### Default value rendering
-
-Non-string template values render as JSON by default (via the `finalize`
-hook): lists become `["a", "b"]`, dicts become `{"k": "v"}`, datetimes
-become ISO 8601. The `| join`, `| length`, `| tojson` filters give explicit
-control. Strings render as themselves with no quoting. `None` renders as
-empty string in templates, as `null` in JSON contexts.
-
-### 7.5 Event bus
-
-`domain_events` table, polled by Celery Beat alongside the existing
-scheduler. Both connector events and internal SurfSense events publish to
-it. Both are consumed by the dispatcher's event-trigger subscriber.
-
-**Automations themselves publish events.** Successful and failed runs emit
-`automation.run.succeeded` / `automation.run.failed` events with the run
-metadata. This makes automations composable through events — chain them by
-subscribing one automation's event trigger to another's run event. No new
-mechanism; the trigger filter and event publishing already exist.
-
-Upgrade path documented: when throughput or latency demands it, replace
-PostgreSQL polling with Redis Streams. The `events.publish()` and
-`events.subscribe()` interfaces stay the same. Nothing else changes.
-
----
-
-## 8. Cross-cutting concerns
-
-### Concurrency policy
-
-Per-automation `concurrency` field controls what happens when a new fire
-occurs while a previous run is still running:
-
-- `drop_if_running` — silently skip the new fire
-- `queue` — execute serially, in arrival order
-- `allow_parallel` — start a new run independently
-
-The dispatcher enforces this before enqueueing.
-
-### Retry policy
-
-Three fields, per-automation defaults with optional per-step overrides:
-- `max_retries`: integer, 0–10
-- `retry_backoff`: `none` | `linear` | `exponential`
-- `timeout_seconds`: integer
-
-Retries on:
-- Action handler exceptions
-- Output schema validation failures (for dynamic-output actions, the
- validation error is fed back to the LLM in the retry)
-
-Not retries:
-- `when:` evaluation failures (these are user errors, surface immediately)
-- Input validation failures (caught at dispatch, never reach the executor)
-
-### Budget enforcement *(deferred — not in v1)*
-
-Future shape: `budget_cap_usd` on `Execution`, dispatcher refuses to
-enqueue if estimated cost exceeds it, executor kills the run if
-accumulated cost crosses it mid-flight (the LLM ops handler reports
-tokens consumed back to the executor between calls).
-
-Prerequisites before this can land:
-- Each action declares cost reporting (tokens × model price, API call
- charges) — `ActionDefinition` has no such field today.
-- `automation_runs.cost_usd` column + executor accumulates per step.
-- A historical-cost ledger so pre-flight estimation can return useful
- numbers (otherwise the dispatcher gate is guessing).
-
-Until all three exist, v1 has no surface for budget enforcement.
-
-### On-failure handlers
-
-`execution.on_failure` is a list of steps that run after the main plan has
-failed and all retries are exhausted. Same step shape as the main plan.
-Cannot have their own `on_failure`. See `run.error.*` in the run context.
-
-### Artifacts
-
-Actions that produce artifacts declare `produces_artifacts: list[ArtifactSpec]`:
-
-```python
-@dataclass
-class ArtifactSpec:
- kind: str # "audio", "document", "image", "data"
- retention: str # "transient" | "default" | "permanent"
- visibility: str # "private" | "search_space" | "shared"
-```
-
-The engine handles storage (writes to SurfSense's existing object storage),
-URL generation (signed, scoped to the run's permissions), and cleanup (a
-nightly Celery Beat task deletes expired artifacts).
-
-### Duration classes and queue routing — deferred
-
-The original design routed runs to multiple Celery queues based on each
-action's declared `expected_duration_seconds`. v1 ships with **one
-queue** (`automations_default`) and actions do not declare a duration.
-Multi-queue routing returns when burst load on a single queue actually
-justifies the operational complexity of independent worker pools.
-
-Adding the second queue is a config change plus reintroducing
-`expected_duration_seconds` on the `ActionDefinition` dataclass — both
-mechanical, additive, and free of design rewrite.
-
----
-
-## 9. Data model
-
-**v1 ships three tables:** `automations`, `automation_triggers`,
-`automation_runs`. All scoped by `search_space_id` for RBAC.
-
-The other three tables described in earlier drafts are deferred:
-
-- `domain_events` → **deferred to Phase 3** (introduced with the event
- trigger).
-- `mcp_connections`, `mcp_tools` → **deferred to Phase 4** (MCP
- integration).
-
-The deferred tables ship as-is when their consuming feature lands;
-nothing in the v1 schema needs to change to accommodate them. The three
-v1 tables form the engine's persistent state — definitions, triggers,
-and an immutable run history.
-
-### `automations`
-
-| field | type | notes |
-| ----------------- | ----------------------------------- | -------------------------------------------------------------------------- |
-| `id` | int PK | |
-| `search_space_id` | FK → `search_spaces.id` | |
-| `created_by` | FK → `users.id` | runs execute as this identity |
-| `name` | str | |
-| `description` | str | |
-| `status` | enum | `active`, `paused`, `archived` |
-| `definition` | jsonb | the editable structured spec |
-| `version` | int | bumped on every edit |
-| `created_at` / `updated_at` | timestamps | |
-
-### `automation_triggers`
-
-| field | type | notes |
-| --------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------- |
-| `id` | int PK | |
-| `automation_id` | FK | |
-| `type` | enum: `schedule`, `manual` (Phase 2/3 add `webhook`, `event`) | |
-| `params` | jsonb | trigger-type config, validated against trigger's `params_schema` |
-| `static_inputs` | jsonb | per-attachment domain values merged into every run (static wins on collision) |
-| `enabled` | bool | |
-| `last_fired_at` | timestamp | |
-| `next_fire_at` | timestamp / null | precomputed next fire moment for schedule triggers |
-
-`secret_hash` (for webhook bearer tokens) is **deferred to Phase 2** with
-the webhook trigger.
-
-### `automation_runs`
-
-| field | type | notes |
-| ----------------- | ---------------------------------------------------------------------------- | -------------------------------------------------- |
-| `id` | int PK | |
-| `automation_id` | FK | |
-| `trigger_id` | FK / null | null = manual via UI |
-| `status` | enum | `pending`, `running`, `succeeded`, `failed`, `cancelled`, `timed_out` |
-| `definition_snapshot` | jsonb | the definition as it was when this run fired |
-| `inputs` | jsonb | merged & validated inputs (trigger.static_inputs ∪ producer runtime data, static wins) |
-| `step_results` | jsonb | array of per-step results with timing |
-| `output` | jsonb / null | |
-| `artifacts` | jsonb | references to created artifacts |
-| `error` | jsonb / null | |
-| `started_at` / `finished_at` | timestamps | |
-| `agent_session_id`| str / null | link to LangGraph trace if agent_task was used |
-
-`cost_usd` (per-run accumulated cost) is **deferred** until at least one
-action records token-level cost. When reintroduced it lands as a
-column-only migration.
-
-### Deferred tables
-
-- **`domain_events`** — the event bus backing event triggers. Ships in
- Phase 3 with the event trigger. v1 only emits `automation.run.*`
- events into application logs; the table is added when at least one
- consumer needs to subscribe to them.
-- **`mcp_connections`** / **`mcp_tools`** — see §3. Both ship in Phase 4
- alongside the MCP harvester and the two-tier registry.
-
-NL drafts are **not** a core table. They live in a generic short-TTL
-store (Redis or a transient table) when the NL flow is built in
-Phase 3.
-
----
-
-## 10. NL authoring flow
-
-**This is how the system is intended to be used from day one, not just a
-Phase 3 addition.** The product surface is: user describes intent in natural
-language, LLM produces a structured proposal, user reviews and edits in an
-auto-generated form, then saves. Hand-authoring JSON directly is supported
-but is not the primary path.
-
-This shapes the trust model. Templates are LLM-generated from day one, not
-hand-written by power users. The mitigation is human-in-the-loop review,
-not "trusted authors only."
-
-### Pass 1: Proposal generation
-
-User provides natural-language input. The Generator LLM is given:
-- The full schema set (input schema for definition, registry of action
- types with their params_schemas, registry of trigger types, list of
- allowed Jinja filters)
-- A tool to list available connectors, channels, and other SearchSpace
- resources, so it doesn't invent names that don't exist
-- A few-shot set of examples
-
-**Scoped input.** The Generator does *not* receive arbitrary SearchSpace
-document content. Its context is the user's prompt plus the schema and
-registry information. This minimizes the prompt-injection surface — there's
-no document text in the context for an attacker to seed instructions into.
-
-If a user wants document-aware generation later ("create an automation
-that processes documents like this one"), that's a deliberate feature
-extension with its own prompt-injection mitigations, not the default flow.
-
-Output: a structured proposal matching the automation definition schema.
-
-### Pass 2: Deterministic validation
-
-Server-side, before the proposal reaches the user:
-- Validate against JSON Schema (shape correctness)
-- Verify every action and trigger type referenced exists in the registry
-- Verify every connector/channel/resource referenced exists in this SearchSpace
-- Validate every template against the sandbox's allowlist (no underscore
- attributes, no unregistered filter names, length under cap)
-
-Failures here are deterministic errors, not warnings. A proposal that
-references a non-existent action or includes a template using
-`{{x.__class__}}` is rejected before the user sees it; the Generator is
-re-prompted with the validation error and asked to fix the proposal.
-
-### Pass 2.5: Review pass
-
-A second LLM call — the **Review LLM** — examines the validated proposal and
-produces two outputs for the user:
-
-1. **A plain-language summary** of what the automation will do, in business
- terms. "This automation will run every weekday at 9am. It reads documents
- in this SearchSpace tagged 'competitor' that were indexed since the last
- run, asks an agent to summarize them as 5 bullets, and posts the summary
- to your #engineering-standup Slack channel. Estimated cost: $0.40 per
- run."
-
-2. **A "things worth checking" list** flagging anything unusual:
- - Templates with unusual attribute paths or filter usage
- - Prompts containing instructions that look more like commands than
- descriptions ("ignore previous instructions" style)
- - Action sequences that touch external systems without obvious benefit
- to the user
- - Cost estimates that seem high relative to the goal
- - References to actions the user hasn't used before
- - Schedules tighter than 15 minutes (likely should be event triggers)
-
-The Review LLM is a **UX layer** that makes review actually useful. It is
-**not a security boundary.** The deterministic controls (sandbox, runtime
-limits, schema validator) are the security boundaries. The Review LLM
-helps users catch their own intent mismatches and surfaces anomalies for
-attention, but the sandbox would block dangerous templates even if the
-Review LLM missed them.
-
-This separation is important: two probabilistic controls compounding can
-create a false sense of security. The Review LLM is explicitly framed in
-the architecture as helper, not gatekeeper.
-
-### Pass 3: Editable review
-
-The user lands on a form pre-filled with the proposal. The page shows:
-- The plain-language summary from the Review pass
-- The flagged items, prominently displayed near the relevant fields
-- The full editable form, auto-generated from the JSON Schemas
-- Cost estimate and impact summary (which external systems get touched)
-
-**Every field is editable.** Clarifications appear as required fields.
-Templates are shown in code-styled fields with syntax highlighting and the
-filter palette visible. The user can edit any field; saving re-runs Pass 2
-(deterministic validation) before persisting.
-
-Hitting **Save** promotes the proposal to an `automation` row.
-
-### Editing existing automations
-
-NL editing of an existing automation is a patch operation: the Generator
-LLM receives the current definition plus the NL instruction and produces a
-modified proposal. The same Pass 2 (validation) and Pass 2.5 (review) run
-against the modified version, and the user reviews the diff before saving.
-Existing run history is unaffected — only future runs use the new version.
-
-### Why human-in-the-loop is non-negotiable
-
-The Generator LLM, the Review LLM, and the sandbox are three layers of
-defense against malformed or malicious proposals. The human approval step
-is the fourth and most important layer. It exists because:
-
-- LLMs can be prompt-injected; humans can spot text that asks them to
- ignore instructions
-- LLMs can produce confident-but-wrong proposals; humans can catch
- semantic mismatches between intent and output
-- The cost of a bad automation running unattended is high; the cost of a
- user clicking "approve" after reading is low
-
-The architecture must never offer "auto-approve" or "skip review" options
-for LLM-generated proposals. Save requires human action on the proposal,
-always.
-
----
-
-## 11. Repository layout
-
-```
-surfsense_backend/app/
-├── automations/ # NEW: the engine
-│ ├── __init__.py
-│ ├── persistence/ # SQLAlchemy models + enums for 3 tables
-│ ├── schemas/ # Pydantic schemas (definition envelope, etc.)
-│ ├── routes.py # FastAPI router (/api/v1/automations)
-│ ├── service.py # CRUD + business logic
-│ ├── dispatcher.py # trigger matching, run creation
-│ ├── executor.py # the Celery task that runs a plan
-│ ├── templating.py # Jinja sandbox + filters
-│ ├── events.py # publish/subscribe for domain_events
-│ ├── filters.py # JSON filter grammar evaluator
-│ ├── registries/ # action and trigger registries
-│ │ ├── actions/ # ActionDefinition + handler registration
-│ │ └── triggers/ # TriggerDefinition
-│ └── nl/ # Phase 1 — primary user path
-│ ├── generator.py # Generator LLM
-│ ├── reviewer.py # Review LLM (summary + flagged items)
-│ ├── validator.py # deterministic schema + resource checks
-│ └── prompts.py # system prompts for both LLMs
-│
-├── utils/
-│ └── periodic_scheduler.py # EXTENDED to scan automation_triggers
-│
-└── alembic/versions/
- └── NN_add_automation_tables.py
-
-surfsense_web/app/(routes)/
-└── automations/ # NEW: UI
- ├── page.tsx # list
- ├── new/page.tsx # NL input + draft preview (Phase 1)
- ├── [id]/page.tsx # editor (auto-generated forms)
- └── [id]/runs/page.tsx # run history, streamed via Electric SQL
-```
-
----
-
-## 12. Phased delivery
-
-Each phase delivers something usable. Each de-risks the next. **NL authoring
-is the primary user path from Phase 1** — what evolves across phases is
-which actions and triggers are available, not whether users can describe
-automations in natural language.
-
-### Phase 1 — Engine MVP with NL authoring
-
-**Step 1 (current scope, this batch of commits):**
-- 3 tables (`automations`, `automation_triggers`, `automation_runs`) +
- Alembic migration
-- Empty action and trigger registries under
- `app/automations/registries/` (concrete entries land in later steps)
-- Pydantic schemas for the automation definition envelope, the two v1
- trigger params shapes (`schedule`, `manual`), and the one v1 action
- params shape (`agent_task`)
-- Module structure under `app/automations/` (persistence/, schemas/,
- registries/), fully isolated from the existing codebase
-
-**Step 2:**
-- The `agent_task` action handler and the `schedule` / `manual` triggers
- registered in `app/automations/registries/`. Tool resolution for
- `agent_task.params.tools` is opaque to the contract — the handler
- decides what string identifiers it accepts and how they resolve.
-
-**Step 3:**
-- Executor (single-queue Celery task) with retries and timeouts
-- Template engine (Jinja sandbox + the v1 filter allowlist + runtime
- limits)
-- Manual "Run now" endpoint
-
-**Step 4:**
-- NL authoring flow: Generator LLM, deterministic validator, Review LLM,
- editable form
-- Run history UI with Electric SQL streaming
-
-**After Phase 1**: a user can describe an automation in natural language,
-review the proposal (with summary + flagged anomalies), edit any field,
-save, and watch it run on a schedule.
-
-### Phase 2 — Webhooks and delivery
-- `webhook` trigger with per-automation bearer tokens
-- Tight actions: `slack_post`, `send_email`, `notification`
-- `transform_data` action
-- `on_failure` hooks
-- Step-level retry/timeout overrides
-- Concurrency policy enforcement
-
-**After Phase 2**: external systems can drive automations, results go
-somewhere humans see, complex pipelines have proper error handling.
-
-### Phase 3 — NL authoring polish
-- NL patch flow for editing existing automations (diff-based)
-- Conversational refinement during proposal review ("change the schedule
- to weekdays only," "add a Slack notification on failure")
-- Improved Review LLM coverage (more anomaly patterns, cost-relative-to-
- goal heuristics)
-- Saved prompt templates and starter examples
-
-**After Phase 3**: NL authoring is the polished primary surface; edit
-flows are conversational rather than form-only.
-
-### Phase 4 — Event triggers + integration tooling
-- `domain_events` table and `events.py` module
-- Indexing pipeline publishes `connector.*` events (smallest change — just
- add publish calls to the existing flow)
-- Automations publish `automation.run.*` events on completion
-- `event` trigger with filter grammar
-- The unification layer redesign (see §3) — `CallContext`, scope
- declarations, per-user authorization gating
-- MCP integration on top of the unification layer (external tool servers
- harvested into the shared catalog)
-
-**After Phase 4**: "do X when Y happens" automations work, including
-automation-chaining through events; external MCP tools and SurfSense
-actions share one vocabulary.
-
-### Phase 5 — Wrapping existing features and sharing
-- Wrap existing SurfSense features as actions: `podcast_generation`,
- `report_generation`, `indexing_sweep`
-- Artifact lifecycle implementation
-- `expected_duration_seconds` based queue routing (split `automations_long`
- from `automations_default`)
-- **Automation templates** (shareable, exportable, importable) — with
- the import re-approval flow that handles the approver-≠-runner trust
- shift documented in §7.4's pre-Phase-5 gate
-- Cross-automation composition examples in the docs
-
-**After Phase 5**: every existing SurfSense feature is automatable
-without any per-feature code, and automations can be shared between
-SearchSpaces and users.
-
----
-
-## 13. Decisions locked
-
-For reference — every decision made through the design process, in one
-place.
-
-### Foundations
-1. ✅ JSON Schema (draft 2020-12) is the single schema language for everything
-2. ✅ Definition is the program; infrastructure is the interpreter
-3. ✅ List of steps (not single action) in the plan, with `output_as` chaining
-4. ⏸ Capability unification layer (one catalog shared by automations, agents, and future surfaces) — **deferred to post-v1** (see §3). v1 ships actions only.
-5. ✅ Name-based resolution: definitions reference action and trigger types by string ID. The registry is the runtime's vocabulary; lookup is a dict access. No code references in definitions.
-6. ✅ The expressive spectrum runs from pure direct calls to broad agent_task; the NL generator proposes the cheapest shape that meets intent (Shape 6 from §4 by default)
-
-### Trigger taxonomy
-8. ✅ Three trigger types: `schedule`, `webhook`, `event`
-9. ✅ Events absorb both connector events and internal SurfSense events
-10. ✅ Filter grammar is JSON-structured operators (not Jinja)
-
-### Templating cluster
-11. ✅ Jinja2 `SandboxedEnvironment` for templates and `when:` predicates — but with the explicit understanding that the sandbox is an allowlist-by-default architecture, not a denylist
-12. ✅ Zero globals registered. Curated 15 filters only, each audited for safe behavior with hostile input. List grows only by reviewed addition
-13. ✅ Four runtime mitigations: `StrictUndefined`, 8 KB template source cap, 100 ms render time cap (watchdog-enforced), 1 MB output size cap
-14. ✅ Non-string template values render as JSON by default
-15. ✅ Fixed `run.*` namespace, documented
-16. ⏸ **Pre-Phase-5 gate**: template sharing across SearchSpaces breaks the approver-equals-runner trust model. Mitigation is a re-approval flow at the import boundary (UX-level), not a template-language migration. Jinja itself stays.
-
-### Execution
-17. ✅ Executor is a Celery task wrapping a sequential loop — not an agent
-18. ✅ `when:` is optional per step; false = skipped (not failed)
-19. ✅ No DAGs, no parallelism, no loops — composition via agent_task or events
-20. ✅ `on_failure` part of execution policy from v1
-21. ✅ Step-level retry and timeout overrides
-22. ⏸ Budget cap enforced pre-enqueue and mid-flight — **deferred** until the cost ledger ships (see §8 Budget enforcement)
-
-### Components
-23. ✅ Dispatcher / executor / handlers / registry — distinct, each replaceable
-24. ⏸ Side effects are a set, including `USER_VISIBLE` — **deferred** until multi-user automation RBAC ships
-25. ⏸ `expected_duration_seconds` integer drives queue routing — **deferred** until a second Celery queue is needed
-26. ⏸ `produces_artifacts` is a list of `ArtifactSpec`, not a bool — **deferred** until artifacts beyond the deliverable handlers' own persistence are needed
-27. ✅ Output schemas recommended on `agent_task`; editor warns when missing
-
-### Event bus
-28. ✅ `domain_events` table for v1, with upgrade path to Redis Streams
-29. ✅ Automations publish run events for composability
-30. ✅ Publish/subscribe behind interface — no direct table access elsewhere
-
-### Capability unification — all deferred to post-v1
-31. ⏸ One shared catalog of "things this SurfSense instance can do" — **deferred**, see §3
-32. ⏸ Handler `CallContext` (caller user id, search space id, run id) — **deferred** with unification
-33. ⏸ Per-capability scope declarations driving authorization — **deferred** with unification
-34. ⏸ MCP integration on top of the unification layer (`mcp_connections`, `mcp_tools`, harvester) — **deferred to Phase 4**
-
-### Credentials — all deferred to Phase 2
-35. ⏸ Credentials never appear in the automation definition — only connection IDs do — **Phase 2**
-36. ⏸ Credentials never appear in the LLM's context — the host holds them — **Phase 2**
-37. ⏸ Credentials resolved per-call by the handler context, not pre-loaded into worker environment — **Phase 2**
-38. ⏸ Tokens encrypted at rest; refresh handled automatically by the handler context — **Phase 2**
-
-### v1-minimum
-39. ✅ v1 ships actions only — no separate capability layer. `ActionDefinition` is five fields: `type`, `name`, `description`, `params_schema`, `handler`. Additional fields are added only when a concrete consumer feature requires them.
-40. ✅ Cost is **measured** from a per-run ledger, not declared. Pre-flight cost checks return when the ledger has enough history.
-41. ✅ Single `automations_default` Celery queue in v1. Multi-queue routing returns when load justifies it.
-
-### NL authoring
-42. ✅ LLM-authored templates is the primary path from day one — not a Phase 3 addition. Hand-authoring JSON is supported but secondary
-43. ✅ Generator LLM produces JSON; deterministic schema + resource validation runs before user sees the proposal
-44. ✅ Review LLM produces plain-language summary + flagged anomalies for the user — UX layer, not a security boundary
-45. ✅ Generator LLM's input is scoped (user prompt + schema + registry only); arbitrary document content is not fed in
-46. ✅ Human approval is required before save — no auto-approval option, ever
-47. ✅ Every field editable in the proposal; unresolved questions surface as clarifications
-48. ✅ NL drafts are transient storage, not a core table
-
-### Data model
-49. ✅ v1 ships three tables (`automations`, `automation_triggers`, `automation_runs`). `domain_events` lands in Phase 3; `mcp_connections` and `mcp_tools` in Phase 4.
-50. ✅ Run rows snapshot the definition (immutable history)
-51. ✅ All entities scoped by `search_space_id` for RBAC
-52. ✅ Editing an automation bumps `version`; existing runs unaffected
-
----
-
-## 14. Open questions deferred to implementation
-
-None of these block design; they're decisions a developer will make in
-context, with the principle from §1 as their guide.
-
-- Exact retry backoff formulas (multipliers, jitter, ceilings)
-- Webhook signature verification standards (HMAC scheme, header naming)
-- Whether to support inline JSON Schema `$ref` to external schemas, or
- inline everything
-- Specific CDN/storage backend choices for artifacts (probably
- whatever SurfSense already uses for podcasts)
-- Rate limits per SearchSpace and per user
-- Audit log retention policy
-
----
-
-## 15. Why this is ready to build
-
-This document satisfies five tests:
-
-1. **The four worked examples** (digest, CI webhook, file-added-trigger,
- weekly podcast) all express cleanly in the contract without special
- cases. Each one was used to find gaps before the gaps reached code.
-
-2. **The audit pass identified six refinements**, all incorporated. No
- pending audit items.
-
-3. **Every decision points back to the principle from §1.** When a future
- feature request lands, "does it belong in the definition or in the
- engine?" gives a clear answer.
-
-4. **The build is staged** so Phase 1 ships in weeks, not months, and
- each subsequent phase delivers user value while de-risking the next.
-
-5. **Existing SurfSense infrastructure is reused**, not paralleled. Celery
- Beat, PostgreSQL/JSONB, Electric SQL, SQLAlchemy/Alembic, the existing
- `tools/registry.py` pattern, the existing Search Space scoping — all
- continue to do what they already do. The automation engine is a new
- directory, not a new system.
-
-The next document a developer needs is the Pydantic models and JSON
-Schemas spelled out concretely. Those follow mechanically from this plan.
-
----
-
-*Sources consulted: Claude Code Routines documentation; NousResearch/hermes-
-agent (cron and skills subsystems); n8n documentation on node types and
-workflow data model; the SurfSense repository and DeepWiki architecture
-notes (FastAPI + Celery Beat + Electric SQL + LangGraph Deep Agents +
-Search Space RBAC); Model Context Protocol specification for external
-tool harvesting; AWS EventBridge for filter grammar; workflow-pattern
-literature (van der Aalst et al.) for the trigger / action / concurrency
-vocabulary.*
From 2d8d42bd9cee790c8884a75cbc1ea5f9aa113126 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 15:40:18 +0200
Subject: [PATCH 77/87] refactor(web): polish automations detail view
---
.../automation-detail-content.tsx | 24 ++++----
.../automation-definition-section.tsx | 2 +-
.../components/automation-detail-loading.tsx | 60 ++++++++++++-------
.../components/automation-runs-section.tsx | 2 +-
.../automation-triggers-section.tsx | 2 +-
.../components/plan-step-card.tsx | 2 +-
.../components/run-details-panel.tsx | 2 +-
.../[automation_id]/components/run-row.tsx | 2 +-
.../components/runs-loading.tsx | 2 +-
.../components/trigger-card.tsx | 2 +-
.../new/components/automation-json-form.tsx | 2 +-
.../layout/providers/LayoutDataProvider.tsx | 8 ++-
12 files changed, 65 insertions(+), 45 deletions(-)
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
index 49df3633e..4085d47a8 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/automation-detail-content.tsx
@@ -72,16 +72,20 @@ export function AutomationDetailContent({
canDelete={perms.canDelete}
/>
-
-
-
-
-
+
>
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
index 9545f363b..e8721d9b0 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
@@ -24,7 +24,7 @@ export function AutomationDefinitionSection({ definition }: AutomationDefinition
const hasInputs = !!definition.inputs;
return (
-
+
Definition
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx
index 1d01305ee..0d6ba3110 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-loading.tsx
@@ -3,12 +3,13 @@ import { Card, CardContent, CardHeader } from "@/components/ui/card";
import { Skeleton } from "@/components/ui/skeleton";
/**
- * Skeleton for the detail page. Same shell as the loaded view (header +
- * two stacked cards) so the layout doesn't jump on data arrival.
+ * Skeleton for the detail page. Mirrors the loaded view's main/sidebar
+ * grid (Definition + Runs on the left, Triggers on the right) so layout
+ * doesn't reflow when data arrives.
*/
export function AutomationDetailLoading() {
return (
-
+ <>
@@ -18,25 +19,38 @@ export function AutomationDetailLoading() {
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ >
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx
index b6158cab2..d31bd696d 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-runs-section.tsx
@@ -23,7 +23,7 @@ export function AutomationRunsSection({ automationId }: AutomationRunsSectionPro
const runs = data?.items ?? [];
return (
-
+
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
index 33c8373a1..558a089ac 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-triggers-section.tsx
@@ -23,7 +23,7 @@ export function AutomationTriggersSection({
canDelete,
}: AutomationTriggersSectionProps) {
return (
-
+
Triggers
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
index 3feb77712..b9fda00db 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
@@ -15,7 +15,7 @@ interface PlanStepCardProps {
*/
export function PlanStepCard({ step, index }: PlanStepCardProps) {
return (
-
+
{index + 1}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
index d1d46900a..94a96b199 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
@@ -109,7 +109,7 @@ function Section({
function JsonBlock({ value }: { value: unknown }) {
return (
-
+
{JSON.stringify(value, null, 2)}
);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx
index b8d2bcc8b..02ca0569c 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-row.tsx
@@ -24,7 +24,7 @@ export function RunRow({ run, automationId }: RunRowProps) {
: formatRelativeDate(run.created_at);
return (
-
+
setOpen((value) => !value)}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx
index 5cab18f4c..61ce25e32 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/runs-loading.tsx
@@ -9,7 +9,7 @@ export function RunsLoading() {
{ROW_KEYS.map((key) => (
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
index ec0246e49..200a15f57 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
@@ -48,7 +48,7 @@ export function TriggerCard({ trigger, automationId, canUpdate, canDelete }: Tri
return (
<>
-
+
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
index 845d95166..8fe065295 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
@@ -75,7 +75,7 @@ export function AutomationJsonForm({ searchSpaceId }: AutomationJsonFormProps) {
const hasIssues = issues.length > 0;
return (
-
+
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
index 67971e435..663e4b96f 100644
--- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx
@@ -726,9 +726,11 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid
: undefined
}
workspacePanelContentClassName={
- isUserSettingsPage || isSearchSpaceSettingsPage || isTeamPage || isAutomationsPage
- ? "max-w-5xl"
- : undefined
+ isAutomationsPage
+ ? "max-w-none"
+ : isUserSettingsPage || isSearchSpaceSettingsPage || isTeamPage
+ ? "max-w-5xl"
+ : undefined
}
isLoadingChats={isLoadingThreads}
activeSlideoutPanel={activeSlideoutPanel}
From fa0cdb9760f0b08bfb5db4596c79ffbefdbc845b Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Thu, 28 May 2026 16:07:54 +0200
Subject: [PATCH 78/87] feat(web): unified json viewer/editor + edit existing
automation
---
.../automation-definition-section.tsx | 5 +-
.../components/automation-detail-header.tsx | 10 +-
.../components/inputs-schema-preview.tsx | 15 +-
.../components/plan-step-card.tsx | 7 +-
.../components/run-details-panel.tsx | 7 +-
.../components/trigger-card.tsx | 7 +-
.../edit/automation-edit-content.tsx | 56 ++++++
.../edit/components/automation-edit-form.tsx | 121 +++++++++++++
.../automations/[automation_id]/edit/page.tsx | 18 ++
.../new/components/automation-json-form.tsx | 62 +++----
.../components/json-metadata-viewer.tsx | 11 +-
surfsense_web/components/json-view.tsx | 93 ++++++++++
.../tool-ui/automation/create-automation.tsx | 50 ++----
surfsense_web/package.json | 2 +-
surfsense_web/pnpm-lock.yaml | 159 ++++++++++++++++--
15 files changed, 504 insertions(+), 119 deletions(-)
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/automation-edit-content.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/components/automation-edit-form.tsx
create mode 100644 surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/page.tsx
create mode 100644 surfsense_web/components/json-view.tsx
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
index e8721d9b0..4ff9b8b8c 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-definition-section.tsx
@@ -11,9 +11,8 @@ interface AutomationDefinitionSectionProps {
}
/**
- * The Definition card. Read-only in v1 — editing definitions happens via
- * chat (re-run create_automation with a refined intent) or, later, via
- * the raw-JSON path. Layout is top-down:
+ * The Definition card. Read view; editing happens on the sibling /edit
+ * route (Edit button in the header). Layout is top-down:
* goal → tags → execution defaults → inputs schema (if any) → plan
*
* The schema_version is rendered as a small badge next to the section
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx
index 4cf3efcc1..0bce3fa2d 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/automation-detail-header.tsx
@@ -1,6 +1,6 @@
"use client";
import { useAtomValue } from "jotai";
-import { ArrowLeft, Pause, Play, Trash2 } from "lucide-react";
+import { ArrowLeft, Pause, Pencil, Play, Trash2 } from "lucide-react";
import Link from "next/link";
import { useRouter } from "next/navigation";
import { useCallback, useState } from "react";
@@ -82,6 +82,14 @@ export function AutomationDetailHeader({
+ {canUpdate && (
+
+
+
+ Edit
+
+
+ )}
{canToggle && (
- {JSON.stringify(inputs.schema, null, 2)}
-
+
+
+
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
index b9fda00db..27cecf3bf 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/plan-step-card.tsx
@@ -1,5 +1,6 @@
"use client";
import { ArrowRightCircle, GitCommitHorizontal } from "lucide-react";
+import { JsonView } from "@/components/json-view";
import type { PlanStep } from "@/contracts/types/automation.types";
interface PlanStepCardProps {
@@ -54,9 +55,9 @@ export function PlanStepCard({ step, index }: PlanStepCardProps) {
Params
-
- {JSON.stringify(step.params, null, 2)}
-
+
+
+
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
index 94a96b199..f9c6fbb5a 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/run-details-panel.tsx
@@ -1,5 +1,6 @@
"use client";
import { AlertCircle, FileOutput, GitCommitHorizontal, Package, Settings2 } from "lucide-react";
+import { JsonView } from "@/components/json-view";
import { Skeleton } from "@/components/ui/skeleton";
import { useAutomationRun } from "@/hooks/use-automation-runs";
@@ -109,8 +110,8 @@ function Section({
function JsonBlock({ value }: { value: unknown }) {
return (
-
- {JSON.stringify(value, null, 2)}
-
+
+
+
);
}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
index 200a15f57..a1d84d2d7 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/components/trigger-card.tsx
@@ -3,6 +3,7 @@ import { useAtomValue } from "jotai";
import { CalendarClock, Clock, Trash2 } from "lucide-react";
import { useState } from "react";
import { updateTriggerMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { JsonView } from "@/components/json-view";
import { Button } from "@/components/ui/button";
import { Switch } from "@/components/ui/switch";
import type { Trigger } from "@/contracts/types/automation.types";
@@ -109,9 +110,9 @@ export function TriggerCard({ trigger, automationId, canUpdate, canDelete }: Tri
{hasStaticInputs && (
Static inputs
-
- {JSON.stringify(trigger.static_inputs, null, 2)}
-
+
+
+
)}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/automation-edit-content.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/automation-edit-content.tsx
new file mode 100644
index 000000000..219552a1a
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/automation-edit-content.tsx
@@ -0,0 +1,56 @@
+"use client";
+import { ShieldAlert } from "lucide-react";
+import { useAutomation } from "@/hooks/use-automation";
+import { useAutomationPermissions } from "../../hooks/use-automation-permissions";
+import { AutomationDetailLoading } from "../components/automation-detail-loading";
+import { AutomationNotFound } from "../components/automation-not-found";
+import { AutomationEditForm } from "./components/automation-edit-form";
+
+interface AutomationEditContentProps {
+ searchSpaceId: number;
+ automationId: number;
+}
+
+/**
+ * Client orchestrator for the edit route. Mirrors detail-content's branch
+ * structure but gates on ``canUpdate`` instead of ``canRead``: a user who
+ * can read but not update is bounced to the access-denied panel.
+ */
+export function AutomationEditContent({
+ searchSpaceId,
+ automationId,
+}: AutomationEditContentProps) {
+ const perms = useAutomationPermissions();
+ const validId = Number.isInteger(automationId) && automationId > 0;
+ const { data: automation, isLoading, error } = useAutomation(validId ? automationId : undefined);
+
+ if (perms.loading) {
+ return
;
+ }
+
+ if (!perms.canUpdate) {
+ return (
+
+
+
Access denied
+
+ You don't have permission to edit automations in this search space.
+
+
+ );
+ }
+
+ if (!validId) {
+ return
;
+ }
+
+ if (isLoading) {
+ return
;
+ }
+
+ if (error || !automation) {
+ return
;
+ }
+
+ return
;
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/components/automation-edit-form.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/components/automation-edit-form.tsx
new file mode 100644
index 000000000..86b355838
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/components/automation-edit-form.tsx
@@ -0,0 +1,121 @@
+"use client";
+import { useAtomValue } from "jotai";
+import { AlertCircle, ArrowLeft, Save } from "lucide-react";
+import Link from "next/link";
+import { useRouter } from "next/navigation";
+import { useState } from "react";
+import { updateAutomationMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { JsonView } from "@/components/json-view";
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Spinner } from "@/components/ui/spinner";
+import {
+ type Automation,
+ automationUpdateRequest,
+} from "@/contracts/types/automation.types";
+
+interface AutomationEditFormProps {
+ automation: Automation;
+ searchSpaceId: number;
+}
+
+/**
+ * Edit-existing-automation form. Surfaces the four mutable fields
+ * (name, description, status, definition) as one editable JSON tree;
+ * triggers stay on the detail page where they have their own management
+ * UI. Validates with the same Zod schema the API expects, then PATCHes
+ * the changed shape back.
+ */
+export function AutomationEditForm({ automation, searchSpaceId }: AutomationEditFormProps) {
+ const router = useRouter();
+ const { mutateAsync: updateAutomation, isPending } = useAtomValue(updateAutomationMutationAtom);
+ const detailHref = `/dashboard/${searchSpaceId}/automations/${automation.id}`;
+
+ const [value, setValue] = useState(() => ({
+ name: automation.name,
+ description: automation.description ?? null,
+ status: automation.status,
+ definition: automation.definition,
+ }));
+ const [issues, setIssues] = useState
([]);
+
+ async function handleSave() {
+ setIssues([]);
+ const result = automationUpdateRequest.safeParse(value);
+ if (!result.success) {
+ setIssues(
+ result.error.issues.map((issue) => `${issue.path.join(".") || "(root)"}: ${issue.message}`)
+ );
+ return;
+ }
+ try {
+ await updateAutomation({ automationId: automation.id, patch: result.data });
+ router.push(detailHref);
+ } catch (err) {
+ setIssues([(err as Error).message ?? "Update failed"]);
+ }
+ }
+
+ return (
+ <>
+
+
+
+
+ Back to automation
+
+
+
+
+ Edit automation
+
+
{automation.name}
+
+
+
+
+
+ Definition
+
+
+
+ setValue(next as typeof value)}
+ collapsed={false}
+ />
+
+
+ {issues.length > 0 && (
+
+
+
+ {issues.length === 1 ? "1 issue" : `${issues.length} issues`}
+
+
+ {issues.map((issue) => (
+ {issue}
+ ))}
+
+
+ )}
+
+
+
+ Cancel
+
+
+ {isPending ? (
+
+ ) : (
+
+ )}
+ Save changes
+
+
+
+
+ >
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/page.tsx
new file mode 100644
index 000000000..8477b9e12
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/[automation_id]/edit/page.tsx
@@ -0,0 +1,18 @@
+import { AutomationEditContent } from "./automation-edit-content";
+
+export default async function AutomationEditPage({
+ params,
+}: {
+ params: Promise<{ search_space_id: string; automation_id: string }>;
+}) {
+ const { search_space_id, automation_id } = await params;
+
+ return (
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
index 8fe065295..94b608b8f 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/automations/new/components/automation-json-form.tsx
@@ -1,9 +1,10 @@
"use client";
import { useAtomValue } from "jotai";
-import { AlertCircle, Code, FileJson, Save } from "lucide-react";
+import { AlertCircle, FileJson, Save } from "lucide-react";
import { useRouter } from "next/navigation";
import { useState } from "react";
import { createAutomationMutationAtom } from "@/atoms/automations/automations-mutation.atoms";
+import { JsonView } from "@/components/json-view";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Spinner } from "@/components/ui/spinner";
@@ -17,45 +18,24 @@ interface AutomationJsonFormProps {
/**
* Raw-JSON create form. Lets power users skip the chat drafter when they
* already know the shape they want. Flow:
- * parse JSON → inject search_space_id → Zod validate → POST → navigate
+ * edit tree → inject search_space_id → Zod validate → POST → navigate
*
- * ``search_space_id`` is injected here rather than required in the pasted
- * payload — the user shouldn't have to know their numeric id, and it
- * keeps the template copy-paste-friendly across search spaces.
+ * ``search_space_id`` is injected here rather than required in the edited
+ * tree — the user shouldn't have to know their numeric id, and it keeps
+ * the template copy-paste-friendly across search spaces.
*/
export function AutomationJsonForm({ searchSpaceId }: AutomationJsonFormProps) {
const router = useRouter();
const { mutateAsync: createAutomation, isPending } = useAtomValue(createAutomationMutationAtom);
- const [text, setText] = useState(() => JSON.stringify(DEFAULT_AUTOMATION_TEMPLATE, null, 2));
+ const [value, setValue] = useState>(
+ () => DEFAULT_AUTOMATION_TEMPLATE as Record
+ );
const [issues, setIssues] = useState([]);
- function handleFormat() {
- try {
- const parsed = JSON.parse(text);
- setText(JSON.stringify(parsed, null, 2));
- setIssues([]);
- } catch (err) {
- setIssues([`Cannot format — not valid JSON: ${(err as Error).message}`]);
- }
- }
-
async function handleSubmit() {
setIssues([]);
- let parsed: unknown;
- try {
- parsed = JSON.parse(text);
- } catch (err) {
- setIssues([`Invalid JSON: ${(err as Error).message}`]);
- return;
- }
-
- if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
- setIssues(["Root must be a JSON object."]);
- return;
- }
-
- const payload = { ...(parsed as Record), search_space_id: searchSpaceId };
+ const payload = { ...value, search_space_id: searchSpaceId };
const result = automationCreateRequest.safeParse(payload);
if (!result.success) {
setIssues(
@@ -76,25 +56,21 @@ export function AutomationJsonForm({ searchSpaceId }: AutomationJsonFormProps) {
return (
-
+
Definition + triggers
-
-
- Format
-
-