Merge remote-tracking branch 'upstream/dev' into feat/whatsapp-gateway-integration

This commit is contained in:
Anish Sarkar 2026-06-02 00:29:32 +05:30
commit e3de7c4667
465 changed files with 29171 additions and 6994 deletions

View file

@ -385,3 +385,50 @@ LANGSMITH_PROJECT=surfsense
# updates and deletes — the TTL only bounds staleness for bulk-import
# paths that bypass the ORM. Set to 0 to disable the cache.
# SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS=30
# -----------------------------------------------------------------------------
# `task` boundary controls (Hermes-inspired improvements)
# -----------------------------------------------------------------------------
# Wall-clock budget for a single ``task(subagent, ...)`` invocation in
# seconds. Subagents that run hot (slow image vendors, sluggish embedders,
# wedged MCP servers) would otherwise pin the orchestrator until the next
# checkpoint heartbeat fires. On timeout the runtime cancels the underlying
# coroutine and synthesizes a ToolMessage telling the orchestrator to treat
# the result as ``status=error``. Set to 0 to disable the cap entirely.
# Default: 300.0
# SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS=300
# Batch-mode (``task(tasks=[...])``) concurrency cap and max batch size.
# Concurrency is enforced via an ``asyncio.Semaphore`` so a runaway fanout
# cannot starve unrelated subagents (each child still owns an LLM call and
# its own DB session). Max-size is a hard safety net for prompt-injection /
# runaway loops; the orchestrator rarely needs more than a handful of
# concurrent specialists. Set concurrency to 1 to effectively serialise
# batches without changing the schema.
# SURFSENSE_TASK_BATCH_CONCURRENCY=3
# SURFSENSE_TASK_BATCH_MAX_SIZE=8
# Soft per-turn cap on cumulative ``task(...)`` invocations across all
# subagents. Once the sum of ``state['billable_calls']`` crosses this
# number, the runtime appends a one-shot warning ToolMessage telling the
# orchestrator to wrap up rather than launching more specialists. Tunable
# so heavy-research turns (15+ legitimate specialist calls) don't trip the
# alarm in production. Set to 0 to disable the warning entirely.
# SURFSENSE_SUBAGENT_BILLABLE_THRESHOLD=15
# Per-workspace spawn-paused kill switch — set via Redis at runtime, not
# this env var. The env var below only disables the check itself (useful
# for local dev without Redis). To pause a workspace in production:
# redis-cli SET surfsense:spawn_paused:<search_space_id> 1 EX 600
# redis-cli DEL surfsense:spawn_paused:<search_space_id>
# The check is fail-open: a Redis blip never blocks ``task(...)``.
# SURFSENSE_TASK_SPAWN_PAUSED_DISABLED=false
# Note on Celery-backed deliverables (generate_podcast,
# generate_video_presentation): these tools poll the artefact row until
# it reaches a terminal status — they do NOT use an internal wall-clock
# budget. The effective ceiling is SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS
# (above, default 300s) in multi-agent mode and the chat's HTTP / process
# lifetime in single-agent mode. If your podcasts or videos routinely
# exceed 5 minutes, raise SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS (or
# set it to 0 to disable that ceiling entirely).

View file

@ -0,0 +1,177 @@
"""Add automation tables (automations, automation_triggers, automation_runs)
Revision ID: 144
Revises: 143
Create Date: 2026-05-26
Adds the three tables that back the v1 automation engine, plus the
three PostgreSQL ENUM types they reference. Matches the SQLAlchemy
models under ``app.automations.persistence.models`` and the v1 data
model in ``automation-design-plan.md`` §9.
v1 ships these three tables only. ``domain_events`` is deferred to
Phase 3 with the event trigger; ``mcp_connections`` / ``mcp_tools``
are deferred to Phase 4 with the MCP integration.
"""
from collections.abc import Sequence
from alembic import op
revision: str = "144"
down_revision: str | None = "143"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# ENUM types (PostgreSQL requires types created before tables that use them)
op.execute(
"""
CREATE TYPE automation_status AS ENUM (
'active', 'paused', 'archived'
);
"""
)
op.execute(
"""
CREATE TYPE automation_trigger_type AS ENUM (
'schedule', 'manual'
);
"""
)
op.execute(
"""
CREATE TYPE automation_run_status AS ENUM (
'pending', 'running', 'succeeded', 'failed',
'cancelled', 'timed_out'
);
"""
)
# automations — the editable, versioned automation definition
op.execute(
"""
CREATE TABLE automations (
id SERIAL PRIMARY KEY,
search_space_id INTEGER NOT NULL
REFERENCES searchspaces(id) ON DELETE CASCADE,
created_by_user_id UUID
REFERENCES "user"(id) ON DELETE SET NULL,
name VARCHAR(200) NOT NULL,
description TEXT,
status automation_status NOT NULL DEFAULT 'active',
definition JSONB NOT NULL,
version INTEGER NOT NULL DEFAULT 1,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
)
op.execute(
"CREATE INDEX ix_automations_search_space_id ON automations(search_space_id);"
)
op.execute(
"CREATE INDEX ix_automations_created_by_user_id ON automations(created_by_user_id);"
)
op.execute("CREATE INDEX ix_automations_status ON automations(status);")
op.execute("CREATE INDEX ix_automations_created_at ON automations(created_at);")
op.execute("CREATE INDEX ix_automations_updated_at ON automations(updated_at);")
# automation_triggers — one row per (automation, trigger-instance) pair
op.execute(
"""
CREATE TABLE automation_triggers (
id SERIAL PRIMARY KEY,
automation_id INTEGER NOT NULL
REFERENCES automations(id) ON DELETE CASCADE,
type automation_trigger_type NOT NULL,
params JSONB NOT NULL,
static_inputs JSONB NOT NULL DEFAULT '{}'::jsonb,
enabled BOOLEAN NOT NULL DEFAULT true,
last_fired_at TIMESTAMP WITH TIME ZONE,
next_fire_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
)
op.execute(
"CREATE INDEX ix_automation_triggers_automation_id ON automation_triggers(automation_id);"
)
op.execute("CREATE INDEX ix_automation_triggers_type ON automation_triggers(type);")
op.execute(
"CREATE INDEX ix_automation_triggers_enabled ON automation_triggers(enabled);"
)
op.execute(
"CREATE INDEX ix_automation_triggers_created_at ON automation_triggers(created_at);"
)
# Partial index for the schedule tick: only enabled schedule triggers
# with a scheduled next fire are ever scanned for due rows.
op.execute(
"""
CREATE INDEX ix_automation_triggers_due
ON automation_triggers (next_fire_at)
WHERE enabled = true
AND type = 'schedule'
AND next_fire_at IS NOT NULL;
"""
)
# automation_runs — the immutable per-fire execution record
op.execute(
"""
CREATE TABLE automation_runs (
id SERIAL PRIMARY KEY,
automation_id INTEGER NOT NULL
REFERENCES automations(id) ON DELETE CASCADE,
trigger_id INTEGER
REFERENCES automation_triggers(id) ON DELETE SET NULL,
status automation_run_status NOT NULL DEFAULT 'pending',
definition_snapshot JSONB NOT NULL,
inputs JSONB NOT NULL DEFAULT '{}'::jsonb,
step_results JSONB NOT NULL DEFAULT '[]'::jsonb,
output JSONB,
artifacts JSONB NOT NULL DEFAULT '[]'::jsonb,
error JSONB,
started_at TIMESTAMP WITH TIME ZONE,
finished_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
"""
)
op.execute(
"CREATE INDEX ix_automation_runs_automation_id ON automation_runs(automation_id);"
)
op.execute(
"CREATE INDEX ix_automation_runs_trigger_id ON automation_runs(trigger_id);"
)
op.execute("CREATE INDEX ix_automation_runs_status ON automation_runs(status);")
op.execute(
"CREATE INDEX ix_automation_runs_created_at ON automation_runs(created_at);"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_automation_runs_created_at;")
op.execute("DROP INDEX IF EXISTS ix_automation_runs_status;")
op.execute("DROP INDEX IF EXISTS ix_automation_runs_trigger_id;")
op.execute("DROP INDEX IF EXISTS ix_automation_runs_automation_id;")
op.execute("DROP TABLE IF EXISTS automation_runs;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_due;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_created_at;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_enabled;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_type;")
op.execute("DROP INDEX IF EXISTS ix_automation_triggers_automation_id;")
op.execute("DROP TABLE IF EXISTS automation_triggers;")
op.execute("DROP INDEX IF EXISTS ix_automations_updated_at;")
op.execute("DROP INDEX IF EXISTS ix_automations_created_at;")
op.execute("DROP INDEX IF EXISTS ix_automations_status;")
op.execute("DROP INDEX IF EXISTS ix_automations_created_by_user_id;")
op.execute("DROP INDEX IF EXISTS ix_automations_search_space_id;")
op.execute("DROP TABLE IF EXISTS automations;")
op.execute("DROP TYPE IF EXISTS automation_run_status;")
op.execute("DROP TYPE IF EXISTS automation_trigger_type;")
op.execute("DROP TYPE IF EXISTS automation_status;")

View file

@ -0,0 +1,87 @@
"""Add automations permissions to existing Editor/Viewer roles
Revision ID: 145
Revises: 144
Create Date: 2026-05-27
Owners already have ``*`` and need no backfill. Custom (non-system) roles
are left untouched on purpose: workspace admins manage those explicitly.
"""
from collections.abc import Sequence
from sqlalchemy import text
from alembic import op
revision: str = "145"
down_revision: str | None = "144"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
_EDITOR_PERMISSIONS = (
"automations:create",
"automations:read",
"automations:update",
"automations:execute",
)
_VIEWER_PERMISSIONS = ("automations:read",)
def upgrade():
connection = op.get_bind()
for permission in _EDITOR_PERMISSIONS:
connection.execute(
text(
"""
UPDATE search_space_roles
SET permissions = array_append(permissions, :permission)
WHERE name = 'Editor'
AND NOT (:permission = ANY(permissions))
"""
),
{"permission": permission},
)
for permission in _VIEWER_PERMISSIONS:
connection.execute(
text(
"""
UPDATE search_space_roles
SET permissions = array_append(permissions, :permission)
WHERE name = 'Viewer'
AND NOT (:permission = ANY(permissions))
"""
),
{"permission": permission},
)
def downgrade():
connection = op.get_bind()
for permission in _EDITOR_PERMISSIONS:
connection.execute(
text(
"""
UPDATE search_space_roles
SET permissions = array_remove(permissions, :permission)
WHERE name = 'Editor'
"""
),
{"permission": permission},
)
for permission in _VIEWER_PERMISSIONS:
connection.execute(
text(
"""
UPDATE search_space_roles
SET permissions = array_remove(permissions, :permission)
WHERE name = 'Viewer'
"""
),
{"permission": permission},
)

View file

@ -0,0 +1,129 @@
"""Drop Surfsense docs tables (feature removed end to end)
Revision ID: 146
Revises: 145
Create Date: 2026-05-28
Removes the SurfSense product-documentation feature: the
``surfsense_docs_documents`` and ``surfsense_docs_chunks`` tables (created
in revision 60) and the GIN trigram index on the title column (added in
revision 67). The docs were seeded at startup from local MDX files, so no
user data is lost. Downgrade recreates the tables and indexes.
"""
from collections.abc import Sequence
from alembic import op
from app.config import config
# revision identifiers, used by Alembic.
revision: str = "146"
down_revision: str | None = "145"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
# Embedding dimension is required to recreate the vector columns on downgrade.
EMBEDDING_DIM = config.embedding_model_instance.dimension
def upgrade() -> None:
"""Drop surfsense docs tables and all their indexes."""
# Trigram index from revision 67
op.execute("DROP INDEX IF EXISTS idx_surfsense_docs_title_trgm")
# Full-text search indexes
op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_search_index")
op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_search_index")
# Vector indexes
op.execute("DROP INDEX IF EXISTS surfsense_docs_chunks_vector_index")
op.execute("DROP INDEX IF EXISTS surfsense_docs_documents_vector_index")
# B-tree indexes
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_chunks_document_id")
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_updated_at")
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_content_hash")
op.execute("DROP INDEX IF EXISTS ix_surfsense_docs_documents_source")
# Tables (chunks first due to FK)
op.execute("DROP TABLE IF EXISTS surfsense_docs_chunks")
op.execute("DROP TABLE IF EXISTS surfsense_docs_documents")
def downgrade() -> None:
"""Recreate surfsense docs tables and indexes (reverses revisions 60 + 67)."""
op.execute(
f"""
CREATE TABLE IF NOT EXISTS surfsense_docs_documents (
id SERIAL PRIMARY KEY,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
source VARCHAR NOT NULL UNIQUE,
title VARCHAR NOT NULL,
content TEXT NOT NULL,
content_hash VARCHAR NOT NULL,
embedding vector({EMBEDDING_DIM}),
updated_at TIMESTAMP WITH TIME ZONE
);
"""
)
op.execute(
f"""
CREATE TABLE IF NOT EXISTS surfsense_docs_chunks (
id SERIAL PRIMARY KEY,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
content TEXT NOT NULL,
embedding vector({EMBEDDING_DIM}),
document_id INTEGER NOT NULL REFERENCES surfsense_docs_documents(id) ON DELETE CASCADE
);
"""
)
# B-tree indexes
op.execute(
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_source ON surfsense_docs_documents(source)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_content_hash ON surfsense_docs_documents(content_hash)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_documents_updated_at ON surfsense_docs_documents(updated_at)"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_surfsense_docs_chunks_document_id ON surfsense_docs_chunks(document_id)"
)
# Vector indexes
op.execute(
"""
CREATE INDEX IF NOT EXISTS surfsense_docs_documents_vector_index
ON surfsense_docs_documents USING hnsw (embedding public.vector_cosine_ops);
"""
)
op.execute(
"""
CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_vector_index
ON surfsense_docs_chunks USING hnsw (embedding public.vector_cosine_ops);
"""
)
# Full-text search indexes
op.execute(
"""
CREATE INDEX IF NOT EXISTS surfsense_docs_documents_search_index
ON surfsense_docs_documents USING gin (to_tsvector('english', content));
"""
)
op.execute(
"""
CREATE INDEX IF NOT EXISTS surfsense_docs_chunks_search_index
ON surfsense_docs_chunks USING gin (to_tsvector('english', content));
"""
)
# Trigram index from revision 67
op.execute(
"""
CREATE INDEX IF NOT EXISTS idx_surfsense_docs_title_trgm
ON surfsense_docs_documents USING gin (title gin_trgm_ops);
"""
)

View file

@ -0,0 +1,47 @@
"""Add 'event' to automation_trigger_type enum
Revision ID: 147
Revises: 146
Create Date: 2026-05-29
Adds the ``event`` value to the ``automation_trigger_type`` enum so automations
can be triggered by published domain events, alongside the existing
``schedule`` triggers.
"""
from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "147"
down_revision: str | None = "146"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
ENUM_NAME = "automation_trigger_type"
NEW_VALUE = "event"
def upgrade() -> None:
"""Safely add 'event' to automation_trigger_type enum if missing."""
op.execute(
f"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = '{ENUM_NAME}' AND e.enumlabel = '{NEW_VALUE}'
) THEN
ALTER TYPE {ENUM_NAME} ADD VALUE '{NEW_VALUE}';
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""No-op: PostgreSQL does not support removing enum values."""
pass

View file

@ -57,6 +57,7 @@ async def build_agent_with_cache(
mcp_tools_by_agent: dict[str, list[BaseTool]],
disabled_tools: list[str] | None,
config_id: str | None,
image_generation_config_id_override: int | None = None,
) -> Any:
"""Compile the multi-agent graph, serving from cache when key components are stable."""
@ -91,7 +92,7 @@ async def build_agent_with_cache(
# the key, otherwise a hit will leak state across threads. Bump the schema
# version when the component list changes shape.
cache_key = stable_hash(
"multi-agent-v1",
"multi-agent-v2",
config_id,
thread_id,
user_id,
@ -109,6 +110,10 @@ async def build_agent_with_cache(
system_prompt_hash(final_system_prompt),
max_input_tokens,
sorted(disabled_tools) if disabled_tools else None,
# Bound into the generate_image subagent tool at construction time, so it
# must key the compiled-agent cache to avoid leaking one automation's
# image model into another with the same config_id/search_space.
image_generation_config_id_override,
)
return await get_cache().get_or_build(cache_key, builder=_build)

View file

@ -62,8 +62,14 @@ async def create_multi_agent_chat_deep_agent(
mentioned_document_ids: list[int] | None = None,
anon_session_id: str | None = None,
filesystem_selection: FilesystemSelection | None = None,
image_generation_config_id: int | None = None,
):
"""Deep agent with SurfSense tools/middleware; registry route subagents behind ``task`` when enabled."""
"""Deep agent with SurfSense tools/middleware; registry route subagents behind ``task`` when enabled.
``image_generation_config_id`` overrides the search space's image model for
this invocation (used by automations to run on their captured model). When
``None``, the ``generate_image`` tool resolves the live search-space pref.
"""
_t_agent_total = time.perf_counter()
apply_litellm_prompt_caching(llm, agent_config=agent_config, thread_id=thread_id)
@ -129,6 +135,9 @@ async def create_multi_agent_chat_deep_agent(
"available_document_types": available_document_types,
"max_input_tokens": _max_input_tokens,
"llm": llm,
# Per-invocation image model override (automations run on their captured
# model). Reaches the generate_image subagent tool via subagent_dependencies.
"image_generation_config_id_override": image_generation_config_id,
}
_t0 = time.perf_counter()
@ -285,6 +294,7 @@ async def create_multi_agent_chat_deep_agent(
mcp_tools_by_agent=mcp_tools_by_agent,
disabled_tools=disabled_tools,
config_id=config_id,
image_generation_config_id_override=image_generation_config_id,
)
_perf_log.info(
"[create_agent] Middleware stack + graph compiled in %.3fs",

View file

@ -4,8 +4,8 @@ never invent ids you didn't see. Citation ids are resolved by exact-match
lookup; a wrong id silently breaks the link, so when in doubt, omit.
### Channel A — chunk blocks injected this turn
When `search_surfsense_docs` or `web_search` returns `<document>` /
`<chunk id='…'>` blocks in this turn:
When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
turn:
1. For each factual statement taken from those chunks, add
`[citation:chunk_id]` using the **exact** id from a visible

View file

@ -20,8 +20,8 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap",
delegating to a specialist.
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
Each chunk carries a stable `id` attribute.
by KB search (backing `<priority_documents>`). Each chunk carries a stable
`id` attribute.
If a block doesn't appear this turn, work from the conversation alone.
</dynamic_context>

View file

@ -20,8 +20,8 @@ week's planning notes") into concrete document references before delegating
to a specialist.
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
Each chunk carries a stable `id` attribute.
by KB search (backing `<priority_documents>`). Each chunk carries a stable
`id` attribute.
If a block doesn't appear this turn, work from the conversation alone.
</dynamic_context>

View file

@ -1,19 +1,21 @@
<knowledge_base_first>
CRITICAL — ground factual answers in what you actually receive this turn:
- injected workspace context (see `<dynamic_context>`),
- results from your own tool calls (`search_surfsense_docs`, `web_search`,
`scrape_webpage`),
- results from your own tool calls (`web_search`, `scrape_webpage`),
- or substantive summaries returned by a `task` specialist you invoked.
Do **not** answer factual or informational questions from general knowledge
unless the user explicitly authorises it after you say you couldn't find
enough in those sources. The flow when nothing is found:
1. Say you couldn't find enough in their workspace, docs, or tool output.
1. Say you couldn't find enough in their workspace or tool output.
2. Ask: *"Would you like me to answer from my general knowledge instead?"*
3. Only answer from general knowledge after a clear yes.
This rule does NOT apply to: casual conversation · meta-questions about
SurfSense ("what can you do?") · formatting or analysis of content already
in chat · clear rewrite/edit instructions · lightweight web research.
For "how do I use SurfSense" / product-documentation questions, point the
user to https://www.surfsense.com/docs.
</knowledge_base_first>

View file

@ -5,7 +5,7 @@ Structured reasoning:
- For non-trivial work, `<thinking>` / short `<plan>` before tool calls is fine.
Professional objectivity:
- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — dont invent connector access.
- Accuracy over flattery; verify with **web_search**, **scrape_webpage**, or **task** when unsure — dont invent connector access.
Task management:
- For 3+ steps, use todo tooling; update statuses promptly.

View file

@ -13,6 +13,6 @@ Attribution:
Tool calls:
- Parallelise independent calls.
- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask.
- For SurfSense docs/product questions, point the user to https://www.surfsense.com/docs.
- Dont invent paths, chunk ids, or URLs — only values from tools or the user.
</provider_hints>

View file

@ -7,7 +7,7 @@ Output style:
- GitHub-flavoured Markdown; monospace-friendly.
Workflow (Understand → Plan → Act → Verify):
1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing.
1. **Understand:** parse the ask; use injected workspace context before guessing.
2. **Plan:** for multi-step work, a short plan first.
3. **Act:** only with tools you actually have on this agent (see `<tools>` and `<tool_routing>`). Connector work → **task**.
4. **Verify:** re-read or re-search only when it materially reduces risk.

View file

@ -15,6 +15,7 @@ Output style:
Tool calls:
- Parallelise independent calls in one turn.
- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage**
for fresh public facts; integrations and heavy workflows → **task**.
- For SurfSense-product questions, point the user to https://www.surfsense.com/docs;
use **web_search** / **scrape_webpage** for fresh public facts; integrations and
heavy workflows → **task**.
</provider_hints>

View file

@ -3,10 +3,7 @@ You have two execution channels. Pick the one that owns the work — never
simulate one with the other.
### 1. Direct tools (you call them yourself)
- `search_surfsense_docs` — SurfSense product docs (setup, configuration,
connector docs, feature behavior).
- `web_search` — search the public web (anything outside SurfSense docs and
the workspace KB).
- `web_search` — search the public web (anything outside the workspace KB).
- `scrape_webpage` — fetch the body of a specific public URL.
- `update_memory` — curate persistent memory (see `<memory_protocol>`).
- `write_todos` — maintain a structured plan when the turn series spans
@ -14,6 +11,10 @@ simulate one with the other.
`in_progress` **before** the `task` call that handles it, `completed`
once the call returns. Skip for single-step requests.
**Questions about how to use SurfSense itself** (setup, configuration,
connectors, feature behavior) — point the user to the documentation:
https://www.surfsense.com/docs. There is no docs-search tool; give the link.
**You have NO filesystem tools.** Any read, write, edit, move, rename, or
search inside the user's workspace goes through `task(knowledge_base, …)`
never via `write_file`, `ls`, or any direct file operation.
@ -33,6 +34,15 @@ Rules for `task`:
- Neither's prompt references the other's output, and
- They target different specialists, OR the same specialist with
non-overlapping scopes (e.g. reading two unrelated paths).
- **Batch shape for many-shot fanout.** When a single user request expands
to **3 or more independent specialist calls** (e.g. "create five issues
from this list"), prefer the batch shape:
`task(tasks=[{description, subagent_type}, ...])`. The runtime fans them
out concurrently under a small semaphore and aggregates one ToolMessage
per child prefixed with `[task <index>]`. Batched children **do not
support human-in-the-loop interrupts** — if one needs approval it surfaces
an error and you re-dispatch it as a single (non-batched) `task(...)` call.
For 12 independent calls, just emit two separate `task(...)` calls.
- **Serialise dependent work across turns.** If one specialist's output
must inform another's input (e.g. "find the roadmap in my KB, then
email it to Maya"), invoke them on consecutive turns — first finishes,
@ -93,4 +103,65 @@ user: "Find my Q2 roadmap doc in the KB and email a summary to Maya."
task(gmail, "Send an email to Maya with subject 'Q2 roadmap summary'
and the following body: <summary returned by knowledge_base>.")
</example>
<example>
user: "Create issues in Linear for each of these five bugs: <list>"
→ Many-shot independent fanout — use the batch shape:
task(tasks=[
{subagent_type: "linear", description: "Create a Linear issue titled
'<bug 1 title>' with body '<bug 1 body>'. Return the issue URL."},
{subagent_type: "linear", description: "Create a Linear issue titled
'<bug 2 title>' with body '<bug 2 body>'. Return the issue URL."},
{subagent_type: "linear", description: "Create a Linear issue titled
'<bug 3 title>' with body '<bug 3 body>'. Return the issue URL."},
{subagent_type: "linear", description: "Create a Linear issue titled
'<bug 4 title>' with body '<bug 4 body>'. Return the issue URL."},
{subagent_type: "linear", description: "Create a Linear issue titled
'<bug 5 title>' with body '<bug 5 body>'. Return the issue URL."},
])
Read back the `[task 0]``[task 4]` blocks in the combined ToolMessage and
verify each via its Receipt's `verifiable_url` per the `<verification>`
teaching before confirming to the user.
</example>
<example>
user: "Make a 30-second podcast of this conversation."
→ Celery-backed deliverable. The `deliverables` subagent dispatches the
Celery job and then **waits for it to finish** before returning. The
call may take 10-60 seconds (or longer for video presentations) —
that is intentional, not a hang. You always get back one of two
Receipt shapes:
task(deliverables, "Generate a podcast titled '<title>' from the
following content. Use a 30-second style brief. Return the podcast
id and title.\n\n<source content>")
Outcomes:
- **`status="success"`**: the audio is saved. Tell the user the
podcast is **ready** and quote the `external_id` / `preview` so
they can find it in the podcast panel.
- **`status="failed"`**: surface the Receipt's `error` field
verbatim. Do NOT silently re-dispatch — the backend already tried
and reported a real error.
Same two-way pattern applies to video presentations (which take
longer to render, but still return a terminal status). If a
`task(deliverables, ...)` invocation itself times out at the subagent
layer (separate from the Receipt), that's an operator-side problem
with the subagent invoke timeout, not a deliverable failure — pass
the message through and stop.
</example>
<example>
user: "Post the launch announcement to #general and let me know when it's up."
→ Mutating subagent + user wants external confirmation. Apply the
`<verification>` teaching: the slack subagent's reply is a self-report;
check its `evidence.receipts` for a Receipt with `status="success"` and
a `verifiable_url`, then fetch that URL to confirm before reporting back.
This turn:
task(slack, "Post '<launch announcement text>' to #general.
Return the message permalink.")
Next turn (with the receipt's `verifiable_url` in hand):
scrape_webpage(url=<verifiable_url from slack receipt>)
→ confirm the post is live, then tell the user it's up with the URL.
If the slack reply has NO Receipt with `status="success"`, treat it as a
silent failure: surface the error verbatim, do not retry.
</example>
</routing>

View file

@ -0,0 +1 @@
"""``create_automation`` — description + few-shot examples."""

View file

@ -0,0 +1,34 @@
- `create_automation` — Draft and author a new automation. You describe the
user's intent; a focused drafter inside the tool turns it into the full
automation JSON; the user sees a preview on an approval card and chooses
approve or reject. All three phases happen in a single tool call.
- Call when the user wants SurfSense to do something on its own: anything
recurring or scheduled ("every morning…", "each Monday…", "weekly
recap…").
- Args:
- `intent` (string): restate the user's request **concretely**, in one
paragraph. Cover three things:
- **What** should run (the action: summarize, recap, post, draft, …).
- **When** it should run (schedule + timezone if the user mentioned one;
otherwise leave the timezone for the drafter to default to UTC).
- **Static values** the automation needs (folder ids, channel names,
project keys, parent page ids, …) — list them with their values.
If the user did NOT supply one the automation needs, say so
explicitly ("the Notion parent page id was not specified") so the
drafter leaves a placeholder.
- Do NOT prompt the user to confirm before calling — the approval card
IS the confirmation. The card shows a structured preview plus the raw
JSON; it offers approve/reject only. If the user wants changes after
seeing the draft, they reply in chat and you call this tool again with
a refined `intent` — that's the edit path.
- Returns:
- `{status: "saved", automation_id, name}` — confirm briefly to the
user ("Saved as automation #N — runs <when>."). Don't dump JSON back.
- `{status: "rejected", message}` — the user declined on the card.
Acknowledge once ("Understood, I didn't create it.") and stop. Do
NOT retry or pitch variants without a fresh user request.
- `{status: "invalid", issues, raw?}` — drafting/validation failed
before the card was shown. Read the issues, refine your `intent`
with the missing details, call again.
- `{status: "error", message}` — surface the message verbatim and
offer to retry.

View file

@ -0,0 +1,13 @@
<example>
user: "Every weekday at 9am, summarize new documents in folder 12 and post the summary to Slack channel #daily-digest."
→ create_automation(intent="Every weekday at 09:00 UTC, summarize documents added to folder_id=12 since the last run, then post the summary to Slack channel '#daily-digest'. Static inputs: folder_id=12, slack_channel='#daily-digest'.")
tool returns: {"status": "saved", "automation_id": 42, "name": "Daily folder 12 digest"}
(Reply briefly: "Saved as automation #42 — runs weekdays at 9am UTC.")
</example>
<example>
user: "Once a week on Mondays at 7am Paris time, draft a Notion page recapping last week's Jira tickets in project CORE."
→ create_automation(intent="Every Monday at 07:00 Europe/Paris, read last week's Jira issues in project CORE, then draft a Notion page recapping them. Static inputs: jira_project_key='CORE'. The user did NOT specify which Notion page the recap should sit under — leave notion_parent_page_id as a placeholder.")
tool returns: {"status": "saved", "automation_id": 51, "name": "Weekly CORE Jira recap"}
(Reply: "Saved as automation #51. I left the Notion parent page id as a placeholder — set it on the automation before next Monday.")
</example>

View file

@ -1 +0,0 @@
"""``search_surfsense_docs`` — description + few-shot examples."""

View file

@ -1,10 +0,0 @@
- `search_surfsense_docs` — Search official SurfSense documentation (product
help).
- Use when the user asks how SurfSense itself works — setup, configuration,
connector documentation, feature behavior, anything covered in the
product docs.
- Not a substitute for `task` when the user wants actions inside a
connected service (Gmail, Slack, Jira, Notion, etc.).
- Args: `query`, `top_k` (default 10).
- Returns doc excerpts; chunk ids may appear for attribution — see
`<citations>` for the contract.

View file

@ -1,15 +0,0 @@
<example>
user: "How do I install SurfSense?"
→ search_surfsense_docs(query="installation setup")
</example>
<example>
user: "What connectors does SurfSense support?"
→ search_surfsense_docs(query="available connectors integrations")
</example>
<example>
user: "How do I set up the Notion connector?"
→ search_surfsense_docs(query="Notion connector setup configuration")
(Changing data inside Notion itself → `task(notion, …)`, not this tool.)
</example>

View file

@ -4,12 +4,69 @@
`<specialists>` for the live roster.
- Each subagent runs in isolation with its own tool stack and context,
and returns a single synthesized result.
- Args:
- Args (single mode):
- `subagent_type` — name of the specialist to invoke (must match an
entry in `<specialists>`).
- `description` — the FULL task prompt. The specialist cannot see this
thread, so include all context and constraints, plus what you need
back. The specialist will respond in its own format — don't dictate
one.
- Args (batch mode):
- `tasks` — array of `{description, subagent_type}` objects to fan out
concurrently. Mutually exclusive with single-mode args. Use when a
single request expands to **3 or more independent specialist calls**
(e.g. "create five issues from this list"). Children run under a
small concurrency cap and the runtime returns one ToolMessage block
per child, prefixed with `[task <index>]`. **Batched children do not
support human-in-the-loop interrupts** — if any child needs approval
it surfaces an error and you must re-dispatch that single task as a
non-batched `task(...)` call.
- Routing rules (when to call, how often, how to scope) live in
`<routing>`.
<verification>
A subagent's natural-language reply is a **self-report**, not proof. The
specialist might claim a Slack message was posted, a Jira issue was
created, or a report was generated even when the underlying tool call
failed silently or was rate-limited. Treat success language ("Done",
"Posted to #general", "Created ENG-42") as a hypothesis, not a fact.
Two ground-truth signals are always available to verify a mutating
subagent's claim:
1. **`state['receipts']`** — every mutating tool emits a structured
`Receipt` (route, type, operation, status, external_id,
verifiable_url, preview) into this append-only list. The supervisor
never sees the raw list directly, but each subagent's
`<output_contract>` carries the matching Receipt(s) under
`evidence.receipts`. If a subagent reports success with NO matching
Receipt at `status="success"` (or `"pending"` for async deliverables
like podcasts/videos), the operation did not happen — treat as
failure and surface that to the user verbatim, do not retry blindly.
2. **`scrape_webpage`** — when a Receipt carries a `verifiable_url`
(Notion page URL, Slack permalink, Jira issue URL, Linear identifier
URL, etc.), you can fetch that URL and confirm the operation
externally. Use this for high-stakes mutations the user explicitly
called out (e.g. "send the launch email to the whole team") or when
the subagent's self-report contradicts what the user expected.
**Receipt status semantics — read carefully:**
- `status="success"`: the mutation already committed in the backend.
If a `verifiable_url` is present and the request was high-stakes,
you may `scrape_webpage` it to externally confirm. Otherwise trust
the Receipt and tell the user it is done. Celery-backed deliverables
(podcasts, video presentations) also land here — the subagent
already waited for the worker to finish, so a `success` Receipt
means the artefact really is saved.
- `status="failed"`: a Receipt with this status carries the backend's
error in its `error` field. Surface that text verbatim to the user;
re-routing or retrying is only appropriate when the user explicitly
asks for it.
- `status="pending"`: rare today — current mutating tools wait for
their backend before returning. If you ever do see a pending
Receipt, tell the user the work has been **kicked off** (quote the
`external_id` / `preview` so they can find it later), do not
`scrape_webpage` it, and do not re-dispatch the same
`task(...)` call hoping it will be done "this time".
</verification>

View file

@ -0,0 +1,7 @@
"""``create_automation`` — author + persist an automation via a HITL card."""
from __future__ import annotations
from .create import create_create_automation_tool
__all__ = ["create_create_automation_tool"]

View file

@ -0,0 +1,214 @@
"""``create_automation`` — NL intent → drafted JSON → HITL approval card → persisted.
Single tool that:
1. Drafts a structured automation from the user's intent via a focused sub-LLM
(system prompt in :mod:`.prompt`).
2. Surfaces the validated draft in a HITL approval card
(``action_type="automation_create"``).
3. On approval, validates the (possibly edited) payload again and persists
it via :class:`AutomationService`.
The main agent only restates the user's request as a single ``intent`` string.
The drafting sub-LLM owns the JSON shape; the HITL card is the user's review.
"""
from __future__ import annotations
import json
import logging
import re
from typing import Any
from uuid import UUID
from fastapi import HTTPException
from langchain.tools import ToolRuntime
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool
from pydantic import ValidationError
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
request_approval,
)
from app.automations.schemas.api import AutomationCreate
from app.automations.services.automation import AutomationService
from app.db import User, async_session_maker
from app.utils.content_utils import extract_text_content
from .prompt import build_draft_prompt
logger = logging.getLogger(__name__)
_JSON_FENCE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL)
def create_create_automation_tool(
*,
search_space_id: int,
user_id: str | UUID,
llm: Any,
):
"""Factory for the ``create_automation`` tool.
``search_space_id`` is injected from the chat session (the model never
has to guess it). ``llm`` is the drafting sub-model we reuse the main
agent's LLM and tag the call so it's identifiable in traces. A fresh
``AsyncSession`` is opened per call to avoid stale sessions on
compiled-agent cache hits (same pattern as the Notion / memory tools).
"""
uid = UUID(user_id) if isinstance(user_id, str) else user_id
@tool
async def create_automation(intent: str, runtime: ToolRuntime) -> dict[str, Any]:
"""Draft + save an automation from a natural-language intent.
Use this when the user wants SurfSense to do something on its own
on a schedule (e.g. "every morning summarize folder 12 to Slack").
Restate the user's request as ONE concrete ``intent`` string: what
should run, when, and which static values (folder ids, channel
names, ) it needs.
The tool drafts the full automation JSON internally, shows the user
a structured preview on an approval card, and persists on approval.
The card supports approve/reject only if the user wants edits
after seeing the draft, they say so in chat and you call this tool
again with a refined intent. Do NOT prompt the user to confirm
before calling the card IS the confirmation.
Args:
intent: Concrete restatement of the user's request. Include
the schedule (with timezone if mentioned), the action to
take, and any static values. Example: "Every weekday at
09:00 UTC, summarize new docs added to folder_id=12 since
the last run, then post the summary to Slack channel
'#daily-digest'."
Returns:
``{"status": "saved", "automation_id": int, "name": str}`` on
approval + save.
``{"status": "rejected", "message": "..."}`` when the user
declines on the card.
``{"status": "invalid", "issues": [...], "raw": ...}`` when
the drafter produced output that did not validate (call again
with a more precise intent).
``{"status": "error", "message": "..."}`` on drafter or
persistence failure.
IMPORTANT: when status is ``"rejected"`` the user explicitly
declined. Acknowledge once and stop do NOT retry or pitch
variants without a fresh user request.
"""
# Models are chosen per-automation on the approval card (premium/BYOK
# selectors) and validated when persisted by ``AutomationService.create``
# — so there's no fail-fast search-space eligibility gate here. The
# search space's current chat/role model selection no longer constrains
# whether an automation can be drafted or saved.
# --- 1. Draft via sub-LLM ---
prompt = build_draft_prompt(search_space_id=search_space_id, intent=intent)
try:
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal", "automation-draft"]},
)
except Exception as exc:
logger.exception("create_automation drafting LLM call failed")
return {"status": "error", "message": f"drafting failed: {exc}"}
raw_text = extract_text_content(response.content).strip()
draft = _extract_json(raw_text)
if draft is None:
return {
"status": "invalid",
"issues": ["model output was not parseable JSON"],
"raw": raw_text,
}
# search_space_id is injected here so the sub-LLM never has to guess.
draft["search_space_id"] = search_space_id
try:
validated_draft = AutomationCreate.model_validate(draft)
except ValidationError as exc:
return {
"status": "invalid",
"issues": _format_validation_issues(exc),
"raw": draft,
}
# --- 2. HITL approval card ---
try:
card_params = validated_draft.model_dump(mode="json", by_alias=True)
# search_space_id is session-scoped, not user-editable.
card_params.pop("search_space_id", None)
result = request_approval(
action_type="automation_create",
tool_name="create_automation",
params=card_params,
context={"search_space_id": search_space_id},
tool_call_id=runtime.tool_call_id,
)
if result.rejected:
return {
"status": "rejected",
"message": "User declined. Do not retry or suggest alternatives.",
}
# --- 3. Persist (re-validate in case the user edited) ---
final_payload = {**result.params, "search_space_id": search_space_id}
try:
final_validated = AutomationCreate.model_validate(final_payload)
except ValidationError as exc:
return {
"status": "invalid",
"issues": _format_validation_issues(exc),
}
async with async_session_maker() as session:
user = await session.get(User, uid)
if user is None:
return {
"status": "error",
"message": "user not found in this session",
}
service = AutomationService(session=session, user=user)
created = await service.create(final_validated)
return {
"status": "saved",
"automation_id": created.id,
"name": created.name,
}
except HTTPException as exc:
return {"status": "error", "message": exc.detail}
except Exception as exc:
from langgraph.errors import GraphInterrupt
if isinstance(exc, GraphInterrupt):
raise
logger.exception("create_automation failed")
return {"status": "error", "message": f"persistence failed: {exc}"}
return create_automation
def _extract_json(text: str) -> dict[str, Any] | None:
"""Pull a JSON object out of the model response, tolerating ``` fences."""
if not text:
return None
candidate = text
fence_match = _JSON_FENCE.search(text)
if fence_match:
candidate = fence_match.group(1)
try:
parsed = json.loads(candidate)
except json.JSONDecodeError:
return None
return parsed if isinstance(parsed, dict) else None
def _format_validation_issues(exc: ValidationError) -> list[str]:
return [
f"{'.'.join(str(p) for p in err['loc'])}: {err['msg']}" for err in exc.errors()
]

View file

@ -0,0 +1,178 @@
"""System prompt for the drafting sub-LLM inside ``create_automation``.
Converts a natural-language ``intent`` into a structured ``AutomationCreate``
JSON object. That object becomes the payload the HITL approval card surfaces.
Scope split:
Real automation JSONs live here this is the graph that *generates*
the JSON. The main agent's prompt fragments (``description.md`` /
``example.md``) only carry intent-string examples; the main agent
never sees the schema.
Layout:
The prompt is concatenated from four format-safe pieces. ``_HEADER`` /
``_FOOTER`` carry the only ``str.format`` placeholders; ``_SCHEMA`` and
``_FEW_SHOTS`` are plain strings so their JSON literals (and the
``{{ inputs.X }}`` Jinja references in queries) can stay readable
without doubled-brace escaping.
Catalog handling:
v1 hard-codes the action/trigger catalog (one action, one trigger).
When new types ship, swap the inline lines for a render-time pull
from ``app.automations.actions`` / ``app.automations.triggers`` via
lazy imports inside :func:`build_draft_prompt` so this module never
participates in the ``multi_agent_chat`` import cycle.
"""
from __future__ import annotations
from datetime import UTC, datetime
_HEADER = """\
You are the SurfSense automation drafter. Convert the user intent below
into a SINGLE JSON object matching the AutomationCreate schema. Output
ONLY that JSON object no prose, no markdown fence, no commentary.
Current UTC time (for cron context): {now}
Target search_space_id: {search_space_id}
"""
_SCHEMA = """
Required JSON shape:
{
"name": "<1-200 char identifier>",
"description": "<one-liner or null>",
"definition": {
"schema_version": "1.0",
"name": "<same as outer name>",
"goal": "<one sentence>",
"plan": [
{
"step_id": "<slug>",
"action": "agent_task",
"params": {
"query": "<Jinja string referencing {{ inputs.X }}>",
"auto_approve_all": true
}
}
],
"metadata": {"tags": ["..."]}
},
"triggers": [
{
"type": "schedule",
"params": {"cron": "<5-field cron>", "timezone": "<IANA tz, default UTC>"},
"static_inputs": {"<key>": <value>, ...},
"enabled": true
}
]
}
v1 catalog (only these are valid):
- Actions: agent_task params: query (string, Jinja), auto_approve_all (bool).
- Triggers: schedule params: cron (5-field), timezone (IANA, e.g. "UTC",
"Europe/Paris"). Has static_inputs (object).
Conventions:
- Whatever the plan references via {{ inputs.X }} MUST appear either in a
trigger's static_inputs OR in definition.inputs.schema_.properties so the
executor can resolve it at fire time.
- static_inputs carries values that stay the same across every fire
(folder ids, channel names, project keys, parent page ids). Put them on
the trigger that supplies them, not in the plan.
- If the user did NOT supply a value the plan needs, put "REPLACE_ME" in
static_inputs. Do NOT invent ids, channels, or paths.
- Cron is 5-field (minute hour day-of-month month day-of-week). Use the
timezone the user mentioned; default "UTC" when unspecified.
- Templating variables available at fire time: inputs.* (merged
static_inputs + runtime), inputs.fired_at, inputs.last_fired_at.
"""
_FEW_SHOTS = """
Few-shot examples (intent JSON output):
### Example 1 — schedule with all static values supplied
intent: "Every weekday at 09:00 UTC, summarize documents added to folder_id=12 since the last run, then post the summary to Slack channel '#daily-digest'. Static inputs: folder_id=12, slack_channel='#daily-digest'."
output:
{
"name": "Daily folder 12 digest",
"description": "Weekday 09:00 UTC summary of folder 12 documents posted to #daily-digest",
"definition": {
"schema_version": "1.0",
"name": "Daily folder 12 digest",
"goal": "Summarize new docs in folder 12 since the last run and post to #daily-digest",
"plan": [
{
"step_id": "summarize_and_post",
"action": "agent_task",
"params": {
"query": "Summarize documents added to folder {{ inputs.folder_id }} since {{ inputs.last_fired_at or 'yesterday' }}, then send the summary to Slack channel {{ inputs.slack_channel }}.",
"auto_approve_all": true
}
}
],
"metadata": {"tags": ["daily", "digest", "slack"]}
},
"triggers": [
{
"type": "schedule",
"params": {"cron": "0 9 * * 1-5", "timezone": "UTC"},
"static_inputs": {"folder_id": 12, "slack_channel": "#daily-digest"},
"enabled": true
}
]
}
### Example 2 — schedule with a missing value (REPLACE_ME placeholder)
intent: "Every Monday at 07:00 Europe/Paris, read last week's Jira issues in project CORE, then draft a Notion page recapping them. Static inputs: jira_project_key='CORE'. The user did NOT specify the Notion parent page id — leave it as a placeholder."
output:
{
"name": "Weekly CORE Jira recap",
"description": "Monday 07:00 Europe/Paris recap of last week's CORE Jira issues, drafted to Notion",
"definition": {
"schema_version": "1.0",
"name": "Weekly CORE Jira recap",
"goal": "Recap last week's CORE Jira issues into a Notion page",
"plan": [
{
"step_id": "recap",
"action": "agent_task",
"params": {
"query": "List Jira issues in project {{ inputs.jira_project_key }} updated in the 7 days before {{ inputs.fired_at }}. Draft a Notion page under parent id {{ inputs.notion_parent_page_id }} titled 'CORE recap — week of {{ inputs.fired_at }}'.",
"auto_approve_all": true
}
}
],
"metadata": {"tags": ["weekly", "recap", "jira", "notion"]}
},
"triggers": [
{
"type": "schedule",
"params": {"cron": "0 7 * * 1", "timezone": "Europe/Paris"},
"static_inputs": {"jira_project_key": "CORE", "notion_parent_page_id": "REPLACE_ME"},
"enabled": true
}
]
}
"""
_FOOTER = """
User intent:
{intent}
"""
def build_draft_prompt(*, search_space_id: int, intent: str) -> str:
"""Render the drafting sub-LLM system prompt for the given intent."""
return (
_HEADER.format(
now=datetime.now(UTC).isoformat(timespec="seconds"),
search_space_id=search_space_id,
)
+ _SCHEMA
+ _FEW_SHOTS
+ _FOOTER.format(intent=intent.strip())
)

View file

@ -6,10 +6,10 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag
from __future__ import annotations
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
"search_surfsense_docs",
"web_search",
"scrape_webpage",
"update_memory",
"create_automation",
)
MAIN_AGENT_SURFSENSE_TOOL_NAMES: frozenset[str] = frozenset(

View file

@ -2,6 +2,8 @@
from __future__ import annotations
import os
# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
EXCLUDED_STATE_KEYS = frozenset(
{
@ -16,3 +18,72 @@ EXCLUDED_STATE_KEYS = frozenset(
# Match the parent graph's budget; the LangGraph default of 25 trips on
# multi-step subagent runs.
DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
def _read_timeout_env(name: str, default: float) -> float:
"""Parse ``name`` from the environment; fall back to ``default`` on bad values.
Kept as a free function so the module-level constants stay constants
after import; tests can monkeypatch this and re-evaluate via
``importlib.reload`` if they need a different value mid-process.
"""
raw = os.environ.get(name)
if not raw:
return default
try:
value = float(raw)
except (TypeError, ValueError):
return default
return value if value > 0 else default
# Wall-clock budget for a single ``task(subagent, ...)`` invocation.
# Subagents that run hot (image generation with slow vendors, KB writes
# behind a sluggish embedder) can otherwise wedge the orchestrator until
# the next checkpoint heartbeat. ``0`` disables the timeout entirely.
DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS: float = _read_timeout_env(
"SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS",
default=300.0,
)
def _read_int_env(name: str, default: int) -> int:
raw = os.environ.get(name)
if not raw:
return default
try:
value = int(raw)
except (TypeError, ValueError):
return default
return value if value > 0 else default
# Maximum number of children that ``task(..., tasks=[...])`` runs in
# parallel via ``asyncio.gather`` + ``Semaphore``. Bounded so a runaway
# fanout cannot starve unrelated subagents (each child still owns an
# LLM call + DB session). Set ``SURFSENSE_TASK_BATCH_CONCURRENCY=1`` to
# effectively serialise batches without changing the schema.
DEFAULT_SUBAGENT_BATCH_CONCURRENCY: int = _read_int_env(
"SURFSENSE_TASK_BATCH_CONCURRENCY",
default=3,
)
# Max number of children in a single batched ``task`` call. Hard upper
# bound is a safety net for prompt-injection / runaway loops; the orchestrator
# rarely needs more than a handful of concurrent specialists.
MAX_SUBAGENT_BATCH_SIZE: int = _read_int_env(
"SURFSENSE_TASK_BATCH_MAX_SIZE",
default=8,
)
# Soft threshold for per-turn cumulative ``task(...)`` invocations across
# **all** subagents. Once the sum of ``state['billable_calls']`` values
# crosses this number, the runtime appends a one-shot warning ToolMessage
# instructing the orchestrator to wrap up the turn. Tunable so heavy-research
# turns (which legitimately need 15+ specialist calls) don't trip the alarm
# in production. Set to ``0`` to disable the warning entirely.
DEFAULT_SUBAGENT_BILLABLE_THRESHOLD: int = _read_int_env(
"SURFSENSE_SUBAGENT_BILLABLE_THRESHOLD",
default=15,
)

View file

@ -16,6 +16,9 @@ from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY,
)
from app.utils.perf import get_perf_logger
from .task_tool import build_task_tool_with_parent_config
@ -34,6 +37,7 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
subagents: list[SubAgent | CompiledSubAgent],
system_prompt: str | None = TASK_SYSTEM_PROMPT,
task_description: str | None = None,
search_space_id: int | None = None,
) -> None:
self._surf_checkpointer = checkpointer
super(SubAgentMiddleware, self).__init__()
@ -43,8 +47,17 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
)
self._backend = backend
self._subagents = subagents
# Search-space id is captured at build time (the orchestrator runs in
# exactly one search space for its lifetime). The spawn-paused kill
# switch keys on it so an operator can quarantine one workspace
# without affecting the rest of the deployment.
self._search_space_id = search_space_id
subagent_specs = self._surf_compile_subagent_graphs()
task_tool = build_task_tool_with_parent_config(subagent_specs, task_description)
task_tool = build_task_tool_with_parent_config(
subagent_specs,
task_description,
search_space_id=search_space_id,
)
if system_prompt and subagent_specs:
agents_desc = "\n".join(
f"- {s['name']}: {s['description']}" for s in subagent_specs
@ -64,6 +77,10 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
for spec in self._subagents:
spec_start = time.perf_counter()
# Provider may be ``None`` (no hint), in which case task_tool
# skips the prepend step. We forward the key unconditionally so
# the registry shape is uniform.
hint_provider = cast(dict, spec).get(SURF_CONTEXT_HINT_PROVIDER_KEY)
if "runnable" in spec:
compiled = cast(CompiledSubAgent, spec)
specs.append(
@ -71,6 +88,7 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
"name": compiled["name"],
"description": compiled["description"],
"runnable": compiled["runnable"],
SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
}
)
timings.append(
@ -108,6 +126,7 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
"name": spec["name"],
"description": spec["description"],
"runnable": runnable,
SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
}
)
timings.append(

View file

@ -0,0 +1,84 @@
"""Per-search-space spawn-paused kill switch for the ``task`` boundary.
When operators see a runaway loop, a vendor outage, or a billing event
that requires immediate cessation of subagent traffic for a specific
workspace, they flip a Redis flag and the ``task`` tool short-circuits
without touching downstream services. The flag is **per-search-space**
so one tenant's incident never silences the rest of the deployment.
Flag key: ``surfsense:spawn_paused:{search_space_id}``
Flag value: any string-truthy value (we read presence, not contents).
TTL: set by whoever toggles the flag this module never expires
keys on its own, since "the flag is on" is itself the signal
that a human (or alert) needs to investigate.
The check is best-effort: Redis errors are logged but do not block the
``task`` invocation. Failing closed (block-on-redis-error) would let a
single Redis blip take the whole orchestrator offline; failing open
preserves availability and the alarm bells (rate-limits, cost spikes)
will surface the underlying outage.
"""
from __future__ import annotations
import contextlib
import logging
import os
from app.config import config
logger = logging.getLogger(__name__)
# Operators can disable the check entirely (e.g. local dev without Redis)
# by setting ``SURFSENSE_TASK_SPAWN_PAUSED_DISABLED=1``. Default is
# enabled so production never relies on flipping an opt-out flag.
_DISABLED = os.environ.get(
"SURFSENSE_TASK_SPAWN_PAUSED_DISABLED", ""
).strip().lower() in {
"1",
"true",
"yes",
"on",
}
def _flag_key(search_space_id: int) -> str:
return f"surfsense:spawn_paused:{search_space_id}"
async def is_spawn_paused(search_space_id: int | None) -> bool:
"""Return ``True`` iff the workspace's spawn-paused flag is set in Redis.
A ``None`` search-space (e.g. dev paths that did not plumb the id
through yet) bypasses the check. So does a Redis outage see module
docstring for the fail-open rationale.
"""
if _DISABLED or search_space_id is None:
return False
try:
# Local import keeps the cold-path import cheap and lets routes
# that never call ``task`` skip the redis dependency entirely.
import redis.asyncio as aioredis # type: ignore[import-not-found]
client = aioredis.from_url(config.REDIS_APP_URL, decode_responses=True)
try:
raw = await client.get(_flag_key(search_space_id))
finally:
# ``aclose()`` is the async-safe variant on redis-py >=5; fall back
# to ``close()`` for older clients pinned in tests.
close = getattr(client, "aclose", None) or getattr(client, "close", None)
if callable(close):
with contextlib.suppress(Exception):
await close() # type: ignore[misc]
return bool(raw)
except Exception:
logger.warning(
"spawn_paused check failed for search_space_id=%s; failing open.",
search_space_id,
exc_info=True,
)
return False
__all__ = ["is_spawn_paused"]

View file

@ -8,9 +8,12 @@ re-raises any new pending interrupt back to the parent.
from __future__ import annotations
import asyncio
import json
import logging
import time
from typing import Annotated, Any, NoReturn
from collections.abc import Awaitable
from typing import Annotated, Any, NoReturn, TypeVar
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
from langchain.tools import BaseTool, ToolRuntime
@ -20,6 +23,10 @@ from langchain_core.tools import StructuredTool
from langgraph.errors import GraphInterrupt
from langgraph.types import Command, Interrupt
from app.agents.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY,
ContextHintProvider,
)
from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger
@ -29,7 +36,13 @@ from .config import (
has_surfsense_resume,
subagent_invoke_config,
)
from .constants import EXCLUDED_STATE_KEYS
from .constants import (
DEFAULT_SUBAGENT_BATCH_CONCURRENCY,
DEFAULT_SUBAGENT_BILLABLE_THRESHOLD,
DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS,
EXCLUDED_STATE_KEYS,
MAX_SUBAGENT_BATCH_SIZE,
)
from .propagation import wrap_with_tool_call_id
from .resume import (
build_resume_command,
@ -37,11 +50,70 @@ from .resume import (
get_first_pending_subagent_interrupt,
hitlrequest_action_count,
)
from .spawn_paused import is_spawn_paused
logger = logging.getLogger(__name__)
_perf_log = get_perf_logger()
class SubagentInvokeTimeoutError(Exception):
"""Raised when ``subagent.ainvoke`` exceeds the configured wall-clock budget.
Carries the subagent name and the elapsed seconds so the caller can
synthesize a ToolMessage that the orchestrator can act on (re-route,
surface to the user, or retry with a smaller scope).
"""
def __init__(self, subagent_type: str, elapsed_seconds: float) -> None:
super().__init__(
f"subagent {subagent_type!r} exceeded "
f"{DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS:.0f}s budget "
f"(elapsed={elapsed_seconds:.1f}s)"
)
self.subagent_type = subagent_type
self.elapsed_seconds = elapsed_seconds
_T = TypeVar("_T")
async def _ainvoke_with_timeout[T](
coro: Awaitable[_T], *, subagent_type: str, started_at: float
) -> _T:
"""Apply :data:`DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS` to ``coro``.
A non-positive timeout disables the cap (configurable via the
``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` env var). On expiry the
underlying task is cancelled and :class:`SubagentInvokeTimeoutError` is
raised the caller wraps it into a synthetic ToolMessage so the
orchestrator can decide what to do.
"""
timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS
if timeout <= 0:
return await coro
try:
return await asyncio.wait_for(coro, timeout=timeout)
except TimeoutError as exc:
elapsed = time.perf_counter() - started_at
raise SubagentInvokeTimeoutError(subagent_type, elapsed) from exc
def _synthesize_timeout_command(
exc: SubagentInvokeTimeoutError, *, tool_call_id: str
) -> Command:
"""Turn a :class:`SubagentInvokeTimeoutError` into a ToolMessage the parent can read."""
content = (
f"Subagent {exc.subagent_type!r} timed out after "
f"{exc.elapsed_seconds:.1f}s (budget="
f"{DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS:.0f}s). "
"The work was cancelled. Treat as status=error; re-route with a "
"narrower scope or different specialist."
)
return Command(
update={"messages": [ToolMessage(content=content, tool_call_id=tool_call_id)]}
)
def _reraise_stamped_subagent_interrupt(
gi: GraphInterrupt, tool_call_id: str
) -> NoReturn:
@ -70,11 +142,24 @@ def _reraise_stamped_subagent_interrupt(
def build_task_tool_with_parent_config(
subagents: list[dict[str, Any]],
task_description: str | None = None,
*,
search_space_id: int | None = None,
) -> BaseTool:
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
subagent_graphs: dict[str, Runnable] = {
spec["name"]: spec["runnable"] for spec in subagents
}
# Per-subagent context-hint providers (see ``SurfSenseSubagentSpec``).
# The mapping is sparse: only routes that opted in via ``pack_subagent``
# appear here, and the value is invoked once per ``task(...)`` call to
# generate a short string prepended to the subagent's first
# ``HumanMessage``. Failures are logged and swallowed — a broken hint
# provider must never prevent the underlying task from running.
subagent_hint_providers: dict[str, ContextHintProvider] = {
spec["name"]: provider
for spec in subagents
if (provider := spec.get(SURF_CONTEXT_HINT_PROVIDER_KEY)) is not None
}
subagent_description_str = "\n".join(
f"- {s['name']}: {s['description']}" for s in subagents
)
@ -88,6 +173,120 @@ def build_task_tool_with_parent_config(
else:
description = task_description
def _billable_call_update(
subagent_type: str, runtime: ToolRuntime
) -> dict[str, Any]:
"""Build the per-call ``billable_calls`` delta + an optional warning.
The orchestrator's ``billable_calls`` map is summed by
:func:`_int_counter_merge_reducer`, so we always emit
``{subagent_type: 1}`` and let the reducer accumulate. If the
cumulative count *after* this call would cross the configured
threshold, we also slip a soft ``messages`` entry into the update
so the orchestrator can read it on its next step and self-limit.
Returning a plain ``dict`` (vs. an extra :class:`Command`) keeps
the helper composable with the existing single/batch return paths.
"""
delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}}
threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
if threshold <= 0:
return delta
prior = runtime.state.get("billable_calls") or {}
# ``prior`` may be a plain dict or a reducer-managed mapping; only
# int values are counted so a malformed checkpoint can't crash us.
prior_total = sum(v for v in prior.values() if isinstance(v, int))
new_total = prior_total + 1
if prior_total < threshold <= new_total:
warn = (
f"[budget warning] This turn has dispatched {new_total} "
f"subagent calls (soft cap = {threshold}). Wrap up the "
"user's request with what you have rather than launching "
"more specialists; surface a partial answer if needed."
)
delta["_billable_warn_text"] = warn
return delta
def _attach_billable(
cmd: Command, subagent_type: str, runtime: ToolRuntime
) -> Command:
"""Merge the per-call billable counter (and warning) into ``cmd``."""
delta = _billable_call_update(subagent_type, runtime)
warn_text = delta.pop("_billable_warn_text", None)
# ``cmd.update`` may be a dict or LangGraph ``UpdateDict``; defensively
# copy so we don't mutate state shared across other tool returns.
update = dict(getattr(cmd, "update", {}) or {})
for key, value in delta.items():
update[key] = value
if warn_text:
existing_msgs = list(update.get("messages") or [])
existing_msgs.append(
ToolMessage(content=warn_text, tool_call_id=runtime.tool_call_id)
)
update["messages"] = existing_msgs
return Command(update=update)
def _safe_message_text(msg: Any) -> str:
"""Pull text out of a BaseMessage without trusting the ``.text`` property.
``BaseMessage.text`` walks ``content_blocks`` and crashes with
``TypeError: 'NoneType' object is not iterable`` when ``content`` is
``None`` (common for tool-call AIMessages whose payload is purely
structured). ``getattr(msg, "text", None)`` does not catch this
because Python evaluates the property body before falling back to
the default. Read ``content`` directly and coerce defensively.
"""
try:
content = getattr(msg, "content", None)
except Exception:
content = None
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
parts: list[str] = []
for block in content:
if isinstance(block, str):
parts.append(block)
elif isinstance(block, dict):
block_text = block.get("text") or block.get("content")
if isinstance(block_text, str):
parts.append(block_text)
return " ".join(parts)
return str(content)
def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]:
"""Compress the subagent's message stream into a compact tool trace.
Each entry is ``{"tool": <name>, "status": "ok"|"error", "preview":
<120 chars>}`` so the orchestrator can show "this is what your
specialist actually did" without dumping the full message stream
back through the prompt. The list is attached to the returned
ToolMessage's ``additional_kwargs`` (under ``"surf_tool_trace"``);
the LLM never sees it, but UI / observability code can pluck it
out of the checkpoint.
"""
trace: list[dict[str, Any]] = []
for msg in messages:
tool_name = getattr(msg, "name", None)
tool_call_id_attr = getattr(msg, "tool_call_id", None)
if not tool_name and not tool_call_id_attr:
# Only ToolMessages have either field; skip AIMessage /
# HumanMessage / SystemMessage frames.
continue
status = getattr(msg, "status", None) or "ok"
preview = _safe_message_text(msg).strip().replace("\n", " ")
if len(preview) > 120:
preview = preview[:117] + "..."
trace.append(
{
"tool": tool_name or "<unknown>",
"status": status,
"preview": preview,
}
)
return trace
def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
if "messages" not in result:
msg = (
@ -106,15 +305,51 @@ def build_task_tool_with_parent_config(
"output to forward back to the user."
)
raise ValueError(msg)
last_text = getattr(messages[-1], "text", None) or ""
message_text = last_text.rstrip()
message_text = _safe_message_text(messages[-1]).rstrip()
# Tool-trace is purely observability — wrap defensively so a single
# malformed frame never bubbles up and kills the whole user turn.
try:
tool_trace = _build_tool_trace(messages)
except Exception:
logger.exception(
"Failed to build tool_trace for subagent return; "
"continuing without trace."
)
tool_trace = []
tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id)
if tool_trace:
# ``additional_kwargs`` is a free-form dict on BaseMessage; using
# a ``surf_`` prefix avoids collision with provider-specific keys
# (e.g. Anthropic's ``cache_control``). The LLM doesn't see it;
# consumers (UI, observability) read it off the checkpoint.
tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace
return Command(
update={
**state_update,
"messages": [ToolMessage(message_text, tool_call_id=tool_call_id)],
"messages": [tool_msg],
}
)
def _resolve_context_hint(
subagent_type: str, description: str, runtime: ToolRuntime
) -> str | None:
"""Run the per-subagent hint provider; swallow & log any exception."""
provider = subagent_hint_providers.get(subagent_type)
if provider is None:
return None
try:
hint = provider(runtime.state, description)
except Exception:
logger.exception(
"Context-hint provider for subagent %r raised; skipping hint.",
subagent_type,
)
return None
if not hint or not isinstance(hint, str):
return None
cleaned = hint.strip()
return cleaned or None
def _validate_and_prepare_state(
subagent_type: str, description: str, runtime: ToolRuntime
) -> tuple[Runnable, dict]:
@ -122,20 +357,306 @@ def build_task_tool_with_parent_config(
subagent_state = {
k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
}
subagent_state["messages"] = [HumanMessage(content=description)]
hint = _resolve_context_hint(subagent_type, description, runtime)
if hint:
# Prepend as a tagged block so the subagent prompt can pattern-match
# on the section (and a future change can lift it into its own
# ``SystemMessage`` if needed).
payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}"
else:
payload = description
subagent_state["messages"] = [HumanMessage(content=payload)]
return subagent, subagent_state
def _merge_batch_results(
results: list[tuple[int, str, dict | str, dict | None]],
runtime: ToolRuntime,
) -> Command:
"""Combine per-child results into one Command with a combined ToolMessage.
``results`` is a list of ``(task_index, subagent_type,
payload_or_error_text, child_state_update)`` tuples preserving the
input order so the orchestrator can map each block back to the task
it dispatched. State updates are merged by reducer for keys outside
:data:`EXCLUDED_STATE_KEYS`; everything else (``messages``, ``todos``,
etc.) is replaced by the synthesized aggregate ToolMessage. Every
child also contributes a ``billable_calls`` increment so cost
accounting matches single-mode dispatch.
"""
results.sort(key=lambda r: r[0])
merged_state: dict[str, Any] = {}
billable_delta: dict[str, int] = {}
message_blocks: list[str] = []
batch_trace: list[dict[str, Any]] = []
for task_index, subagent_type, payload, state_update in results:
billable_delta[subagent_type] = billable_delta.get(subagent_type, 0) + 1
if isinstance(payload, str):
# Pre-flight error or per-task exception text.
message_blocks.append(f"[task {task_index}] {payload}")
batch_trace.append(
{
"task_index": task_index,
"subagent_type": subagent_type,
"status": "error",
"tool_trace": [],
}
)
continue
messages = payload.get("messages") or []
last_text = _safe_message_text(messages[-1]).rstrip() if messages else ""
message_blocks.append(f"[task {task_index}] {last_text or '<empty>'}")
try:
child_trace = _build_tool_trace(messages)
except Exception:
logger.exception(
"Failed to build tool_trace for batch task_index=%d; continuing.",
task_index,
)
child_trace = []
batch_trace.append(
{
"task_index": task_index,
"subagent_type": subagent_type,
"status": "ok",
"tool_trace": child_trace,
}
)
if state_update:
# Naive merge: later tasks win on scalar collisions; reducer-backed
# fields (``receipts``, ``files`` etc.) accumulate at apply time.
merged_state.update(state_update)
aggregate = "\n\n".join(message_blocks)
aggregate_msg = ToolMessage(
content=aggregate, tool_call_id=runtime.tool_call_id
)
if batch_trace:
aggregate_msg.additional_kwargs["surf_tool_trace"] = batch_trace
update: dict[str, Any] = {
**merged_state,
"billable_calls": billable_delta,
"messages": [aggregate_msg],
}
# Soft-cap warning: check the cumulative count after attribution.
threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
if threshold > 0:
prior = runtime.state.get("billable_calls") or {}
prior_total = sum(v for v in prior.values() if isinstance(v, int))
new_total = prior_total + sum(billable_delta.values())
if prior_total < threshold <= new_total:
update["messages"].append(
ToolMessage(
content=(
f"[budget warning] This turn has dispatched "
f"{new_total} subagent calls (soft cap = "
f"{threshold}). Wrap up the user's request with "
"what you have rather than launching more "
"specialists; surface a partial answer if needed."
),
tool_call_id=runtime.tool_call_id,
)
)
return Command(update=update)
async def _ainvoke_one_batch_child(
*,
task_index: int,
subagent_type: str,
description: str,
runtime: ToolRuntime,
semaphore: asyncio.Semaphore,
) -> tuple[int, str, dict | str, dict | None]:
"""Run one child of a batched ``task`` call under the concurrency cap.
Errors are returned as plain text in slot 2 so a single child's
failure does not abort the whole batch. ``GraphInterrupt`` from a
batched child is currently treated as a hard failure for that child
only batched HITL is intentionally out of scope for the v1
rollout (see plan tier 2 item 4 risks).
"""
async with semaphore:
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return (
task_index,
subagent_type,
(
f"Subagent {subagent_type!r} does not exist; "
f"allowed: {allowed_types}"
),
None,
)
subagent, subagent_state = _validate_and_prepare_state(
subagent_type, description, runtime
)
sub_config = subagent_invoke_config(runtime)
started_at = time.perf_counter()
try:
result = await _ainvoke_with_timeout(
subagent.ainvoke(subagent_state, config=sub_config),
subagent_type=subagent_type,
started_at=started_at,
)
except SubagentInvokeTimeoutError as exc:
logger.warning(
"Batch child %d (%s) timed out after %.1fs",
task_index,
subagent_type,
exc.elapsed_seconds,
)
return (task_index, subagent_type, str(exc), None)
except GraphInterrupt:
# Batched HITL is unsupported in v1 — surface as a failure
# for this child so the rest of the batch still completes.
logger.warning(
"Batch child %d (%s) raised GraphInterrupt; batched HITL "
"is not supported. Re-dispatch this task as a single "
"(non-batched) `task(...)` call to get the HITL prompt.",
task_index,
subagent_type,
)
return (
task_index,
subagent_type,
(
f"Subagent {subagent_type!r} needs human approval. "
"Re-dispatch this task as a single (non-batched) "
"`task(...)` call so the approval card can be shown."
),
None,
)
except Exception as exc:
logger.exception(
"Batch child %d (%s) raised: %s",
task_index,
subagent_type,
exc,
)
return (
task_index,
subagent_type,
f"Subagent {subagent_type!r} error: {exc}",
None,
)
child_state_update = {
k: v for k, v in result.items() if k not in EXCLUDED_STATE_KEYS
}
return (task_index, subagent_type, result, child_state_update)
def _coerce_batch_arg(tasks: Any) -> list[dict] | str:
"""Rescue common LLM-side malformations of the ``tasks`` argument.
Some providers serialise an array argument as a JSON-encoded string,
and small models occasionally hand back a single ``{description,
subagent_type}`` dict instead of a one-element array. Both are
recovered here with a WARN log so the issue is visible in metrics
but the user's turn still completes; truly broken shapes return a
plain string that the caller surfaces as the tool error.
"""
if isinstance(tasks, list):
return tasks
if isinstance(tasks, dict):
logger.warning(
"task: `tasks` was a single dict; coercing to a 1-element list. "
"Orchestrators should send `tasks=[{...}]` directly."
)
return [tasks]
if isinstance(tasks, str):
stripped = tasks.strip()
if not stripped:
return "tasks: argument is empty."
try:
parsed = json.loads(stripped)
except json.JSONDecodeError as exc:
return (
f"tasks: argument is a string but not valid JSON ({exc.msg}). "
"Send a JSON array of `{description, subagent_type}` objects."
)
logger.warning(
"task: `tasks` was a JSON-encoded string; parsed to %s. "
"Orchestrators should send a JSON array directly.",
type(parsed).__name__,
)
return _coerce_batch_arg(parsed)
return (
f"tasks: unsupported type {type(tasks).__name__}; expected an array "
"of `{description, subagent_type}` objects."
)
async def _adispatch_batch(
tasks: list[dict], runtime: ToolRuntime
) -> Command | str:
"""Fan-out helper for the ``tasks`` array shape.
Bounded by :data:`MAX_SUBAGENT_BATCH_SIZE` and concurrency-capped
at :data:`DEFAULT_SUBAGENT_BATCH_CONCURRENCY`. Returns a single
:class:`Command` that the LLM sees as one ToolMessage per child,
prefixed with ``[task <index>]`` so it can map back to the input
order.
"""
if not tasks:
return "tasks: array is empty; nothing to dispatch."
if len(tasks) > MAX_SUBAGENT_BATCH_SIZE:
return (
f"tasks: too many children ({len(tasks)}); "
f"max is {MAX_SUBAGENT_BATCH_SIZE}. Split the batch."
)
normalized: list[tuple[int, str, str]] = []
for idx, item in enumerate(tasks):
if not isinstance(item, dict):
return (
f"tasks[{idx}]: must be an object with description+subagent_type."
)
description = item.get("description")
subagent_type = item.get("subagent_type")
if not isinstance(description, str) or not description.strip():
return f"tasks[{idx}]: missing or empty 'description'."
if not isinstance(subagent_type, str) or not subagent_type.strip():
return f"tasks[{idx}]: missing or empty 'subagent_type'."
normalized.append((idx, subagent_type.strip(), description))
semaphore = asyncio.Semaphore(DEFAULT_SUBAGENT_BATCH_CONCURRENCY)
coros = [
_ainvoke_one_batch_child(
task_index=idx,
subagent_type=subagent_type,
description=description,
runtime=runtime,
semaphore=semaphore,
)
for idx, subagent_type, description in normalized
]
results = await asyncio.gather(*coros)
return _merge_batch_results(list(results), runtime)
def task(
description: Annotated[
str,
"A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
],
str | None,
"Single-mode: a detailed task description for the subagent. Required unless `tasks` is provided.",
] = None,
subagent_type: Annotated[
str,
"The type of subagent to use. Must be one of the available agent types listed in the tool description.",
],
runtime: ToolRuntime,
str | None,
"Single-mode: the type of subagent to use. Required unless `tasks` is provided.",
] = None,
runtime: ToolRuntime = None, # type: ignore[assignment]
tasks: Annotated[
list[dict] | None,
(
"Batch-mode: array of `{description, subagent_type}` objects. "
"Synchronous path does not support batch mode; orchestrators "
"must use the async event loop to fan out."
),
] = None,
) -> str | Command:
if tasks is not None:
return (
"task: batch mode (`tasks=[...]`) is only supported on the async "
"path. SurfSense orchestrators always run in an event loop, so "
"this should never fire — file a bug if you see it."
)
if not description or not subagent_type:
return (
"task: must provide either single-mode (`description`+`subagent_type`) "
"or batch-mode (`tasks`)."
)
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return (
@ -284,16 +805,65 @@ def build_task_tool_with_parent_config(
async def atask(
description: Annotated[
str,
"A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
],
str | None,
"Single-mode: a detailed task description for the subagent. Required unless `tasks` is provided.",
] = None,
subagent_type: Annotated[
str,
"The type of subagent to use. Must be one of the available agent types listed in the tool description.",
],
runtime: ToolRuntime,
str | None,
"Single-mode: the type of subagent to use. Required unless `tasks` is provided.",
] = None,
runtime: ToolRuntime = None, # type: ignore[assignment]
tasks: Annotated[
list[dict] | None,
(
"Batch-mode: array of `{description, subagent_type}` objects "
"to fan out concurrently (max "
f"{MAX_SUBAGENT_BATCH_SIZE}, concurrency "
f"{DEFAULT_SUBAGENT_BATCH_CONCURRENCY}). Mutually exclusive "
"with single-mode args. Batched children do not support "
"human-in-the-loop interrupts; re-dispatch as single mode "
"if a child needs approval."
),
] = None,
) -> str | Command:
atask_start = time.perf_counter()
# Kill switch: when ops flips the spawn-paused flag for this
# workspace, every ``task(...)`` invocation (single- or batch-mode)
# short-circuits with a clear ToolMessage so the orchestrator can
# tell the user what happened and stop hammering downstream APIs.
if await is_spawn_paused(search_space_id):
logger.warning(
"[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s",
search_space_id,
runtime.tool_call_id,
)
return (
"task: subagent dispatch is currently paused for this workspace. "
"Acknowledge to the user that delegation is temporarily disabled "
"(ops kill switch); do not retry until the pause is lifted."
)
if tasks is not None:
if description or subagent_type:
return (
"task: cannot combine `tasks` with `description`/`subagent_type`. "
"Use either single-mode (description+subagent_type) or batch-mode (tasks)."
)
if not runtime.tool_call_id:
raise ValueError("Tool call ID is required for subagent invocation")
coerced = _coerce_batch_arg(tasks)
if isinstance(coerced, str):
return coerced
logger.info(
"[hitl_route] atask BATCH ENTRY: size=%d tool_call_id=%s",
len(coerced),
runtime.tool_call_id,
)
return await _adispatch_batch(coerced, runtime)
if not description or not subagent_type:
return (
"task: must provide either single-mode (`description`+`subagent_type`) "
"or batch-mode (`tasks`)."
)
logger.info(
"[hitl_route] atask ENTRY: subagent_type=%r tool_call_id=%s",
subagent_type,
@ -358,11 +928,37 @@ def build_task_tool_with_parent_config(
subagent_type=subagent_type, path=invoke_path
) as sp:
try:
result = await subagent.ainvoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
result = await _ainvoke_with_timeout(
subagent.ainvoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
),
subagent_type=subagent_type,
started_at=ainvoke_start,
)
sp.set_attribute("subagent.outcome", ainvoke_outcome)
except SubagentInvokeTimeoutError as exc:
ainvoke_outcome = "timeout"
sp.set_attribute("subagent.outcome", ainvoke_outcome)
ot_metrics.record_subagent_invoke_duration(
(time.perf_counter() - ainvoke_start) * 1000,
subagent_type=subagent_type,
path=invoke_path,
outcome=ainvoke_outcome,
)
ot_metrics.record_subagent_invoke_outcome(
subagent_type=subagent_type,
path=invoke_path,
outcome=ainvoke_outcome,
)
logger.warning(
"Subagent %r ainvoke (resume) timed out after %.1fs",
subagent_type,
exc.elapsed_seconds,
)
return _synthesize_timeout_command(
exc, tool_call_id=runtime.tool_call_id
)
except GraphInterrupt as gi:
ainvoke_outcome = "interrupted"
sp.set_attribute("subagent.outcome", ainvoke_outcome)
@ -408,10 +1004,34 @@ def build_task_tool_with_parent_config(
subagent_type=subagent_type, path=invoke_path
) as sp:
try:
result = await subagent.ainvoke(
subagent_state, config=sub_config
result = await _ainvoke_with_timeout(
subagent.ainvoke(subagent_state, config=sub_config),
subagent_type=subagent_type,
started_at=ainvoke_start,
)
sp.set_attribute("subagent.outcome", ainvoke_outcome)
except SubagentInvokeTimeoutError as exc:
ainvoke_outcome = "timeout"
sp.set_attribute("subagent.outcome", ainvoke_outcome)
ot_metrics.record_subagent_invoke_duration(
(time.perf_counter() - ainvoke_start) * 1000,
subagent_type=subagent_type,
path=invoke_path,
outcome=ainvoke_outcome,
)
ot_metrics.record_subagent_invoke_outcome(
subagent_type=subagent_type,
path=invoke_path,
outcome=ainvoke_outcome,
)
logger.warning(
"Subagent %r ainvoke (fresh) timed out after %.1fs",
subagent_type,
exc.elapsed_seconds,
)
return _synthesize_timeout_command(
exc, tool_call_id=runtime.tool_call_id
)
except GraphInterrupt as gi:
ainvoke_outcome = "interrupted"
sp.set_attribute("subagent.outcome", ainvoke_outcome)
@ -481,7 +1101,7 @@ def build_task_tool_with_parent_config(
path=invoke_path,
outcome=ainvoke_outcome,
)
return cmd
return _attach_billable(cmd, subagent_type, runtime)
return StructuredTool.from_function(
name="task",

View file

@ -52,9 +52,7 @@ class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
messages.insert(insert_at, SystemMessage(content=tree_text))
priority_count = 0
if priority:
priority_count = (
len(priority) if hasattr(priority, "__len__") else 1
)
priority_count = len(priority) if hasattr(priority, "__len__") else 1
messages.insert(insert_at, _render_priority_message(priority))
_perf_log.info(
"[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",

View file

@ -17,8 +17,7 @@ from langchain_core.tools import BaseTool
from langgraph.types import interrupt
from app.agents.new_chat.permissions import Rule
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
from .decision import normalize_permission_decision
from .payload import PERMISSION_ASK_INTERRUPT_TYPE, build_permission_ask_payload

View file

@ -173,6 +173,7 @@ def build_main_agent_deepagent_middleware(
subagents=subagents,
system_prompt=None,
task_description=TASK_TOOL_DESCRIPTION,
search_space_id=search_space_id,
),
resilience.model_call_limit,
resilience.tool_call_limit,

View file

@ -42,14 +42,16 @@ Return **only** one JSON object (no markdown/prose):
"evidence": {
"artifact_type": "report" | "podcast" | "video_presentation" | "resume" | "image" | null,
"artifact_id": string | null,
"artifact_location": string | null
"artifact_location": string | null,
"receipts": Receipt[] | null
},
"next_step": string | null,
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
Route-specific rules:
- `evidence.receipts` quotes the Receipt(s) returned by `generate_report` / `generate_podcast` / `generate_video_presentation` / `generate_resume` / `generate_image` this turn, verbatim. The Receipt's `type` enum is one of `report` | `podcast` | `video_presentation` | `resume` | `image`.
<include snippet="output_contract_base"/>
</output_contract>
<include snippet="verifiable_handle"/>

View file

@ -4,11 +4,15 @@ import hashlib
import logging
from typing import Any
from langchain.tools import ToolRuntime
from langchain_core.tools import tool
from langgraph.types import Command
from litellm import aimage_generation
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.config import config
from app.db import (
ImageGeneration,
@ -59,15 +63,22 @@ def _get_global_image_gen_config(config_id: int) -> dict | None:
def create_generate_image_tool(
search_space_id: int,
db_session: AsyncSession,
image_generation_config_id_override: int | None = None,
):
"""Create ``generate_image`` with bound search space; DB work uses a per-call session."""
"""Create ``generate_image`` with bound search space; DB work uses a per-call session.
``image_generation_config_id_override``: when set (automations running on a
captured model), use this config id instead of reading the search space's
live ``image_generation_config_id``.
"""
del db_session # use a fresh per-call session, see below
@tool
async def generate_image(
prompt: str,
runtime: ToolRuntime,
n: int = 1,
) -> dict[str, Any]:
) -> Command:
"""
Generate an image from a text description using AI image models.
@ -82,22 +93,48 @@ def create_generate_image_tool(
Returns:
A dictionary containing the generated image(s) for display in the chat.
"""
def _failed(payload: dict[str, Any], *, error: str) -> Command:
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="image",
operation="generate",
status="failed",
preview=prompt[:200] if prompt else None,
error=error,
),
tool_call_id=runtime.tool_call_id,
)
try:
# Use a per-call session so concurrent tool calls don't share an
# AsyncSession (which is not concurrency-safe). The streaming
# task's session is shared across every tool; without isolation,
# autoflushes from a concurrent writer poison this tool too.
async with shielded_async_session() as session:
result = await session.execute(
select(SearchSpace).filter(SearchSpace.id == search_space_id)
)
search_space = result.scalars().first()
if not search_space:
return {"error": "Search space not found"}
if image_generation_config_id_override is not None:
# Automation run: use the captured image model, insulated from
# later search-space changes. No search-space read needed.
config_id = (
image_generation_config_id_override or IMAGE_GEN_AUTO_MODE_ID
)
else:
result = await session.execute(
select(SearchSpace).filter(SearchSpace.id == search_space_id)
)
search_space = result.scalars().first()
if not search_space:
return _failed(
{"error": "Search space not found"},
error="Search space not found",
)
config_id = (
search_space.image_generation_config_id or IMAGE_GEN_AUTO_MODE_ID
)
config_id = (
search_space.image_generation_config_id
or IMAGE_GEN_AUTO_MODE_ID
)
# Build generation kwargs
# NOTE: size, quality, and style are intentionally NOT passed.
@ -112,19 +149,19 @@ def create_generate_image_tool(
# Call litellm based on config type
if is_image_gen_auto_mode(config_id):
if not ImageGenRouterService.is_initialized():
return {
"error": "No image generation models configured. "
err = (
"No image generation models configured. "
"Please add an image model in Settings > Image Models."
}
)
return _failed({"error": err}, error=err)
response = await ImageGenRouterService.aimage_generation(
prompt=prompt, model="auto", **gen_kwargs
)
elif config_id < 0:
cfg = _get_global_image_gen_config(config_id)
if not cfg:
return {
"error": f"Image generation config {config_id} not found"
}
err = f"Image generation config {config_id} not found"
return _failed({"error": err}, error=err)
model_string = _build_model_string(
cfg.get("provider", ""),
@ -151,9 +188,8 @@ def create_generate_image_tool(
)
db_cfg = cfg_result.scalars().first()
if not db_cfg:
return {
"error": f"Image generation config {config_id} not found"
}
err = f"Image generation config {config_id} not found"
return _failed({"error": err}, error=err)
model_string = _build_model_string(
db_cfg.provider.value,
@ -200,7 +236,10 @@ def create_generate_image_tool(
# Extract image URLs from response
images = response_dict.get("data", [])
if not images:
return {"error": "No images were generated"}
return _failed(
{"error": "No images were generated"},
error="No images were generated",
)
first_image = images[0]
revised_prompt = first_image.get("revised_prompt", prompt)
@ -219,11 +258,14 @@ def create_generate_image_tool(
f"{db_image_gen_id}/image?token={access_token}"
)
else:
return {"error": "No displayable image data in the response"}
return _failed(
{"error": "No displayable image data in the response"},
error="No displayable image data in the response",
)
image_id = f"image-{hashlib.md5(image_url.encode()).hexdigest()[:12]}"
return {
payload = {
"id": image_id,
"assetId": image_url,
"src": image_url,
@ -236,12 +278,26 @@ def create_generate_image_tool(
"prompt": prompt,
"image_count": len(images),
}
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="image",
operation="generate",
status="success",
external_id=str(db_image_gen_id),
verifiable_url=image_url,
preview=(revised_prompt or prompt)[:200],
),
tool_call_id=runtime.tool_call_id,
)
except Exception as e:
logger.exception("Image generation failed in tool")
return {
"error": f"Image generation failed: {e!s}",
"prompt": prompt,
}
err = f"Image generation failed: {e!s}"
return _failed(
{"error": err, "prompt": prompt},
error=err,
)
return generate_image

View file

@ -51,5 +51,8 @@ def load_tools(
create_generate_image_tool(
search_space_id=d["search_space_id"],
db_session=d["db_session"],
image_generation_config_id_override=d.get(
"image_generation_config_id_override"
),
),
]

View file

@ -1,12 +1,28 @@
"""Factory for a podcast-generation tool that queues background work and returns an ID for polling."""
"""Factory for a podcast-generation tool.
Dispatches the heavy generation to Celery and then polls the podcast row
until it reaches a terminal status (READY/FAILED). The tool always
returns a real terminal ``Receipt`` never a pending one. The wait is
bounded by the existing per-invocation safety net
(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode,
HTTP / process lifetime in single-agent mode).
"""
import logging
from typing import Any
from langchain.tools import ToolRuntime
from langchain_core.tools import tool
from langgraph.types import Command
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.deliverable_wait import wait_for_deliverable
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.db import Podcast, PodcastStatus, shielded_async_session
logger = logging.getLogger(__name__)
def create_generate_podcast_tool(
search_space_id: int,
@ -19,9 +35,10 @@ def create_generate_podcast_tool(
@tool
async def generate_podcast(
source_content: str,
runtime: ToolRuntime,
podcast_title: str = "SurfSense Podcast",
user_prompt: str | None = None,
) -> dict[str, Any]:
) -> Command:
"""
Generate a podcast from the provided content.
@ -70,23 +87,99 @@ def create_generate_podcast_tool(
user_prompt=user_prompt,
)
print(f"[generate_podcast] Created podcast {podcast_id}, task: {task.id}")
logger.info(
"[generate_podcast] Created podcast %s, task: %s",
podcast_id,
task.id,
)
return {
"status": PodcastStatus.PENDING.value,
# Wait until the Celery worker flips the row to a terminal
# state. The wait is bounded only by the subagent invoke
# timeout (multi-agent) or HTTP lifetime (single-agent) —
# see app.agents.shared.deliverable_wait for details.
terminal_status, columns, elapsed = await wait_for_deliverable(
model=Podcast,
row_id=podcast_id,
columns=[Podcast.status, Podcast.file_location],
terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED},
)
if terminal_status == PodcastStatus.READY:
file_location = columns[1] if columns else None
logger.info(
"[generate_podcast] Podcast %s READY in %.2fs (file=%s)",
podcast_id,
elapsed,
file_location,
)
payload: dict[str, Any] = {
"status": PodcastStatus.READY.value,
"podcast_id": podcast_id,
"title": podcast_title,
"file_location": file_location,
"message": ("Podcast generated and saved to your podcast panel."),
}
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="podcast",
operation="generate",
status="success",
external_id=str(podcast_id),
preview=podcast_title,
),
tool_call_id=runtime.tool_call_id,
)
# Only other terminal state is FAILED.
logger.warning(
"[generate_podcast] Podcast %s FAILED in %.2fs",
podcast_id,
elapsed,
)
err = "Background worker reported FAILED status for this podcast."
payload = {
"status": PodcastStatus.FAILED.value,
"podcast_id": podcast_id,
"title": podcast_title,
"message": "Podcast generation started. This may take a few minutes.",
"error": err,
}
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="podcast",
operation="generate",
status="failed",
external_id=str(podcast_id),
preview=podcast_title,
error=err,
),
tool_call_id=runtime.tool_call_id,
)
except Exception as e:
error_message = str(e)
print(f"[generate_podcast] Error: {error_message}")
return {
logger.exception("[generate_podcast] Error: %s", error_message)
payload = {
"status": PodcastStatus.FAILED.value,
"error": error_message,
"title": podcast_title,
"podcast_id": None,
}
receipt = make_receipt(
route="deliverables",
type="podcast",
operation="generate",
status="failed",
preview=podcast_title,
error=error_message,
)
return with_receipt(
payload=payload,
receipt=receipt,
tool_call_id=runtime.tool_call_id,
)
return generate_podcast

View file

@ -6,10 +6,14 @@ import logging
import re
from typing import Any
from langchain.tools import ToolRuntime
from langchain_core.callbacks import dispatch_custom_event
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool
from langgraph.types import Command
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.db import Report, shielded_async_session
from app.services.connector_service import ConnectorService
from app.services.llm_service import get_document_summary_llm
@ -573,13 +577,14 @@ def create_generate_report_tool(
@tool
async def generate_report(
topic: str,
runtime: ToolRuntime,
source_content: str = "",
source_strategy: str = "provided",
search_queries: list[str] | None = None,
report_style: str = "detailed",
user_instructions: str | None = None,
parent_report_id: int | None = None,
) -> dict[str, Any]:
) -> Command:
"""
Generate a structured Markdown report artifact from provided content.
@ -692,6 +697,23 @@ def create_generate_report_tool(
parent_report_content: str | None = None
report_group_id: int | None = None
def _failed(payload: dict[str, Any], *, error: str) -> Command:
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="report",
operation="generate",
status="failed",
external_id=str(payload.get("report_id"))
if payload.get("report_id") is not None
else None,
preview=topic,
error=error,
),
tool_call_id=runtime.tool_call_id,
)
async def _save_failed_report(error_msg: str) -> int | None:
"""Persist a failed report row using a short-lived session."""
try:
@ -753,12 +775,15 @@ def create_generate_report_tool(
"No LLM configured. Please configure a language model in Settings."
)
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
},
error=error_msg,
)
# Build the user instructions string
user_instructions_section = ""
@ -971,12 +996,15 @@ def create_generate_report_tool(
if not report_content or not isinstance(report_content, str):
error_msg = "LLM returned empty or invalid content"
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
},
error=error_msg,
)
# LLMs often wrap output in ```markdown ... ``` fences — strip them
report_content = _strip_wrapping_code_fences(report_content)
@ -984,12 +1012,15 @@ def create_generate_report_tool(
if not report_content:
error_msg = "LLM returned empty or invalid content"
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
},
error=error_msg,
)
# Strip any existing footer(s) carried over from parent version(s)
while report_content.rstrip().endswith(_REPORT_FOOTER):
@ -1036,7 +1067,7 @@ def create_generate_report_tool(
f"{metadata.get('section_count', 0)} sections"
)
return {
payload: dict[str, Any] = {
"status": "ready",
"report_id": saved_report_id,
"title": topic,
@ -1045,17 +1076,32 @@ def create_generate_report_tool(
"report_markdown": report_content,
"message": f"Report generated successfully: {topic}",
}
receipt = make_receipt(
route="deliverables",
type="report",
operation="generate",
status="success",
external_id=str(saved_report_id),
preview=topic,
)
return with_receipt(
payload=payload,
receipt=receipt,
tool_call_id=runtime.tool_call_id,
)
except Exception as e:
error_message = str(e)
logger.exception(f"[generate_report] Error: {error_message}")
report_id = await _save_failed_report(error_message)
return {
"status": "failed",
"error": error_message,
"report_id": report_id,
"title": topic,
}
return _failed(
{
"status": "failed",
"error": error_message,
"report_id": report_id,
"title": topic,
},
error=error_message,
)
return generate_report

View file

@ -8,10 +8,14 @@ from typing import Any
import pypdf
import typst
from langchain.tools import ToolRuntime
from langchain_core.callbacks import dispatch_custom_event
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool
from langgraph.types import Command
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.db import Report, shielded_async_session
from app.services.llm_service import get_document_summary_llm
@ -429,10 +433,11 @@ def create_generate_resume_tool(
@tool
async def generate_resume(
user_info: str,
runtime: ToolRuntime,
user_instructions: str | None = None,
parent_report_id: int | None = None,
max_pages: int = 1,
) -> dict[str, Any]:
) -> Command:
"""
Generate a professional resume as a Typst document.
@ -476,6 +481,41 @@ def create_generate_resume_tool(
template = _get_template()
llm_reference = _build_llm_reference(template)
def _success(payload: dict[str, Any], *, report_id: int, title: str) -> Command:
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="resume",
operation="generate",
status="success",
external_id=str(report_id),
preview=title,
),
tool_call_id=runtime.tool_call_id,
)
def _failed(
payload: dict[str, Any],
*,
report_id: int | None,
error: str,
title: str = "Resume",
) -> Command:
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="resume",
operation="generate",
status="failed",
external_id=str(report_id) if report_id is not None else None,
preview=title,
error=error,
),
tool_call_id=runtime.tool_call_id,
)
async def _save_failed_report(error_msg: str) -> int | None:
try:
async with shielded_async_session() as session:
@ -514,13 +554,17 @@ def create_generate_resume_tool(
except ValueError as e:
error_msg = str(e)
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_msg,
)
# ── Phase 1: READ ─────────────────────────────────────────────
async with shielded_async_session() as read_session:
@ -541,13 +585,17 @@ def create_generate_resume_tool(
"No LLM configured. Please configure a language model in Settings."
)
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_msg,
)
# ── Phase 2: LLM GENERATION ───────────────────────────────────
@ -588,13 +636,17 @@ def create_generate_resume_tool(
if not body or not isinstance(body, str):
error_msg = "LLM returned empty or invalid content"
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_msg,
)
body = _strip_typst_fences(body)
body = _strip_imports(body)
@ -661,13 +713,17 @@ def create_generate_resume_tool(
f"{compile_error or 'Unknown compile error'}"
)
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_msg,
)
actual_pages = _count_pdf_pages(pdf_bytes)
if actual_pages <= validated_max_pages:
@ -700,13 +756,17 @@ def create_generate_resume_tool(
):
error_msg = "LLM returned empty content while compressing resume"
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_msg,
)
body = _strip_typst_fences(compress_response.content)
body = _strip_imports(body)
@ -718,13 +778,17 @@ def create_generate_resume_tool(
f"Hard limit: <= {MAX_RESUME_PAGES} page(s), actual: {actual_pages}."
)
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_msg,
)
# ── Phase 4: SAVE ─────────────────────────────────────────────
dispatch_custom_event(
@ -768,32 +832,40 @@ def create_generate_resume_tool(
logger.info(f"[generate_resume] Created resume {saved_id}: {resume_title}")
return {
"status": "ready",
"report_id": saved_id,
"title": resume_title,
"content_type": "typst",
"is_revision": bool(parent_content),
"message": (
f"Resume generated successfully: {resume_title}"
if target_page_met
else (
f"Resume generated, but could not fit the target of <= {validated_max_pages} "
f"page(s). Final length: {actual_pages} page(s)."
)
),
}
return _success(
{
"status": "ready",
"report_id": saved_id,
"title": resume_title,
"content_type": "typst",
"is_revision": bool(parent_content),
"message": (
f"Resume generated successfully: {resume_title}"
if target_page_met
else (
f"Resume generated, but could not fit the target of <= {validated_max_pages} "
f"page(s). Final length: {actual_pages} page(s)."
)
),
},
report_id=saved_id,
title=resume_title,
)
except Exception as e:
error_message = str(e)
logger.exception(f"[generate_resume] Error: {error_message}")
report_id = await _save_failed_report(error_message)
return {
"status": "failed",
"error": error_message,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
}
return _failed(
{
"status": "failed",
"error": error_message,
"report_id": report_id,
"title": "Resume",
"content_type": "typst",
},
report_id=report_id,
error=error_message,
)
return generate_resume

View file

@ -1,12 +1,29 @@
"""Factory for a video-presentation tool that queues background work and returns an ID for polling."""
"""Factory for a video-presentation tool.
Dispatches the heavy generation to Celery and then polls the
video-presentation row until it reaches a terminal status (READY/FAILED).
The tool always returns a real terminal ``Receipt`` never a pending
one. The wait is bounded by the existing per-invocation safety net
(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode,
HTTP / process lifetime in single-agent mode). Video rendering can be
heavy; raise that ceiling if your generations routinely exceed it.
"""
import logging
from typing import Any
from langchain.tools import ToolRuntime
from langchain_core.tools import tool
from langgraph.types import Command
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.deliverable_wait import wait_for_deliverable
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
logger = logging.getLogger(__name__)
def create_generate_video_presentation_tool(
search_space_id: int,
@ -19,9 +36,10 @@ def create_generate_video_presentation_tool(
@tool
async def generate_video_presentation(
source_content: str,
runtime: ToolRuntime,
video_title: str = "SurfSense Presentation",
user_prompt: str | None = None,
) -> dict[str, Any]:
) -> Command:
"""Generate a video presentation from the provided content.
Use this tool when the user asks to create a video, presentation, slides, or slide deck.
@ -56,25 +74,100 @@ def create_generate_video_presentation_tool(
user_prompt=user_prompt,
)
print(
f"[generate_video_presentation] Created video presentation {video_pres_id}, task: {task.id}"
logger.info(
"[generate_video_presentation] Created video presentation %s, task: %s",
video_pres_id,
task.id,
)
return {
"status": VideoPresentationStatus.PENDING.value,
# Wait until the Celery worker flips the row to a terminal
# state. The wait is bounded only by the subagent invoke
# timeout (multi-agent) or HTTP lifetime (single-agent) —
# see app.agents.shared.deliverable_wait for details.
terminal_status, _columns, elapsed = await wait_for_deliverable(
model=VideoPresentation,
row_id=video_pres_id,
columns=[VideoPresentation.status],
terminal_statuses={
VideoPresentationStatus.READY,
VideoPresentationStatus.FAILED,
},
)
if terminal_status == VideoPresentationStatus.READY:
logger.info(
"[generate_video_presentation] %s READY in %.2fs",
video_pres_id,
elapsed,
)
payload: dict[str, Any] = {
"status": VideoPresentationStatus.READY.value,
"video_presentation_id": video_pres_id,
"title": video_title,
"message": "Video presentation generated and saved.",
}
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="video_presentation",
operation="generate",
status="success",
external_id=str(video_pres_id),
preview=video_title,
),
tool_call_id=runtime.tool_call_id,
)
# Only other terminal state is FAILED.
logger.warning(
"[generate_video_presentation] %s FAILED in %.2fs",
video_pres_id,
elapsed,
)
err = (
"Background worker reported FAILED status for this video presentation."
)
payload = {
"status": VideoPresentationStatus.FAILED.value,
"video_presentation_id": video_pres_id,
"title": video_title,
"message": "Video presentation generation started. This may take a few minutes.",
"error": err,
}
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="video_presentation",
operation="generate",
status="failed",
external_id=str(video_pres_id),
preview=video_title,
error=err,
),
tool_call_id=runtime.tool_call_id,
)
except Exception as e:
error_message = str(e)
print(f"[generate_video_presentation] Error: {error_message}")
return {
logger.exception("[generate_video_presentation] Error: %s", error_message)
payload = {
"status": VideoPresentationStatus.FAILED.value,
"error": error_message,
"title": video_title,
"video_presentation_id": None,
}
return with_receipt(
payload=payload,
receipt=make_receipt(
route="deliverables",
type="video_presentation",
operation="generate",
status="failed",
preview=video_title,
error=error_message,
),
tool_call_id=runtime.tool_call_id,
)
return generate_video_presentation

View file

@ -150,11 +150,12 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
<include snippet="output_contract_base"/>
Route-specific rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -117,11 +117,12 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
<include snippet="output_contract_base"/>
Route-specific rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -6,7 +6,7 @@ Persist durable preferences/facts/instructions with `update_memory` while avoidi
</goal>
<visibility_scope>
{{MEMORY_VISIBILITY_POLICY}}
Memory is search-space-scoped; do not assume cross-workspace visibility.
</visibility_scope>
<available_tools>
@ -53,10 +53,8 @@ Return **only** one JSON object (no markdown/prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- `evidence.memory_category` is a semantic classification for supervisor logs
only. It is not the persisted storage format and must not force inline
`[fact|preference|instruction]` markers into saved memory.

View file

@ -8,7 +8,6 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio
<available_tools>
- `web_search`
- `scrape_webpage`
- `search_surfsense_docs`
</available_tools>
<tool_policy>
@ -46,10 +45,8 @@ Return **only** one JSON object (no markdown/prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
</output_contract>

View file

@ -1,11 +1,9 @@
"""Research-stage tools: web search, scrape, and in-product doc search."""
"""Research-stage tools: web search and scrape."""
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .web_search import create_web_search_tool
__all__ = [
"create_scrape_webpage_tool",
"create_search_surfsense_docs_tool",
"create_web_search_tool",
]

View file

@ -9,7 +9,6 @@ from langchain_core.tools import BaseTool
from app.agents.new_chat.permissions import Ruleset
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .web_search import create_web_search_tool
NAME = "research"
@ -27,5 +26,4 @@ def load_tools(
available_connectors=d.get("available_connectors"),
),
create_scrape_webpage_tool(firecrawl_api_key=d.get("firecrawl_api_key")),
create_search_surfsense_docs_tool(db_session=d["db_session"]),
]

View file

@ -1,145 +0,0 @@
"""Semantic search over pre-indexed in-app documentation chunks for user how-to questions."""
import asyncio
import json
from langchain_core.tools import tool
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
from app.utils.document_converters import embed_text
from app.utils.surfsense_docs import surfsense_docs_public_url
def format_surfsense_docs_results(results: list[tuple]) -> str:
"""Format (chunk, document) rows as XML with ``doc-`` chunk IDs for citations and UI routing."""
if not results:
return "No relevant Surfsense documentation found for your query."
# Group chunks by document
grouped: dict[int, dict] = {}
for chunk, doc in results:
public_url = surfsense_docs_public_url(doc.source)
if doc.id not in grouped:
grouped[doc.id] = {
"document_id": f"doc-{doc.id}",
"document_type": "SURFSENSE_DOCS",
"title": doc.title,
"url": public_url,
"metadata": {"source": doc.source, "public_url": public_url},
"chunks": [],
}
grouped[doc.id]["chunks"].append(
{
"chunk_id": f"doc-{chunk.id}",
"content": chunk.content,
}
)
# Render XML matching format_documents_for_context structure
parts: list[str] = []
for g in grouped.values():
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
parts.append("<document>")
parts.append("<document_metadata>")
parts.append(f" <document_id>{g['document_id']}</document_id>")
parts.append(f" <document_type>{g['document_type']}</document_type>")
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
parts.append("</document_metadata>")
parts.append("")
parts.append("<document_content>")
for ch in g["chunks"]:
parts.append(
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
)
parts.append("</document_content>")
parts.append("</document>")
parts.append("")
return "\n".join(parts).strip()
async def search_surfsense_docs_async(
query: str,
db_session: AsyncSession,
top_k: int = 10,
) -> str:
"""
Search Surfsense documentation using vector similarity.
Args:
query: The search query about Surfsense usage
db_session: Database session for executing queries
top_k: Number of results to return
Returns:
Formatted string with relevant documentation content
"""
# Get embedding for the query
query_embedding = await asyncio.to_thread(embed_text, query)
# Vector similarity search on chunks, joining with documents
stmt = (
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
.join(
SurfsenseDocsDocument,
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
)
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
.limit(top_k)
)
result = await db_session.execute(stmt)
rows = result.all()
return format_surfsense_docs_results(rows)
def create_search_surfsense_docs_tool(db_session: AsyncSession):
"""
Factory function to create the search_surfsense_docs tool.
Args:
db_session: Database session for executing queries
Returns:
A configured tool function for searching Surfsense documentation
"""
@tool
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
"""
Search Surfsense documentation for help with using the application.
Use this tool when the user asks questions about:
- How to use Surfsense features
- Installation and setup instructions
- Configuration options and settings
- Troubleshooting common issues
- Available connectors and integrations
- Browser extension usage
- API documentation
This searches the official Surfsense documentation that was indexed
at deployment time. It does NOT search the user's personal knowledge base.
Args:
query: The search query about Surfsense usage or features
top_k: Number of documentation chunks to retrieve (default: 10)
Returns:
Relevant documentation content formatted with chunk IDs for citations
"""
return await search_surfsense_docs_async(
query=query,
db_session=db_session,
top_k=top_k,
)
return search_surfsense_docs

View file

@ -92,12 +92,12 @@ Return **only** one JSON object (no markdown, no prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: base, table, field, choice, record, etc.).
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; up to 10 entries, then `"...and N more"`).
</output_contract>
<include snippet="verifiable_handle"/>
Discover before you mutate; never guess identifiers, choice IDs, or required fields.

View file

@ -111,11 +111,12 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For `search_calendar_events` results, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; up to 10 entries, then `"...and N more"`).
- For ambiguous matches across `update_calendar_event` / `delete_calendar_event`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`, where `label` should include the event title and start time for human readability).
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -93,12 +93,12 @@ Return **only** one JSON object (no markdown, no prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: task, list, member, status, custom-field choice, etc.).
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (task id, title, status, assignees; up to 10 entries, then `"...and N more"`).
</output_contract>
<include snippet="verifiable_handle"/>
Discover before you mutate; never guess identifiers, list statuses, or assignees.

View file

@ -100,9 +100,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -108,9 +108,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Resolve before you call; verify before you send; map every tool outcome faithfully.

View file

@ -98,9 +98,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -110,11 +110,12 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For `search_gmail` results, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; up to 10 entries, then `"...and N more"`).
- For ambiguous matches across `update_gmail_draft` / `trash_gmail_email` / `read_gmail_email`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`).
<include snippet="verifiable_handle"/>
Infer before you call; verify before you send; map every tool outcome faithfully.

View file

@ -5,12 +5,16 @@ from datetime import datetime
from email.mime.text import MIMEText
from typing import Any
from langchain.tools import ToolRuntime
from langchain_core.tools import tool
from langgraph.types import Command
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
request_approval,
)
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.services.gmail import GmailToolMetadataService
logger = logging.getLogger(__name__)
@ -26,9 +30,10 @@ def create_send_gmail_email_tool(
to: str,
subject: str,
body: str,
runtime: ToolRuntime,
cc: str | None = None,
bcc: str | None = None,
) -> dict[str, Any]:
) -> Command:
"""Send an email via Gmail.
Use when the user explicitly asks to send an email. This sends the
@ -60,11 +65,34 @@ def create_send_gmail_email_tool(
"""
logger.info(f"send_gmail_email called: to='{to}', subject='{subject}'")
def _emit(
payload: dict[str, Any],
*,
success: bool,
external_id: str | None = None,
error: str | None = None,
) -> Command:
return with_receipt(
payload=payload,
receipt=make_receipt(
route="gmail",
type="message",
operation="send",
status="success" if success else "failed",
external_id=external_id,
preview=f"to={to}: {subject}"[:200],
error=error,
),
tool_call_id=runtime.tool_call_id,
)
if db_session is None or search_space_id is None or user_id is None:
return {
"status": "error",
"message": "Gmail tool not properly configured. Please contact support.",
}
msg = "Gmail tool not properly configured. Please contact support."
return _emit(
{"status": "error", "message": msg},
success=False,
error=msg,
)
try:
metadata_service = GmailToolMetadataService(db_session)
@ -74,16 +102,24 @@ def create_send_gmail_email_tool(
if "error" in context:
logger.error(f"Failed to fetch creation context: {context['error']}")
return {"status": "error", "message": context["error"]}
return _emit(
{"status": "error", "message": context["error"]},
success=False,
error=context["error"],
)
accounts = context.get("accounts", [])
if accounts and all(a.get("auth_expired") for a in accounts):
logger.warning("All Gmail accounts have expired authentication")
return {
"status": "auth_error",
"message": "All connected Gmail accounts need re-authentication. Please re-authenticate in your connector settings.",
"connector_type": "gmail",
}
return _emit(
{
"status": "auth_error",
"message": "All connected Gmail accounts need re-authentication. Please re-authenticate in your connector settings.",
"connector_type": "gmail",
},
success=False,
error="auth_expired",
)
logger.info(
f"Requesting approval for sending Gmail email: to='{to}', subject='{subject}'"
@ -103,10 +139,14 @@ def create_send_gmail_email_tool(
)
if result.rejected:
return {
"status": "rejected",
"message": "User declined. The email was not sent. Do not ask again or suggest alternatives.",
}
return _emit(
{
"status": "rejected",
"message": "User declined. The email was not sent. Do not ask again or suggest alternatives.",
},
success=False,
error="user_rejected",
)
final_to = result.params.get("to", to)
final_subject = result.params.get("subject", subject)
@ -135,10 +175,14 @@ def create_send_gmail_email_tool(
)
connector = result.scalars().first()
if not connector:
return {
"status": "error",
"message": "Selected Gmail connector is invalid or has been disconnected.",
}
msg = (
"Selected Gmail connector is invalid or has been disconnected."
)
return _emit(
{"status": "error", "message": msg},
success=False,
error=msg,
)
actual_connector_id = connector.id
else:
result = await db_session.execute(
@ -150,10 +194,12 @@ def create_send_gmail_email_tool(
)
connector = result.scalars().first()
if not connector:
return {
"status": "error",
"message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
}
msg = "No Gmail connector found. Please connect Gmail in your workspace settings."
return _emit(
{"status": "error", "message": msg},
success=False,
error=msg,
)
actual_connector_id = connector.id
logger.info(
@ -166,10 +212,12 @@ def create_send_gmail_email_tool(
):
cca_id = connector.config.get("composio_connected_account_id")
if not cca_id:
return {
"status": "error",
"message": "Composio connected account ID not found for this Gmail connector.",
}
msg = "Composio connected account ID not found for this Gmail connector."
return _emit(
{"status": "error", "message": msg},
success=False,
error=msg,
)
from app.services.composio_service import ComposioService
@ -187,7 +235,11 @@ def create_send_gmail_email_tool(
bcc=final_bcc,
)
if error:
return {"status": "error", "message": error}
return _emit(
{"status": "error", "message": error},
success=False,
error=error,
)
sent = {"id": sent_message_id, "threadId": sent_thread_id}
else:
from google.oauth2.credentials import Credentials
@ -275,11 +327,15 @@ def create_send_gmail_email_tool(
actual_connector_id,
exc_info=True,
)
return {
"status": "insufficient_permissions",
"connector_id": actual_connector_id,
"message": "This Gmail account needs additional permissions. Please re-authenticate in connector settings.",
}
return _emit(
{
"status": "insufficient_permissions",
"connector_id": actual_connector_id,
"message": "This Gmail account needs additional permissions. Please re-authenticate in connector settings.",
},
success=False,
error="insufficient_permissions",
)
raise
logger.info(
@ -310,12 +366,16 @@ def create_send_gmail_email_tool(
logger.warning(f"KB sync after send failed: {kb_err}")
kb_message_suffix = " This email will be added to your knowledge base in the next scheduled sync."
return {
"status": "success",
"message_id": sent.get("id"),
"thread_id": sent.get("threadId"),
"message": f"Successfully sent email to '{final_to}' with subject '{final_subject}'.{kb_message_suffix}",
}
return _emit(
{
"status": "success",
"message_id": sent.get("id"),
"thread_id": sent.get("threadId"),
"message": f"Successfully sent email to '{final_to}' with subject '{final_subject}'.{kb_message_suffix}",
},
success=True,
external_id=sent.get("id"),
)
except Exception as e:
from langgraph.errors import GraphInterrupt
@ -324,9 +384,11 @@ def create_send_gmail_email_tool(
raise
logger.error(f"Error sending Gmail email: {e}", exc_info=True)
return {
"status": "error",
"message": "Something went wrong while sending the email. Please try again.",
}
msg = "Something went wrong while sending the email. Please try again."
return _emit(
{"status": "error", "message": msg},
success=False,
error=str(e),
)
return send_gmail_email

View file

@ -100,9 +100,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -111,12 +111,12 @@ Return **only** one JSON object (no markdown, no prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: site, project, issue, user, transition, etc.).
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (issue key, summary, status, assignee; up to 10 entries, then `"...and N more"`).
</output_contract>
<include snippet="verifiable_handle"/>
Discover before you mutate; never guess identifiers, transitions, or required fields.

View file

@ -101,12 +101,12 @@ Return **only** one JSON object (no markdown, no prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: issue, user, project, state, etc.).
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (identifier, title, state, assignee; up to 10 entries, then `"...and N more"`).
</output_contract>
<include snippet="verifiable_handle"/>
Discover before you mutate; never guess identifiers.

View file

@ -101,9 +101,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Infer before you call; verify before you create; map every tool outcome faithfully.

View file

@ -99,9 +99,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -1,12 +1,16 @@
import logging
from typing import Any
from langchain.tools import ToolRuntime
from langchain_core.tools import tool
from langgraph.types import Command
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
request_approval,
)
from app.agents.shared.receipt import make_receipt
from app.agents.shared.receipt_command import with_receipt
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
from app.services.notion.tool_metadata_service import NotionToolMetadataService
@ -35,8 +39,9 @@ def create_delete_notion_page_tool(
@tool
async def delete_notion_page(
page_title: str,
runtime: ToolRuntime,
delete_from_kb: bool = False,
) -> dict[str, Any]:
) -> Command:
"""Delete (archive) a Notion page.
Use this tool when the user asks you to delete, remove, or archive
@ -65,14 +70,39 @@ def create_delete_notion_page_tool(
f"delete_notion_page called: page_title='{page_title}', delete_from_kb={delete_from_kb}"
)
def _emit(
payload: dict[str, Any],
*,
status: str,
external_id: str | None = None,
error: str | None = None,
) -> Command:
return with_receipt(
payload=payload,
receipt=make_receipt(
route="notion",
type="page",
operation="delete",
status="success" if status == "success" else "failed",
external_id=external_id,
preview=page_title,
error=error,
),
tool_call_id=runtime.tool_call_id,
)
if db_session is None or search_space_id is None or user_id is None:
logger.error(
"Notion tool not properly configured - missing required parameters"
)
return {
"status": "error",
"message": "Notion tool not properly configured. Please contact support.",
}
return _emit(
{
"status": "error",
"message": "Notion tool not properly configured. Please contact support.",
},
status="error",
error="Notion tool not properly configured. Please contact support.",
)
try:
# Get page context (page_id, account, title) from indexed data
@ -86,16 +116,18 @@ def create_delete_notion_page_tool(
# Check if it's a "not found" error (softer handling for LLM)
if "not found" in error_msg.lower():
logger.warning(f"Page not found: {error_msg}")
return {
"status": "not_found",
"message": error_msg,
}
return _emit(
{"status": "not_found", "message": error_msg},
status="error",
error=error_msg,
)
else:
logger.error(f"Failed to fetch delete context: {error_msg}")
return {
"status": "error",
"message": error_msg,
}
return _emit(
{"status": "error", "message": error_msg},
status="error",
error=error_msg,
)
account = context.get("account", {})
if account.get("auth_expired"):
@ -103,10 +135,14 @@ def create_delete_notion_page_tool(
"Notion account %s has expired authentication",
account.get("id"),
)
return {
"status": "auth_error",
"message": "The Notion account for this page needs re-authentication. Please re-authenticate in your connector settings.",
}
return _emit(
{
"status": "auth_error",
"message": "The Notion account for this page needs re-authentication. Please re-authenticate in your connector settings.",
},
status="error",
error="auth_expired",
)
page_id = context.get("page_id")
connector_id_from_context = account.get("id")
@ -129,10 +165,14 @@ def create_delete_notion_page_tool(
if result.rejected:
logger.info("Notion page deletion rejected by user")
return {
"status": "rejected",
"message": "User declined. Do not retry or suggest alternatives.",
}
return _emit(
{
"status": "rejected",
"message": "User declined. Do not retry or suggest alternatives.",
},
status="error",
error="user_rejected",
)
final_page_id = result.params.get("page_id", page_id)
final_connector_id = result.params.get(
@ -165,18 +205,26 @@ def create_delete_notion_page_tool(
logger.error(
f"Invalid connector_id={final_connector_id} for search_space_id={search_space_id}"
)
return {
"status": "error",
"message": "Selected Notion account is invalid or has been disconnected. Please select a valid account.",
}
return _emit(
{
"status": "error",
"message": "Selected Notion account is invalid or has been disconnected. Please select a valid account.",
},
status="error",
error="invalid_connector",
)
actual_connector_id = connector.id
logger.info(f"Validated Notion connector: id={actual_connector_id}")
else:
logger.error("No connector found for this page")
return {
"status": "error",
"message": "No connector found for this page.",
}
return _emit(
{
"status": "error",
"message": "No connector found for this page.",
},
status="error",
error="no_connector",
)
# Create connector instance
notion_connector = NotionHistoryConnector(
@ -232,7 +280,13 @@ def create_delete_notion_page_tool(
f"{result.get('message', '')} (also removed from knowledge base)"
)
return result
status = result.get("status", "error")
return _emit(
result,
status=status,
external_id=str(final_page_id) if final_page_id else None,
error=None if status == "success" else result.get("message"),
)
except Exception as e:
from langgraph.errors import GraphInterrupt
@ -245,20 +299,28 @@ def create_delete_notion_page_tool(
if isinstance(e, NotionAPIError) and (
"401" in error_str or "unauthorized" in error_str
):
return {
"status": "auth_error",
"message": str(e),
"connector_id": connector_id_from_context
if "connector_id_from_context" in dir()
else None,
"connector_type": "notion",
}
return _emit(
{
"status": "auth_error",
"message": str(e),
"connector_id": connector_id_from_context
if "connector_id_from_context" in dir()
else None,
"connector_type": "notion",
},
status="error",
error=str(e),
)
if isinstance(e, ValueError | NotionAPIError):
message = str(e)
else:
message = (
"Something went wrong while deleting the page. Please try again."
)
return {"status": "error", "message": message}
return _emit(
{"status": "error", "message": message},
status="error",
error=message,
)
return delete_notion_page

View file

@ -97,9 +97,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Infer before you call; map every tool outcome faithfully.

View file

@ -87,12 +87,12 @@ Return **only** one JSON object (no markdown, no prose):
"missing_fields": string[] | null,
"assumptions": string[] | null
}
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
Route-specific rules:
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: channel, user, message, thread).
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (channel/user, key identifier, timestamp, short snippet; up to 10 entries, then `"...and N more"`).
</output_contract>
<include snippet="verifiable_handle"/>
Discover before you post; never guess channel, user, or thread targets.

View file

@ -115,9 +115,8 @@ Return **only** one JSON object (no markdown or prose outside it):
}
```
Rules:
- `status=success``next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error``next_step` must be non-null.
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
<include snippet="output_contract_base"/>
<include snippet="verifiable_handle"/>
Resolve before you call; verify before you send; map every tool outcome faithfully.

View file

@ -49,6 +49,7 @@ def request_approval(
params: dict[str, Any],
context: dict[str, Any] | None = None,
trusted_tools: list[str] | None = None,
tool_call_id: str | None = None,
) -> HITLResult:
"""Pause the graph for user approval and return the user's decision.
@ -64,6 +65,10 @@ def request_approval(
forwarded verbatim to the FE for richer card chrome.
trusted_tools: Per-session allowlist; when ``tool_name`` is in it the
interrupt is skipped and the tool runs immediately.
tool_call_id: Caller's LangChain tool-call id. Required for tools
running directly on the main agent; subagent-mounted tools omit
it (the ``task`` chokepoint stamps it on re-raise see
:mod:`...checkpointed_subagent_middleware.propagation`).
Returns:
:class:`HITLResult` with ``rejected=True`` if the user declined or
@ -90,6 +95,8 @@ def request_approval(
interrupt_type=action_type,
context=context,
)
if tool_call_id:
payload["tool_call_id"] = tool_call_id
approval = interrupt(payload)
parsed = parse_lc_envelope(approval)

View file

@ -2,8 +2,11 @@
from __future__ import annotations
from functools import lru_cache
from importlib import resources
_SHARED_SNIPPETS_PACKAGE = "app.agents.multi_agent_chat.subagents.shared.snippets"
def read_md_file(package: str, stem: str) -> str:
"""Load ``{stem}.md`` from ``package`` via importlib resources, or return empty."""
@ -12,3 +15,13 @@ def read_md_file(package: str, stem: str) -> str:
return ""
text = ref.read_text(encoding="utf-8")
return text.rstrip("\n")
@lru_cache(maxsize=64)
def read_shared_snippet(name: str) -> str:
"""Load a shared markdown snippet from the snippets package.
Cached because snippets are static at runtime and resolved many times
(once per subagent build, plus per-subagent-per-route).
"""
return read_md_file(_SHARED_SNIPPETS_PACKAGE, name)

View file

@ -0,0 +1,6 @@
"""Shared markdown snippets composed into every subagent system prompt.
Resolved at build time by :func:`pack_subagent` in ``subagent_builder.py``
via the ``<include snippet="NAME"/>`` directive. See ``output_contract_base.md``
and ``verifiable_handle.md`` for the included content.
"""

View file

@ -0,0 +1,6 @@
Rules (universal):
- `status=success` -> `next_step=null`, `missing_fields=null`.
- `status=partial|blocked|error` -> `next_step` must be non-null.
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
- `assumptions`: any inferences you made about the user's intent; `null` when no inferences were needed.
- The `evidence` object's fields are documented in your route-specific `<output_contract>` above; never invent fields the tool did not return.

View file

@ -0,0 +1,10 @@
<verifiable_handle>
Mutating tools you call return a structured `Receipt` object alongside their normal payload (see `evidence.receipts` in your `<output_contract>`). The supervisor uses the Receipt's `verifiable_url` and `external_id` to independently confirm the operation succeeded - do not paraphrase, shorten, or guess these values.
Rules:
- Quote each Receipt's `verifiable_url` and `external_id` **verbatim** in `evidence.receipts`. Copy character-for-character; never retype from memory.
- If a Receipt has `status="failed"`, set your own `status="error"` and put the Receipt's `error` field in `next_step`.
- If a Receipt has `status="pending"` (async backends — podcasts, video presentations, anything queued through Celery), report `status=success`, surface the pending Receipt as-is, and tell the supervisor in `action_summary` that the artefact is **being generated in the background** (e.g. "Podcast 38 queued; orchestrator should report it as kicked off, not yet ready"). A pending Receipt almost always lacks `verifiable_url` because the artefact does not exist yet — that is expected, not a defect. Do **not** wait, poll, or retry; control returns to the supervisor immediately and the asset becomes visible to the user out of band via its own UI surface.
- Never claim a mutation succeeded without a matching Receipt with `status="success"` or `"pending"` in your tool results this turn.
- For tools that do not return a Receipt (read-only operations, search, lookup), the receipt rules do not apply; only the route-specific `evidence` fields matter.
</verifiable_handle>

View file

@ -2,12 +2,30 @@
from __future__ import annotations
from collections.abc import Callable, Mapping
from dataclasses import dataclass
from typing import Any
from deepagents import SubAgent
from app.agents.new_chat.permissions import Ruleset
# A context-hint provider receives the parent-agent ``runtime.state`` mapping
# and the ``description`` the orchestrator wrote, and returns a short string
# the runtime prepends to the subagent's first ``HumanMessage``. Used for
# things like "current search-space id is X" or "the user is in workspace Y" —
# never for full corpora, since the prepended text consumes the subagent's
# prompt budget on every invocation. Return ``None`` (or an empty string) to
# skip the hint for this call.
ContextHintProvider = Callable[[Mapping[str, Any], str], str | None]
# Custom key stashed on the deepagents ``SubAgent`` dict so the provider
# survives the trip from ``pack_subagent`` → registry → middleware →
# task_tool. ``deepagents.create_agent`` only extracts the keys it
# recognises, so an extra key here is dropped silently at compile time.
# The prefix avoids any collision with future deepagents fields.
SURF_CONTEXT_HINT_PROVIDER_KEY = "surf_context_hint_provider"
@dataclass(frozen=True, slots=True)
class SurfSenseSubagentSpec:
@ -20,10 +38,22 @@ class SurfSenseSubagentSpec:
layers them into the subagent's :class:`PermissionMiddleware`,
so each subagent owns its own ruleset without aliasing the
shared rule engine.
context_hint_provider: Optional callback invoked once per ``task(...)``
invocation, immediately before the subagent runs. Its return
value is prepended to the subagent's first ``HumanMessage`` so
the subagent can see things it would otherwise have to discover
(active search space, KB root, current user timezone, etc.).
Kept out of the deepagents ``spec`` because that dict is forwarded
verbatim to upstream code and only recognises its own typed keys.
"""
spec: SubAgent
ruleset: Ruleset
context_hint_provider: ContextHintProvider | None = None
__all__ = ["SurfSenseSubagentSpec"]
__all__ = [
"SURF_CONTEXT_HINT_PROVIDER_KEY",
"ContextHintProvider",
"SurfSenseSubagentSpec",
]

View file

@ -2,6 +2,8 @@
from __future__ import annotations
import logging
import re
from typing import Any, cast
from deepagents import SubAgent
@ -12,9 +14,48 @@ from langchain_core.tools import BaseTool
from app.agents.multi_agent_chat.middleware.shared.permissions import (
build_permission_mw,
)
from app.agents.multi_agent_chat.subagents.shared.spec import SurfSenseSubagentSpec
from app.agents.multi_agent_chat.subagents.shared.md_file_reader import (
read_shared_snippet,
)
from app.agents.multi_agent_chat.subagents.shared.spec import (
SURF_CONTEXT_HINT_PROVIDER_KEY,
ContextHintProvider,
SurfSenseSubagentSpec,
)
from app.agents.new_chat.permissions import Ruleset
logger = logging.getLogger(__name__)
# ``<include snippet="NAME"/>`` directive. Matches an XML-style self-closing
# tag whose ``snippet`` attribute names a file in ``shared/snippets/``.
# Whitespace around the attribute and self-close is tolerated; the snippet
# name itself must be a bare identifier (letters / digits / underscores) so
# we never pull a path-traversal value into ``read_shared_snippet``.
_INCLUDE_DIRECTIVE_RE = re.compile(
r"<include\s+snippet=\"(?P<name>[A-Za-z0-9_]+)\"\s*/>"
)
def _resolve_includes(prompt: str, *, subagent_name: str) -> str:
"""Replace ``<include snippet="X"/>`` directives with the snippet body.
Unknown snippet names raise; an empty body is treated as unknown so a
typo or missing file fails loudly at startup instead of silently
shipping a broken prompt to the LLM.
"""
def _replace(match: re.Match[str]) -> str:
name = match.group("name")
body = read_shared_snippet(name)
if not body.strip():
raise ValueError(
f"Subagent {subagent_name!r}: unknown or empty shared "
f"snippet {name!r} referenced via <include>."
)
return body
return _INCLUDE_DIRECTIVE_RE.sub(_replace, prompt)
def _user_allowlist_for(
dependencies: dict[str, Any], subagent_name: str
@ -43,6 +84,7 @@ def pack_subagent(
dependencies: dict[str, Any],
model: BaseChatModel | None = None,
middleware_stack: dict[str, Any] | None = None,
context_hint_provider: ContextHintProvider | None = None,
) -> SurfSenseSubagentSpec:
"""Pack the route-local pieces into one sub-agent spec + its Ruleset.
@ -68,6 +110,8 @@ def pack_subagent(
msg = f"Subagent {name!r}: system_prompt is empty"
raise ValueError(msg)
system_prompt = _resolve_includes(system_prompt, subagent_name=name)
flags = dependencies["flags"]
user_allowlist = _user_allowlist_for(dependencies, name)
subagent_rulesets: list[Ruleset] = [ruleset]
@ -99,4 +143,12 @@ def pack_subagent(
}
if model is not None:
spec_dict["model"] = model
return SurfSenseSubagentSpec(spec=cast(SubAgent, spec_dict), ruleset=ruleset)
if context_hint_provider is not None:
# Stash the callback on the dict so it survives the trip through
# registry / middleware unpacking (both treat the spec as opaque).
spec_dict[SURF_CONTEXT_HINT_PROVIDER_KEY] = context_hint_provider
return SurfSenseSubagentSpec(
spec=cast(SubAgent, spec_dict),
ruleset=ruleset,
context_hint_provider=context_hint_provider,
)

View file

@ -0,0 +1,168 @@
"""Minimal anonymous / free-chat agent.
The no-login chat experience must stay dead simple: the user asks a question
and the model answers, optionally using ``web_search`` and an optionally
uploaded **read-only** document. We deliberately bypass the full SurfSense deep
agent stack (filesystem, file-intent, knowledge-base persistence, subagents,
skills, memory) because those middlewares stage or persist "documents" that an
anonymous session can never see again -- which produced phantom
"I saved it to a file" answers for free users.
For any other SurfSense capability the model is instructed (via the system
prompt built here) to tell the user to create a free account instead of
pretending to perform the action.
"""
from __future__ import annotations
from datetime import UTC, datetime
from typing import Any
from deepagents.backends import StateBackend
from langchain.agents import create_agent
from langchain.agents.middleware import (
ModelCallLimitMiddleware,
ToolCallLimitMiddleware,
)
from langchain_core.language_models import BaseChatModel
from langgraph.types import Checkpointer
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.middleware import (
RetryAfterMiddleware,
create_surfsense_compaction_middleware,
)
from app.agents.new_chat.tools.web_search import create_web_search_tool
# Cap how much of an uploaded document we inline into the system prompt. The
# upload endpoint allows files up to several MB, but the doc is re-sent on
# every turn and counts against the anonymous token quota, so we bound it.
_MAX_DOC_CHARS = 50_000
def build_anonymous_system_prompt(anon_doc: dict[str, Any] | None = None) -> str:
"""Build the system prompt for the minimal anonymous chat agent.
The prompt keeps the assistant focused on plain Q/A + web search, inlines
any uploaded document as read-only context, and redirects every other
SurfSense feature to account registration.
"""
today = datetime.now(UTC).strftime("%A, %B %d, %Y")
doc_section = ""
if anon_doc:
title = str(anon_doc.get("title") or "uploaded_document")
content = str(anon_doc.get("content") or "")
truncated = content[:_MAX_DOC_CHARS]
truncation_note = ""
if len(content) > _MAX_DOC_CHARS:
truncation_note = (
"\n\n[Note: the document was truncated because it is large; "
"only the beginning is shown.]"
)
doc_section = (
"\n\n## Uploaded document (read-only)\n"
f'The user uploaded a document named "{title}". Its contents are '
"provided below for reference only. You may read it and answer "
"questions about it, but you cannot modify, save, or store it.\n\n"
f'<uploaded_document title="{title}">\n'
f"{truncated}{truncation_note}\n"
"</uploaded_document>"
)
return (
"You are SurfSense's free AI assistant, available to everyone without "
"login.\n\n"
f"Today's date is {today}.\n\n"
"## How to help\n"
"- Answer the user's questions directly and conversationally. You are "
"a straightforward question-and-answer assistant.\n"
"- When a question needs current, real-time, or factual information "
"from the internet (news, prices, weather, recent events, live data), "
"use the `web_search` tool. Otherwise, answer directly from your own "
"knowledge.\n"
"- Be concise, accurate, and helpful. Use Markdown formatting when it "
"improves readability."
f"{doc_section}\n\n"
"## What is not available here\n"
"This is the free, no-login experience. You CANNOT save files or "
"notes, generate reports, podcasts, resumes, presentations, or images, "
"search or build a knowledge base, connect to apps (Gmail, Google "
"Drive, Notion, Slack, Calendar, Discord, and similar), set up "
"automations, or remember anything across sessions.\n\n"
"If the user asks for any of these, do NOT pretend to do them and "
"never claim you saved, created, or stored anything. Instead, briefly "
"let them know the feature requires a free SurfSense account and "
"invite them to create one at https://www.surfsense.com. Then offer to "
"help with what you can do here (answering questions and searching the "
"web)."
)
async def create_anonymous_chat_agent(
*,
llm: BaseChatModel,
checkpointer: Checkpointer,
anon_session_id: str | None = None,
anon_doc: dict[str, Any] | None = None,
enable_web_search: bool = True,
):
"""Create a minimal Q/A agent for anonymous / free chat.
Unlike :func:`create_surfsense_deep_agent`, this agent has no filesystem,
file-intent, knowledge-base persistence, subagent, skills, or memory
middleware. Its only tool is ``web_search`` (when ``enable_web_search`` is
True), and any uploaded document is injected into the system prompt as
read-only context.
Args:
llm: The chat model to use (already built by the caller).
checkpointer: LangGraph checkpointer for the ephemeral anon thread.
anon_session_id: Anonymous session id (used only for telemetry/metadata).
anon_doc: Optional ``{"title", "content"}`` for an uploaded document.
enable_web_search: When False, the agent runs as a pure LLM with no
tools (used when the user toggles web search off).
"""
tools = (
[create_web_search_tool(search_space_id=None, available_connectors=None)]
if enable_web_search
else []
)
# Reliability-only middleware. Nothing here touches the database or
# filesystem: call limits guard against loops, compaction summarises long
# histories into in-graph state, and retry handles provider rate limits.
middleware: list[Any] = [
ModelCallLimitMiddleware(thread_limit=120, run_limit=80, exit_behavior="end"),
]
if tools:
middleware.append(
ToolCallLimitMiddleware(
thread_limit=300, run_limit=80, exit_behavior="continue"
)
)
middleware.append(create_surfsense_compaction_middleware(llm, StateBackend))
middleware.append(RetryAfterMiddleware(max_retries=3))
system_prompt = build_anonymous_system_prompt(anon_doc)
agent = create_agent(
llm,
system_prompt=system_prompt,
tools=tools,
middleware=middleware,
context_schema=SurfSenseContextSchema,
checkpointer=checkpointer,
)
return agent.with_config(
{
"recursion_limit": 40,
"metadata": {
"ls_integration": "surfsense_anonymous_chat",
"anon_session_id": anon_session_id,
},
}
)
__all__ = ["build_anonymous_system_prompt", "create_anonymous_chat_agent"]

View file

@ -104,7 +104,7 @@ class AgentFeatureFlags:
# ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``,
# ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``,
# ``tools/teams``, ``tools/luma``, ``connected_accounts``,
# ``update_memory``, ``search_surfsense_docs``) now acquire fresh
# ``update_memory``) now acquire fresh
# short-lived ``AsyncSession`` instances per call via
# :data:`async_session_maker`. The factory still accepts ``db_session``
# for registry compatibility but ``del``'s it immediately — see any

View file

@ -33,9 +33,11 @@ from typing_extensions import TypedDict
from app.agents.new_chat.state_reducers import (
_add_unique_reducer,
_dict_merge_with_tombstones_reducer,
_int_counter_merge_reducer,
_list_append_reducer,
_replace_reducer,
)
from app.agents.shared.receipt import Receipt
class PendingMove(TypedDict, total=False):
@ -172,6 +174,35 @@ class SurfSenseFilesystemState(FilesystemState):
workspace_tree_text: NotRequired[Annotated[str, _replace_reducer]]
"""Pre-rendered ``<workspace_tree>`` body; shared with subagents to skip re-render."""
billable_calls: NotRequired[Annotated[dict[str, int], _int_counter_merge_reducer]]
"""Per-subagent ``task(...)`` invocation counter, summed across the turn.
Incremented by ``task_tool.py`` each time a subagent invocation
completes (single- or batch-mode). The orchestrator can read this map
to self-limit when a runaway loop sends the same specialist 20 calls
in a row; the runtime emits a soft warning ToolMessage once the
cumulative count crosses :data:`DEFAULT_SUBAGENT_BILLABLE_THRESHOLD`.
Cleared by checkpoint rollover (i.e. per turn).
"""
receipts: NotRequired[Annotated[list[Receipt], _list_append_reducer]]
"""Structured Receipt handles emitted by mutating subagent tools this turn.
Each mutating tool (deliverables, every connector, KB writes via the
persistence middleware) wraps its native return into a
:class:`~app.agents.shared.receipt.Receipt`
and returns it under the ``"receipt"`` key alongside its existing
payload. The subagent's tool-call middleware folds the receipt into
this list, and ``_return_command_with_state_update`` in
``checkpointed_subagent_middleware/task_tool.py`` carries the list up
to the parent automatically (``"receipts"`` is not in
``EXCLUDED_STATE_KEYS``).
Append-only across the turn; cleared by checkpoint rollover. The
orchestrator reads it via the ``<verification>`` teaching to confirm
side-effecting subagent claims (see ``shared/snippets/verifiable_handle.md``).
"""
__all__ = [
"KbAnonDoc",

View file

@ -73,9 +73,8 @@ class ResolvedMentionSet:
``@Project Roadmap`` is never shadowed by a shorter prefix
``@Project``).
``mentioned_document_ids`` collapses doc + surfsense_doc chips into
a single ordered, deduped list because the priority middleware
treats them uniformly downstream see
``mentioned_document_ids`` is an ordered, deduped list consumed by
the priority middleware downstream see
``KnowledgePriorityMiddleware._compute_priority_paths``.
"""
@ -103,7 +102,6 @@ async def resolve_mentions(
search_space_id: int,
mentioned_documents: list[MentionedDocumentInfo] | None,
mentioned_document_ids: list[int] | None = None,
mentioned_surfsense_doc_ids: list[int] | None = None,
mentioned_folder_ids: list[int] | None = None,
) -> ResolvedMentionSet:
"""Resolve every @-mention chip on a turn into virtual paths.
@ -111,8 +109,7 @@ async def resolve_mentions(
The function takes both the ``mentioned_documents`` discriminated
list (chip metadata used for substitution + persistence) and the
parallel id arrays (``mentioned_document_ids``,
``mentioned_surfsense_doc_ids``, ``mentioned_folder_ids``) for two
reasons:
``mentioned_folder_ids``) for two reasons:
* Legacy clients that haven't migrated to the unified chip list
still send the id arrays we treat the union as authoritative.
@ -142,7 +139,6 @@ async def resolve_mentions(
dict.fromkeys(
[
*(mentioned_document_ids or []),
*(mentioned_surfsense_doc_ids or []),
*chip_doc_ids,
]
)

View file

@ -34,8 +34,7 @@ from deepagents.middleware.summarization import (
)
from langchain_core.messages import SystemMessage
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
if TYPE_CHECKING:
from deepagents.backends.protocol import BACKEND_TYPES

View file

@ -47,8 +47,7 @@ from langgraph.config import get_config
from langgraph.runtime import Runtime
from langgraph.types import interrupt
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
logger = logging.getLogger(__name__)

View file

@ -55,6 +55,7 @@ from app.agents.new_chat.path_resolver import (
virtual_path_to_doc,
)
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.receipt import Receipt, make_receipt
from app.db import (
AgentActionLog,
Chunk,
@ -1392,6 +1393,81 @@ async def commit_staged_filesystem_state(
"pending_dir_deletes": [_CLEAR],
"dirty_path_tool_calls": {_CLEAR: True},
}
# Emit one Receipt per committed mutation, folded into ``state['receipts']``
# via ``_list_append_reducer``. The receipts surface what actually committed
# (post-savepoint) rather than what the LLM intended; the orchestrator uses
# them as ground truth in the ``<verification>`` teaching. KB writes do not
# have public verifiable URLs, so ``verifiable_url`` stays unset.
receipts: list[Receipt] = []
def _kb_receipt(
*,
type: str,
operation: str,
path: str,
external_id: int | None = None,
) -> None:
if not path:
return
preview = path.rsplit("/", 1)[-1] or path
receipts.append(
make_receipt(
route="knowledge_base",
type=type,
operation=operation,
status="success",
external_id=str(external_id) if external_id is not None else path,
preview=preview,
)
)
for payload in committed_creates:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="write_file",
path=path,
external_id=payload.get("id"),
)
for payload in committed_updates:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="edit_file",
path=path,
external_id=payload.get("id"),
)
for payload in applied_moves:
# ``applied_moves`` rows carry the destination ``virtualPath`` because
# the move has already landed in the DB by the time we reach this code.
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="move_file",
path=path,
external_id=payload.get("id"),
)
for path in staged_dirs:
_kb_receipt(type="folder", operation="mkdir", path=path)
for payload in committed_deletes:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="rm",
path=path,
external_id=payload.get("id"),
)
for payload in committed_folder_deletes:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="folder",
operation="rmdir",
path=path,
external_id=payload.get("id"),
)
if receipts:
delta["receipts"] = receipts
files_delta: dict[str, Any] = {}
if temp_paths:
files_delta.update(dict.fromkeys(temp_paths))

View file

@ -61,8 +61,7 @@ from app.agents.new_chat.permissions import (
aggregate_action,
evaluate_many,
)
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
logger = logging.getLogger(__name__)

View file

@ -59,14 +59,13 @@ Do NOT cite document_id. Always use the chunk id.
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
- Copy the EXACT chunk id from the XML - if it says `<chunk id='doc-123'>`, use [citation:doc-123]
- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
</citation_format>
<citation_examples>
CORRECT citation formats:
- [citation:5] (numeric chunk ID from knowledge base)
- [citation:doc-123] (for Surfsense documentation chunks)
- [citation:https://example.com/article] (URL chunk ID from web search results)
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)

View file

@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
2. Ask the user: "Would you like me to answer from my general knowledge instead?"
3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
- This policy does NOT apply to:
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
* Formatting, summarization, or analysis of content already present in the conversation
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages

View file

@ -7,7 +7,7 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
2. Ask: "Would you like me to answer from my general knowledge instead?"
3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
- This policy does NOT apply to:
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
* Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
* Formatting, summarization, or analysis of content already present in the conversation
* Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages

View file

@ -13,6 +13,7 @@ When to use which tool:
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
- Real-time public web data → call web_search
- Reading a specific webpage → call scrape_webpage
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
**`task` subagents (when to delegate):**
- **`linear_specialist`** — Linear-only investigations and tool use.

View file

@ -13,6 +13,7 @@ When to use which tool:
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
- Real-time public web data → call web_search
- Reading a specific webpage → call scrape_webpage
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
**`task` subagents (when to delegate):**
- **`linear_specialist`** — Linear-only investigations and tool use.

View file

@ -151,7 +151,6 @@ def _read_fragment(subpath: str) -> str:
# Ordered for reading flow: fundamentals first, then artifact generators,
# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
"search_surfsense_docs",
"web_search",
"generate_podcast",
"generate_video_presentation",

View file

@ -1,9 +0,0 @@
- User: "How do I install SurfSense?"
- Call: `search_surfsense_docs(query="installation setup")`
- User: "What connectors does SurfSense support?"
- Call: `search_surfsense_docs(query="available connectors integrations")`
- User: "How do I set up the Notion connector?"
- Call: `search_surfsense_docs(query="Notion connector setup configuration")`
- User: "How do I use Docker to run SurfSense?"
- Call: `search_surfsense_docs(query="Docker installation setup")`

View file

@ -1,7 +0,0 @@
- search_surfsense_docs: Search the official SurfSense documentation.
- Use this tool when the user asks anything about SurfSense itself (the application they are using).
- Args:
- query: The search query about SurfSense
- top_k: Number of documentation chunks to retrieve (default: 10)
- Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])

View file

@ -1,7 +1,6 @@
---
name: email-drafting
description: Draft an email matching the user's voice, with structured intent and CTA
allowed-tools: search_surfsense_docs
---
# Email drafting

View file

@ -1,7 +1,7 @@
---
name: kb-research
description: Structured approach to finding and synthesizing information from the user's knowledge base
allowed-tools: search_surfsense_docs, scrape_webpage, read_file, ls_tree, grep, web_search
allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
---
# Knowledge-base research

View file

@ -1,7 +1,7 @@
---
name: meeting-prep
description: Pull together briefing materials before a scheduled meeting
allowed-tools: search_surfsense_docs, web_search, scrape_webpage, read_file
allowed-tools: web_search, scrape_webpage, read_file
---
# Meeting preparation

View file

@ -1,7 +1,7 @@
---
name: report-writing
description: How to scope, draft, and revise a Markdown report artifact via generate_report
allowed-tools: generate_report, search_surfsense_docs, read_file
allowed-tools: generate_report, read_file
---
# Report writing

View file

@ -1,7 +1,6 @@
---
name: slack-summary
description: Distill a Slack channel or thread into actionable summary
allowed-tools: search_surfsense_docs
---
# Slack summarization

View file

@ -171,6 +171,39 @@ def _dict_merge_with_tombstones_reducer(
return result
def _int_counter_merge_reducer(
left: dict[str, int] | None,
right: dict[str, int] | None,
) -> dict[str, int]:
"""Merge ``right`` into ``left`` by **summing** per-key integer counters.
Used for state fields that accumulate counts across multiple updates
within the same turn (e.g. per-subagent ``billable_calls``). Unknown
keys are added; existing keys are summed. ``_CLEAR`` sentinels reset
the accumulator the same way the other reducers do, so the orchestrator
can wipe the counter at end-of-turn if needed.
"""
if right is None:
return dict(left or {})
if _CLEAR in right or any(_is_clear(k) for k in right):
result: dict[str, int] = {}
for key, value in right.items():
if _is_clear(key):
continue
if not isinstance(value, int):
continue
result[key] = result.get(key, 0) + value
return result
base = dict(left or {})
for key, value in right.items():
if not isinstance(value, int):
continue
base[key] = base.get(key, 0) + value
return base
def _initial_filesystem_state() -> dict[str, Any]:
"""Default empty values for SurfSense filesystem state fields.
@ -200,6 +233,7 @@ __all__ = [
"_add_unique_reducer",
"_dict_merge_with_tombstones_reducer",
"_initial_filesystem_state",
"_int_counter_merge_reducer",
"_list_append_reducer",
"_replace_reducer",
]

View file

@ -46,7 +46,6 @@ logger = logging.getLogger(__name__)
# ``glob``, ``grep``) plus the SurfSense-side read tools.
EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
{
"search_surfsense_docs",
"web_search",
"scrape_webpage",
"read_file",
@ -61,7 +60,6 @@ EXPLORE_READ_TOOLS: frozenset[str] = frozenset(
# is needed, the parent should hand off to ``explore`` first.
REPORT_WRITER_TOOLS: frozenset[str] = frozenset(
{
"search_surfsense_docs",
"read_file",
"generate_report",
}
@ -222,7 +220,6 @@ EXPLORE_SYSTEM_PROMPT = """You are the **explore** subagent for SurfSense.
Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected.
## Tools available
- `search_surfsense_docs` fast hybrid search over the user's knowledge base.
- `web_search` only when the user's KB clearly does not contain the answer.
- `scrape_webpage` to read a URL the user or the search results provided.
- `read_file`, `ls`, `glob`, `grep` to inspect specific documents or trees the parent has flagged.
@ -242,7 +239,7 @@ Produce a single high-quality report deliverable using `generate_report`. The pa
## Workflow
1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions.
2. **Source resolution.** Decide whether to call `search_surfsense_docs` and `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set.
2. **Source resolution.** Decide whether to call `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set.
3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill).
4. **Confirm.** End with a one-sentence summary in your final message never paste the report back into chat; the artifact card renders itself.
"""

View file

@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent.
To add a new tool, see the documentation in registry.py.
Available tools:
- search_surfsense_docs: Search Surfsense documentation for usage help
- generate_podcast: Generate audio podcasts from content
- generate_video_presentation: Generate video presentations with slides and narration
- generate_image: Generate images from text descriptions using AI models
@ -31,7 +30,6 @@ from .registry import (
get_tool_by_name,
)
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .update_memory import create_update_memory_tool, create_update_team_memory_tool
from .video_presentation import create_generate_video_presentation_tool
@ -47,7 +45,6 @@ __all__ = [
"create_generate_podcast_tool",
"create_generate_video_presentation_tool",
"create_scrape_webpage_tool",
"create_search_surfsense_docs_tool",
"create_update_memory_tool",
"create_update_team_memory_tool",
"format_documents_for_context",

View file

@ -2,17 +2,23 @@
Podcast generation tool for the SurfSense agent.
This module provides a factory function for creating the generate_podcast tool
that submits a Celery task for background podcast generation. The frontend
polls for completion and auto-updates when the podcast is ready.
that submits a Celery task for background podcast generation. The tool then
polls the podcast row until it reaches a terminal status (READY/FAILED) and
returns that status. The wait is bounded by the chat's HTTP / process
lifetime; see app.agents.shared.deliverable_wait for details.
"""
import logging
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.deliverable_wait import wait_for_deliverable
from app.db import Podcast, PodcastStatus, shielded_async_session
logger = logging.getLogger(__name__)
def create_generate_podcast_tool(
search_space_id: int,
@ -97,18 +103,53 @@ def create_generate_podcast_tool(
user_prompt=user_prompt,
)
print(f"[generate_podcast] Created podcast {podcast_id}, task: {task.id}")
logger.info(
"[generate_podcast] Created podcast %s, task: %s",
podcast_id,
task.id,
)
# Wait until the Celery worker flips the row to a terminal
# state. No internal budget — see deliverable_wait module.
terminal_status, columns, elapsed = await wait_for_deliverable(
model=Podcast,
row_id=podcast_id,
columns=[Podcast.status, Podcast.file_location],
terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED},
)
if terminal_status == PodcastStatus.READY:
file_location = columns[1] if columns else None
logger.info(
"[generate_podcast] Podcast %s READY in %.2fs (file=%s)",
podcast_id,
elapsed,
file_location,
)
return {
"status": PodcastStatus.READY.value,
"podcast_id": podcast_id,
"title": podcast_title,
"file_location": file_location,
"message": ("Podcast generated and saved to your podcast panel."),
}
# Only other terminal state is FAILED.
logger.warning(
"[generate_podcast] Podcast %s FAILED in %.2fs",
podcast_id,
elapsed,
)
return {
"status": PodcastStatus.PENDING.value,
"status": PodcastStatus.FAILED.value,
"podcast_id": podcast_id,
"title": podcast_title,
"message": "Podcast generation started. This may take a few minutes.",
"error": ("Background worker reported FAILED status for this podcast."),
}
except Exception as e:
error_message = str(e)
print(f"[generate_podcast] Error: {error_message}")
logger.exception("[generate_podcast] Error: %s", error_message)
return {
"status": PodcastStatus.FAILED.value,
"error": error_message,

View file

@ -101,7 +101,6 @@ from .podcast import create_generate_podcast_tool
from .report import create_generate_report_tool
from .resume import create_generate_resume_tool
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .teams import (
create_list_teams_channels_tool,
create_read_teams_messages_tool,
@ -150,6 +149,28 @@ class ToolDefinition:
reverse: Callable[[dict[str, Any], Any], dict[str, Any]] | None = None
# =============================================================================
# Deferred-import factories
# =============================================================================
# Used for tools whose impls live under ``multi_agent_chat``. Importing those
# at module-load time would cycle (``multi_agent_chat`` middleware imports
# this registry). The import inside the factory runs only when
# ``build_tools`` is called, by which point ``multi_agent_chat`` is fully
# initialised.
def _build_create_automation_tool(deps: dict[str, Any]) -> BaseTool:
from app.agents.multi_agent_chat.main_agent.tools.automation import (
create_create_automation_tool,
)
return create_create_automation_tool(
search_space_id=deps["search_space_id"],
user_id=deps["user_id"],
llm=deps["llm"],
)
# =============================================================================
# Built-in Tools Registry
# =============================================================================
@ -236,15 +257,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
),
requires=[],
),
# Surfsense documentation search tool
ToolDefinition(
name="search_surfsense_docs",
description="Search Surfsense documentation for help with using the application",
factory=lambda deps: create_search_surfsense_docs_tool(
db_session=deps["db_session"],
),
requires=["db_session"],
),
# =========================================================================
# SERVICE ACCOUNT DISCOVERY
# Generic tool for the LLM to discover connected accounts and resolve
@ -261,6 +273,21 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
requires=["db_session", "search_space_id", "user_id"],
),
# =========================================================================
# AUTOMATION AUTHORING - single HITL tool. The tool takes an NL ``intent``
# from the main agent, drafts the full AutomationCreate JSON via a focused
# sub-LLM, surfaces it on an approval card, and persists on approval. The
# factory defers its import because the impl lives under ``multi_agent_chat``
# and that package transitively pulls this registry via middleware;
# deferring to ``build_tools`` call-time breaks the cycle without a
# parallel registry.
# =========================================================================
ToolDefinition(
name="create_automation",
description="Draft an automation from an NL intent; user approves the card; tool saves",
factory=_build_create_automation_tool,
requires=["search_space_id", "user_id", "llm"],
),
# =========================================================================
# MEMORY TOOL - single update_memory, private or team by thread_visibility
# =========================================================================
ToolDefinition(

View file

@ -1,174 +0,0 @@
"""
Surfsense documentation search tool.
This tool allows the agent to search the pre-indexed Surfsense documentation
to help users with questions about how to use the application.
The documentation is indexed at deployment time from MDX files and stored
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
"""
import asyncio
import json
from langchain_core.tools import tool
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
from app.utils.document_converters import embed_text
from app.utils.surfsense_docs import surfsense_docs_public_url
def format_surfsense_docs_results(results: list[tuple]) -> str:
"""
Format search results into XML structure for the LLM context.
Uses the same XML structure as format_documents_for_context from knowledge_base.py
but with 'doc-' prefix on chunk IDs. This allows:
- LLM to use consistent [citation:doc-XXX] format
- Frontend to detect 'doc-' prefix and route to surfsense docs endpoint
Args:
results: List of (chunk, document) tuples from the database query
Returns:
Formatted XML string with documentation content and citation-ready chunks
"""
if not results:
return "No relevant Surfsense documentation found for your query."
# Group chunks by document
grouped: dict[int, dict] = {}
for chunk, doc in results:
public_url = surfsense_docs_public_url(doc.source)
if doc.id not in grouped:
grouped[doc.id] = {
"document_id": f"doc-{doc.id}",
"document_type": "SURFSENSE_DOCS",
"title": doc.title,
"url": public_url,
"metadata": {"source": doc.source, "public_url": public_url},
"chunks": [],
}
grouped[doc.id]["chunks"].append(
{
"chunk_id": f"doc-{chunk.id}",
"content": chunk.content,
}
)
# Render XML matching format_documents_for_context structure
parts: list[str] = []
for g in grouped.values():
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
parts.append("<document>")
parts.append("<document_metadata>")
parts.append(f" <document_id>{g['document_id']}</document_id>")
parts.append(f" <document_type>{g['document_type']}</document_type>")
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
parts.append("</document_metadata>")
parts.append("")
parts.append("<document_content>")
for ch in g["chunks"]:
parts.append(
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
)
parts.append("</document_content>")
parts.append("</document>")
parts.append("")
return "\n".join(parts).strip()
async def search_surfsense_docs_async(
query: str,
db_session: AsyncSession,
top_k: int = 10,
) -> str:
"""
Search Surfsense documentation using vector similarity.
Args:
query: The search query about Surfsense usage
db_session: Database session for executing queries
top_k: Number of results to return
Returns:
Formatted string with relevant documentation content
"""
# Get embedding for the query
query_embedding = await asyncio.to_thread(embed_text, query)
# Vector similarity search on chunks, joining with documents
stmt = (
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
.join(
SurfsenseDocsDocument,
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
)
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
.limit(top_k)
)
result = await db_session.execute(stmt)
rows = result.all()
return format_surfsense_docs_results(rows)
def create_search_surfsense_docs_tool(db_session: AsyncSession):
"""
Factory function to create the search_surfsense_docs tool.
The tool acquires its own short-lived ``AsyncSession`` per call via
:data:`async_session_maker` so the closure is safe to share across
HTTP requests by the compiled-agent cache. Capturing a per-request
session here would surface stale/closed sessions on cache hits.
Args:
db_session: Reserved for registry compatibility. Per-call sessions
are opened via :data:`async_session_maker` inside the tool body.
Returns:
A configured tool function for searching Surfsense documentation
"""
del db_session # per-call session — see docstring
@tool
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
"""
Search Surfsense documentation for help with using the application.
Use this tool when the user asks questions about:
- How to use Surfsense features
- Installation and setup instructions
- Configuration options and settings
- Troubleshooting common issues
- Available connectors and integrations
- Browser extension usage
- API documentation
This searches the official Surfsense documentation that was indexed
at deployment time. It does NOT search the user's personal knowledge base.
Args:
query: The search query about Surfsense usage or features
top_k: Number of documentation chunks to retrieve (default: 10)
Returns:
Relevant documentation content formatted with chunk IDs for citations
"""
async with async_session_maker() as db_session:
return await search_surfsense_docs_async(
query=query,
db_session=db_session,
top_k=top_k,
)
return search_surfsense_docs

View file

@ -2,17 +2,23 @@
Video presentation generation tool for the SurfSense agent.
This module provides a factory function for creating the generate_video_presentation
tool that submits a Celery task for background video presentation generation.
The frontend polls for completion and auto-updates when the presentation is ready.
tool that submits a Celery task for background video presentation generation. The
tool then polls the row until it reaches a terminal status (READY/FAILED) and
returns that status. The wait is bounded by the chat's HTTP / process lifetime;
see app.agents.shared.deliverable_wait for details.
"""
import logging
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.deliverable_wait import wait_for_deliverable
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
logger = logging.getLogger(__name__)
def create_generate_video_presentation_tool(
search_space_id: int,
@ -72,20 +78,56 @@ def create_generate_video_presentation_tool(
user_prompt=user_prompt,
)
print(
f"[generate_video_presentation] Created video presentation {video_pres_id}, task: {task.id}"
logger.info(
"[generate_video_presentation] Created video presentation %s, task: %s",
video_pres_id,
task.id,
)
# Wait until the Celery worker flips the row to a terminal
# state. No internal budget — see deliverable_wait module.
terminal_status, _columns, elapsed = await wait_for_deliverable(
model=VideoPresentation,
row_id=video_pres_id,
columns=[VideoPresentation.status],
terminal_statuses={
VideoPresentationStatus.READY,
VideoPresentationStatus.FAILED,
},
)
if terminal_status == VideoPresentationStatus.READY:
logger.info(
"[generate_video_presentation] %s READY in %.2fs",
video_pres_id,
elapsed,
)
return {
"status": VideoPresentationStatus.READY.value,
"video_presentation_id": video_pres_id,
"title": video_title,
"message": "Video presentation generated and saved.",
}
# Only other terminal state is FAILED.
logger.warning(
"[generate_video_presentation] %s FAILED in %.2fs",
video_pres_id,
elapsed,
)
return {
"status": VideoPresentationStatus.PENDING.value,
"status": VideoPresentationStatus.FAILED.value,
"video_presentation_id": video_pres_id,
"title": video_title,
"message": "Video presentation generation started. This may take a few minutes.",
"error": (
"Background worker reported FAILED status for this "
"video presentation."
),
}
except Exception as e:
error_message = str(e)
print(f"[generate_video_presentation] Error: {error_message}")
logger.exception("[generate_video_presentation] Error: %s", error_message)
return {
"status": VideoPresentationStatus.FAILED.value,
"error": error_message,

Some files were not shown because too many files have changed in this diff Show more