mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
feat: enhance task management and timeout configurations in multi-agent chat
- Added new environment variables for controlling task execution limits, including `SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`, `SURFSENSE_TASK_BATCH_CONCURRENCY`, and `SURFSENSE_TASK_BATCH_MAX_SIZE`. - Updated documentation to reflect new batch processing capabilities for `task` calls, allowing for concurrent execution of multiple subagent tasks. - Improved error handling and receipt generation for deliverables, ensuring consistent feedback on task status. - Refactored middleware to incorporate search space ID for better task management.
This commit is contained in:
parent
820f541f08
commit
9d6e9b7e2d
66 changed files with 2561 additions and 380 deletions
9
surfsense_backend/app/agents/shared/__init__.py
Normal file
9
surfsense_backend/app/agents/shared/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
"""Cross-package agent contracts.
|
||||
|
||||
Symbols here are intentionally framework-light (no LangGraph / deepagents
|
||||
internals) so they can be imported from both ``app.agents.new_chat`` and
|
||||
``app.agents.multi_agent_chat`` without creating a circular dependency
|
||||
between the two packages. See ``receipt.py`` for the rationale.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
123
surfsense_backend/app/agents/shared/deliverable_wait.py
Normal file
123
surfsense_backend/app/agents/shared/deliverable_wait.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""Shared poll-until-terminal helper for Celery-backed deliverables.
|
||||
|
||||
Lives in ``app.agents.shared`` (neutral package, no dependencies on either
|
||||
``new_chat`` or ``multi_agent_chat``) so both the flat single-agent tools
|
||||
under ``app/agents/new_chat/tools/`` and the multi-agent subagent tools
|
||||
under ``app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/``
|
||||
can import it without creating a circular dependency.
|
||||
|
||||
Background
|
||||
----------
|
||||
Tools like ``generate_podcast`` and ``generate_video_presentation`` enqueue
|
||||
the heavy work to Celery and historically returned immediately with a
|
||||
"pending" status. That works for very-long deliverables but hurts UX for
|
||||
the common case (most podcasts finish in 10-30 seconds): the agent sends
|
||||
a "kicked off, check back in a minute" reply *before* the worker is done,
|
||||
so the user never gets a "ready" confirmation.
|
||||
|
||||
This helper bridges that gap. The tool dispatches the Celery task as
|
||||
before, then polls the artefact row's ``status`` column **until it
|
||||
reaches a terminal value** (READY / FAILED). The tool then returns a
|
||||
real terminal outcome — never a pending one.
|
||||
|
||||
No wall-clock budget here on purpose
|
||||
------------------------------------
|
||||
Layering a second budget on top of the existing per-invocation safety
|
||||
nets just confused the UX. The real ceilings are:
|
||||
|
||||
* **Multi-agent mode** — ``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS``
|
||||
(default ``300.0``, ``0`` to disable) caps how long any single
|
||||
``task(subagent, ...)`` invocation can run. If a deliverable needs
|
||||
longer than this, the subagent invocation is cancelled and the
|
||||
orchestrator surfaces a "subagent timed out" ToolMessage. Operators
|
||||
who routinely generate long videos should raise that ceiling (or set
|
||||
it to ``0`` for true unbounded waits).
|
||||
* **Single-agent mode** — the chat's HTTP stream / process lifetime is
|
||||
the only ceiling. Truly indefinite waits work here, but a dead Celery
|
||||
worker will leave the row in PENDING/GENERATING forever; treat that
|
||||
as an operational concern, not a UX concern.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
None. The poll cadence is hardcoded at 1.5s — small enough to feel
|
||||
responsive (~6 polls per typical 10s podcast), large enough to avoid
|
||||
hammering the DB under burst traffic. Override at the call site if a
|
||||
specific tool needs a different cadence.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import InstrumentedAttribute
|
||||
|
||||
from app.db import shielded_async_session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_DEFAULT_POLL_INTERVAL_SECONDS: float = 1.5
|
||||
|
||||
|
||||
async def wait_for_deliverable(
|
||||
*,
|
||||
model: type,
|
||||
row_id: int,
|
||||
columns: list[InstrumentedAttribute[Any]],
|
||||
terminal_statuses: set[Enum],
|
||||
poll_interval_s: float = _DEFAULT_POLL_INTERVAL_SECONDS,
|
||||
) -> tuple[Enum, tuple[Any, ...], float]:
|
||||
"""Poll ``model`` row ``row_id`` until ``columns[0]`` reaches a terminal status.
|
||||
|
||||
Blocks until the row's status column matches one of
|
||||
``terminal_statuses``. There is no internal wall-clock budget; cancel
|
||||
from the outside (subagent timeout, HTTP disconnect, task
|
||||
cancellation) if you need a ceiling. See module docstring.
|
||||
|
||||
The first entry of ``columns`` must be the status column; additional
|
||||
columns (e.g. ``Podcast.file_location``) are returned alongside the
|
||||
final status so callers can build their payload without a second
|
||||
roundtrip.
|
||||
|
||||
A fresh ``shielded_async_session`` is opened per poll so we never
|
||||
hold a transaction across the wait, and a failed poll is logged but
|
||||
does not abort the wait — transient DB hiccups should not collapse
|
||||
the tool call.
|
||||
|
||||
Returns
|
||||
-------
|
||||
``(terminal_status, columns, elapsed_seconds)``
|
||||
``columns`` mirrors the requested ``columns`` (including the
|
||||
status itself in position 0).
|
||||
"""
|
||||
if not columns:
|
||||
raise ValueError("wait_for_deliverable requires at least the status column")
|
||||
|
||||
start = time.monotonic()
|
||||
|
||||
while True:
|
||||
await asyncio.sleep(poll_interval_s)
|
||||
row: tuple[Any, ...] | None = None
|
||||
try:
|
||||
async with shielded_async_session() as session:
|
||||
result = await session.execute(
|
||||
select(*columns).where(model.id == row_id)
|
||||
)
|
||||
row = result.first()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[deliverable_wait] poll failed model=%s id=%s err=%r",
|
||||
getattr(model, "__name__", str(model)),
|
||||
row_id,
|
||||
exc,
|
||||
)
|
||||
|
||||
if row is not None:
|
||||
status_val = row[0]
|
||||
if status_val in terminal_statuses:
|
||||
return status_val, tuple(row), time.monotonic() - start
|
||||
161
surfsense_backend/app/agents/shared/receipt.py
Normal file
161
surfsense_backend/app/agents/shared/receipt.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
"""Receipt: structured handle returned by every mutating subagent tool.
|
||||
|
||||
Generalises the Hermes ``entry`` dict (see ``references/hermes-agent/tools/
|
||||
delegate_tool.py:1663-1697``) for our 5 deliverable types + 15 connectors +
|
||||
KB writes. The supervisor reads the Receipt to verify what actually happened
|
||||
without round-tripping through LLM paraphrase.
|
||||
|
||||
**Why this lives under ``app.agents.shared`` and not under either of the
|
||||
two agent packages:** the Receipt is a *contract* shared between
|
||||
``multi_agent_chat`` (where mutating tools emit it) and ``new_chat``
|
||||
(where ``filesystem_state.SurfSenseFilesystemState`` declares the
|
||||
``receipts`` reducer that accumulates it, and where
|
||||
``middleware.kb_persistence`` emits its own KB-write receipts). Putting
|
||||
the contract in either package would create a bidirectional import
|
||||
between the two — see the commit that introduced this module for the
|
||||
``ImportError`` chain it broke.
|
||||
|
||||
Each mutating tool wraps its native return shape into a Receipt via
|
||||
:func:`make_receipt` (or builds one directly) and returns it under the
|
||||
``"receipt"`` key alongside its existing payload. The subagent boundary
|
||||
machinery in ``checkpointed_subagent_middleware.task_tool`` then folds
|
||||
the receipt into the parent's ``receipts`` state via the append reducer.
|
||||
|
||||
The KB write path is the one exception: file-tool calls cannot emit a
|
||||
durable receipt because the actual DB writes happen end-of-turn inside
|
||||
:class:`app.agents.new_chat.middleware.kb_persistence.KnowledgeBasePersistenceMiddleware`.
|
||||
KB tools therefore emit a *provisional* receipt with ``status="pending"``;
|
||||
the persistence middleware flips it to ``"success"`` or ``"failed"``
|
||||
before returning control to the parent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal, TypedDict
|
||||
|
||||
# Subagent that emitted this receipt.
|
||||
ReceiptRoute = Literal[
|
||||
"deliverables",
|
||||
"knowledge_base",
|
||||
"notion",
|
||||
"slack",
|
||||
"gmail",
|
||||
"linear",
|
||||
"jira",
|
||||
"clickup",
|
||||
"confluence",
|
||||
"calendar",
|
||||
"luma",
|
||||
"airtable",
|
||||
"google_drive",
|
||||
"dropbox",
|
||||
"onedrive",
|
||||
"discord",
|
||||
"teams",
|
||||
]
|
||||
|
||||
# Within-route kind of artefact / external resource the operation touched.
|
||||
# Left as ``str`` rather than a giant union so each route file documents
|
||||
# its own enum next to its tools.
|
||||
ReceiptType = str
|
||||
|
||||
# Operation verb. Kept open for the same reason as ``ReceiptType``.
|
||||
ReceiptOperation = str
|
||||
|
||||
# Pending = async backend (Celery podcast / video) that the orchestrator
|
||||
# will surface progress for out of band; persistence-MW flipped this to
|
||||
# ``success`` for KB writes that committed.
|
||||
ReceiptStatus = Literal["success", "pending", "failed"]
|
||||
|
||||
|
||||
class Receipt(TypedDict, total=False):
|
||||
"""Structured per-mutation handle returned to the parent subagent.
|
||||
|
||||
All fields are ``NotRequired`` (TypedDict ``total=False``) so each
|
||||
route's tool can populate only the fields it actually has — e.g. Gmail
|
||||
never sets ``verifiable_url`` because Gmail doesn't expose per-message
|
||||
URLs. The receipts state reducer treats missing keys as missing rather
|
||||
than ``null`` so we don't double-count.
|
||||
"""
|
||||
|
||||
route: ReceiptRoute
|
||||
"""Subagent name. Lets the orchestrator filter ``state['receipts']``
|
||||
by route without re-deriving from ``type``."""
|
||||
|
||||
type: ReceiptType
|
||||
"""Within-route kind. e.g. for ``deliverables`` one of ``{report,
|
||||
podcast, video_presentation, resume, image}``; for ``notion`` ``page``;
|
||||
for ``slack`` ``message``."""
|
||||
|
||||
operation: ReceiptOperation
|
||||
"""Verb. e.g. ``generate`` (deliverables), ``create`` / ``update`` /
|
||||
``delete`` (most connectors), ``send`` / ``post`` (chat), ``write_file``
|
||||
/ ``edit_file`` / ``rm`` / ``rmdir`` / ``move_file`` / ``mkdir`` (KB)."""
|
||||
|
||||
status: ReceiptStatus
|
||||
"""``success`` / ``pending`` / ``failed``. The verification teaching
|
||||
in ``shared/snippets/verifiable_handle.md`` keys off this field."""
|
||||
|
||||
external_id: str | None
|
||||
"""Backend identifier. Report row id, Notion ``page_id``, Slack ``ts``,
|
||||
Gmail ``message_id``, Linear identifier, KB ``virtualPath``, etc.
|
||||
``None`` only when the operation failed before the backend assigned one."""
|
||||
|
||||
verifiable_url: str | None
|
||||
"""URL the parent can pass to ``scrape_webpage`` to verify the
|
||||
operation. ``None`` when no public URL exists (Gmail, KB, raw images
|
||||
stored in the DB)."""
|
||||
|
||||
preview: str | None
|
||||
"""Short snippet (~200 chars) of what was produced. First lines of
|
||||
a generated report's markdown, transcript opener for a podcast,
|
||||
thumbnail URL for an image. Lets the orchestrator decide whether to
|
||||
re-render in the UI without re-loading the artefact."""
|
||||
|
||||
error: str | None
|
||||
"""Filled iff ``status == "failed"``. Plain-text reason; the parent
|
||||
surfaces it in its own ``next_step``."""
|
||||
|
||||
|
||||
def make_receipt(
|
||||
*,
|
||||
route: ReceiptRoute,
|
||||
type: str,
|
||||
operation: str,
|
||||
status: ReceiptStatus,
|
||||
external_id: str | None = None,
|
||||
verifiable_url: str | None = None,
|
||||
preview: str | None = None,
|
||||
error: str | None = None,
|
||||
) -> Receipt:
|
||||
"""Construct a :class:`Receipt` with non-``None`` fields only.
|
||||
|
||||
Drops keys whose value is ``None`` so downstream consumers can use
|
||||
``"verifiable_url" in receipt`` to distinguish "tool returned no URL"
|
||||
from "tool deliberately surfaced ``null``".
|
||||
"""
|
||||
out: dict[str, Any] = {
|
||||
"route": route,
|
||||
"type": type,
|
||||
"operation": operation,
|
||||
"status": status,
|
||||
}
|
||||
if external_id is not None:
|
||||
out["external_id"] = external_id
|
||||
if verifiable_url is not None:
|
||||
out["verifiable_url"] = verifiable_url
|
||||
if preview is not None:
|
||||
out["preview"] = preview
|
||||
if error is not None:
|
||||
out["error"] = error
|
||||
return out # type: ignore[return-value]
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Receipt",
|
||||
"ReceiptOperation",
|
||||
"ReceiptRoute",
|
||||
"ReceiptStatus",
|
||||
"ReceiptType",
|
||||
"make_receipt",
|
||||
]
|
||||
71
surfsense_backend/app/agents/shared/receipt_command.py
Normal file
71
surfsense_backend/app/agents/shared/receipt_command.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
"""Helper for wrapping a tool result with a Receipt in a ``Command(update=...)``.
|
||||
|
||||
Most mutating subagent tools historically returned a plain ``dict`` payload
|
||||
which deepagents serialised straight into the ``ToolMessage`` content. To
|
||||
participate in the verification teaching from
|
||||
``multi_agent_chat/subagents/shared/snippets/verifiable_handle.md`` those
|
||||
tools now also need to write a :class:`Receipt` into the parent's
|
||||
``state['receipts']`` list (declared on
|
||||
:class:`~app.agents.new_chat.filesystem_state.SurfSenseFilesystemState`
|
||||
and backed by the append reducer).
|
||||
|
||||
:func:`with_receipt` wraps both behaviours: it returns the tool payload as
|
||||
a JSON-encoded ``ToolMessage`` AND appends the receipt to state in a single
|
||||
:class:`~langgraph.types.Command`. Use it at every ``return`` site of a
|
||||
mutating tool — including failure paths (emit a receipt with
|
||||
``status="failed"`` and the error message in ``error``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import ToolMessage
|
||||
from langgraph.types import Command
|
||||
|
||||
from app.agents.shared.receipt import Receipt
|
||||
|
||||
|
||||
def _content_to_text(payload: dict[str, Any] | str) -> str:
|
||||
"""Serialise a tool payload to ``ToolMessage`` content.
|
||||
|
||||
Dicts go through ``json.dumps`` (matching deepagents' default tool-result
|
||||
serialisation); strings are passed through. Anything else is coerced via
|
||||
``str`` so we never raise here — a mis-typed tool return would already
|
||||
have failed inside the tool body.
|
||||
"""
|
||||
if isinstance(payload, str):
|
||||
return payload
|
||||
if isinstance(payload, dict):
|
||||
return json.dumps(payload, default=str)
|
||||
return str(payload)
|
||||
|
||||
|
||||
def with_receipt(
|
||||
*,
|
||||
payload: dict[str, Any] | str,
|
||||
receipt: Receipt,
|
||||
tool_call_id: str,
|
||||
) -> Command:
|
||||
"""Return a Command that ships ``payload`` as a ToolMessage AND appends ``receipt``.
|
||||
|
||||
The append happens via the ``_list_append_reducer`` on the ``receipts``
|
||||
field of :class:`~app.agents.new_chat.filesystem_state.SurfSenseFilesystemState`,
|
||||
so concurrent subagent batches (item 4 in the plan) won't clobber each
|
||||
other's receipts.
|
||||
"""
|
||||
return Command(
|
||||
update={
|
||||
"messages": [
|
||||
ToolMessage(
|
||||
content=_content_to_text(payload),
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
],
|
||||
"receipts": [receipt],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["with_receipt"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue