feat: enhance task management and timeout configurations in multi-agent chat

- Added new environment variables for controlling task execution limits, including `SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`, `SURFSENSE_TASK_BATCH_CONCURRENCY`, and `SURFSENSE_TASK_BATCH_MAX_SIZE`.
- Updated documentation to reflect new batch processing capabilities for `task` calls, allowing for concurrent execution of multiple subagent tasks.
- Improved error handling and receipt generation for deliverables, ensuring consistent feedback on task status.
- Refactored middleware to incorporate search space ID for better task management.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-27 14:58:10 -07:00
parent 820f541f08
commit 9d6e9b7e2d
66 changed files with 2561 additions and 380 deletions

View file

@ -33,9 +33,11 @@ from typing_extensions import TypedDict
from app.agents.new_chat.state_reducers import (
_add_unique_reducer,
_dict_merge_with_tombstones_reducer,
_int_counter_merge_reducer,
_list_append_reducer,
_replace_reducer,
)
from app.agents.shared.receipt import Receipt
class PendingMove(TypedDict, total=False):
@ -172,6 +174,35 @@ class SurfSenseFilesystemState(FilesystemState):
workspace_tree_text: NotRequired[Annotated[str, _replace_reducer]]
"""Pre-rendered ``<workspace_tree>`` body; shared with subagents to skip re-render."""
billable_calls: NotRequired[Annotated[dict[str, int], _int_counter_merge_reducer]]
"""Per-subagent ``task(...)`` invocation counter, summed across the turn.
Incremented by ``task_tool.py`` each time a subagent invocation
completes (single- or batch-mode). The orchestrator can read this map
to self-limit when a runaway loop sends the same specialist 20 calls
in a row; the runtime emits a soft warning ToolMessage once the
cumulative count crosses :data:`DEFAULT_SUBAGENT_BILLABLE_THRESHOLD`.
Cleared by checkpoint rollover (i.e. per turn).
"""
receipts: NotRequired[Annotated[list[Receipt], _list_append_reducer]]
"""Structured Receipt handles emitted by mutating subagent tools this turn.
Each mutating tool (deliverables, every connector, KB writes via the
persistence middleware) wraps its native return into a
:class:`~app.agents.shared.receipt.Receipt`
and returns it under the ``"receipt"`` key alongside its existing
payload. The subagent's tool-call middleware folds the receipt into
this list, and ``_return_command_with_state_update`` in
``checkpointed_subagent_middleware/task_tool.py`` carries the list up
to the parent automatically (``"receipts"`` is not in
``EXCLUDED_STATE_KEYS``).
Append-only across the turn; cleared by checkpoint rollover. The
orchestrator reads it via the ``<verification>`` teaching to confirm
side-effecting subagent claims (see ``shared/snippets/verifiable_handle.md``).
"""
__all__ = [
"KbAnonDoc",

View file

@ -34,8 +34,7 @@ from deepagents.middleware.summarization import (
)
from langchain_core.messages import SystemMessage
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
if TYPE_CHECKING:
from deepagents.backends.protocol import BACKEND_TYPES

View file

@ -47,8 +47,7 @@ from langgraph.config import get_config
from langgraph.runtime import Runtime
from langgraph.types import interrupt
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
logger = logging.getLogger(__name__)

View file

@ -55,6 +55,7 @@ from app.agents.new_chat.path_resolver import (
virtual_path_to_doc,
)
from app.agents.new_chat.state_reducers import _CLEAR
from app.agents.shared.receipt import Receipt, make_receipt
from app.db import (
AgentActionLog,
Chunk,
@ -1392,6 +1393,81 @@ async def commit_staged_filesystem_state(
"pending_dir_deletes": [_CLEAR],
"dirty_path_tool_calls": {_CLEAR: True},
}
# Emit one Receipt per committed mutation, folded into ``state['receipts']``
# via ``_list_append_reducer``. The receipts surface what actually committed
# (post-savepoint) rather than what the LLM intended; the orchestrator uses
# them as ground truth in the ``<verification>`` teaching. KB writes do not
# have public verifiable URLs, so ``verifiable_url`` stays unset.
receipts: list[Receipt] = []
def _kb_receipt(
*,
type: str,
operation: str,
path: str,
external_id: int | None = None,
) -> None:
if not path:
return
preview = path.rsplit("/", 1)[-1] or path
receipts.append(
make_receipt(
route="knowledge_base",
type=type,
operation=operation,
status="success",
external_id=str(external_id) if external_id is not None else path,
preview=preview,
)
)
for payload in committed_creates:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="write_file",
path=path,
external_id=payload.get("id"),
)
for payload in committed_updates:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="edit_file",
path=path,
external_id=payload.get("id"),
)
for payload in applied_moves:
# ``applied_moves`` rows carry the destination ``virtualPath`` because
# the move has already landed in the DB by the time we reach this code.
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="move_file",
path=path,
external_id=payload.get("id"),
)
for path in staged_dirs:
_kb_receipt(type="folder", operation="mkdir", path=path)
for payload in committed_deletes:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="file",
operation="rm",
path=path,
external_id=payload.get("id"),
)
for payload in committed_folder_deletes:
path = str(payload.get("virtualPath") or "")
_kb_receipt(
type="folder",
operation="rmdir",
path=path,
external_id=payload.get("id"),
)
if receipts:
delta["receipts"] = receipts
files_delta: dict[str, Any] = {}
if temp_paths:
files_delta.update(dict.fromkeys(temp_paths))

View file

@ -61,8 +61,7 @@ from app.agents.new_chat.permissions import (
aggregate_action,
evaluate_many,
)
from app.observability import metrics as ot_metrics
from app.observability import otel as ot
from app.observability import metrics as ot_metrics, otel as ot
logger = logging.getLogger(__name__)

View file

@ -171,6 +171,39 @@ def _dict_merge_with_tombstones_reducer(
return result
def _int_counter_merge_reducer(
left: dict[str, int] | None,
right: dict[str, int] | None,
) -> dict[str, int]:
"""Merge ``right`` into ``left`` by **summing** per-key integer counters.
Used for state fields that accumulate counts across multiple updates
within the same turn (e.g. per-subagent ``billable_calls``). Unknown
keys are added; existing keys are summed. ``_CLEAR`` sentinels reset
the accumulator the same way the other reducers do, so the orchestrator
can wipe the counter at end-of-turn if needed.
"""
if right is None:
return dict(left or {})
if _CLEAR in right or any(_is_clear(k) for k in right):
result: dict[str, int] = {}
for key, value in right.items():
if _is_clear(key):
continue
if not isinstance(value, int):
continue
result[key] = result.get(key, 0) + value
return result
base = dict(left or {})
for key, value in right.items():
if not isinstance(value, int):
continue
base[key] = base.get(key, 0) + value
return base
def _initial_filesystem_state() -> dict[str, Any]:
"""Default empty values for SurfSense filesystem state fields.
@ -200,6 +233,7 @@ __all__ = [
"_add_unique_reducer",
"_dict_merge_with_tombstones_reducer",
"_initial_filesystem_state",
"_int_counter_merge_reducer",
"_list_append_reducer",
"_replace_reducer",
]

View file

@ -2,17 +2,23 @@
Podcast generation tool for the SurfSense agent.
This module provides a factory function for creating the generate_podcast tool
that submits a Celery task for background podcast generation. The frontend
polls for completion and auto-updates when the podcast is ready.
that submits a Celery task for background podcast generation. The tool then
polls the podcast row until it reaches a terminal status (READY/FAILED) and
returns that status. The wait is bounded by the chat's HTTP / process
lifetime; see app.agents.shared.deliverable_wait for details.
"""
import logging
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.deliverable_wait import wait_for_deliverable
from app.db import Podcast, PodcastStatus, shielded_async_session
logger = logging.getLogger(__name__)
def create_generate_podcast_tool(
search_space_id: int,
@ -97,18 +103,57 @@ def create_generate_podcast_tool(
user_prompt=user_prompt,
)
print(f"[generate_podcast] Created podcast {podcast_id}, task: {task.id}")
logger.info(
"[generate_podcast] Created podcast %s, task: %s",
podcast_id,
task.id,
)
# Wait until the Celery worker flips the row to a terminal
# state. No internal budget — see deliverable_wait module.
terminal_status, columns, elapsed = await wait_for_deliverable(
model=Podcast,
row_id=podcast_id,
columns=[Podcast.status, Podcast.file_location],
terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED},
)
if terminal_status == PodcastStatus.READY:
file_location = columns[1] if columns else None
logger.info(
"[generate_podcast] Podcast %s READY in %.2fs (file=%s)",
podcast_id,
elapsed,
file_location,
)
return {
"status": PodcastStatus.READY.value,
"podcast_id": podcast_id,
"title": podcast_title,
"file_location": file_location,
"message": (
"Podcast generated and saved to your podcast panel."
),
}
# Only other terminal state is FAILED.
logger.warning(
"[generate_podcast] Podcast %s FAILED in %.2fs",
podcast_id,
elapsed,
)
return {
"status": PodcastStatus.PENDING.value,
"status": PodcastStatus.FAILED.value,
"podcast_id": podcast_id,
"title": podcast_title,
"message": "Podcast generation started. This may take a few minutes.",
"error": (
"Background worker reported FAILED status for this podcast."
),
}
except Exception as e:
error_message = str(e)
print(f"[generate_podcast] Error: {error_message}")
logger.exception("[generate_podcast] Error: %s", error_message)
return {
"status": PodcastStatus.FAILED.value,
"error": error_message,

View file

@ -2,17 +2,23 @@
Video presentation generation tool for the SurfSense agent.
This module provides a factory function for creating the generate_video_presentation
tool that submits a Celery task for background video presentation generation.
The frontend polls for completion and auto-updates when the presentation is ready.
tool that submits a Celery task for background video presentation generation. The
tool then polls the row until it reaches a terminal status (READY/FAILED) and
returns that status. The wait is bounded by the chat's HTTP / process lifetime;
see app.agents.shared.deliverable_wait for details.
"""
import logging
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.shared.deliverable_wait import wait_for_deliverable
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
logger = logging.getLogger(__name__)
def create_generate_video_presentation_tool(
search_space_id: int,
@ -72,20 +78,58 @@ def create_generate_video_presentation_tool(
user_prompt=user_prompt,
)
print(
f"[generate_video_presentation] Created video presentation {video_pres_id}, task: {task.id}"
logger.info(
"[generate_video_presentation] Created video presentation %s, task: %s",
video_pres_id,
task.id,
)
# Wait until the Celery worker flips the row to a terminal
# state. No internal budget — see deliverable_wait module.
terminal_status, _columns, elapsed = await wait_for_deliverable(
model=VideoPresentation,
row_id=video_pres_id,
columns=[VideoPresentation.status],
terminal_statuses={
VideoPresentationStatus.READY,
VideoPresentationStatus.FAILED,
},
)
if terminal_status == VideoPresentationStatus.READY:
logger.info(
"[generate_video_presentation] %s READY in %.2fs",
video_pres_id,
elapsed,
)
return {
"status": VideoPresentationStatus.READY.value,
"video_presentation_id": video_pres_id,
"title": video_title,
"message": "Video presentation generated and saved.",
}
# Only other terminal state is FAILED.
logger.warning(
"[generate_video_presentation] %s FAILED in %.2fs",
video_pres_id,
elapsed,
)
return {
"status": VideoPresentationStatus.PENDING.value,
"status": VideoPresentationStatus.FAILED.value,
"video_presentation_id": video_pres_id,
"title": video_title,
"message": "Video presentation generation started. This may take a few minutes.",
"error": (
"Background worker reported FAILED status for this "
"video presentation."
),
}
except Exception as e:
error_message = str(e)
print(f"[generate_video_presentation] Error: {error_message}")
logger.exception(
"[generate_video_presentation] Error: %s", error_message
)
return {
"status": VideoPresentationStatus.FAILED.value,
"error": error_message,