mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
Merge pull request #1442 from MODSetter/feat/hermes-task-boundary-improvements
feat: enhance task management and timeout configurations in multi-age…
This commit is contained in:
commit
b645c3f54d
66 changed files with 2561 additions and 380 deletions
|
|
@ -357,3 +357,50 @@ LANGSMITH_PROJECT=surfsense
|
||||||
# updates and deletes — the TTL only bounds staleness for bulk-import
|
# updates and deletes — the TTL only bounds staleness for bulk-import
|
||||||
# paths that bypass the ORM. Set to 0 to disable the cache.
|
# paths that bypass the ORM. Set to 0 to disable the cache.
|
||||||
# SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS=30
|
# SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS=30
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# `task` boundary controls (Hermes-inspired improvements)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Wall-clock budget for a single ``task(subagent, ...)`` invocation in
|
||||||
|
# seconds. Subagents that run hot (slow image vendors, sluggish embedders,
|
||||||
|
# wedged MCP servers) would otherwise pin the orchestrator until the next
|
||||||
|
# checkpoint heartbeat fires. On timeout the runtime cancels the underlying
|
||||||
|
# coroutine and synthesizes a ToolMessage telling the orchestrator to treat
|
||||||
|
# the result as ``status=error``. Set to 0 to disable the cap entirely.
|
||||||
|
# Default: 300.0
|
||||||
|
# SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS=300
|
||||||
|
|
||||||
|
# Batch-mode (``task(tasks=[...])``) concurrency cap and max batch size.
|
||||||
|
# Concurrency is enforced via an ``asyncio.Semaphore`` so a runaway fanout
|
||||||
|
# cannot starve unrelated subagents (each child still owns an LLM call and
|
||||||
|
# its own DB session). Max-size is a hard safety net for prompt-injection /
|
||||||
|
# runaway loops; the orchestrator rarely needs more than a handful of
|
||||||
|
# concurrent specialists. Set concurrency to 1 to effectively serialise
|
||||||
|
# batches without changing the schema.
|
||||||
|
# SURFSENSE_TASK_BATCH_CONCURRENCY=3
|
||||||
|
# SURFSENSE_TASK_BATCH_MAX_SIZE=8
|
||||||
|
|
||||||
|
# Soft per-turn cap on cumulative ``task(...)`` invocations across all
|
||||||
|
# subagents. Once the sum of ``state['billable_calls']`` crosses this
|
||||||
|
# number, the runtime appends a one-shot warning ToolMessage telling the
|
||||||
|
# orchestrator to wrap up rather than launching more specialists. Tunable
|
||||||
|
# so heavy-research turns (15+ legitimate specialist calls) don't trip the
|
||||||
|
# alarm in production. Set to 0 to disable the warning entirely.
|
||||||
|
# SURFSENSE_SUBAGENT_BILLABLE_THRESHOLD=15
|
||||||
|
|
||||||
|
# Per-workspace spawn-paused kill switch — set via Redis at runtime, not
|
||||||
|
# this env var. The env var below only disables the check itself (useful
|
||||||
|
# for local dev without Redis). To pause a workspace in production:
|
||||||
|
# redis-cli SET surfsense:spawn_paused:<search_space_id> 1 EX 600
|
||||||
|
# redis-cli DEL surfsense:spawn_paused:<search_space_id>
|
||||||
|
# The check is fail-open: a Redis blip never blocks ``task(...)``.
|
||||||
|
# SURFSENSE_TASK_SPAWN_PAUSED_DISABLED=false
|
||||||
|
|
||||||
|
# Note on Celery-backed deliverables (generate_podcast,
|
||||||
|
# generate_video_presentation): these tools poll the artefact row until
|
||||||
|
# it reaches a terminal status — they do NOT use an internal wall-clock
|
||||||
|
# budget. The effective ceiling is SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS
|
||||||
|
# (above, default 300s) in multi-agent mode and the chat's HTTP / process
|
||||||
|
# lifetime in single-agent mode. If your podcasts or videos routinely
|
||||||
|
# exceed 5 minutes, raise SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS (or
|
||||||
|
# set it to 0 to disable that ceiling entirely).
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,15 @@ Rules for `task`:
|
||||||
- Neither's prompt references the other's output, and
|
- Neither's prompt references the other's output, and
|
||||||
- They target different specialists, OR the same specialist with
|
- They target different specialists, OR the same specialist with
|
||||||
non-overlapping scopes (e.g. reading two unrelated paths).
|
non-overlapping scopes (e.g. reading two unrelated paths).
|
||||||
|
- **Batch shape for many-shot fanout.** When a single user request expands
|
||||||
|
to **3 or more independent specialist calls** (e.g. "create five issues
|
||||||
|
from this list"), prefer the batch shape:
|
||||||
|
`task(tasks=[{description, subagent_type}, ...])`. The runtime fans them
|
||||||
|
out concurrently under a small semaphore and aggregates one ToolMessage
|
||||||
|
per child prefixed with `[task <index>]`. Batched children **do not
|
||||||
|
support human-in-the-loop interrupts** — if one needs approval it surfaces
|
||||||
|
an error and you re-dispatch it as a single (non-batched) `task(...)` call.
|
||||||
|
For 1–2 independent calls, just emit two separate `task(...)` calls.
|
||||||
- **Serialise dependent work across turns.** If one specialist's output
|
- **Serialise dependent work across turns.** If one specialist's output
|
||||||
must inform another's input (e.g. "find the roadmap in my KB, then
|
must inform another's input (e.g. "find the roadmap in my KB, then
|
||||||
email it to Maya"), invoke them on consecutive turns — first finishes,
|
email it to Maya"), invoke them on consecutive turns — first finishes,
|
||||||
|
|
@ -93,4 +102,65 @@ user: "Find my Q2 roadmap doc in the KB and email a summary to Maya."
|
||||||
task(gmail, "Send an email to Maya with subject 'Q2 roadmap summary'
|
task(gmail, "Send an email to Maya with subject 'Q2 roadmap summary'
|
||||||
and the following body: <summary returned by knowledge_base>.")
|
and the following body: <summary returned by knowledge_base>.")
|
||||||
</example>
|
</example>
|
||||||
|
|
||||||
|
<example>
|
||||||
|
user: "Create issues in Linear for each of these five bugs: <list>"
|
||||||
|
→ Many-shot independent fanout — use the batch shape:
|
||||||
|
task(tasks=[
|
||||||
|
{subagent_type: "linear", description: "Create a Linear issue titled
|
||||||
|
'<bug 1 title>' with body '<bug 1 body>'. Return the issue URL."},
|
||||||
|
{subagent_type: "linear", description: "Create a Linear issue titled
|
||||||
|
'<bug 2 title>' with body '<bug 2 body>'. Return the issue URL."},
|
||||||
|
{subagent_type: "linear", description: "Create a Linear issue titled
|
||||||
|
'<bug 3 title>' with body '<bug 3 body>'. Return the issue URL."},
|
||||||
|
{subagent_type: "linear", description: "Create a Linear issue titled
|
||||||
|
'<bug 4 title>' with body '<bug 4 body>'. Return the issue URL."},
|
||||||
|
{subagent_type: "linear", description: "Create a Linear issue titled
|
||||||
|
'<bug 5 title>' with body '<bug 5 body>'. Return the issue URL."},
|
||||||
|
])
|
||||||
|
Read back the `[task 0]`…`[task 4]` blocks in the combined ToolMessage and
|
||||||
|
verify each via its Receipt's `verifiable_url` per the `<verification>`
|
||||||
|
teaching before confirming to the user.
|
||||||
|
</example>
|
||||||
|
|
||||||
|
<example>
|
||||||
|
user: "Make a 30-second podcast of this conversation."
|
||||||
|
→ Celery-backed deliverable. The `deliverables` subagent dispatches the
|
||||||
|
Celery job and then **waits for it to finish** before returning. The
|
||||||
|
call may take 10-60 seconds (or longer for video presentations) —
|
||||||
|
that is intentional, not a hang. You always get back one of two
|
||||||
|
Receipt shapes:
|
||||||
|
task(deliverables, "Generate a podcast titled '<title>' from the
|
||||||
|
following content. Use a 30-second style brief. Return the podcast
|
||||||
|
id and title.\n\n<source content>")
|
||||||
|
Outcomes:
|
||||||
|
- **`status="success"`**: the audio is saved. Tell the user the
|
||||||
|
podcast is **ready** and quote the `external_id` / `preview` so
|
||||||
|
they can find it in the podcast panel.
|
||||||
|
- **`status="failed"`**: surface the Receipt's `error` field
|
||||||
|
verbatim. Do NOT silently re-dispatch — the backend already tried
|
||||||
|
and reported a real error.
|
||||||
|
Same two-way pattern applies to video presentations (which take
|
||||||
|
longer to render, but still return a terminal status). If a
|
||||||
|
`task(deliverables, ...)` invocation itself times out at the subagent
|
||||||
|
layer (separate from the Receipt), that's an operator-side problem
|
||||||
|
with the subagent invoke timeout, not a deliverable failure — pass
|
||||||
|
the message through and stop.
|
||||||
|
</example>
|
||||||
|
|
||||||
|
<example>
|
||||||
|
user: "Post the launch announcement to #general and let me know when it's up."
|
||||||
|
→ Mutating subagent + user wants external confirmation. Apply the
|
||||||
|
`<verification>` teaching: the slack subagent's reply is a self-report;
|
||||||
|
check its `evidence.receipts` for a Receipt with `status="success"` and
|
||||||
|
a `verifiable_url`, then fetch that URL to confirm before reporting back.
|
||||||
|
This turn:
|
||||||
|
task(slack, "Post '<launch announcement text>' to #general.
|
||||||
|
Return the message permalink.")
|
||||||
|
Next turn (with the receipt's `verifiable_url` in hand):
|
||||||
|
scrape_webpage(url=<verifiable_url from slack receipt>)
|
||||||
|
→ confirm the post is live, then tell the user it's up with the URL.
|
||||||
|
If the slack reply has NO Receipt with `status="success"`, treat it as a
|
||||||
|
silent failure: surface the error verbatim, do not retry.
|
||||||
|
</example>
|
||||||
</routing>
|
</routing>
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,69 @@
|
||||||
`<specialists>` for the live roster.
|
`<specialists>` for the live roster.
|
||||||
- Each subagent runs in isolation with its own tool stack and context,
|
- Each subagent runs in isolation with its own tool stack and context,
|
||||||
and returns a single synthesized result.
|
and returns a single synthesized result.
|
||||||
- Args:
|
- Args (single mode):
|
||||||
- `subagent_type` — name of the specialist to invoke (must match an
|
- `subagent_type` — name of the specialist to invoke (must match an
|
||||||
entry in `<specialists>`).
|
entry in `<specialists>`).
|
||||||
- `description` — the FULL task prompt. The specialist cannot see this
|
- `description` — the FULL task prompt. The specialist cannot see this
|
||||||
thread, so include all context and constraints, plus what you need
|
thread, so include all context and constraints, plus what you need
|
||||||
back. The specialist will respond in its own format — don't dictate
|
back. The specialist will respond in its own format — don't dictate
|
||||||
one.
|
one.
|
||||||
|
- Args (batch mode):
|
||||||
|
- `tasks` — array of `{description, subagent_type}` objects to fan out
|
||||||
|
concurrently. Mutually exclusive with single-mode args. Use when a
|
||||||
|
single request expands to **3 or more independent specialist calls**
|
||||||
|
(e.g. "create five issues from this list"). Children run under a
|
||||||
|
small concurrency cap and the runtime returns one ToolMessage block
|
||||||
|
per child, prefixed with `[task <index>]`. **Batched children do not
|
||||||
|
support human-in-the-loop interrupts** — if any child needs approval
|
||||||
|
it surfaces an error and you must re-dispatch that single task as a
|
||||||
|
non-batched `task(...)` call.
|
||||||
- Routing rules (when to call, how often, how to scope) live in
|
- Routing rules (when to call, how often, how to scope) live in
|
||||||
`<routing>`.
|
`<routing>`.
|
||||||
|
<verification>
|
||||||
|
A subagent's natural-language reply is a **self-report**, not proof. The
|
||||||
|
specialist might claim a Slack message was posted, a Jira issue was
|
||||||
|
created, or a report was generated even when the underlying tool call
|
||||||
|
failed silently or was rate-limited. Treat success language ("Done",
|
||||||
|
"Posted to #general", "Created ENG-42") as a hypothesis, not a fact.
|
||||||
|
|
||||||
|
Two ground-truth signals are always available to verify a mutating
|
||||||
|
subagent's claim:
|
||||||
|
|
||||||
|
1. **`state['receipts']`** — every mutating tool emits a structured
|
||||||
|
`Receipt` (route, type, operation, status, external_id,
|
||||||
|
verifiable_url, preview) into this append-only list. The supervisor
|
||||||
|
never sees the raw list directly, but each subagent's
|
||||||
|
`<output_contract>` carries the matching Receipt(s) under
|
||||||
|
`evidence.receipts`. If a subagent reports success with NO matching
|
||||||
|
Receipt at `status="success"` (or `"pending"` for async deliverables
|
||||||
|
like podcasts/videos), the operation did not happen — treat as
|
||||||
|
failure and surface that to the user verbatim, do not retry blindly.
|
||||||
|
|
||||||
|
2. **`scrape_webpage`** — when a Receipt carries a `verifiable_url`
|
||||||
|
(Notion page URL, Slack permalink, Jira issue URL, Linear identifier
|
||||||
|
URL, etc.), you can fetch that URL and confirm the operation
|
||||||
|
externally. Use this for high-stakes mutations the user explicitly
|
||||||
|
called out (e.g. "send the launch email to the whole team") or when
|
||||||
|
the subagent's self-report contradicts what the user expected.
|
||||||
|
|
||||||
|
**Receipt status semantics — read carefully:**
|
||||||
|
|
||||||
|
- `status="success"`: the mutation already committed in the backend.
|
||||||
|
If a `verifiable_url` is present and the request was high-stakes,
|
||||||
|
you may `scrape_webpage` it to externally confirm. Otherwise trust
|
||||||
|
the Receipt and tell the user it is done. Celery-backed deliverables
|
||||||
|
(podcasts, video presentations) also land here — the subagent
|
||||||
|
already waited for the worker to finish, so a `success` Receipt
|
||||||
|
means the artefact really is saved.
|
||||||
|
- `status="failed"`: a Receipt with this status carries the backend's
|
||||||
|
error in its `error` field. Surface that text verbatim to the user;
|
||||||
|
re-routing or retrying is only appropriate when the user explicitly
|
||||||
|
asks for it.
|
||||||
|
- `status="pending"`: rare today — current mutating tools wait for
|
||||||
|
their backend before returning. If you ever do see a pending
|
||||||
|
Receipt, tell the user the work has been **kicked off** (quote the
|
||||||
|
`external_id` / `preview` so they can find it later), do not
|
||||||
|
`scrape_webpage` it, and do not re-dispatch the same
|
||||||
|
`task(...)` call hoping it will be done "this time".
|
||||||
|
</verification>
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
|
# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
|
||||||
EXCLUDED_STATE_KEYS = frozenset(
|
EXCLUDED_STATE_KEYS = frozenset(
|
||||||
{
|
{
|
||||||
|
|
@ -16,3 +18,72 @@ EXCLUDED_STATE_KEYS = frozenset(
|
||||||
# Match the parent graph's budget; the LangGraph default of 25 trips on
|
# Match the parent graph's budget; the LangGraph default of 25 trips on
|
||||||
# multi-step subagent runs.
|
# multi-step subagent runs.
|
||||||
DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
|
DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
|
||||||
|
|
||||||
|
|
||||||
|
def _read_timeout_env(name: str, default: float) -> float:
|
||||||
|
"""Parse ``name`` from the environment; fall back to ``default`` on bad values.
|
||||||
|
|
||||||
|
Kept as a free function so the module-level constants stay constants
|
||||||
|
after import; tests can monkeypatch this and re-evaluate via
|
||||||
|
``importlib.reload`` if they need a different value mid-process.
|
||||||
|
"""
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if not raw:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
value = float(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
return value if value > 0 else default
|
||||||
|
|
||||||
|
|
||||||
|
# Wall-clock budget for a single ``task(subagent, ...)`` invocation.
|
||||||
|
# Subagents that run hot (image generation with slow vendors, KB writes
|
||||||
|
# behind a sluggish embedder) can otherwise wedge the orchestrator until
|
||||||
|
# the next checkpoint heartbeat. ``0`` disables the timeout entirely.
|
||||||
|
DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS: float = _read_timeout_env(
|
||||||
|
"SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS",
|
||||||
|
default=300.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _read_int_env(name: str, default: int) -> int:
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if not raw:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
value = int(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
return value if value > 0 else default
|
||||||
|
|
||||||
|
|
||||||
|
# Maximum number of children that ``task(..., tasks=[...])`` runs in
|
||||||
|
# parallel via ``asyncio.gather`` + ``Semaphore``. Bounded so a runaway
|
||||||
|
# fanout cannot starve unrelated subagents (each child still owns an
|
||||||
|
# LLM call + DB session). Set ``SURFSENSE_TASK_BATCH_CONCURRENCY=1`` to
|
||||||
|
# effectively serialise batches without changing the schema.
|
||||||
|
DEFAULT_SUBAGENT_BATCH_CONCURRENCY: int = _read_int_env(
|
||||||
|
"SURFSENSE_TASK_BATCH_CONCURRENCY",
|
||||||
|
default=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Max number of children in a single batched ``task`` call. Hard upper
|
||||||
|
# bound is a safety net for prompt-injection / runaway loops; the orchestrator
|
||||||
|
# rarely needs more than a handful of concurrent specialists.
|
||||||
|
MAX_SUBAGENT_BATCH_SIZE: int = _read_int_env(
|
||||||
|
"SURFSENSE_TASK_BATCH_MAX_SIZE",
|
||||||
|
default=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Soft threshold for per-turn cumulative ``task(...)`` invocations across
|
||||||
|
# **all** subagents. Once the sum of ``state['billable_calls']`` values
|
||||||
|
# crosses this number, the runtime appends a one-shot warning ToolMessage
|
||||||
|
# instructing the orchestrator to wrap up the turn. Tunable so heavy-research
|
||||||
|
# turns (which legitimately need 15+ specialist calls) don't trip the alarm
|
||||||
|
# in production. Set to ``0`` to disable the warning entirely.
|
||||||
|
DEFAULT_SUBAGENT_BILLABLE_THRESHOLD: int = _read_int_env(
|
||||||
|
"SURFSENSE_SUBAGENT_BILLABLE_THRESHOLD",
|
||||||
|
default=15,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ from langchain.agents import create_agent
|
||||||
from langchain.chat_models import init_chat_model
|
from langchain.chat_models import init_chat_model
|
||||||
from langgraph.types import Checkpointer
|
from langgraph.types import Checkpointer
|
||||||
|
|
||||||
|
from app.agents.multi_agent_chat.subagents.shared.spec import (
|
||||||
|
SURF_CONTEXT_HINT_PROVIDER_KEY,
|
||||||
|
)
|
||||||
from app.utils.perf import get_perf_logger
|
from app.utils.perf import get_perf_logger
|
||||||
|
|
||||||
from .task_tool import build_task_tool_with_parent_config
|
from .task_tool import build_task_tool_with_parent_config
|
||||||
|
|
@ -34,6 +37,7 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
|
||||||
subagents: list[SubAgent | CompiledSubAgent],
|
subagents: list[SubAgent | CompiledSubAgent],
|
||||||
system_prompt: str | None = TASK_SYSTEM_PROMPT,
|
system_prompt: str | None = TASK_SYSTEM_PROMPT,
|
||||||
task_description: str | None = None,
|
task_description: str | None = None,
|
||||||
|
search_space_id: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._surf_checkpointer = checkpointer
|
self._surf_checkpointer = checkpointer
|
||||||
super(SubAgentMiddleware, self).__init__()
|
super(SubAgentMiddleware, self).__init__()
|
||||||
|
|
@ -43,8 +47,17 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
|
||||||
)
|
)
|
||||||
self._backend = backend
|
self._backend = backend
|
||||||
self._subagents = subagents
|
self._subagents = subagents
|
||||||
|
# Search-space id is captured at build time (the orchestrator runs in
|
||||||
|
# exactly one search space for its lifetime). The spawn-paused kill
|
||||||
|
# switch keys on it so an operator can quarantine one workspace
|
||||||
|
# without affecting the rest of the deployment.
|
||||||
|
self._search_space_id = search_space_id
|
||||||
subagent_specs = self._surf_compile_subagent_graphs()
|
subagent_specs = self._surf_compile_subagent_graphs()
|
||||||
task_tool = build_task_tool_with_parent_config(subagent_specs, task_description)
|
task_tool = build_task_tool_with_parent_config(
|
||||||
|
subagent_specs,
|
||||||
|
task_description,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
)
|
||||||
if system_prompt and subagent_specs:
|
if system_prompt and subagent_specs:
|
||||||
agents_desc = "\n".join(
|
agents_desc = "\n".join(
|
||||||
f"- {s['name']}: {s['description']}" for s in subagent_specs
|
f"- {s['name']}: {s['description']}" for s in subagent_specs
|
||||||
|
|
@ -64,6 +77,10 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
|
||||||
|
|
||||||
for spec in self._subagents:
|
for spec in self._subagents:
|
||||||
spec_start = time.perf_counter()
|
spec_start = time.perf_counter()
|
||||||
|
# Provider may be ``None`` (no hint), in which case task_tool
|
||||||
|
# skips the prepend step. We forward the key unconditionally so
|
||||||
|
# the registry shape is uniform.
|
||||||
|
hint_provider = cast(dict, spec).get(SURF_CONTEXT_HINT_PROVIDER_KEY)
|
||||||
if "runnable" in spec:
|
if "runnable" in spec:
|
||||||
compiled = cast(CompiledSubAgent, spec)
|
compiled = cast(CompiledSubAgent, spec)
|
||||||
specs.append(
|
specs.append(
|
||||||
|
|
@ -71,6 +88,7 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
|
||||||
"name": compiled["name"],
|
"name": compiled["name"],
|
||||||
"description": compiled["description"],
|
"description": compiled["description"],
|
||||||
"runnable": compiled["runnable"],
|
"runnable": compiled["runnable"],
|
||||||
|
SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
timings.append(
|
timings.append(
|
||||||
|
|
@ -108,6 +126,7 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
|
||||||
"name": spec["name"],
|
"name": spec["name"],
|
||||||
"description": spec["description"],
|
"description": spec["description"],
|
||||||
"runnable": runnable,
|
"runnable": runnable,
|
||||||
|
SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
timings.append(
|
timings.append(
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
"""Per-search-space spawn-paused kill switch for the ``task`` boundary.
|
||||||
|
|
||||||
|
When operators see a runaway loop, a vendor outage, or a billing event
|
||||||
|
that requires immediate cessation of subagent traffic for a specific
|
||||||
|
workspace, they flip a Redis flag and the ``task`` tool short-circuits
|
||||||
|
without touching downstream services. The flag is **per-search-space**
|
||||||
|
so one tenant's incident never silences the rest of the deployment.
|
||||||
|
|
||||||
|
Flag key: ``surfsense:spawn_paused:{search_space_id}``
|
||||||
|
Flag value: any string-truthy value (we read presence, not contents).
|
||||||
|
TTL: set by whoever toggles the flag — this module never expires
|
||||||
|
keys on its own, since "the flag is on" is itself the signal
|
||||||
|
that a human (or alert) needs to investigate.
|
||||||
|
|
||||||
|
The check is best-effort: Redis errors are logged but do not block the
|
||||||
|
``task`` invocation. Failing closed (block-on-redis-error) would let a
|
||||||
|
single Redis blip take the whole orchestrator offline; failing open
|
||||||
|
preserves availability and the alarm bells (rate-limits, cost spikes)
|
||||||
|
will surface the underlying outage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from app.config import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Operators can disable the check entirely (e.g. local dev without Redis)
|
||||||
|
# by setting ``SURFSENSE_TASK_SPAWN_PAUSED_DISABLED=1``. Default is
|
||||||
|
# enabled so production never relies on flipping an opt-out flag.
|
||||||
|
_DISABLED = os.environ.get(
|
||||||
|
"SURFSENSE_TASK_SPAWN_PAUSED_DISABLED", ""
|
||||||
|
).strip().lower() in {
|
||||||
|
"1",
|
||||||
|
"true",
|
||||||
|
"yes",
|
||||||
|
"on",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _flag_key(search_space_id: int) -> str:
|
||||||
|
return f"surfsense:spawn_paused:{search_space_id}"
|
||||||
|
|
||||||
|
|
||||||
|
async def is_spawn_paused(search_space_id: int | None) -> bool:
|
||||||
|
"""Return ``True`` iff the workspace's spawn-paused flag is set in Redis.
|
||||||
|
|
||||||
|
A ``None`` search-space (e.g. dev paths that did not plumb the id
|
||||||
|
through yet) bypasses the check. So does a Redis outage — see module
|
||||||
|
docstring for the fail-open rationale.
|
||||||
|
"""
|
||||||
|
if _DISABLED or search_space_id is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
# Local import keeps the cold-path import cheap and lets routes
|
||||||
|
# that never call ``task`` skip the redis dependency entirely.
|
||||||
|
import redis.asyncio as aioredis # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
client = aioredis.from_url(config.REDIS_APP_URL, decode_responses=True)
|
||||||
|
try:
|
||||||
|
raw = await client.get(_flag_key(search_space_id))
|
||||||
|
finally:
|
||||||
|
# ``aclose()`` is the async-safe variant on redis-py >=5; fall back
|
||||||
|
# to ``close()`` for older clients pinned in tests.
|
||||||
|
close = getattr(client, "aclose", None) or getattr(client, "close", None)
|
||||||
|
if callable(close):
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await close() # type: ignore[misc]
|
||||||
|
return bool(raw)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"spawn_paused check failed for search_space_id=%s; failing open.",
|
||||||
|
search_space_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["is_spawn_paused"]
|
||||||
|
|
@ -8,9 +8,12 @@ re-raises any new pending interrupt back to the parent.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Annotated, Any, NoReturn
|
from collections.abc import Awaitable
|
||||||
|
from typing import Annotated, Any, NoReturn, TypeVar
|
||||||
|
|
||||||
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
|
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
|
||||||
from langchain.tools import BaseTool, ToolRuntime
|
from langchain.tools import BaseTool, ToolRuntime
|
||||||
|
|
@ -20,6 +23,10 @@ from langchain_core.tools import StructuredTool
|
||||||
from langgraph.errors import GraphInterrupt
|
from langgraph.errors import GraphInterrupt
|
||||||
from langgraph.types import Command, Interrupt
|
from langgraph.types import Command, Interrupt
|
||||||
|
|
||||||
|
from app.agents.multi_agent_chat.subagents.shared.spec import (
|
||||||
|
SURF_CONTEXT_HINT_PROVIDER_KEY,
|
||||||
|
ContextHintProvider,
|
||||||
|
)
|
||||||
from app.observability import metrics as ot_metrics, otel as ot
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.utils.perf import get_perf_logger
|
from app.utils.perf import get_perf_logger
|
||||||
|
|
||||||
|
|
@ -29,7 +36,13 @@ from .config import (
|
||||||
has_surfsense_resume,
|
has_surfsense_resume,
|
||||||
subagent_invoke_config,
|
subagent_invoke_config,
|
||||||
)
|
)
|
||||||
from .constants import EXCLUDED_STATE_KEYS
|
from .constants import (
|
||||||
|
DEFAULT_SUBAGENT_BATCH_CONCURRENCY,
|
||||||
|
DEFAULT_SUBAGENT_BILLABLE_THRESHOLD,
|
||||||
|
DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS,
|
||||||
|
EXCLUDED_STATE_KEYS,
|
||||||
|
MAX_SUBAGENT_BATCH_SIZE,
|
||||||
|
)
|
||||||
from .propagation import wrap_with_tool_call_id
|
from .propagation import wrap_with_tool_call_id
|
||||||
from .resume import (
|
from .resume import (
|
||||||
build_resume_command,
|
build_resume_command,
|
||||||
|
|
@ -37,11 +50,70 @@ from .resume import (
|
||||||
get_first_pending_subagent_interrupt,
|
get_first_pending_subagent_interrupt,
|
||||||
hitlrequest_action_count,
|
hitlrequest_action_count,
|
||||||
)
|
)
|
||||||
|
from .spawn_paused import is_spawn_paused
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
_perf_log = get_perf_logger()
|
_perf_log = get_perf_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class SubagentInvokeTimeoutError(Exception):
|
||||||
|
"""Raised when ``subagent.ainvoke`` exceeds the configured wall-clock budget.
|
||||||
|
|
||||||
|
Carries the subagent name and the elapsed seconds so the caller can
|
||||||
|
synthesize a ToolMessage that the orchestrator can act on (re-route,
|
||||||
|
surface to the user, or retry with a smaller scope).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, subagent_type: str, elapsed_seconds: float) -> None:
|
||||||
|
super().__init__(
|
||||||
|
f"subagent {subagent_type!r} exceeded "
|
||||||
|
f"{DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS:.0f}s budget "
|
||||||
|
f"(elapsed={elapsed_seconds:.1f}s)"
|
||||||
|
)
|
||||||
|
self.subagent_type = subagent_type
|
||||||
|
self.elapsed_seconds = elapsed_seconds
|
||||||
|
|
||||||
|
|
||||||
|
_T = TypeVar("_T")
|
||||||
|
|
||||||
|
|
||||||
|
async def _ainvoke_with_timeout[T](
|
||||||
|
coro: Awaitable[_T], *, subagent_type: str, started_at: float
|
||||||
|
) -> _T:
|
||||||
|
"""Apply :data:`DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS` to ``coro``.
|
||||||
|
|
||||||
|
A non-positive timeout disables the cap (configurable via the
|
||||||
|
``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` env var). On expiry the
|
||||||
|
underlying task is cancelled and :class:`SubagentInvokeTimeoutError` is
|
||||||
|
raised — the caller wraps it into a synthetic ToolMessage so the
|
||||||
|
orchestrator can decide what to do.
|
||||||
|
"""
|
||||||
|
timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS
|
||||||
|
if timeout <= 0:
|
||||||
|
return await coro
|
||||||
|
try:
|
||||||
|
return await asyncio.wait_for(coro, timeout=timeout)
|
||||||
|
except TimeoutError as exc:
|
||||||
|
elapsed = time.perf_counter() - started_at
|
||||||
|
raise SubagentInvokeTimeoutError(subagent_type, elapsed) from exc
|
||||||
|
|
||||||
|
|
||||||
|
def _synthesize_timeout_command(
|
||||||
|
exc: SubagentInvokeTimeoutError, *, tool_call_id: str
|
||||||
|
) -> Command:
|
||||||
|
"""Turn a :class:`SubagentInvokeTimeoutError` into a ToolMessage the parent can read."""
|
||||||
|
content = (
|
||||||
|
f"Subagent {exc.subagent_type!r} timed out after "
|
||||||
|
f"{exc.elapsed_seconds:.1f}s (budget="
|
||||||
|
f"{DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS:.0f}s). "
|
||||||
|
"The work was cancelled. Treat as status=error; re-route with a "
|
||||||
|
"narrower scope or different specialist."
|
||||||
|
)
|
||||||
|
return Command(
|
||||||
|
update={"messages": [ToolMessage(content=content, tool_call_id=tool_call_id)]}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _reraise_stamped_subagent_interrupt(
|
def _reraise_stamped_subagent_interrupt(
|
||||||
gi: GraphInterrupt, tool_call_id: str
|
gi: GraphInterrupt, tool_call_id: str
|
||||||
) -> NoReturn:
|
) -> NoReturn:
|
||||||
|
|
@ -70,11 +142,24 @@ def _reraise_stamped_subagent_interrupt(
|
||||||
def build_task_tool_with_parent_config(
|
def build_task_tool_with_parent_config(
|
||||||
subagents: list[dict[str, Any]],
|
subagents: list[dict[str, Any]],
|
||||||
task_description: str | None = None,
|
task_description: str | None = None,
|
||||||
|
*,
|
||||||
|
search_space_id: int | None = None,
|
||||||
) -> BaseTool:
|
) -> BaseTool:
|
||||||
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
|
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
|
||||||
subagent_graphs: dict[str, Runnable] = {
|
subagent_graphs: dict[str, Runnable] = {
|
||||||
spec["name"]: spec["runnable"] for spec in subagents
|
spec["name"]: spec["runnable"] for spec in subagents
|
||||||
}
|
}
|
||||||
|
# Per-subagent context-hint providers (see ``SurfSenseSubagentSpec``).
|
||||||
|
# The mapping is sparse: only routes that opted in via ``pack_subagent``
|
||||||
|
# appear here, and the value is invoked once per ``task(...)`` call to
|
||||||
|
# generate a short string prepended to the subagent's first
|
||||||
|
# ``HumanMessage``. Failures are logged and swallowed — a broken hint
|
||||||
|
# provider must never prevent the underlying task from running.
|
||||||
|
subagent_hint_providers: dict[str, ContextHintProvider] = {
|
||||||
|
spec["name"]: provider
|
||||||
|
for spec in subagents
|
||||||
|
if (provider := spec.get(SURF_CONTEXT_HINT_PROVIDER_KEY)) is not None
|
||||||
|
}
|
||||||
subagent_description_str = "\n".join(
|
subagent_description_str = "\n".join(
|
||||||
f"- {s['name']}: {s['description']}" for s in subagents
|
f"- {s['name']}: {s['description']}" for s in subagents
|
||||||
)
|
)
|
||||||
|
|
@ -88,6 +173,120 @@ def build_task_tool_with_parent_config(
|
||||||
else:
|
else:
|
||||||
description = task_description
|
description = task_description
|
||||||
|
|
||||||
|
def _billable_call_update(
|
||||||
|
subagent_type: str, runtime: ToolRuntime
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Build the per-call ``billable_calls`` delta + an optional warning.
|
||||||
|
|
||||||
|
The orchestrator's ``billable_calls`` map is summed by
|
||||||
|
:func:`_int_counter_merge_reducer`, so we always emit
|
||||||
|
``{subagent_type: 1}`` and let the reducer accumulate. If the
|
||||||
|
cumulative count *after* this call would cross the configured
|
||||||
|
threshold, we also slip a soft ``messages`` entry into the update
|
||||||
|
so the orchestrator can read it on its next step and self-limit.
|
||||||
|
Returning a plain ``dict`` (vs. an extra :class:`Command`) keeps
|
||||||
|
the helper composable with the existing single/batch return paths.
|
||||||
|
"""
|
||||||
|
delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}}
|
||||||
|
threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
|
||||||
|
if threshold <= 0:
|
||||||
|
return delta
|
||||||
|
prior = runtime.state.get("billable_calls") or {}
|
||||||
|
# ``prior`` may be a plain dict or a reducer-managed mapping; only
|
||||||
|
# int values are counted so a malformed checkpoint can't crash us.
|
||||||
|
prior_total = sum(v for v in prior.values() if isinstance(v, int))
|
||||||
|
new_total = prior_total + 1
|
||||||
|
if prior_total < threshold <= new_total:
|
||||||
|
warn = (
|
||||||
|
f"[budget warning] This turn has dispatched {new_total} "
|
||||||
|
f"subagent calls (soft cap = {threshold}). Wrap up the "
|
||||||
|
"user's request with what you have rather than launching "
|
||||||
|
"more specialists; surface a partial answer if needed."
|
||||||
|
)
|
||||||
|
delta["_billable_warn_text"] = warn
|
||||||
|
return delta
|
||||||
|
|
||||||
|
def _attach_billable(
|
||||||
|
cmd: Command, subagent_type: str, runtime: ToolRuntime
|
||||||
|
) -> Command:
|
||||||
|
"""Merge the per-call billable counter (and warning) into ``cmd``."""
|
||||||
|
delta = _billable_call_update(subagent_type, runtime)
|
||||||
|
warn_text = delta.pop("_billable_warn_text", None)
|
||||||
|
# ``cmd.update`` may be a dict or LangGraph ``UpdateDict``; defensively
|
||||||
|
# copy so we don't mutate state shared across other tool returns.
|
||||||
|
update = dict(getattr(cmd, "update", {}) or {})
|
||||||
|
for key, value in delta.items():
|
||||||
|
update[key] = value
|
||||||
|
if warn_text:
|
||||||
|
existing_msgs = list(update.get("messages") or [])
|
||||||
|
existing_msgs.append(
|
||||||
|
ToolMessage(content=warn_text, tool_call_id=runtime.tool_call_id)
|
||||||
|
)
|
||||||
|
update["messages"] = existing_msgs
|
||||||
|
return Command(update=update)
|
||||||
|
|
||||||
|
def _safe_message_text(msg: Any) -> str:
|
||||||
|
"""Pull text out of a BaseMessage without trusting the ``.text`` property.
|
||||||
|
|
||||||
|
``BaseMessage.text`` walks ``content_blocks`` and crashes with
|
||||||
|
``TypeError: 'NoneType' object is not iterable`` when ``content`` is
|
||||||
|
``None`` (common for tool-call AIMessages whose payload is purely
|
||||||
|
structured). ``getattr(msg, "text", None)`` does not catch this
|
||||||
|
because Python evaluates the property body before falling back to
|
||||||
|
the default. Read ``content`` directly and coerce defensively.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
content = getattr(msg, "content", None)
|
||||||
|
except Exception:
|
||||||
|
content = None
|
||||||
|
if content is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
if isinstance(content, list):
|
||||||
|
parts: list[str] = []
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, str):
|
||||||
|
parts.append(block)
|
||||||
|
elif isinstance(block, dict):
|
||||||
|
block_text = block.get("text") or block.get("content")
|
||||||
|
if isinstance(block_text, str):
|
||||||
|
parts.append(block_text)
|
||||||
|
return " ".join(parts)
|
||||||
|
return str(content)
|
||||||
|
|
||||||
|
def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]:
|
||||||
|
"""Compress the subagent's message stream into a compact tool trace.
|
||||||
|
|
||||||
|
Each entry is ``{"tool": <name>, "status": "ok"|"error", "preview":
|
||||||
|
<≤120 chars>}`` so the orchestrator can show "this is what your
|
||||||
|
specialist actually did" without dumping the full message stream
|
||||||
|
back through the prompt. The list is attached to the returned
|
||||||
|
ToolMessage's ``additional_kwargs`` (under ``"surf_tool_trace"``);
|
||||||
|
the LLM never sees it, but UI / observability code can pluck it
|
||||||
|
out of the checkpoint.
|
||||||
|
"""
|
||||||
|
trace: list[dict[str, Any]] = []
|
||||||
|
for msg in messages:
|
||||||
|
tool_name = getattr(msg, "name", None)
|
||||||
|
tool_call_id_attr = getattr(msg, "tool_call_id", None)
|
||||||
|
if not tool_name and not tool_call_id_attr:
|
||||||
|
# Only ToolMessages have either field; skip AIMessage /
|
||||||
|
# HumanMessage / SystemMessage frames.
|
||||||
|
continue
|
||||||
|
status = getattr(msg, "status", None) or "ok"
|
||||||
|
preview = _safe_message_text(msg).strip().replace("\n", " ")
|
||||||
|
if len(preview) > 120:
|
||||||
|
preview = preview[:117] + "..."
|
||||||
|
trace.append(
|
||||||
|
{
|
||||||
|
"tool": tool_name or "<unknown>",
|
||||||
|
"status": status,
|
||||||
|
"preview": preview,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return trace
|
||||||
|
|
||||||
def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
|
def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
|
||||||
if "messages" not in result:
|
if "messages" not in result:
|
||||||
msg = (
|
msg = (
|
||||||
|
|
@ -106,15 +305,51 @@ def build_task_tool_with_parent_config(
|
||||||
"output to forward back to the user."
|
"output to forward back to the user."
|
||||||
)
|
)
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
last_text = getattr(messages[-1], "text", None) or ""
|
message_text = _safe_message_text(messages[-1]).rstrip()
|
||||||
message_text = last_text.rstrip()
|
# Tool-trace is purely observability — wrap defensively so a single
|
||||||
|
# malformed frame never bubbles up and kills the whole user turn.
|
||||||
|
try:
|
||||||
|
tool_trace = _build_tool_trace(messages)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to build tool_trace for subagent return; "
|
||||||
|
"continuing without trace."
|
||||||
|
)
|
||||||
|
tool_trace = []
|
||||||
|
tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id)
|
||||||
|
if tool_trace:
|
||||||
|
# ``additional_kwargs`` is a free-form dict on BaseMessage; using
|
||||||
|
# a ``surf_`` prefix avoids collision with provider-specific keys
|
||||||
|
# (e.g. Anthropic's ``cache_control``). The LLM doesn't see it;
|
||||||
|
# consumers (UI, observability) read it off the checkpoint.
|
||||||
|
tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace
|
||||||
return Command(
|
return Command(
|
||||||
update={
|
update={
|
||||||
**state_update,
|
**state_update,
|
||||||
"messages": [ToolMessage(message_text, tool_call_id=tool_call_id)],
|
"messages": [tool_msg],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _resolve_context_hint(
|
||||||
|
subagent_type: str, description: str, runtime: ToolRuntime
|
||||||
|
) -> str | None:
|
||||||
|
"""Run the per-subagent hint provider; swallow & log any exception."""
|
||||||
|
provider = subagent_hint_providers.get(subagent_type)
|
||||||
|
if provider is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
hint = provider(runtime.state, description)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Context-hint provider for subagent %r raised; skipping hint.",
|
||||||
|
subagent_type,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
if not hint or not isinstance(hint, str):
|
||||||
|
return None
|
||||||
|
cleaned = hint.strip()
|
||||||
|
return cleaned or None
|
||||||
|
|
||||||
def _validate_and_prepare_state(
|
def _validate_and_prepare_state(
|
||||||
subagent_type: str, description: str, runtime: ToolRuntime
|
subagent_type: str, description: str, runtime: ToolRuntime
|
||||||
) -> tuple[Runnable, dict]:
|
) -> tuple[Runnable, dict]:
|
||||||
|
|
@ -122,20 +357,308 @@ def build_task_tool_with_parent_config(
|
||||||
subagent_state = {
|
subagent_state = {
|
||||||
k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
|
k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
|
||||||
}
|
}
|
||||||
subagent_state["messages"] = [HumanMessage(content=description)]
|
hint = _resolve_context_hint(subagent_type, description, runtime)
|
||||||
|
if hint:
|
||||||
|
# Prepend as a tagged block so the subagent prompt can pattern-match
|
||||||
|
# on the section (and a future change can lift it into its own
|
||||||
|
# ``SystemMessage`` if needed).
|
||||||
|
payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}"
|
||||||
|
else:
|
||||||
|
payload = description
|
||||||
|
subagent_state["messages"] = [HumanMessage(content=payload)]
|
||||||
return subagent, subagent_state
|
return subagent, subagent_state
|
||||||
|
|
||||||
|
def _merge_batch_results(
|
||||||
|
results: list[tuple[int, str, dict | str, dict | None]],
|
||||||
|
runtime: ToolRuntime,
|
||||||
|
) -> Command:
|
||||||
|
"""Combine per-child results into one Command with a combined ToolMessage.
|
||||||
|
|
||||||
|
``results`` is a list of ``(task_index, subagent_type,
|
||||||
|
payload_or_error_text, child_state_update)`` tuples — preserving the
|
||||||
|
input order so the orchestrator can map each block back to the task
|
||||||
|
it dispatched. State updates are merged by reducer for keys outside
|
||||||
|
:data:`EXCLUDED_STATE_KEYS`; everything else (``messages``, ``todos``,
|
||||||
|
etc.) is replaced by the synthesized aggregate ToolMessage. Every
|
||||||
|
child also contributes a ``billable_calls`` increment so cost
|
||||||
|
accounting matches single-mode dispatch.
|
||||||
|
"""
|
||||||
|
results.sort(key=lambda r: r[0])
|
||||||
|
merged_state: dict[str, Any] = {}
|
||||||
|
billable_delta: dict[str, int] = {}
|
||||||
|
message_blocks: list[str] = []
|
||||||
|
batch_trace: list[dict[str, Any]] = []
|
||||||
|
for task_index, subagent_type, payload, state_update in results:
|
||||||
|
billable_delta[subagent_type] = billable_delta.get(subagent_type, 0) + 1
|
||||||
|
if isinstance(payload, str):
|
||||||
|
# Pre-flight error or per-task exception text.
|
||||||
|
message_blocks.append(f"[task {task_index}] {payload}")
|
||||||
|
batch_trace.append(
|
||||||
|
{
|
||||||
|
"task_index": task_index,
|
||||||
|
"subagent_type": subagent_type,
|
||||||
|
"status": "error",
|
||||||
|
"tool_trace": [],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
messages = payload.get("messages") or []
|
||||||
|
last_text = _safe_message_text(messages[-1]).rstrip() if messages else ""
|
||||||
|
message_blocks.append(
|
||||||
|
f"[task {task_index}] {last_text or '<empty>'}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
child_trace = _build_tool_trace(messages)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to build tool_trace for batch task_index=%d; continuing.",
|
||||||
|
task_index,
|
||||||
|
)
|
||||||
|
child_trace = []
|
||||||
|
batch_trace.append(
|
||||||
|
{
|
||||||
|
"task_index": task_index,
|
||||||
|
"subagent_type": subagent_type,
|
||||||
|
"status": "ok",
|
||||||
|
"tool_trace": child_trace,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if state_update:
|
||||||
|
# Naive merge: later tasks win on scalar collisions; reducer-backed
|
||||||
|
# fields (``receipts``, ``files`` etc.) accumulate at apply time.
|
||||||
|
merged_state.update(state_update)
|
||||||
|
aggregate = "\n\n".join(message_blocks)
|
||||||
|
aggregate_msg = ToolMessage(
|
||||||
|
content=aggregate, tool_call_id=runtime.tool_call_id
|
||||||
|
)
|
||||||
|
if batch_trace:
|
||||||
|
aggregate_msg.additional_kwargs["surf_tool_trace"] = batch_trace
|
||||||
|
update: dict[str, Any] = {
|
||||||
|
**merged_state,
|
||||||
|
"billable_calls": billable_delta,
|
||||||
|
"messages": [aggregate_msg],
|
||||||
|
}
|
||||||
|
# Soft-cap warning: check the cumulative count after attribution.
|
||||||
|
threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
|
||||||
|
if threshold > 0:
|
||||||
|
prior = runtime.state.get("billable_calls") or {}
|
||||||
|
prior_total = sum(v for v in prior.values() if isinstance(v, int))
|
||||||
|
new_total = prior_total + sum(billable_delta.values())
|
||||||
|
if prior_total < threshold <= new_total:
|
||||||
|
update["messages"].append(
|
||||||
|
ToolMessage(
|
||||||
|
content=(
|
||||||
|
f"[budget warning] This turn has dispatched "
|
||||||
|
f"{new_total} subagent calls (soft cap = "
|
||||||
|
f"{threshold}). Wrap up the user's request with "
|
||||||
|
"what you have rather than launching more "
|
||||||
|
"specialists; surface a partial answer if needed."
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return Command(update=update)
|
||||||
|
|
||||||
|
async def _ainvoke_one_batch_child(
|
||||||
|
*,
|
||||||
|
task_index: int,
|
||||||
|
subagent_type: str,
|
||||||
|
description: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
|
semaphore: asyncio.Semaphore,
|
||||||
|
) -> tuple[int, str, dict | str, dict | None]:
|
||||||
|
"""Run one child of a batched ``task`` call under the concurrency cap.
|
||||||
|
|
||||||
|
Errors are returned as plain text in slot 2 so a single child's
|
||||||
|
failure does not abort the whole batch. ``GraphInterrupt`` from a
|
||||||
|
batched child is currently treated as a hard failure for that child
|
||||||
|
only — batched HITL is intentionally out of scope for the v1
|
||||||
|
rollout (see plan tier 2 item 4 risks).
|
||||||
|
"""
|
||||||
|
async with semaphore:
|
||||||
|
if subagent_type not in subagent_graphs:
|
||||||
|
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
|
||||||
|
return (
|
||||||
|
task_index,
|
||||||
|
subagent_type,
|
||||||
|
(
|
||||||
|
f"Subagent {subagent_type!r} does not exist; "
|
||||||
|
f"allowed: {allowed_types}"
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
subagent, subagent_state = _validate_and_prepare_state(
|
||||||
|
subagent_type, description, runtime
|
||||||
|
)
|
||||||
|
sub_config = subagent_invoke_config(runtime)
|
||||||
|
started_at = time.perf_counter()
|
||||||
|
try:
|
||||||
|
result = await _ainvoke_with_timeout(
|
||||||
|
subagent.ainvoke(subagent_state, config=sub_config),
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
started_at=started_at,
|
||||||
|
)
|
||||||
|
except SubagentInvokeTimeoutError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Batch child %d (%s) timed out after %.1fs",
|
||||||
|
task_index,
|
||||||
|
subagent_type,
|
||||||
|
exc.elapsed_seconds,
|
||||||
|
)
|
||||||
|
return (task_index, subagent_type, str(exc), None)
|
||||||
|
except GraphInterrupt:
|
||||||
|
# Batched HITL is unsupported in v1 — surface as a failure
|
||||||
|
# for this child so the rest of the batch still completes.
|
||||||
|
logger.warning(
|
||||||
|
"Batch child %d (%s) raised GraphInterrupt; batched HITL "
|
||||||
|
"is not supported. Re-dispatch this task as a single "
|
||||||
|
"(non-batched) `task(...)` call to get the HITL prompt.",
|
||||||
|
task_index,
|
||||||
|
subagent_type,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
task_index,
|
||||||
|
subagent_type,
|
||||||
|
(
|
||||||
|
f"Subagent {subagent_type!r} needs human approval. "
|
||||||
|
"Re-dispatch this task as a single (non-batched) "
|
||||||
|
"`task(...)` call so the approval card can be shown."
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception(
|
||||||
|
"Batch child %d (%s) raised: %s",
|
||||||
|
task_index,
|
||||||
|
subagent_type,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
task_index,
|
||||||
|
subagent_type,
|
||||||
|
f"Subagent {subagent_type!r} error: {exc}",
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
child_state_update = {
|
||||||
|
k: v for k, v in result.items() if k not in EXCLUDED_STATE_KEYS
|
||||||
|
}
|
||||||
|
return (task_index, subagent_type, result, child_state_update)
|
||||||
|
|
||||||
|
def _coerce_batch_arg(tasks: Any) -> list[dict] | str:
|
||||||
|
"""Rescue common LLM-side malformations of the ``tasks`` argument.
|
||||||
|
|
||||||
|
Some providers serialise an array argument as a JSON-encoded string,
|
||||||
|
and small models occasionally hand back a single ``{description,
|
||||||
|
subagent_type}`` dict instead of a one-element array. Both are
|
||||||
|
recovered here with a WARN log so the issue is visible in metrics
|
||||||
|
but the user's turn still completes; truly broken shapes return a
|
||||||
|
plain string that the caller surfaces as the tool error.
|
||||||
|
"""
|
||||||
|
if isinstance(tasks, list):
|
||||||
|
return tasks
|
||||||
|
if isinstance(tasks, dict):
|
||||||
|
logger.warning(
|
||||||
|
"task: `tasks` was a single dict; coercing to a 1-element list. "
|
||||||
|
"Orchestrators should send `tasks=[{...}]` directly."
|
||||||
|
)
|
||||||
|
return [tasks]
|
||||||
|
if isinstance(tasks, str):
|
||||||
|
stripped = tasks.strip()
|
||||||
|
if not stripped:
|
||||||
|
return "tasks: argument is empty."
|
||||||
|
try:
|
||||||
|
parsed = json.loads(stripped)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
return (
|
||||||
|
f"tasks: argument is a string but not valid JSON ({exc.msg}). "
|
||||||
|
"Send a JSON array of `{description, subagent_type}` objects."
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"task: `tasks` was a JSON-encoded string; parsed to %s. "
|
||||||
|
"Orchestrators should send a JSON array directly.",
|
||||||
|
type(parsed).__name__,
|
||||||
|
)
|
||||||
|
return _coerce_batch_arg(parsed)
|
||||||
|
return (
|
||||||
|
f"tasks: unsupported type {type(tasks).__name__}; expected an array "
|
||||||
|
"of `{description, subagent_type}` objects."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _adispatch_batch(
|
||||||
|
tasks: list[dict], runtime: ToolRuntime
|
||||||
|
) -> Command | str:
|
||||||
|
"""Fan-out helper for the ``tasks`` array shape.
|
||||||
|
|
||||||
|
Bounded by :data:`MAX_SUBAGENT_BATCH_SIZE` and concurrency-capped
|
||||||
|
at :data:`DEFAULT_SUBAGENT_BATCH_CONCURRENCY`. Returns a single
|
||||||
|
:class:`Command` that the LLM sees as one ToolMessage per child,
|
||||||
|
prefixed with ``[task <index>]`` so it can map back to the input
|
||||||
|
order.
|
||||||
|
"""
|
||||||
|
if not tasks:
|
||||||
|
return "tasks: array is empty; nothing to dispatch."
|
||||||
|
if len(tasks) > MAX_SUBAGENT_BATCH_SIZE:
|
||||||
|
return (
|
||||||
|
f"tasks: too many children ({len(tasks)}); "
|
||||||
|
f"max is {MAX_SUBAGENT_BATCH_SIZE}. Split the batch."
|
||||||
|
)
|
||||||
|
normalized: list[tuple[int, str, str]] = []
|
||||||
|
for idx, item in enumerate(tasks):
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
return (
|
||||||
|
f"tasks[{idx}]: must be an object with description+subagent_type."
|
||||||
|
)
|
||||||
|
description = item.get("description")
|
||||||
|
subagent_type = item.get("subagent_type")
|
||||||
|
if not isinstance(description, str) or not description.strip():
|
||||||
|
return f"tasks[{idx}]: missing or empty 'description'."
|
||||||
|
if not isinstance(subagent_type, str) or not subagent_type.strip():
|
||||||
|
return f"tasks[{idx}]: missing or empty 'subagent_type'."
|
||||||
|
normalized.append((idx, subagent_type.strip(), description))
|
||||||
|
semaphore = asyncio.Semaphore(DEFAULT_SUBAGENT_BATCH_CONCURRENCY)
|
||||||
|
coros = [
|
||||||
|
_ainvoke_one_batch_child(
|
||||||
|
task_index=idx,
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
description=description,
|
||||||
|
runtime=runtime,
|
||||||
|
semaphore=semaphore,
|
||||||
|
)
|
||||||
|
for idx, subagent_type, description in normalized
|
||||||
|
]
|
||||||
|
results = await asyncio.gather(*coros)
|
||||||
|
return _merge_batch_results(list(results), runtime)
|
||||||
|
|
||||||
def task(
|
def task(
|
||||||
description: Annotated[
|
description: Annotated[
|
||||||
str,
|
str | None,
|
||||||
"A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
|
"Single-mode: a detailed task description for the subagent. Required unless `tasks` is provided.",
|
||||||
],
|
] = None,
|
||||||
subagent_type: Annotated[
|
subagent_type: Annotated[
|
||||||
str,
|
str | None,
|
||||||
"The type of subagent to use. Must be one of the available agent types listed in the tool description.",
|
"Single-mode: the type of subagent to use. Required unless `tasks` is provided.",
|
||||||
],
|
] = None,
|
||||||
runtime: ToolRuntime,
|
runtime: ToolRuntime = None, # type: ignore[assignment]
|
||||||
|
tasks: Annotated[
|
||||||
|
list[dict] | None,
|
||||||
|
(
|
||||||
|
"Batch-mode: array of `{description, subagent_type}` objects. "
|
||||||
|
"Synchronous path does not support batch mode; orchestrators "
|
||||||
|
"must use the async event loop to fan out."
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> str | Command:
|
) -> str | Command:
|
||||||
|
if tasks is not None:
|
||||||
|
return (
|
||||||
|
"task: batch mode (`tasks=[...]`) is only supported on the async "
|
||||||
|
"path. SurfSense orchestrators always run in an event loop, so "
|
||||||
|
"this should never fire — file a bug if you see it."
|
||||||
|
)
|
||||||
|
if not description or not subagent_type:
|
||||||
|
return (
|
||||||
|
"task: must provide either single-mode (`description`+`subagent_type`) "
|
||||||
|
"or batch-mode (`tasks`)."
|
||||||
|
)
|
||||||
if subagent_type not in subagent_graphs:
|
if subagent_type not in subagent_graphs:
|
||||||
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
|
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
|
||||||
return (
|
return (
|
||||||
|
|
@ -284,16 +807,65 @@ def build_task_tool_with_parent_config(
|
||||||
|
|
||||||
async def atask(
|
async def atask(
|
||||||
description: Annotated[
|
description: Annotated[
|
||||||
str,
|
str | None,
|
||||||
"A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
|
"Single-mode: a detailed task description for the subagent. Required unless `tasks` is provided.",
|
||||||
],
|
] = None,
|
||||||
subagent_type: Annotated[
|
subagent_type: Annotated[
|
||||||
str,
|
str | None,
|
||||||
"The type of subagent to use. Must be one of the available agent types listed in the tool description.",
|
"Single-mode: the type of subagent to use. Required unless `tasks` is provided.",
|
||||||
],
|
] = None,
|
||||||
runtime: ToolRuntime,
|
runtime: ToolRuntime = None, # type: ignore[assignment]
|
||||||
|
tasks: Annotated[
|
||||||
|
list[dict] | None,
|
||||||
|
(
|
||||||
|
"Batch-mode: array of `{description, subagent_type}` objects "
|
||||||
|
"to fan out concurrently (max "
|
||||||
|
f"{MAX_SUBAGENT_BATCH_SIZE}, concurrency "
|
||||||
|
f"{DEFAULT_SUBAGENT_BATCH_CONCURRENCY}). Mutually exclusive "
|
||||||
|
"with single-mode args. Batched children do not support "
|
||||||
|
"human-in-the-loop interrupts; re-dispatch as single mode "
|
||||||
|
"if a child needs approval."
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> str | Command:
|
) -> str | Command:
|
||||||
atask_start = time.perf_counter()
|
atask_start = time.perf_counter()
|
||||||
|
# Kill switch: when ops flips the spawn-paused flag for this
|
||||||
|
# workspace, every ``task(...)`` invocation (single- or batch-mode)
|
||||||
|
# short-circuits with a clear ToolMessage so the orchestrator can
|
||||||
|
# tell the user what happened and stop hammering downstream APIs.
|
||||||
|
if await is_spawn_paused(search_space_id):
|
||||||
|
logger.warning(
|
||||||
|
"[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s",
|
||||||
|
search_space_id,
|
||||||
|
runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
"task: subagent dispatch is currently paused for this workspace. "
|
||||||
|
"Acknowledge to the user that delegation is temporarily disabled "
|
||||||
|
"(ops kill switch); do not retry until the pause is lifted."
|
||||||
|
)
|
||||||
|
if tasks is not None:
|
||||||
|
if description or subagent_type:
|
||||||
|
return (
|
||||||
|
"task: cannot combine `tasks` with `description`/`subagent_type`. "
|
||||||
|
"Use either single-mode (description+subagent_type) or batch-mode (tasks)."
|
||||||
|
)
|
||||||
|
if not runtime.tool_call_id:
|
||||||
|
raise ValueError("Tool call ID is required for subagent invocation")
|
||||||
|
coerced = _coerce_batch_arg(tasks)
|
||||||
|
if isinstance(coerced, str):
|
||||||
|
return coerced
|
||||||
|
logger.info(
|
||||||
|
"[hitl_route] atask BATCH ENTRY: size=%d tool_call_id=%s",
|
||||||
|
len(coerced),
|
||||||
|
runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
return await _adispatch_batch(coerced, runtime)
|
||||||
|
if not description or not subagent_type:
|
||||||
|
return (
|
||||||
|
"task: must provide either single-mode (`description`+`subagent_type`) "
|
||||||
|
"or batch-mode (`tasks`)."
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"[hitl_route] atask ENTRY: subagent_type=%r tool_call_id=%s",
|
"[hitl_route] atask ENTRY: subagent_type=%r tool_call_id=%s",
|
||||||
subagent_type,
|
subagent_type,
|
||||||
|
|
@ -358,11 +930,37 @@ def build_task_tool_with_parent_config(
|
||||||
subagent_type=subagent_type, path=invoke_path
|
subagent_type=subagent_type, path=invoke_path
|
||||||
) as sp:
|
) as sp:
|
||||||
try:
|
try:
|
||||||
result = await subagent.ainvoke(
|
result = await _ainvoke_with_timeout(
|
||||||
|
subagent.ainvoke(
|
||||||
build_resume_command(resume_value, pending_id),
|
build_resume_command(resume_value, pending_id),
|
||||||
config=sub_config,
|
config=sub_config,
|
||||||
|
),
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
started_at=ainvoke_start,
|
||||||
)
|
)
|
||||||
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
||||||
|
except SubagentInvokeTimeoutError as exc:
|
||||||
|
ainvoke_outcome = "timeout"
|
||||||
|
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
||||||
|
ot_metrics.record_subagent_invoke_duration(
|
||||||
|
(time.perf_counter() - ainvoke_start) * 1000,
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
path=invoke_path,
|
||||||
|
outcome=ainvoke_outcome,
|
||||||
|
)
|
||||||
|
ot_metrics.record_subagent_invoke_outcome(
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
path=invoke_path,
|
||||||
|
outcome=ainvoke_outcome,
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"Subagent %r ainvoke (resume) timed out after %.1fs",
|
||||||
|
subagent_type,
|
||||||
|
exc.elapsed_seconds,
|
||||||
|
)
|
||||||
|
return _synthesize_timeout_command(
|
||||||
|
exc, tool_call_id=runtime.tool_call_id
|
||||||
|
)
|
||||||
except GraphInterrupt as gi:
|
except GraphInterrupt as gi:
|
||||||
ainvoke_outcome = "interrupted"
|
ainvoke_outcome = "interrupted"
|
||||||
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
||||||
|
|
@ -408,10 +1006,34 @@ def build_task_tool_with_parent_config(
|
||||||
subagent_type=subagent_type, path=invoke_path
|
subagent_type=subagent_type, path=invoke_path
|
||||||
) as sp:
|
) as sp:
|
||||||
try:
|
try:
|
||||||
result = await subagent.ainvoke(
|
result = await _ainvoke_with_timeout(
|
||||||
subagent_state, config=sub_config
|
subagent.ainvoke(subagent_state, config=sub_config),
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
started_at=ainvoke_start,
|
||||||
)
|
)
|
||||||
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
||||||
|
except SubagentInvokeTimeoutError as exc:
|
||||||
|
ainvoke_outcome = "timeout"
|
||||||
|
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
||||||
|
ot_metrics.record_subagent_invoke_duration(
|
||||||
|
(time.perf_counter() - ainvoke_start) * 1000,
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
path=invoke_path,
|
||||||
|
outcome=ainvoke_outcome,
|
||||||
|
)
|
||||||
|
ot_metrics.record_subagent_invoke_outcome(
|
||||||
|
subagent_type=subagent_type,
|
||||||
|
path=invoke_path,
|
||||||
|
outcome=ainvoke_outcome,
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"Subagent %r ainvoke (fresh) timed out after %.1fs",
|
||||||
|
subagent_type,
|
||||||
|
exc.elapsed_seconds,
|
||||||
|
)
|
||||||
|
return _synthesize_timeout_command(
|
||||||
|
exc, tool_call_id=runtime.tool_call_id
|
||||||
|
)
|
||||||
except GraphInterrupt as gi:
|
except GraphInterrupt as gi:
|
||||||
ainvoke_outcome = "interrupted"
|
ainvoke_outcome = "interrupted"
|
||||||
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
sp.set_attribute("subagent.outcome", ainvoke_outcome)
|
||||||
|
|
@ -481,7 +1103,7 @@ def build_task_tool_with_parent_config(
|
||||||
path=invoke_path,
|
path=invoke_path,
|
||||||
outcome=ainvoke_outcome,
|
outcome=ainvoke_outcome,
|
||||||
)
|
)
|
||||||
return cmd
|
return _attach_billable(cmd, subagent_type, runtime)
|
||||||
|
|
||||||
return StructuredTool.from_function(
|
return StructuredTool.from_function(
|
||||||
name="task",
|
name="task",
|
||||||
|
|
|
||||||
|
|
@ -52,9 +52,7 @@ class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
messages.insert(insert_at, SystemMessage(content=tree_text))
|
messages.insert(insert_at, SystemMessage(content=tree_text))
|
||||||
priority_count = 0
|
priority_count = 0
|
||||||
if priority:
|
if priority:
|
||||||
priority_count = (
|
priority_count = len(priority) if hasattr(priority, "__len__") else 1
|
||||||
len(priority) if hasattr(priority, "__len__") else 1
|
|
||||||
)
|
|
||||||
messages.insert(insert_at, _render_priority_message(priority))
|
messages.insert(insert_at, _render_priority_message(priority))
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",
|
"[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,7 @@ from langchain_core.tools import BaseTool
|
||||||
from langgraph.types import interrupt
|
from langgraph.types import interrupt
|
||||||
|
|
||||||
from app.agents.new_chat.permissions import Rule
|
from app.agents.new_chat.permissions import Rule
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.observability import otel as ot
|
|
||||||
|
|
||||||
from .decision import normalize_permission_decision
|
from .decision import normalize_permission_decision
|
||||||
from .payload import PERMISSION_ASK_INTERRUPT_TYPE, build_permission_ask_payload
|
from .payload import PERMISSION_ASK_INTERRUPT_TYPE, build_permission_ask_payload
|
||||||
|
|
|
||||||
|
|
@ -173,6 +173,7 @@ def build_main_agent_deepagent_middleware(
|
||||||
subagents=subagents,
|
subagents=subagents,
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
task_description=TASK_TOOL_DESCRIPTION,
|
task_description=TASK_TOOL_DESCRIPTION,
|
||||||
|
search_space_id=search_space_id,
|
||||||
),
|
),
|
||||||
resilience.model_call_limit,
|
resilience.model_call_limit,
|
||||||
resilience.tool_call_limit,
|
resilience.tool_call_limit,
|
||||||
|
|
|
||||||
|
|
@ -42,14 +42,16 @@ Return **only** one JSON object (no markdown/prose):
|
||||||
"evidence": {
|
"evidence": {
|
||||||
"artifact_type": "report" | "podcast" | "video_presentation" | "resume" | "image" | null,
|
"artifact_type": "report" | "podcast" | "video_presentation" | "resume" | "image" | null,
|
||||||
"artifact_id": string | null,
|
"artifact_id": string | null,
|
||||||
"artifact_location": string | null
|
"artifact_location": string | null,
|
||||||
|
"receipts": Receipt[] | null
|
||||||
},
|
},
|
||||||
"next_step": string | null,
|
"next_step": string | null,
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
Route-specific rules:
|
||||||
- `status=success` -> `next_step=null`, `missing_fields=null`.
|
- `evidence.receipts` quotes the Receipt(s) returned by `generate_report` / `generate_podcast` / `generate_video_presentation` / `generate_resume` / `generate_image` this turn, verbatim. The Receipt's `type` enum is one of `report` | `podcast` | `video_presentation` | `resume` | `image`.
|
||||||
- `status=partial|blocked|error` -> `next_step` must be non-null.
|
<include snippet="output_contract_base"/>
|
||||||
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
|
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,15 @@ import hashlib
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
from litellm import aimage_generation
|
from litellm import aimage_generation
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.config import config
|
from app.config import config
|
||||||
from app.db import (
|
from app.db import (
|
||||||
ImageGeneration,
|
ImageGeneration,
|
||||||
|
|
@ -66,8 +70,9 @@ def create_generate_image_tool(
|
||||||
@tool
|
@tool
|
||||||
async def generate_image(
|
async def generate_image(
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
n: int = 1,
|
n: int = 1,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""
|
"""
|
||||||
Generate an image from a text description using AI image models.
|
Generate an image from a text description using AI image models.
|
||||||
|
|
||||||
|
|
@ -82,6 +87,21 @@ def create_generate_image_tool(
|
||||||
Returns:
|
Returns:
|
||||||
A dictionary containing the generated image(s) for display in the chat.
|
A dictionary containing the generated image(s) for display in the chat.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def _failed(payload: dict[str, Any], *, error: str) -> Command:
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="image",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
preview=prompt[:200] if prompt else None,
|
||||||
|
error=error,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use a per-call session so concurrent tool calls don't share an
|
# Use a per-call session so concurrent tool calls don't share an
|
||||||
# AsyncSession (which is not concurrency-safe). The streaming
|
# AsyncSession (which is not concurrency-safe). The streaming
|
||||||
|
|
@ -93,7 +113,10 @@ def create_generate_image_tool(
|
||||||
)
|
)
|
||||||
search_space = result.scalars().first()
|
search_space = result.scalars().first()
|
||||||
if not search_space:
|
if not search_space:
|
||||||
return {"error": "Search space not found"}
|
return _failed(
|
||||||
|
{"error": "Search space not found"},
|
||||||
|
error="Search space not found",
|
||||||
|
)
|
||||||
|
|
||||||
config_id = (
|
config_id = (
|
||||||
search_space.image_generation_config_id or IMAGE_GEN_AUTO_MODE_ID
|
search_space.image_generation_config_id or IMAGE_GEN_AUTO_MODE_ID
|
||||||
|
|
@ -112,19 +135,19 @@ def create_generate_image_tool(
|
||||||
# Call litellm based on config type
|
# Call litellm based on config type
|
||||||
if is_image_gen_auto_mode(config_id):
|
if is_image_gen_auto_mode(config_id):
|
||||||
if not ImageGenRouterService.is_initialized():
|
if not ImageGenRouterService.is_initialized():
|
||||||
return {
|
err = (
|
||||||
"error": "No image generation models configured. "
|
"No image generation models configured. "
|
||||||
"Please add an image model in Settings > Image Models."
|
"Please add an image model in Settings > Image Models."
|
||||||
}
|
)
|
||||||
|
return _failed({"error": err}, error=err)
|
||||||
response = await ImageGenRouterService.aimage_generation(
|
response = await ImageGenRouterService.aimage_generation(
|
||||||
prompt=prompt, model="auto", **gen_kwargs
|
prompt=prompt, model="auto", **gen_kwargs
|
||||||
)
|
)
|
||||||
elif config_id < 0:
|
elif config_id < 0:
|
||||||
cfg = _get_global_image_gen_config(config_id)
|
cfg = _get_global_image_gen_config(config_id)
|
||||||
if not cfg:
|
if not cfg:
|
||||||
return {
|
err = f"Image generation config {config_id} not found"
|
||||||
"error": f"Image generation config {config_id} not found"
|
return _failed({"error": err}, error=err)
|
||||||
}
|
|
||||||
|
|
||||||
model_string = _build_model_string(
|
model_string = _build_model_string(
|
||||||
cfg.get("provider", ""),
|
cfg.get("provider", ""),
|
||||||
|
|
@ -151,9 +174,8 @@ def create_generate_image_tool(
|
||||||
)
|
)
|
||||||
db_cfg = cfg_result.scalars().first()
|
db_cfg = cfg_result.scalars().first()
|
||||||
if not db_cfg:
|
if not db_cfg:
|
||||||
return {
|
err = f"Image generation config {config_id} not found"
|
||||||
"error": f"Image generation config {config_id} not found"
|
return _failed({"error": err}, error=err)
|
||||||
}
|
|
||||||
|
|
||||||
model_string = _build_model_string(
|
model_string = _build_model_string(
|
||||||
db_cfg.provider.value,
|
db_cfg.provider.value,
|
||||||
|
|
@ -200,7 +222,10 @@ def create_generate_image_tool(
|
||||||
# Extract image URLs from response
|
# Extract image URLs from response
|
||||||
images = response_dict.get("data", [])
|
images = response_dict.get("data", [])
|
||||||
if not images:
|
if not images:
|
||||||
return {"error": "No images were generated"}
|
return _failed(
|
||||||
|
{"error": "No images were generated"},
|
||||||
|
error="No images were generated",
|
||||||
|
)
|
||||||
|
|
||||||
first_image = images[0]
|
first_image = images[0]
|
||||||
revised_prompt = first_image.get("revised_prompt", prompt)
|
revised_prompt = first_image.get("revised_prompt", prompt)
|
||||||
|
|
@ -219,11 +244,14 @@ def create_generate_image_tool(
|
||||||
f"{db_image_gen_id}/image?token={access_token}"
|
f"{db_image_gen_id}/image?token={access_token}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return {"error": "No displayable image data in the response"}
|
return _failed(
|
||||||
|
{"error": "No displayable image data in the response"},
|
||||||
|
error="No displayable image data in the response",
|
||||||
|
)
|
||||||
|
|
||||||
image_id = f"image-{hashlib.md5(image_url.encode()).hexdigest()[:12]}"
|
image_id = f"image-{hashlib.md5(image_url.encode()).hexdigest()[:12]}"
|
||||||
|
|
||||||
return {
|
payload = {
|
||||||
"id": image_id,
|
"id": image_id,
|
||||||
"assetId": image_url,
|
"assetId": image_url,
|
||||||
"src": image_url,
|
"src": image_url,
|
||||||
|
|
@ -236,12 +264,26 @@ def create_generate_image_tool(
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"image_count": len(images),
|
"image_count": len(images),
|
||||||
}
|
}
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="image",
|
||||||
|
operation="generate",
|
||||||
|
status="success",
|
||||||
|
external_id=str(db_image_gen_id),
|
||||||
|
verifiable_url=image_url,
|
||||||
|
preview=(revised_prompt or prompt)[:200],
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Image generation failed in tool")
|
logger.exception("Image generation failed in tool")
|
||||||
return {
|
err = f"Image generation failed: {e!s}"
|
||||||
"error": f"Image generation failed: {e!s}",
|
return _failed(
|
||||||
"prompt": prompt,
|
{"error": err, "prompt": prompt},
|
||||||
}
|
error=err,
|
||||||
|
)
|
||||||
|
|
||||||
return generate_image
|
return generate_image
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,28 @@
|
||||||
"""Factory for a podcast-generation tool that queues background work and returns an ID for polling."""
|
"""Factory for a podcast-generation tool.
|
||||||
|
|
||||||
|
Dispatches the heavy generation to Celery and then polls the podcast row
|
||||||
|
until it reaches a terminal status (READY/FAILED). The tool always
|
||||||
|
returns a real terminal ``Receipt`` — never a pending one. The wait is
|
||||||
|
bounded by the existing per-invocation safety net
|
||||||
|
(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode,
|
||||||
|
HTTP / process lifetime in single-agent mode).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.agents.shared.deliverable_wait import wait_for_deliverable
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.db import Podcast, PodcastStatus, shielded_async_session
|
from app.db import Podcast, PodcastStatus, shielded_async_session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def create_generate_podcast_tool(
|
def create_generate_podcast_tool(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
|
|
@ -19,9 +35,10 @@ def create_generate_podcast_tool(
|
||||||
@tool
|
@tool
|
||||||
async def generate_podcast(
|
async def generate_podcast(
|
||||||
source_content: str,
|
source_content: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
podcast_title: str = "SurfSense Podcast",
|
podcast_title: str = "SurfSense Podcast",
|
||||||
user_prompt: str | None = None,
|
user_prompt: str | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""
|
"""
|
||||||
Generate a podcast from the provided content.
|
Generate a podcast from the provided content.
|
||||||
|
|
||||||
|
|
@ -70,23 +87,101 @@ def create_generate_podcast_tool(
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"[generate_podcast] Created podcast {podcast_id}, task: {task.id}")
|
logger.info(
|
||||||
|
"[generate_podcast] Created podcast %s, task: %s",
|
||||||
|
podcast_id,
|
||||||
|
task.id,
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
# Wait until the Celery worker flips the row to a terminal
|
||||||
"status": PodcastStatus.PENDING.value,
|
# state. The wait is bounded only by the subagent invoke
|
||||||
|
# timeout (multi-agent) or HTTP lifetime (single-agent) —
|
||||||
|
# see app.agents.shared.deliverable_wait for details.
|
||||||
|
terminal_status, columns, elapsed = await wait_for_deliverable(
|
||||||
|
model=Podcast,
|
||||||
|
row_id=podcast_id,
|
||||||
|
columns=[Podcast.status, Podcast.file_location],
|
||||||
|
terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED},
|
||||||
|
)
|
||||||
|
|
||||||
|
if terminal_status == PodcastStatus.READY:
|
||||||
|
file_location = columns[1] if columns else None
|
||||||
|
logger.info(
|
||||||
|
"[generate_podcast] Podcast %s READY in %.2fs (file=%s)",
|
||||||
|
podcast_id,
|
||||||
|
elapsed,
|
||||||
|
file_location,
|
||||||
|
)
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"status": PodcastStatus.READY.value,
|
||||||
"podcast_id": podcast_id,
|
"podcast_id": podcast_id,
|
||||||
"title": podcast_title,
|
"title": podcast_title,
|
||||||
"message": "Podcast generation started. This may take a few minutes.",
|
"file_location": file_location,
|
||||||
|
"message": (
|
||||||
|
"Podcast generated and saved to your podcast panel."
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="podcast",
|
||||||
|
operation="generate",
|
||||||
|
status="success",
|
||||||
|
external_id=str(podcast_id),
|
||||||
|
preview=podcast_title,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only other terminal state is FAILED.
|
||||||
|
logger.warning(
|
||||||
|
"[generate_podcast] Podcast %s FAILED in %.2fs",
|
||||||
|
podcast_id,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
err = "Background worker reported FAILED status for this podcast."
|
||||||
|
payload = {
|
||||||
|
"status": PodcastStatus.FAILED.value,
|
||||||
|
"podcast_id": podcast_id,
|
||||||
|
"title": podcast_title,
|
||||||
|
"error": err,
|
||||||
|
}
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="podcast",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
external_id=str(podcast_id),
|
||||||
|
preview=podcast_title,
|
||||||
|
error=err,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
print(f"[generate_podcast] Error: {error_message}")
|
logger.exception("[generate_podcast] Error: %s", error_message)
|
||||||
return {
|
payload = {
|
||||||
"status": PodcastStatus.FAILED.value,
|
"status": PodcastStatus.FAILED.value,
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
"title": podcast_title,
|
"title": podcast_title,
|
||||||
"podcast_id": None,
|
"podcast_id": None,
|
||||||
}
|
}
|
||||||
|
receipt = make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="podcast",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
preview=podcast_title,
|
||||||
|
error=error_message,
|
||||||
|
)
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=receipt,
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
return generate_podcast
|
return generate_podcast
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,14 @@ import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.callbacks import dispatch_custom_event
|
from langchain_core.callbacks import dispatch_custom_event
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
|
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.db import Report, shielded_async_session
|
from app.db import Report, shielded_async_session
|
||||||
from app.services.connector_service import ConnectorService
|
from app.services.connector_service import ConnectorService
|
||||||
from app.services.llm_service import get_document_summary_llm
|
from app.services.llm_service import get_document_summary_llm
|
||||||
|
|
@ -573,13 +577,14 @@ def create_generate_report_tool(
|
||||||
@tool
|
@tool
|
||||||
async def generate_report(
|
async def generate_report(
|
||||||
topic: str,
|
topic: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
source_content: str = "",
|
source_content: str = "",
|
||||||
source_strategy: str = "provided",
|
source_strategy: str = "provided",
|
||||||
search_queries: list[str] | None = None,
|
search_queries: list[str] | None = None,
|
||||||
report_style: str = "detailed",
|
report_style: str = "detailed",
|
||||||
user_instructions: str | None = None,
|
user_instructions: str | None = None,
|
||||||
parent_report_id: int | None = None,
|
parent_report_id: int | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""
|
"""
|
||||||
Generate a structured Markdown report artifact from provided content.
|
Generate a structured Markdown report artifact from provided content.
|
||||||
|
|
||||||
|
|
@ -692,6 +697,23 @@ def create_generate_report_tool(
|
||||||
parent_report_content: str | None = None
|
parent_report_content: str | None = None
|
||||||
report_group_id: int | None = None
|
report_group_id: int | None = None
|
||||||
|
|
||||||
|
def _failed(payload: dict[str, Any], *, error: str) -> Command:
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="report",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
external_id=str(payload.get("report_id"))
|
||||||
|
if payload.get("report_id") is not None
|
||||||
|
else None,
|
||||||
|
preview=topic,
|
||||||
|
error=error,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
async def _save_failed_report(error_msg: str) -> int | None:
|
async def _save_failed_report(error_msg: str) -> int | None:
|
||||||
"""Persist a failed report row using a short-lived session."""
|
"""Persist a failed report row using a short-lived session."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -753,12 +775,15 @@ def create_generate_report_tool(
|
||||||
"No LLM configured. Please configure a language model in Settings."
|
"No LLM configured. Please configure a language model in Settings."
|
||||||
)
|
)
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": topic,
|
"title": topic,
|
||||||
}
|
},
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
# Build the user instructions string
|
# Build the user instructions string
|
||||||
user_instructions_section = ""
|
user_instructions_section = ""
|
||||||
|
|
@ -971,12 +996,15 @@ def create_generate_report_tool(
|
||||||
if not report_content or not isinstance(report_content, str):
|
if not report_content or not isinstance(report_content, str):
|
||||||
error_msg = "LLM returned empty or invalid content"
|
error_msg = "LLM returned empty or invalid content"
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": topic,
|
"title": topic,
|
||||||
}
|
},
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
# LLMs often wrap output in ```markdown ... ``` fences — strip them
|
# LLMs often wrap output in ```markdown ... ``` fences — strip them
|
||||||
report_content = _strip_wrapping_code_fences(report_content)
|
report_content = _strip_wrapping_code_fences(report_content)
|
||||||
|
|
@ -984,12 +1012,15 @@ def create_generate_report_tool(
|
||||||
if not report_content:
|
if not report_content:
|
||||||
error_msg = "LLM returned empty or invalid content"
|
error_msg = "LLM returned empty or invalid content"
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": topic,
|
"title": topic,
|
||||||
}
|
},
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
# Strip any existing footer(s) carried over from parent version(s)
|
# Strip any existing footer(s) carried over from parent version(s)
|
||||||
while report_content.rstrip().endswith(_REPORT_FOOTER):
|
while report_content.rstrip().endswith(_REPORT_FOOTER):
|
||||||
|
|
@ -1036,7 +1067,7 @@ def create_generate_report_tool(
|
||||||
f"{metadata.get('section_count', 0)} sections"
|
f"{metadata.get('section_count', 0)} sections"
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
payload: dict[str, Any] = {
|
||||||
"status": "ready",
|
"status": "ready",
|
||||||
"report_id": saved_report_id,
|
"report_id": saved_report_id,
|
||||||
"title": topic,
|
"title": topic,
|
||||||
|
|
@ -1045,17 +1076,32 @@ def create_generate_report_tool(
|
||||||
"report_markdown": report_content,
|
"report_markdown": report_content,
|
||||||
"message": f"Report generated successfully: {topic}",
|
"message": f"Report generated successfully: {topic}",
|
||||||
}
|
}
|
||||||
|
receipt = make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="report",
|
||||||
|
operation="generate",
|
||||||
|
status="success",
|
||||||
|
external_id=str(saved_report_id),
|
||||||
|
preview=topic,
|
||||||
|
)
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=receipt,
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
logger.exception(f"[generate_report] Error: {error_message}")
|
logger.exception(f"[generate_report] Error: {error_message}")
|
||||||
report_id = await _save_failed_report(error_message)
|
report_id = await _save_failed_report(error_message)
|
||||||
|
return _failed(
|
||||||
return {
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": topic,
|
"title": topic,
|
||||||
}
|
},
|
||||||
|
error=error_message,
|
||||||
|
)
|
||||||
|
|
||||||
return generate_report
|
return generate_report
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,14 @@ from typing import Any
|
||||||
|
|
||||||
import pypdf
|
import pypdf
|
||||||
import typst
|
import typst
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.callbacks import dispatch_custom_event
|
from langchain_core.callbacks import dispatch_custom_event
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
|
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.db import Report, shielded_async_session
|
from app.db import Report, shielded_async_session
|
||||||
from app.services.llm_service import get_document_summary_llm
|
from app.services.llm_service import get_document_summary_llm
|
||||||
|
|
||||||
|
|
@ -429,10 +433,11 @@ def create_generate_resume_tool(
|
||||||
@tool
|
@tool
|
||||||
async def generate_resume(
|
async def generate_resume(
|
||||||
user_info: str,
|
user_info: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
user_instructions: str | None = None,
|
user_instructions: str | None = None,
|
||||||
parent_report_id: int | None = None,
|
parent_report_id: int | None = None,
|
||||||
max_pages: int = 1,
|
max_pages: int = 1,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""
|
"""
|
||||||
Generate a professional resume as a Typst document.
|
Generate a professional resume as a Typst document.
|
||||||
|
|
||||||
|
|
@ -476,6 +481,41 @@ def create_generate_resume_tool(
|
||||||
template = _get_template()
|
template = _get_template()
|
||||||
llm_reference = _build_llm_reference(template)
|
llm_reference = _build_llm_reference(template)
|
||||||
|
|
||||||
|
def _success(payload: dict[str, Any], *, report_id: int, title: str) -> Command:
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="resume",
|
||||||
|
operation="generate",
|
||||||
|
status="success",
|
||||||
|
external_id=str(report_id),
|
||||||
|
preview=title,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _failed(
|
||||||
|
payload: dict[str, Any],
|
||||||
|
*,
|
||||||
|
report_id: int | None,
|
||||||
|
error: str,
|
||||||
|
title: str = "Resume",
|
||||||
|
) -> Command:
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="resume",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
external_id=str(report_id) if report_id is not None else None,
|
||||||
|
preview=title,
|
||||||
|
error=error,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
async def _save_failed_report(error_msg: str) -> int | None:
|
async def _save_failed_report(error_msg: str) -> int | None:
|
||||||
try:
|
try:
|
||||||
async with shielded_async_session() as session:
|
async with shielded_async_session() as session:
|
||||||
|
|
@ -514,13 +554,17 @@ def create_generate_resume_tool(
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
error_msg = str(e)
|
error_msg = str(e)
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
# ── Phase 1: READ ─────────────────────────────────────────────
|
# ── Phase 1: READ ─────────────────────────────────────────────
|
||||||
async with shielded_async_session() as read_session:
|
async with shielded_async_session() as read_session:
|
||||||
|
|
@ -541,13 +585,17 @@ def create_generate_resume_tool(
|
||||||
"No LLM configured. Please configure a language model in Settings."
|
"No LLM configured. Please configure a language model in Settings."
|
||||||
)
|
)
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
# ── Phase 2: LLM GENERATION ───────────────────────────────────
|
# ── Phase 2: LLM GENERATION ───────────────────────────────────
|
||||||
|
|
||||||
|
|
@ -588,13 +636,17 @@ def create_generate_resume_tool(
|
||||||
if not body or not isinstance(body, str):
|
if not body or not isinstance(body, str):
|
||||||
error_msg = "LLM returned empty or invalid content"
|
error_msg = "LLM returned empty or invalid content"
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
body = _strip_typst_fences(body)
|
body = _strip_typst_fences(body)
|
||||||
body = _strip_imports(body)
|
body = _strip_imports(body)
|
||||||
|
|
@ -661,13 +713,17 @@ def create_generate_resume_tool(
|
||||||
f"{compile_error or 'Unknown compile error'}"
|
f"{compile_error or 'Unknown compile error'}"
|
||||||
)
|
)
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
actual_pages = _count_pdf_pages(pdf_bytes)
|
actual_pages = _count_pdf_pages(pdf_bytes)
|
||||||
if actual_pages <= validated_max_pages:
|
if actual_pages <= validated_max_pages:
|
||||||
|
|
@ -700,13 +756,17 @@ def create_generate_resume_tool(
|
||||||
):
|
):
|
||||||
error_msg = "LLM returned empty content while compressing resume"
|
error_msg = "LLM returned empty content while compressing resume"
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
body = _strip_typst_fences(compress_response.content)
|
body = _strip_typst_fences(compress_response.content)
|
||||||
body = _strip_imports(body)
|
body = _strip_imports(body)
|
||||||
|
|
@ -718,13 +778,17 @@ def create_generate_resume_tool(
|
||||||
f"Hard limit: <= {MAX_RESUME_PAGES} page(s), actual: {actual_pages}."
|
f"Hard limit: <= {MAX_RESUME_PAGES} page(s), actual: {actual_pages}."
|
||||||
)
|
)
|
||||||
report_id = await _save_failed_report(error_msg)
|
report_id = await _save_failed_report(error_msg)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_msg,
|
"error": error_msg,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
# ── Phase 4: SAVE ─────────────────────────────────────────────
|
# ── Phase 4: SAVE ─────────────────────────────────────────────
|
||||||
dispatch_custom_event(
|
dispatch_custom_event(
|
||||||
|
|
@ -768,7 +832,8 @@ def create_generate_resume_tool(
|
||||||
|
|
||||||
logger.info(f"[generate_resume] Created resume {saved_id}: {resume_title}")
|
logger.info(f"[generate_resume] Created resume {saved_id}: {resume_title}")
|
||||||
|
|
||||||
return {
|
return _success(
|
||||||
|
{
|
||||||
"status": "ready",
|
"status": "ready",
|
||||||
"report_id": saved_id,
|
"report_id": saved_id,
|
||||||
"title": resume_title,
|
"title": resume_title,
|
||||||
|
|
@ -782,18 +847,25 @@ def create_generate_resume_tool(
|
||||||
f"page(s). Final length: {actual_pages} page(s)."
|
f"page(s). Final length: {actual_pages} page(s)."
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
}
|
},
|
||||||
|
report_id=saved_id,
|
||||||
|
title=resume_title,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
logger.exception(f"[generate_resume] Error: {error_message}")
|
logger.exception(f"[generate_resume] Error: {error_message}")
|
||||||
report_id = await _save_failed_report(error_message)
|
report_id = await _save_failed_report(error_message)
|
||||||
return {
|
return _failed(
|
||||||
|
{
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
"report_id": report_id,
|
"report_id": report_id,
|
||||||
"title": "Resume",
|
"title": "Resume",
|
||||||
"content_type": "typst",
|
"content_type": "typst",
|
||||||
}
|
},
|
||||||
|
report_id=report_id,
|
||||||
|
error=error_message,
|
||||||
|
)
|
||||||
|
|
||||||
return generate_resume
|
return generate_resume
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,29 @@
|
||||||
"""Factory for a video-presentation tool that queues background work and returns an ID for polling."""
|
"""Factory for a video-presentation tool.
|
||||||
|
|
||||||
|
Dispatches the heavy generation to Celery and then polls the
|
||||||
|
video-presentation row until it reaches a terminal status (READY/FAILED).
|
||||||
|
The tool always returns a real terminal ``Receipt`` — never a pending
|
||||||
|
one. The wait is bounded by the existing per-invocation safety net
|
||||||
|
(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode,
|
||||||
|
HTTP / process lifetime in single-agent mode). Video rendering can be
|
||||||
|
heavy; raise that ceiling if your generations routinely exceed it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.agents.shared.deliverable_wait import wait_for_deliverable
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
|
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def create_generate_video_presentation_tool(
|
def create_generate_video_presentation_tool(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
|
|
@ -19,9 +36,10 @@ def create_generate_video_presentation_tool(
|
||||||
@tool
|
@tool
|
||||||
async def generate_video_presentation(
|
async def generate_video_presentation(
|
||||||
source_content: str,
|
source_content: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
video_title: str = "SurfSense Presentation",
|
video_title: str = "SurfSense Presentation",
|
||||||
user_prompt: str | None = None,
|
user_prompt: str | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""Generate a video presentation from the provided content.
|
"""Generate a video presentation from the provided content.
|
||||||
|
|
||||||
Use this tool when the user asks to create a video, presentation, slides, or slide deck.
|
Use this tool when the user asks to create a video, presentation, slides, or slide deck.
|
||||||
|
|
@ -56,25 +74,103 @@ def create_generate_video_presentation_tool(
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(
|
logger.info(
|
||||||
f"[generate_video_presentation] Created video presentation {video_pres_id}, task: {task.id}"
|
"[generate_video_presentation] Created video presentation %s, task: %s",
|
||||||
|
video_pres_id,
|
||||||
|
task.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
# Wait until the Celery worker flips the row to a terminal
|
||||||
"status": VideoPresentationStatus.PENDING.value,
|
# state. The wait is bounded only by the subagent invoke
|
||||||
|
# timeout (multi-agent) or HTTP lifetime (single-agent) —
|
||||||
|
# see app.agents.shared.deliverable_wait for details.
|
||||||
|
terminal_status, _columns, elapsed = await wait_for_deliverable(
|
||||||
|
model=VideoPresentation,
|
||||||
|
row_id=video_pres_id,
|
||||||
|
columns=[VideoPresentation.status],
|
||||||
|
terminal_statuses={
|
||||||
|
VideoPresentationStatus.READY,
|
||||||
|
VideoPresentationStatus.FAILED,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if terminal_status == VideoPresentationStatus.READY:
|
||||||
|
logger.info(
|
||||||
|
"[generate_video_presentation] %s READY in %.2fs",
|
||||||
|
video_pres_id,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"status": VideoPresentationStatus.READY.value,
|
||||||
"video_presentation_id": video_pres_id,
|
"video_presentation_id": video_pres_id,
|
||||||
"title": video_title,
|
"title": video_title,
|
||||||
"message": "Video presentation generation started. This may take a few minutes.",
|
"message": "Video presentation generated and saved.",
|
||||||
}
|
}
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="video_presentation",
|
||||||
|
operation="generate",
|
||||||
|
status="success",
|
||||||
|
external_id=str(video_pres_id),
|
||||||
|
preview=video_title,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only other terminal state is FAILED.
|
||||||
|
logger.warning(
|
||||||
|
"[generate_video_presentation] %s FAILED in %.2fs",
|
||||||
|
video_pres_id,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
err = (
|
||||||
|
"Background worker reported FAILED status for this "
|
||||||
|
"video presentation."
|
||||||
|
)
|
||||||
|
payload = {
|
||||||
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
|
"video_presentation_id": video_pres_id,
|
||||||
|
"title": video_title,
|
||||||
|
"error": err,
|
||||||
|
}
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="video_presentation",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
external_id=str(video_pres_id),
|
||||||
|
preview=video_title,
|
||||||
|
error=err,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
print(f"[generate_video_presentation] Error: {error_message}")
|
logger.exception(
|
||||||
return {
|
"[generate_video_presentation] Error: %s", error_message
|
||||||
|
)
|
||||||
|
payload = {
|
||||||
"status": VideoPresentationStatus.FAILED.value,
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
"title": video_title,
|
"title": video_title,
|
||||||
"video_presentation_id": None,
|
"video_presentation_id": None,
|
||||||
}
|
}
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="deliverables",
|
||||||
|
type="video_presentation",
|
||||||
|
operation="generate",
|
||||||
|
status="failed",
|
||||||
|
preview=video_title,
|
||||||
|
error=error_message,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
return generate_video_presentation
|
return generate_video_presentation
|
||||||
|
|
|
||||||
|
|
@ -150,11 +150,12 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
|
|
||||||
|
Route-specific rules:
|
||||||
|
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.
|
- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -117,11 +117,12 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
|
|
||||||
|
Route-specific rules:
|
||||||
|
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.
|
- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output.
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ Persist durable preferences/facts/instructions with `update_memory` while avoidi
|
||||||
</goal>
|
</goal>
|
||||||
|
|
||||||
<visibility_scope>
|
<visibility_scope>
|
||||||
{{MEMORY_VISIBILITY_POLICY}}
|
Memory is search-space-scoped; do not assume cross-workspace visibility.
|
||||||
</visibility_scope>
|
</visibility_scope>
|
||||||
|
|
||||||
<available_tools>
|
<available_tools>
|
||||||
|
|
@ -53,10 +53,8 @@ Return **only** one JSON object (no markdown/prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` -> `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` -> `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
|
|
||||||
- `evidence.memory_category` is a semantic classification for supervisor logs
|
- `evidence.memory_category` is a semantic classification for supervisor logs
|
||||||
only. It is not the persisted storage format and must not force inline
|
only. It is not the persisted storage format and must not force inline
|
||||||
`[fact|preference|instruction]` markers into saved memory.
|
`[fact|preference|instruction]` markers into saved memory.
|
||||||
|
|
|
||||||
|
|
@ -46,10 +46,8 @@ Return **only** one JSON object (no markdown/prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` -> `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` -> `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
|
|
||||||
- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
|
- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
|
||||||
- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
|
- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
|
||||||
|
|
@ -92,12 +92,12 @@ Return **only** one JSON object (no markdown, no prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: base, table, field, choice, record, etc.).
|
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: base, table, field, choice, record, etc.).
|
||||||
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; up to 10 entries, then `"...and N more"`).
|
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; up to 10 entries, then `"...and N more"`).
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Discover before you mutate; never guess identifiers, choice IDs, or required fields.
|
Discover before you mutate; never guess identifiers, choice IDs, or required fields.
|
||||||
|
|
|
||||||
|
|
@ -111,11 +111,12 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
Route-specific rules:
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For `search_calendar_events` results, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; up to 10 entries, then `"...and N more"`).
|
- For `search_calendar_events` results, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; up to 10 entries, then `"...and N more"`).
|
||||||
- For ambiguous matches across `update_calendar_event` / `delete_calendar_event`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`, where `label` should include the event title and start time for human readability).
|
- For ambiguous matches across `update_calendar_event` / `delete_calendar_event`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`, where `label` should include the event title and start time for human readability).
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -93,12 +93,12 @@ Return **only** one JSON object (no markdown, no prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: task, list, member, status, custom-field choice, etc.).
|
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: task, list, member, status, custom-field choice, etc.).
|
||||||
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (task id, title, status, assignees; up to 10 entries, then `"...and N more"`).
|
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (task id, title, status, assignees; up to 10 entries, then `"...and N more"`).
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Discover before you mutate; never guess identifiers, list statuses, or assignees.
|
Discover before you mutate; never guess identifiers, list statuses, or assignees.
|
||||||
|
|
|
||||||
|
|
@ -100,9 +100,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -108,9 +108,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Resolve before you call; verify before you send; map every tool outcome faithfully.
|
Resolve before you call; verify before you send; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -98,9 +98,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -110,11 +110,12 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
Route-specific rules:
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For `search_gmail` results, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; up to 10 entries, then `"...and N more"`).
|
- For `search_gmail` results, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; up to 10 entries, then `"...and N more"`).
|
||||||
- For ambiguous matches across `update_gmail_draft` / `trash_gmail_email` / `read_gmail_email`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`).
|
- For ambiguous matches across `update_gmail_draft` / `trash_gmail_email` / `read_gmail_email`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`).
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Infer before you call; verify before you send; map every tool outcome faithfully.
|
Infer before you call; verify before you send; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,16 @@ from datetime import datetime
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
|
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
|
||||||
request_approval,
|
request_approval,
|
||||||
)
|
)
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.services.gmail import GmailToolMetadataService
|
from app.services.gmail import GmailToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -26,9 +30,10 @@ def create_send_gmail_email_tool(
|
||||||
to: str,
|
to: str,
|
||||||
subject: str,
|
subject: str,
|
||||||
body: str,
|
body: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
cc: str | None = None,
|
cc: str | None = None,
|
||||||
bcc: str | None = None,
|
bcc: str | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""Send an email via Gmail.
|
"""Send an email via Gmail.
|
||||||
|
|
||||||
Use when the user explicitly asks to send an email. This sends the
|
Use when the user explicitly asks to send an email. This sends the
|
||||||
|
|
@ -60,11 +65,34 @@ def create_send_gmail_email_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"send_gmail_email called: to='{to}', subject='{subject}'")
|
logger.info(f"send_gmail_email called: to='{to}', subject='{subject}'")
|
||||||
|
|
||||||
|
def _emit(
|
||||||
|
payload: dict[str, Any],
|
||||||
|
*,
|
||||||
|
success: bool,
|
||||||
|
external_id: str | None = None,
|
||||||
|
error: str | None = None,
|
||||||
|
) -> Command:
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="gmail",
|
||||||
|
type="message",
|
||||||
|
operation="send",
|
||||||
|
status="success" if success else "failed",
|
||||||
|
external_id=external_id,
|
||||||
|
preview=f"to={to}: {subject}"[:200],
|
||||||
|
error=error,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if db_session is None or search_space_id is None or user_id is None:
|
||||||
return {
|
msg = "Gmail tool not properly configured. Please contact support."
|
||||||
"status": "error",
|
return _emit(
|
||||||
"message": "Gmail tool not properly configured. Please contact support.",
|
{"status": "error", "message": msg},
|
||||||
}
|
success=False,
|
||||||
|
error=msg,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
metadata_service = GmailToolMetadataService(db_session)
|
metadata_service = GmailToolMetadataService(db_session)
|
||||||
|
|
@ -74,16 +102,24 @@ def create_send_gmail_email_tool(
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(f"Failed to fetch creation context: {context['error']}")
|
||||||
return {"status": "error", "message": context["error"]}
|
return _emit(
|
||||||
|
{"status": "error", "message": context["error"]},
|
||||||
|
success=False,
|
||||||
|
error=context["error"],
|
||||||
|
)
|
||||||
|
|
||||||
accounts = context.get("accounts", [])
|
accounts = context.get("accounts", [])
|
||||||
if accounts and all(a.get("auth_expired") for a in accounts):
|
if accounts and all(a.get("auth_expired") for a in accounts):
|
||||||
logger.warning("All Gmail accounts have expired authentication")
|
logger.warning("All Gmail accounts have expired authentication")
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "auth_error",
|
"status": "auth_error",
|
||||||
"message": "All connected Gmail accounts need re-authentication. Please re-authenticate in your connector settings.",
|
"message": "All connected Gmail accounts need re-authentication. Please re-authenticate in your connector settings.",
|
||||||
"connector_type": "gmail",
|
"connector_type": "gmail",
|
||||||
}
|
},
|
||||||
|
success=False,
|
||||||
|
error="auth_expired",
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Requesting approval for sending Gmail email: to='{to}', subject='{subject}'"
|
f"Requesting approval for sending Gmail email: to='{to}', subject='{subject}'"
|
||||||
|
|
@ -103,10 +139,14 @@ def create_send_gmail_email_tool(
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.rejected:
|
if result.rejected:
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "rejected",
|
"status": "rejected",
|
||||||
"message": "User declined. The email was not sent. Do not ask again or suggest alternatives.",
|
"message": "User declined. The email was not sent. Do not ask again or suggest alternatives.",
|
||||||
}
|
},
|
||||||
|
success=False,
|
||||||
|
error="user_rejected",
|
||||||
|
)
|
||||||
|
|
||||||
final_to = result.params.get("to", to)
|
final_to = result.params.get("to", to)
|
||||||
final_subject = result.params.get("subject", subject)
|
final_subject = result.params.get("subject", subject)
|
||||||
|
|
@ -135,10 +175,14 @@ def create_send_gmail_email_tool(
|
||||||
)
|
)
|
||||||
connector = result.scalars().first()
|
connector = result.scalars().first()
|
||||||
if not connector:
|
if not connector:
|
||||||
return {
|
msg = (
|
||||||
"status": "error",
|
"Selected Gmail connector is invalid or has been disconnected."
|
||||||
"message": "Selected Gmail connector is invalid or has been disconnected.",
|
)
|
||||||
}
|
return _emit(
|
||||||
|
{"status": "error", "message": msg},
|
||||||
|
success=False,
|
||||||
|
error=msg,
|
||||||
|
)
|
||||||
actual_connector_id = connector.id
|
actual_connector_id = connector.id
|
||||||
else:
|
else:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
|
|
@ -150,10 +194,12 @@ def create_send_gmail_email_tool(
|
||||||
)
|
)
|
||||||
connector = result.scalars().first()
|
connector = result.scalars().first()
|
||||||
if not connector:
|
if not connector:
|
||||||
return {
|
msg = "No Gmail connector found. Please connect Gmail in your workspace settings."
|
||||||
"status": "error",
|
return _emit(
|
||||||
"message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
|
{"status": "error", "message": msg},
|
||||||
}
|
success=False,
|
||||||
|
error=msg,
|
||||||
|
)
|
||||||
actual_connector_id = connector.id
|
actual_connector_id = connector.id
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -166,10 +212,12 @@ def create_send_gmail_email_tool(
|
||||||
):
|
):
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if not cca_id:
|
if not cca_id:
|
||||||
return {
|
msg = "Composio connected account ID not found for this Gmail connector."
|
||||||
"status": "error",
|
return _emit(
|
||||||
"message": "Composio connected account ID not found for this Gmail connector.",
|
{"status": "error", "message": msg},
|
||||||
}
|
success=False,
|
||||||
|
error=msg,
|
||||||
|
)
|
||||||
|
|
||||||
from app.services.composio_service import ComposioService
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
|
@ -187,7 +235,11 @@ def create_send_gmail_email_tool(
|
||||||
bcc=final_bcc,
|
bcc=final_bcc,
|
||||||
)
|
)
|
||||||
if error:
|
if error:
|
||||||
return {"status": "error", "message": error}
|
return _emit(
|
||||||
|
{"status": "error", "message": error},
|
||||||
|
success=False,
|
||||||
|
error=error,
|
||||||
|
)
|
||||||
sent = {"id": sent_message_id, "threadId": sent_thread_id}
|
sent = {"id": sent_message_id, "threadId": sent_thread_id}
|
||||||
else:
|
else:
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
|
|
@ -275,11 +327,15 @@ def create_send_gmail_email_tool(
|
||||||
actual_connector_id,
|
actual_connector_id,
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "insufficient_permissions",
|
"status": "insufficient_permissions",
|
||||||
"connector_id": actual_connector_id,
|
"connector_id": actual_connector_id,
|
||||||
"message": "This Gmail account needs additional permissions. Please re-authenticate in connector settings.",
|
"message": "This Gmail account needs additional permissions. Please re-authenticate in connector settings.",
|
||||||
}
|
},
|
||||||
|
success=False,
|
||||||
|
error="insufficient_permissions",
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -310,12 +366,16 @@ def create_send_gmail_email_tool(
|
||||||
logger.warning(f"KB sync after send failed: {kb_err}")
|
logger.warning(f"KB sync after send failed: {kb_err}")
|
||||||
kb_message_suffix = " This email will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This email will be added to your knowledge base in the next scheduled sync."
|
||||||
|
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"message_id": sent.get("id"),
|
"message_id": sent.get("id"),
|
||||||
"thread_id": sent.get("threadId"),
|
"thread_id": sent.get("threadId"),
|
||||||
"message": f"Successfully sent email to '{final_to}' with subject '{final_subject}'.{kb_message_suffix}",
|
"message": f"Successfully sent email to '{final_to}' with subject '{final_subject}'.{kb_message_suffix}",
|
||||||
}
|
},
|
||||||
|
success=True,
|
||||||
|
external_id=sent.get("id"),
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
from langgraph.errors import GraphInterrupt
|
from langgraph.errors import GraphInterrupt
|
||||||
|
|
@ -324,9 +384,11 @@ def create_send_gmail_email_tool(
|
||||||
raise
|
raise
|
||||||
|
|
||||||
logger.error(f"Error sending Gmail email: {e}", exc_info=True)
|
logger.error(f"Error sending Gmail email: {e}", exc_info=True)
|
||||||
return {
|
msg = "Something went wrong while sending the email. Please try again."
|
||||||
"status": "error",
|
return _emit(
|
||||||
"message": "Something went wrong while sending the email. Please try again.",
|
{"status": "error", "message": msg},
|
||||||
}
|
success=False,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
return send_gmail_email
|
return send_gmail_email
|
||||||
|
|
|
||||||
|
|
@ -100,9 +100,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -111,12 +111,12 @@ Return **only** one JSON object (no markdown, no prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: site, project, issue, user, transition, etc.).
|
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: site, project, issue, user, transition, etc.).
|
||||||
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (issue key, summary, status, assignee; up to 10 entries, then `"...and N more"`).
|
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (issue key, summary, status, assignee; up to 10 entries, then `"...and N more"`).
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Discover before you mutate; never guess identifiers, transitions, or required fields.
|
Discover before you mutate; never guess identifiers, transitions, or required fields.
|
||||||
|
|
|
||||||
|
|
@ -101,12 +101,12 @@ Return **only** one JSON object (no markdown, no prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: issue, user, project, state, etc.).
|
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: issue, user, project, state, etc.).
|
||||||
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (identifier, title, state, assignee; up to 10 entries, then `"...and N more"`).
|
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (identifier, title, state, assignee; up to 10 entries, then `"...and N more"`).
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Discover before you mutate; never guess identifiers.
|
Discover before you mutate; never guess identifiers.
|
||||||
|
|
|
||||||
|
|
@ -101,9 +101,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Infer before you call; verify before you create; map every tool outcome faithfully.
|
Infer before you call; verify before you create; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -99,9 +99,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,16 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.tools import ToolRuntime
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from langgraph.types import Command
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
|
from app.agents.multi_agent_chat.subagents.shared.hitl.approvals.self_gated import (
|
||||||
request_approval,
|
request_approval,
|
||||||
)
|
)
|
||||||
|
from app.agents.shared.receipt import make_receipt
|
||||||
|
from app.agents.shared.receipt_command import with_receipt
|
||||||
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
||||||
from app.services.notion.tool_metadata_service import NotionToolMetadataService
|
from app.services.notion.tool_metadata_service import NotionToolMetadataService
|
||||||
|
|
||||||
|
|
@ -35,8 +39,9 @@ def create_delete_notion_page_tool(
|
||||||
@tool
|
@tool
|
||||||
async def delete_notion_page(
|
async def delete_notion_page(
|
||||||
page_title: str,
|
page_title: str,
|
||||||
|
runtime: ToolRuntime,
|
||||||
delete_from_kb: bool = False,
|
delete_from_kb: bool = False,
|
||||||
) -> dict[str, Any]:
|
) -> Command:
|
||||||
"""Delete (archive) a Notion page.
|
"""Delete (archive) a Notion page.
|
||||||
|
|
||||||
Use this tool when the user asks you to delete, remove, or archive
|
Use this tool when the user asks you to delete, remove, or archive
|
||||||
|
|
@ -65,14 +70,39 @@ def create_delete_notion_page_tool(
|
||||||
f"delete_notion_page called: page_title='{page_title}', delete_from_kb={delete_from_kb}"
|
f"delete_notion_page called: page_title='{page_title}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _emit(
|
||||||
|
payload: dict[str, Any],
|
||||||
|
*,
|
||||||
|
status: str,
|
||||||
|
external_id: str | None = None,
|
||||||
|
error: str | None = None,
|
||||||
|
) -> Command:
|
||||||
|
return with_receipt(
|
||||||
|
payload=payload,
|
||||||
|
receipt=make_receipt(
|
||||||
|
route="notion",
|
||||||
|
type="page",
|
||||||
|
operation="delete",
|
||||||
|
status="success" if status == "success" else "failed",
|
||||||
|
external_id=external_id,
|
||||||
|
preview=page_title,
|
||||||
|
error=error,
|
||||||
|
),
|
||||||
|
tool_call_id=runtime.tool_call_id,
|
||||||
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if db_session is None or search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Notion tool not properly configured - missing required parameters"
|
"Notion tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Notion tool not properly configured. Please contact support.",
|
"message": "Notion tool not properly configured. Please contact support.",
|
||||||
}
|
},
|
||||||
|
status="error",
|
||||||
|
error="Notion tool not properly configured. Please contact support.",
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get page context (page_id, account, title) from indexed data
|
# Get page context (page_id, account, title) from indexed data
|
||||||
|
|
@ -86,16 +116,18 @@ def create_delete_notion_page_tool(
|
||||||
# Check if it's a "not found" error (softer handling for LLM)
|
# Check if it's a "not found" error (softer handling for LLM)
|
||||||
if "not found" in error_msg.lower():
|
if "not found" in error_msg.lower():
|
||||||
logger.warning(f"Page not found: {error_msg}")
|
logger.warning(f"Page not found: {error_msg}")
|
||||||
return {
|
return _emit(
|
||||||
"status": "not_found",
|
{"status": "not_found", "message": error_msg},
|
||||||
"message": error_msg,
|
status="error",
|
||||||
}
|
error=error_msg,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to fetch delete context: {error_msg}")
|
logger.error(f"Failed to fetch delete context: {error_msg}")
|
||||||
return {
|
return _emit(
|
||||||
"status": "error",
|
{"status": "error", "message": error_msg},
|
||||||
"message": error_msg,
|
status="error",
|
||||||
}
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
account = context.get("account", {})
|
account = context.get("account", {})
|
||||||
if account.get("auth_expired"):
|
if account.get("auth_expired"):
|
||||||
|
|
@ -103,10 +135,14 @@ def create_delete_notion_page_tool(
|
||||||
"Notion account %s has expired authentication",
|
"Notion account %s has expired authentication",
|
||||||
account.get("id"),
|
account.get("id"),
|
||||||
)
|
)
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "auth_error",
|
"status": "auth_error",
|
||||||
"message": "The Notion account for this page needs re-authentication. Please re-authenticate in your connector settings.",
|
"message": "The Notion account for this page needs re-authentication. Please re-authenticate in your connector settings.",
|
||||||
}
|
},
|
||||||
|
status="error",
|
||||||
|
error="auth_expired",
|
||||||
|
)
|
||||||
|
|
||||||
page_id = context.get("page_id")
|
page_id = context.get("page_id")
|
||||||
connector_id_from_context = account.get("id")
|
connector_id_from_context = account.get("id")
|
||||||
|
|
@ -129,10 +165,14 @@ def create_delete_notion_page_tool(
|
||||||
|
|
||||||
if result.rejected:
|
if result.rejected:
|
||||||
logger.info("Notion page deletion rejected by user")
|
logger.info("Notion page deletion rejected by user")
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "rejected",
|
"status": "rejected",
|
||||||
"message": "User declined. Do not retry or suggest alternatives.",
|
"message": "User declined. Do not retry or suggest alternatives.",
|
||||||
}
|
},
|
||||||
|
status="error",
|
||||||
|
error="user_rejected",
|
||||||
|
)
|
||||||
|
|
||||||
final_page_id = result.params.get("page_id", page_id)
|
final_page_id = result.params.get("page_id", page_id)
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
|
|
@ -165,18 +205,26 @@ def create_delete_notion_page_tool(
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Invalid connector_id={final_connector_id} for search_space_id={search_space_id}"
|
f"Invalid connector_id={final_connector_id} for search_space_id={search_space_id}"
|
||||||
)
|
)
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Selected Notion account is invalid or has been disconnected. Please select a valid account.",
|
"message": "Selected Notion account is invalid or has been disconnected. Please select a valid account.",
|
||||||
}
|
},
|
||||||
|
status="error",
|
||||||
|
error="invalid_connector",
|
||||||
|
)
|
||||||
actual_connector_id = connector.id
|
actual_connector_id = connector.id
|
||||||
logger.info(f"Validated Notion connector: id={actual_connector_id}")
|
logger.info(f"Validated Notion connector: id={actual_connector_id}")
|
||||||
else:
|
else:
|
||||||
logger.error("No connector found for this page")
|
logger.error("No connector found for this page")
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "No connector found for this page.",
|
"message": "No connector found for this page.",
|
||||||
}
|
},
|
||||||
|
status="error",
|
||||||
|
error="no_connector",
|
||||||
|
)
|
||||||
|
|
||||||
# Create connector instance
|
# Create connector instance
|
||||||
notion_connector = NotionHistoryConnector(
|
notion_connector = NotionHistoryConnector(
|
||||||
|
|
@ -232,7 +280,13 @@ def create_delete_notion_page_tool(
|
||||||
f"{result.get('message', '')} (also removed from knowledge base)"
|
f"{result.get('message', '')} (also removed from knowledge base)"
|
||||||
)
|
)
|
||||||
|
|
||||||
return result
|
status = result.get("status", "error")
|
||||||
|
return _emit(
|
||||||
|
result,
|
||||||
|
status=status,
|
||||||
|
external_id=str(final_page_id) if final_page_id else None,
|
||||||
|
error=None if status == "success" else result.get("message"),
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
from langgraph.errors import GraphInterrupt
|
from langgraph.errors import GraphInterrupt
|
||||||
|
|
@ -245,20 +299,28 @@ def create_delete_notion_page_tool(
|
||||||
if isinstance(e, NotionAPIError) and (
|
if isinstance(e, NotionAPIError) and (
|
||||||
"401" in error_str or "unauthorized" in error_str
|
"401" in error_str or "unauthorized" in error_str
|
||||||
):
|
):
|
||||||
return {
|
return _emit(
|
||||||
|
{
|
||||||
"status": "auth_error",
|
"status": "auth_error",
|
||||||
"message": str(e),
|
"message": str(e),
|
||||||
"connector_id": connector_id_from_context
|
"connector_id": connector_id_from_context
|
||||||
if "connector_id_from_context" in dir()
|
if "connector_id_from_context" in dir()
|
||||||
else None,
|
else None,
|
||||||
"connector_type": "notion",
|
"connector_type": "notion",
|
||||||
}
|
},
|
||||||
|
status="error",
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
if isinstance(e, ValueError | NotionAPIError):
|
if isinstance(e, ValueError | NotionAPIError):
|
||||||
message = str(e)
|
message = str(e)
|
||||||
else:
|
else:
|
||||||
message = (
|
message = (
|
||||||
"Something went wrong while deleting the page. Please try again."
|
"Something went wrong while deleting the page. Please try again."
|
||||||
)
|
)
|
||||||
return {"status": "error", "message": message}
|
return _emit(
|
||||||
|
{"status": "error", "message": message},
|
||||||
|
status="error",
|
||||||
|
error=message,
|
||||||
|
)
|
||||||
|
|
||||||
return delete_notion_page
|
return delete_notion_page
|
||||||
|
|
|
||||||
|
|
@ -97,9 +97,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Infer before you call; map every tool outcome faithfully.
|
Infer before you call; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -87,12 +87,12 @@ Return **only** one JSON object (no markdown, no prose):
|
||||||
"missing_fields": string[] | null,
|
"missing_fields": string[] | null,
|
||||||
"assumptions": string[] | null
|
"assumptions": string[] | null
|
||||||
}
|
}
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
Route-specific rules:
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: channel, user, message, thread).
|
- For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: channel, user, message, thread).
|
||||||
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (channel/user, key identifier, timestamp, short snippet; up to 10 entries, then `"...and N more"`).
|
- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (channel/user, key identifier, timestamp, short snippet; up to 10 entries, then `"...and N more"`).
|
||||||
</output_contract>
|
</output_contract>
|
||||||
|
|
||||||
|
<include snippet="verifiable_handle"/>
|
||||||
|
|
||||||
Discover before you post; never guess channel, user, or thread targets.
|
Discover before you post; never guess channel, user, or thread targets.
|
||||||
|
|
|
||||||
|
|
@ -115,9 +115,8 @@ Return **only** one JSON object (no markdown or prose outside it):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
<include snippet="output_contract_base"/>
|
||||||
- `status=success` → `next_step=null`, `missing_fields=null`.
|
|
||||||
- `status=partial|blocked|error` → `next_step` must be non-null.
|
<include snippet="verifiable_handle"/>
|
||||||
- `status=blocked` due to missing required inputs → `missing_fields` must be non-null.
|
|
||||||
|
|
||||||
Resolve before you call; verify before you send; map every tool outcome faithfully.
|
Resolve before you call; verify before you send; map every tool outcome faithfully.
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,11 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
from importlib import resources
|
from importlib import resources
|
||||||
|
|
||||||
|
_SHARED_SNIPPETS_PACKAGE = "app.agents.multi_agent_chat.subagents.shared.snippets"
|
||||||
|
|
||||||
|
|
||||||
def read_md_file(package: str, stem: str) -> str:
|
def read_md_file(package: str, stem: str) -> str:
|
||||||
"""Load ``{stem}.md`` from ``package`` via importlib resources, or return empty."""
|
"""Load ``{stem}.md`` from ``package`` via importlib resources, or return empty."""
|
||||||
|
|
@ -12,3 +15,13 @@ def read_md_file(package: str, stem: str) -> str:
|
||||||
return ""
|
return ""
|
||||||
text = ref.read_text(encoding="utf-8")
|
text = ref.read_text(encoding="utf-8")
|
||||||
return text.rstrip("\n")
|
return text.rstrip("\n")
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=64)
|
||||||
|
def read_shared_snippet(name: str) -> str:
|
||||||
|
"""Load a shared markdown snippet from the snippets package.
|
||||||
|
|
||||||
|
Cached because snippets are static at runtime and resolved many times
|
||||||
|
(once per subagent build, plus per-subagent-per-route).
|
||||||
|
"""
|
||||||
|
return read_md_file(_SHARED_SNIPPETS_PACKAGE, name)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
"""Shared markdown snippets composed into every subagent system prompt.
|
||||||
|
|
||||||
|
Resolved at build time by :func:`pack_subagent` in ``subagent_builder.py``
|
||||||
|
via the ``<include snippet="NAME"/>`` directive. See ``output_contract_base.md``
|
||||||
|
and ``verifiable_handle.md`` for the included content.
|
||||||
|
"""
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
Rules (universal):
|
||||||
|
- `status=success` -> `next_step=null`, `missing_fields=null`.
|
||||||
|
- `status=partial|blocked|error` -> `next_step` must be non-null.
|
||||||
|
- `status=blocked` due to missing required inputs -> `missing_fields` must be non-null.
|
||||||
|
- `assumptions`: any inferences you made about the user's intent; `null` when no inferences were needed.
|
||||||
|
- The `evidence` object's fields are documented in your route-specific `<output_contract>` above; never invent fields the tool did not return.
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
<verifiable_handle>
|
||||||
|
Mutating tools you call return a structured `Receipt` object alongside their normal payload (see `evidence.receipts` in your `<output_contract>`). The supervisor uses the Receipt's `verifiable_url` and `external_id` to independently confirm the operation succeeded - do not paraphrase, shorten, or guess these values.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Quote each Receipt's `verifiable_url` and `external_id` **verbatim** in `evidence.receipts`. Copy character-for-character; never retype from memory.
|
||||||
|
- If a Receipt has `status="failed"`, set your own `status="error"` and put the Receipt's `error` field in `next_step`.
|
||||||
|
- If a Receipt has `status="pending"` (async backends — podcasts, video presentations, anything queued through Celery), report `status=success`, surface the pending Receipt as-is, and tell the supervisor in `action_summary` that the artefact is **being generated in the background** (e.g. "Podcast 38 queued; orchestrator should report it as kicked off, not yet ready"). A pending Receipt almost always lacks `verifiable_url` because the artefact does not exist yet — that is expected, not a defect. Do **not** wait, poll, or retry; control returns to the supervisor immediately and the asset becomes visible to the user out of band via its own UI surface.
|
||||||
|
- Never claim a mutation succeeded without a matching Receipt with `status="success"` or `"pending"` in your tool results this turn.
|
||||||
|
- For tools that do not return a Receipt (read-only operations, search, lookup), the receipt rules do not apply; only the route-specific `evidence` fields matter.
|
||||||
|
</verifiable_handle>
|
||||||
|
|
@ -2,12 +2,30 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable, Mapping
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from deepagents import SubAgent
|
from deepagents import SubAgent
|
||||||
|
|
||||||
from app.agents.new_chat.permissions import Ruleset
|
from app.agents.new_chat.permissions import Ruleset
|
||||||
|
|
||||||
|
# A context-hint provider receives the parent-agent ``runtime.state`` mapping
|
||||||
|
# and the ``description`` the orchestrator wrote, and returns a short string
|
||||||
|
# the runtime prepends to the subagent's first ``HumanMessage``. Used for
|
||||||
|
# things like "current search-space id is X" or "the user is in workspace Y" —
|
||||||
|
# never for full corpora, since the prepended text consumes the subagent's
|
||||||
|
# prompt budget on every invocation. Return ``None`` (or an empty string) to
|
||||||
|
# skip the hint for this call.
|
||||||
|
ContextHintProvider = Callable[[Mapping[str, Any], str], str | None]
|
||||||
|
|
||||||
|
# Custom key stashed on the deepagents ``SubAgent`` dict so the provider
|
||||||
|
# survives the trip from ``pack_subagent`` → registry → middleware →
|
||||||
|
# task_tool. ``deepagents.create_agent`` only extracts the keys it
|
||||||
|
# recognises, so an extra key here is dropped silently at compile time.
|
||||||
|
# The prefix avoids any collision with future deepagents fields.
|
||||||
|
SURF_CONTEXT_HINT_PROVIDER_KEY = "surf_context_hint_provider"
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, slots=True)
|
@dataclass(frozen=True, slots=True)
|
||||||
class SurfSenseSubagentSpec:
|
class SurfSenseSubagentSpec:
|
||||||
|
|
@ -20,10 +38,22 @@ class SurfSenseSubagentSpec:
|
||||||
layers them into the subagent's :class:`PermissionMiddleware`,
|
layers them into the subagent's :class:`PermissionMiddleware`,
|
||||||
so each subagent owns its own ruleset without aliasing the
|
so each subagent owns its own ruleset without aliasing the
|
||||||
shared rule engine.
|
shared rule engine.
|
||||||
|
context_hint_provider: Optional callback invoked once per ``task(...)``
|
||||||
|
invocation, immediately before the subagent runs. Its return
|
||||||
|
value is prepended to the subagent's first ``HumanMessage`` so
|
||||||
|
the subagent can see things it would otherwise have to discover
|
||||||
|
(active search space, KB root, current user timezone, etc.).
|
||||||
|
Kept out of the deepagents ``spec`` because that dict is forwarded
|
||||||
|
verbatim to upstream code and only recognises its own typed keys.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
spec: SubAgent
|
spec: SubAgent
|
||||||
ruleset: Ruleset
|
ruleset: Ruleset
|
||||||
|
context_hint_provider: ContextHintProvider | None = None
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["SurfSenseSubagentSpec"]
|
__all__ = [
|
||||||
|
"SURF_CONTEXT_HINT_PROVIDER_KEY",
|
||||||
|
"ContextHintProvider",
|
||||||
|
"SurfSenseSubagentSpec",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
from deepagents import SubAgent
|
from deepagents import SubAgent
|
||||||
|
|
@ -12,9 +14,48 @@ from langchain_core.tools import BaseTool
|
||||||
from app.agents.multi_agent_chat.middleware.shared.permissions import (
|
from app.agents.multi_agent_chat.middleware.shared.permissions import (
|
||||||
build_permission_mw,
|
build_permission_mw,
|
||||||
)
|
)
|
||||||
from app.agents.multi_agent_chat.subagents.shared.spec import SurfSenseSubagentSpec
|
from app.agents.multi_agent_chat.subagents.shared.md_file_reader import (
|
||||||
|
read_shared_snippet,
|
||||||
|
)
|
||||||
|
from app.agents.multi_agent_chat.subagents.shared.spec import (
|
||||||
|
SURF_CONTEXT_HINT_PROVIDER_KEY,
|
||||||
|
ContextHintProvider,
|
||||||
|
SurfSenseSubagentSpec,
|
||||||
|
)
|
||||||
from app.agents.new_chat.permissions import Ruleset
|
from app.agents.new_chat.permissions import Ruleset
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ``<include snippet="NAME"/>`` directive. Matches an XML-style self-closing
|
||||||
|
# tag whose ``snippet`` attribute names a file in ``shared/snippets/``.
|
||||||
|
# Whitespace around the attribute and self-close is tolerated; the snippet
|
||||||
|
# name itself must be a bare identifier (letters / digits / underscores) so
|
||||||
|
# we never pull a path-traversal value into ``read_shared_snippet``.
|
||||||
|
_INCLUDE_DIRECTIVE_RE = re.compile(
|
||||||
|
r"<include\s+snippet=\"(?P<name>[A-Za-z0-9_]+)\"\s*/>"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_includes(prompt: str, *, subagent_name: str) -> str:
|
||||||
|
"""Replace ``<include snippet="X"/>`` directives with the snippet body.
|
||||||
|
|
||||||
|
Unknown snippet names raise; an empty body is treated as unknown so a
|
||||||
|
typo or missing file fails loudly at startup instead of silently
|
||||||
|
shipping a broken prompt to the LLM.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _replace(match: re.Match[str]) -> str:
|
||||||
|
name = match.group("name")
|
||||||
|
body = read_shared_snippet(name)
|
||||||
|
if not body.strip():
|
||||||
|
raise ValueError(
|
||||||
|
f"Subagent {subagent_name!r}: unknown or empty shared "
|
||||||
|
f"snippet {name!r} referenced via <include>."
|
||||||
|
)
|
||||||
|
return body
|
||||||
|
|
||||||
|
return _INCLUDE_DIRECTIVE_RE.sub(_replace, prompt)
|
||||||
|
|
||||||
|
|
||||||
def _user_allowlist_for(
|
def _user_allowlist_for(
|
||||||
dependencies: dict[str, Any], subagent_name: str
|
dependencies: dict[str, Any], subagent_name: str
|
||||||
|
|
@ -43,6 +84,7 @@ def pack_subagent(
|
||||||
dependencies: dict[str, Any],
|
dependencies: dict[str, Any],
|
||||||
model: BaseChatModel | None = None,
|
model: BaseChatModel | None = None,
|
||||||
middleware_stack: dict[str, Any] | None = None,
|
middleware_stack: dict[str, Any] | None = None,
|
||||||
|
context_hint_provider: ContextHintProvider | None = None,
|
||||||
) -> SurfSenseSubagentSpec:
|
) -> SurfSenseSubagentSpec:
|
||||||
"""Pack the route-local pieces into one sub-agent spec + its Ruleset.
|
"""Pack the route-local pieces into one sub-agent spec + its Ruleset.
|
||||||
|
|
||||||
|
|
@ -68,6 +110,8 @@ def pack_subagent(
|
||||||
msg = f"Subagent {name!r}: system_prompt is empty"
|
msg = f"Subagent {name!r}: system_prompt is empty"
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
system_prompt = _resolve_includes(system_prompt, subagent_name=name)
|
||||||
|
|
||||||
flags = dependencies["flags"]
|
flags = dependencies["flags"]
|
||||||
user_allowlist = _user_allowlist_for(dependencies, name)
|
user_allowlist = _user_allowlist_for(dependencies, name)
|
||||||
subagent_rulesets: list[Ruleset] = [ruleset]
|
subagent_rulesets: list[Ruleset] = [ruleset]
|
||||||
|
|
@ -99,4 +143,12 @@ def pack_subagent(
|
||||||
}
|
}
|
||||||
if model is not None:
|
if model is not None:
|
||||||
spec_dict["model"] = model
|
spec_dict["model"] = model
|
||||||
return SurfSenseSubagentSpec(spec=cast(SubAgent, spec_dict), ruleset=ruleset)
|
if context_hint_provider is not None:
|
||||||
|
# Stash the callback on the dict so it survives the trip through
|
||||||
|
# registry / middleware unpacking (both treat the spec as opaque).
|
||||||
|
spec_dict[SURF_CONTEXT_HINT_PROVIDER_KEY] = context_hint_provider
|
||||||
|
return SurfSenseSubagentSpec(
|
||||||
|
spec=cast(SubAgent, spec_dict),
|
||||||
|
ruleset=ruleset,
|
||||||
|
context_hint_provider=context_hint_provider,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -33,9 +33,11 @@ from typing_extensions import TypedDict
|
||||||
from app.agents.new_chat.state_reducers import (
|
from app.agents.new_chat.state_reducers import (
|
||||||
_add_unique_reducer,
|
_add_unique_reducer,
|
||||||
_dict_merge_with_tombstones_reducer,
|
_dict_merge_with_tombstones_reducer,
|
||||||
|
_int_counter_merge_reducer,
|
||||||
_list_append_reducer,
|
_list_append_reducer,
|
||||||
_replace_reducer,
|
_replace_reducer,
|
||||||
)
|
)
|
||||||
|
from app.agents.shared.receipt import Receipt
|
||||||
|
|
||||||
|
|
||||||
class PendingMove(TypedDict, total=False):
|
class PendingMove(TypedDict, total=False):
|
||||||
|
|
@ -172,6 +174,35 @@ class SurfSenseFilesystemState(FilesystemState):
|
||||||
workspace_tree_text: NotRequired[Annotated[str, _replace_reducer]]
|
workspace_tree_text: NotRequired[Annotated[str, _replace_reducer]]
|
||||||
"""Pre-rendered ``<workspace_tree>`` body; shared with subagents to skip re-render."""
|
"""Pre-rendered ``<workspace_tree>`` body; shared with subagents to skip re-render."""
|
||||||
|
|
||||||
|
billable_calls: NotRequired[Annotated[dict[str, int], _int_counter_merge_reducer]]
|
||||||
|
"""Per-subagent ``task(...)`` invocation counter, summed across the turn.
|
||||||
|
|
||||||
|
Incremented by ``task_tool.py`` each time a subagent invocation
|
||||||
|
completes (single- or batch-mode). The orchestrator can read this map
|
||||||
|
to self-limit when a runaway loop sends the same specialist 20 calls
|
||||||
|
in a row; the runtime emits a soft warning ToolMessage once the
|
||||||
|
cumulative count crosses :data:`DEFAULT_SUBAGENT_BILLABLE_THRESHOLD`.
|
||||||
|
Cleared by checkpoint rollover (i.e. per turn).
|
||||||
|
"""
|
||||||
|
|
||||||
|
receipts: NotRequired[Annotated[list[Receipt], _list_append_reducer]]
|
||||||
|
"""Structured Receipt handles emitted by mutating subagent tools this turn.
|
||||||
|
|
||||||
|
Each mutating tool (deliverables, every connector, KB writes via the
|
||||||
|
persistence middleware) wraps its native return into a
|
||||||
|
:class:`~app.agents.shared.receipt.Receipt`
|
||||||
|
and returns it under the ``"receipt"`` key alongside its existing
|
||||||
|
payload. The subagent's tool-call middleware folds the receipt into
|
||||||
|
this list, and ``_return_command_with_state_update`` in
|
||||||
|
``checkpointed_subagent_middleware/task_tool.py`` carries the list up
|
||||||
|
to the parent automatically (``"receipts"`` is not in
|
||||||
|
``EXCLUDED_STATE_KEYS``).
|
||||||
|
|
||||||
|
Append-only across the turn; cleared by checkpoint rollover. The
|
||||||
|
orchestrator reads it via the ``<verification>`` teaching to confirm
|
||||||
|
side-effecting subagent claims (see ``shared/snippets/verifiable_handle.md``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"KbAnonDoc",
|
"KbAnonDoc",
|
||||||
|
|
|
||||||
|
|
@ -34,8 +34,7 @@ from deepagents.middleware.summarization import (
|
||||||
)
|
)
|
||||||
from langchain_core.messages import SystemMessage
|
from langchain_core.messages import SystemMessage
|
||||||
|
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.observability import otel as ot
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from deepagents.backends.protocol import BACKEND_TYPES
|
from deepagents.backends.protocol import BACKEND_TYPES
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,7 @@ from langgraph.config import get_config
|
||||||
from langgraph.runtime import Runtime
|
from langgraph.runtime import Runtime
|
||||||
from langgraph.types import interrupt
|
from langgraph.types import interrupt
|
||||||
|
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.observability import otel as ot
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,7 @@ from app.agents.new_chat.path_resolver import (
|
||||||
virtual_path_to_doc,
|
virtual_path_to_doc,
|
||||||
)
|
)
|
||||||
from app.agents.new_chat.state_reducers import _CLEAR
|
from app.agents.new_chat.state_reducers import _CLEAR
|
||||||
|
from app.agents.shared.receipt import Receipt, make_receipt
|
||||||
from app.db import (
|
from app.db import (
|
||||||
AgentActionLog,
|
AgentActionLog,
|
||||||
Chunk,
|
Chunk,
|
||||||
|
|
@ -1392,6 +1393,81 @@ async def commit_staged_filesystem_state(
|
||||||
"pending_dir_deletes": [_CLEAR],
|
"pending_dir_deletes": [_CLEAR],
|
||||||
"dirty_path_tool_calls": {_CLEAR: True},
|
"dirty_path_tool_calls": {_CLEAR: True},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Emit one Receipt per committed mutation, folded into ``state['receipts']``
|
||||||
|
# via ``_list_append_reducer``. The receipts surface what actually committed
|
||||||
|
# (post-savepoint) rather than what the LLM intended; the orchestrator uses
|
||||||
|
# them as ground truth in the ``<verification>`` teaching. KB writes do not
|
||||||
|
# have public verifiable URLs, so ``verifiable_url`` stays unset.
|
||||||
|
receipts: list[Receipt] = []
|
||||||
|
|
||||||
|
def _kb_receipt(
|
||||||
|
*,
|
||||||
|
type: str,
|
||||||
|
operation: str,
|
||||||
|
path: str,
|
||||||
|
external_id: int | None = None,
|
||||||
|
) -> None:
|
||||||
|
if not path:
|
||||||
|
return
|
||||||
|
preview = path.rsplit("/", 1)[-1] or path
|
||||||
|
receipts.append(
|
||||||
|
make_receipt(
|
||||||
|
route="knowledge_base",
|
||||||
|
type=type,
|
||||||
|
operation=operation,
|
||||||
|
status="success",
|
||||||
|
external_id=str(external_id) if external_id is not None else path,
|
||||||
|
preview=preview,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for payload in committed_creates:
|
||||||
|
path = str(payload.get("virtualPath") or "")
|
||||||
|
_kb_receipt(
|
||||||
|
type="file",
|
||||||
|
operation="write_file",
|
||||||
|
path=path,
|
||||||
|
external_id=payload.get("id"),
|
||||||
|
)
|
||||||
|
for payload in committed_updates:
|
||||||
|
path = str(payload.get("virtualPath") or "")
|
||||||
|
_kb_receipt(
|
||||||
|
type="file",
|
||||||
|
operation="edit_file",
|
||||||
|
path=path,
|
||||||
|
external_id=payload.get("id"),
|
||||||
|
)
|
||||||
|
for payload in applied_moves:
|
||||||
|
# ``applied_moves`` rows carry the destination ``virtualPath`` because
|
||||||
|
# the move has already landed in the DB by the time we reach this code.
|
||||||
|
path = str(payload.get("virtualPath") or "")
|
||||||
|
_kb_receipt(
|
||||||
|
type="file",
|
||||||
|
operation="move_file",
|
||||||
|
path=path,
|
||||||
|
external_id=payload.get("id"),
|
||||||
|
)
|
||||||
|
for path in staged_dirs:
|
||||||
|
_kb_receipt(type="folder", operation="mkdir", path=path)
|
||||||
|
for payload in committed_deletes:
|
||||||
|
path = str(payload.get("virtualPath") or "")
|
||||||
|
_kb_receipt(
|
||||||
|
type="file",
|
||||||
|
operation="rm",
|
||||||
|
path=path,
|
||||||
|
external_id=payload.get("id"),
|
||||||
|
)
|
||||||
|
for payload in committed_folder_deletes:
|
||||||
|
path = str(payload.get("virtualPath") or "")
|
||||||
|
_kb_receipt(
|
||||||
|
type="folder",
|
||||||
|
operation="rmdir",
|
||||||
|
path=path,
|
||||||
|
external_id=payload.get("id"),
|
||||||
|
)
|
||||||
|
if receipts:
|
||||||
|
delta["receipts"] = receipts
|
||||||
files_delta: dict[str, Any] = {}
|
files_delta: dict[str, Any] = {}
|
||||||
if temp_paths:
|
if temp_paths:
|
||||||
files_delta.update(dict.fromkeys(temp_paths))
|
files_delta.update(dict.fromkeys(temp_paths))
|
||||||
|
|
|
||||||
|
|
@ -61,8 +61,7 @@ from app.agents.new_chat.permissions import (
|
||||||
aggregate_action,
|
aggregate_action,
|
||||||
evaluate_many,
|
evaluate_many,
|
||||||
)
|
)
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.observability import otel as ot
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -171,6 +171,39 @@ def _dict_merge_with_tombstones_reducer(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _int_counter_merge_reducer(
|
||||||
|
left: dict[str, int] | None,
|
||||||
|
right: dict[str, int] | None,
|
||||||
|
) -> dict[str, int]:
|
||||||
|
"""Merge ``right`` into ``left`` by **summing** per-key integer counters.
|
||||||
|
|
||||||
|
Used for state fields that accumulate counts across multiple updates
|
||||||
|
within the same turn (e.g. per-subagent ``billable_calls``). Unknown
|
||||||
|
keys are added; existing keys are summed. ``_CLEAR`` sentinels reset
|
||||||
|
the accumulator the same way the other reducers do, so the orchestrator
|
||||||
|
can wipe the counter at end-of-turn if needed.
|
||||||
|
"""
|
||||||
|
if right is None:
|
||||||
|
return dict(left or {})
|
||||||
|
|
||||||
|
if _CLEAR in right or any(_is_clear(k) for k in right):
|
||||||
|
result: dict[str, int] = {}
|
||||||
|
for key, value in right.items():
|
||||||
|
if _is_clear(key):
|
||||||
|
continue
|
||||||
|
if not isinstance(value, int):
|
||||||
|
continue
|
||||||
|
result[key] = result.get(key, 0) + value
|
||||||
|
return result
|
||||||
|
|
||||||
|
base = dict(left or {})
|
||||||
|
for key, value in right.items():
|
||||||
|
if not isinstance(value, int):
|
||||||
|
continue
|
||||||
|
base[key] = base.get(key, 0) + value
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
def _initial_filesystem_state() -> dict[str, Any]:
|
def _initial_filesystem_state() -> dict[str, Any]:
|
||||||
"""Default empty values for SurfSense filesystem state fields.
|
"""Default empty values for SurfSense filesystem state fields.
|
||||||
|
|
||||||
|
|
@ -200,6 +233,7 @@ __all__ = [
|
||||||
"_add_unique_reducer",
|
"_add_unique_reducer",
|
||||||
"_dict_merge_with_tombstones_reducer",
|
"_dict_merge_with_tombstones_reducer",
|
||||||
"_initial_filesystem_state",
|
"_initial_filesystem_state",
|
||||||
|
"_int_counter_merge_reducer",
|
||||||
"_list_append_reducer",
|
"_list_append_reducer",
|
||||||
"_replace_reducer",
|
"_replace_reducer",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,23 @@
|
||||||
Podcast generation tool for the SurfSense agent.
|
Podcast generation tool for the SurfSense agent.
|
||||||
|
|
||||||
This module provides a factory function for creating the generate_podcast tool
|
This module provides a factory function for creating the generate_podcast tool
|
||||||
that submits a Celery task for background podcast generation. The frontend
|
that submits a Celery task for background podcast generation. The tool then
|
||||||
polls for completion and auto-updates when the podcast is ready.
|
polls the podcast row until it reaches a terminal status (READY/FAILED) and
|
||||||
|
returns that status. The wait is bounded by the chat's HTTP / process
|
||||||
|
lifetime; see app.agents.shared.deliverable_wait for details.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.agents.shared.deliverable_wait import wait_for_deliverable
|
||||||
from app.db import Podcast, PodcastStatus, shielded_async_session
|
from app.db import Podcast, PodcastStatus, shielded_async_session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def create_generate_podcast_tool(
|
def create_generate_podcast_tool(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
|
|
@ -97,18 +103,57 @@ def create_generate_podcast_tool(
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"[generate_podcast] Created podcast {podcast_id}, task: {task.id}")
|
logger.info(
|
||||||
|
"[generate_podcast] Created podcast %s, task: %s",
|
||||||
|
podcast_id,
|
||||||
|
task.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wait until the Celery worker flips the row to a terminal
|
||||||
|
# state. No internal budget — see deliverable_wait module.
|
||||||
|
terminal_status, columns, elapsed = await wait_for_deliverable(
|
||||||
|
model=Podcast,
|
||||||
|
row_id=podcast_id,
|
||||||
|
columns=[Podcast.status, Podcast.file_location],
|
||||||
|
terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED},
|
||||||
|
)
|
||||||
|
|
||||||
|
if terminal_status == PodcastStatus.READY:
|
||||||
|
file_location = columns[1] if columns else None
|
||||||
|
logger.info(
|
||||||
|
"[generate_podcast] Podcast %s READY in %.2fs (file=%s)",
|
||||||
|
podcast_id,
|
||||||
|
elapsed,
|
||||||
|
file_location,
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"status": PodcastStatus.PENDING.value,
|
"status": PodcastStatus.READY.value,
|
||||||
"podcast_id": podcast_id,
|
"podcast_id": podcast_id,
|
||||||
"title": podcast_title,
|
"title": podcast_title,
|
||||||
"message": "Podcast generation started. This may take a few minutes.",
|
"file_location": file_location,
|
||||||
|
"message": (
|
||||||
|
"Podcast generated and saved to your podcast panel."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only other terminal state is FAILED.
|
||||||
|
logger.warning(
|
||||||
|
"[generate_podcast] Podcast %s FAILED in %.2fs",
|
||||||
|
podcast_id,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": PodcastStatus.FAILED.value,
|
||||||
|
"podcast_id": podcast_id,
|
||||||
|
"title": podcast_title,
|
||||||
|
"error": (
|
||||||
|
"Background worker reported FAILED status for this podcast."
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
print(f"[generate_podcast] Error: {error_message}")
|
logger.exception("[generate_podcast] Error: %s", error_message)
|
||||||
return {
|
return {
|
||||||
"status": PodcastStatus.FAILED.value,
|
"status": PodcastStatus.FAILED.value,
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,23 @@
|
||||||
Video presentation generation tool for the SurfSense agent.
|
Video presentation generation tool for the SurfSense agent.
|
||||||
|
|
||||||
This module provides a factory function for creating the generate_video_presentation
|
This module provides a factory function for creating the generate_video_presentation
|
||||||
tool that submits a Celery task for background video presentation generation.
|
tool that submits a Celery task for background video presentation generation. The
|
||||||
The frontend polls for completion and auto-updates when the presentation is ready.
|
tool then polls the row until it reaches a terminal status (READY/FAILED) and
|
||||||
|
returns that status. The wait is bounded by the chat's HTTP / process lifetime;
|
||||||
|
see app.agents.shared.deliverable_wait for details.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.agents.shared.deliverable_wait import wait_for_deliverable
|
||||||
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
|
from app.db import VideoPresentation, VideoPresentationStatus, shielded_async_session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def create_generate_video_presentation_tool(
|
def create_generate_video_presentation_tool(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
|
|
@ -72,20 +78,58 @@ def create_generate_video_presentation_tool(
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(
|
logger.info(
|
||||||
f"[generate_video_presentation] Created video presentation {video_pres_id}, task: {task.id}"
|
"[generate_video_presentation] Created video presentation %s, task: %s",
|
||||||
|
video_pres_id,
|
||||||
|
task.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Wait until the Celery worker flips the row to a terminal
|
||||||
|
# state. No internal budget — see deliverable_wait module.
|
||||||
|
terminal_status, _columns, elapsed = await wait_for_deliverable(
|
||||||
|
model=VideoPresentation,
|
||||||
|
row_id=video_pres_id,
|
||||||
|
columns=[VideoPresentation.status],
|
||||||
|
terminal_statuses={
|
||||||
|
VideoPresentationStatus.READY,
|
||||||
|
VideoPresentationStatus.FAILED,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if terminal_status == VideoPresentationStatus.READY:
|
||||||
|
logger.info(
|
||||||
|
"[generate_video_presentation] %s READY in %.2fs",
|
||||||
|
video_pres_id,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"status": VideoPresentationStatus.PENDING.value,
|
"status": VideoPresentationStatus.READY.value,
|
||||||
"video_presentation_id": video_pres_id,
|
"video_presentation_id": video_pres_id,
|
||||||
"title": video_title,
|
"title": video_title,
|
||||||
"message": "Video presentation generation started. This may take a few minutes.",
|
"message": "Video presentation generated and saved.",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only other terminal state is FAILED.
|
||||||
|
logger.warning(
|
||||||
|
"[generate_video_presentation] %s FAILED in %.2fs",
|
||||||
|
video_pres_id,
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
|
"video_presentation_id": video_pres_id,
|
||||||
|
"title": video_title,
|
||||||
|
"error": (
|
||||||
|
"Background worker reported FAILED status for this "
|
||||||
|
"video presentation."
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
print(f"[generate_video_presentation] Error: {error_message}")
|
logger.exception(
|
||||||
|
"[generate_video_presentation] Error: %s", error_message
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"status": VideoPresentationStatus.FAILED.value,
|
"status": VideoPresentationStatus.FAILED.value,
|
||||||
"error": error_message,
|
"error": error_message,
|
||||||
|
|
|
||||||
9
surfsense_backend/app/agents/shared/__init__.py
Normal file
9
surfsense_backend/app/agents/shared/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
"""Cross-package agent contracts.
|
||||||
|
|
||||||
|
Symbols here are intentionally framework-light (no LangGraph / deepagents
|
||||||
|
internals) so they can be imported from both ``app.agents.new_chat`` and
|
||||||
|
``app.agents.multi_agent_chat`` without creating a circular dependency
|
||||||
|
between the two packages. See ``receipt.py`` for the rationale.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
123
surfsense_backend/app/agents/shared/deliverable_wait.py
Normal file
123
surfsense_backend/app/agents/shared/deliverable_wait.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
"""Shared poll-until-terminal helper for Celery-backed deliverables.
|
||||||
|
|
||||||
|
Lives in ``app.agents.shared`` (neutral package, no dependencies on either
|
||||||
|
``new_chat`` or ``multi_agent_chat``) so both the flat single-agent tools
|
||||||
|
under ``app/agents/new_chat/tools/`` and the multi-agent subagent tools
|
||||||
|
under ``app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/``
|
||||||
|
can import it without creating a circular dependency.
|
||||||
|
|
||||||
|
Background
|
||||||
|
----------
|
||||||
|
Tools like ``generate_podcast`` and ``generate_video_presentation`` enqueue
|
||||||
|
the heavy work to Celery and historically returned immediately with a
|
||||||
|
"pending" status. That works for very-long deliverables but hurts UX for
|
||||||
|
the common case (most podcasts finish in 10-30 seconds): the agent sends
|
||||||
|
a "kicked off, check back in a minute" reply *before* the worker is done,
|
||||||
|
so the user never gets a "ready" confirmation.
|
||||||
|
|
||||||
|
This helper bridges that gap. The tool dispatches the Celery task as
|
||||||
|
before, then polls the artefact row's ``status`` column **until it
|
||||||
|
reaches a terminal value** (READY / FAILED). The tool then returns a
|
||||||
|
real terminal outcome — never a pending one.
|
||||||
|
|
||||||
|
No wall-clock budget here on purpose
|
||||||
|
------------------------------------
|
||||||
|
Layering a second budget on top of the existing per-invocation safety
|
||||||
|
nets just confused the UX. The real ceilings are:
|
||||||
|
|
||||||
|
* **Multi-agent mode** — ``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS``
|
||||||
|
(default ``300.0``, ``0`` to disable) caps how long any single
|
||||||
|
``task(subagent, ...)`` invocation can run. If a deliverable needs
|
||||||
|
longer than this, the subagent invocation is cancelled and the
|
||||||
|
orchestrator surfaces a "subagent timed out" ToolMessage. Operators
|
||||||
|
who routinely generate long videos should raise that ceiling (or set
|
||||||
|
it to ``0`` for true unbounded waits).
|
||||||
|
* **Single-agent mode** — the chat's HTTP stream / process lifetime is
|
||||||
|
the only ceiling. Truly indefinite waits work here, but a dead Celery
|
||||||
|
worker will leave the row in PENDING/GENERATING forever; treat that
|
||||||
|
as an operational concern, not a UX concern.
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
-------------
|
||||||
|
None. The poll cadence is hardcoded at 1.5s — small enough to feel
|
||||||
|
responsive (~6 polls per typical 10s podcast), large enough to avoid
|
||||||
|
hammering the DB under burst traffic. Override at the call site if a
|
||||||
|
specific tool needs a different cadence.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.orm import InstrumentedAttribute
|
||||||
|
|
||||||
|
from app.db import shielded_async_session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_POLL_INTERVAL_SECONDS: float = 1.5
|
||||||
|
|
||||||
|
|
||||||
|
async def wait_for_deliverable(
|
||||||
|
*,
|
||||||
|
model: type,
|
||||||
|
row_id: int,
|
||||||
|
columns: list[InstrumentedAttribute[Any]],
|
||||||
|
terminal_statuses: set[Enum],
|
||||||
|
poll_interval_s: float = _DEFAULT_POLL_INTERVAL_SECONDS,
|
||||||
|
) -> tuple[Enum, tuple[Any, ...], float]:
|
||||||
|
"""Poll ``model`` row ``row_id`` until ``columns[0]`` reaches a terminal status.
|
||||||
|
|
||||||
|
Blocks until the row's status column matches one of
|
||||||
|
``terminal_statuses``. There is no internal wall-clock budget; cancel
|
||||||
|
from the outside (subagent timeout, HTTP disconnect, task
|
||||||
|
cancellation) if you need a ceiling. See module docstring.
|
||||||
|
|
||||||
|
The first entry of ``columns`` must be the status column; additional
|
||||||
|
columns (e.g. ``Podcast.file_location``) are returned alongside the
|
||||||
|
final status so callers can build their payload without a second
|
||||||
|
roundtrip.
|
||||||
|
|
||||||
|
A fresh ``shielded_async_session`` is opened per poll so we never
|
||||||
|
hold a transaction across the wait, and a failed poll is logged but
|
||||||
|
does not abort the wait — transient DB hiccups should not collapse
|
||||||
|
the tool call.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
``(terminal_status, columns, elapsed_seconds)``
|
||||||
|
``columns`` mirrors the requested ``columns`` (including the
|
||||||
|
status itself in position 0).
|
||||||
|
"""
|
||||||
|
if not columns:
|
||||||
|
raise ValueError("wait_for_deliverable requires at least the status column")
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(poll_interval_s)
|
||||||
|
row: tuple[Any, ...] | None = None
|
||||||
|
try:
|
||||||
|
async with shielded_async_session() as session:
|
||||||
|
result = await session.execute(
|
||||||
|
select(*columns).where(model.id == row_id)
|
||||||
|
)
|
||||||
|
row = result.first()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"[deliverable_wait] poll failed model=%s id=%s err=%r",
|
||||||
|
getattr(model, "__name__", str(model)),
|
||||||
|
row_id,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if row is not None:
|
||||||
|
status_val = row[0]
|
||||||
|
if status_val in terminal_statuses:
|
||||||
|
return status_val, tuple(row), time.monotonic() - start
|
||||||
161
surfsense_backend/app/agents/shared/receipt.py
Normal file
161
surfsense_backend/app/agents/shared/receipt.py
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
"""Receipt: structured handle returned by every mutating subagent tool.
|
||||||
|
|
||||||
|
Generalises the Hermes ``entry`` dict (see ``references/hermes-agent/tools/
|
||||||
|
delegate_tool.py:1663-1697``) for our 5 deliverable types + 15 connectors +
|
||||||
|
KB writes. The supervisor reads the Receipt to verify what actually happened
|
||||||
|
without round-tripping through LLM paraphrase.
|
||||||
|
|
||||||
|
**Why this lives under ``app.agents.shared`` and not under either of the
|
||||||
|
two agent packages:** the Receipt is a *contract* shared between
|
||||||
|
``multi_agent_chat`` (where mutating tools emit it) and ``new_chat``
|
||||||
|
(where ``filesystem_state.SurfSenseFilesystemState`` declares the
|
||||||
|
``receipts`` reducer that accumulates it, and where
|
||||||
|
``middleware.kb_persistence`` emits its own KB-write receipts). Putting
|
||||||
|
the contract in either package would create a bidirectional import
|
||||||
|
between the two — see the commit that introduced this module for the
|
||||||
|
``ImportError`` chain it broke.
|
||||||
|
|
||||||
|
Each mutating tool wraps its native return shape into a Receipt via
|
||||||
|
:func:`make_receipt` (or builds one directly) and returns it under the
|
||||||
|
``"receipt"`` key alongside its existing payload. The subagent boundary
|
||||||
|
machinery in ``checkpointed_subagent_middleware.task_tool`` then folds
|
||||||
|
the receipt into the parent's ``receipts`` state via the append reducer.
|
||||||
|
|
||||||
|
The KB write path is the one exception: file-tool calls cannot emit a
|
||||||
|
durable receipt because the actual DB writes happen end-of-turn inside
|
||||||
|
:class:`app.agents.new_chat.middleware.kb_persistence.KnowledgeBasePersistenceMiddleware`.
|
||||||
|
KB tools therefore emit a *provisional* receipt with ``status="pending"``;
|
||||||
|
the persistence middleware flips it to ``"success"`` or ``"failed"``
|
||||||
|
before returning control to the parent.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Literal, TypedDict
|
||||||
|
|
||||||
|
# Subagent that emitted this receipt.
|
||||||
|
ReceiptRoute = Literal[
|
||||||
|
"deliverables",
|
||||||
|
"knowledge_base",
|
||||||
|
"notion",
|
||||||
|
"slack",
|
||||||
|
"gmail",
|
||||||
|
"linear",
|
||||||
|
"jira",
|
||||||
|
"clickup",
|
||||||
|
"confluence",
|
||||||
|
"calendar",
|
||||||
|
"luma",
|
||||||
|
"airtable",
|
||||||
|
"google_drive",
|
||||||
|
"dropbox",
|
||||||
|
"onedrive",
|
||||||
|
"discord",
|
||||||
|
"teams",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Within-route kind of artefact / external resource the operation touched.
|
||||||
|
# Left as ``str`` rather than a giant union so each route file documents
|
||||||
|
# its own enum next to its tools.
|
||||||
|
ReceiptType = str
|
||||||
|
|
||||||
|
# Operation verb. Kept open for the same reason as ``ReceiptType``.
|
||||||
|
ReceiptOperation = str
|
||||||
|
|
||||||
|
# Pending = async backend (Celery podcast / video) that the orchestrator
|
||||||
|
# will surface progress for out of band; persistence-MW flipped this to
|
||||||
|
# ``success`` for KB writes that committed.
|
||||||
|
ReceiptStatus = Literal["success", "pending", "failed"]
|
||||||
|
|
||||||
|
|
||||||
|
class Receipt(TypedDict, total=False):
|
||||||
|
"""Structured per-mutation handle returned to the parent subagent.
|
||||||
|
|
||||||
|
All fields are ``NotRequired`` (TypedDict ``total=False``) so each
|
||||||
|
route's tool can populate only the fields it actually has — e.g. Gmail
|
||||||
|
never sets ``verifiable_url`` because Gmail doesn't expose per-message
|
||||||
|
URLs. The receipts state reducer treats missing keys as missing rather
|
||||||
|
than ``null`` so we don't double-count.
|
||||||
|
"""
|
||||||
|
|
||||||
|
route: ReceiptRoute
|
||||||
|
"""Subagent name. Lets the orchestrator filter ``state['receipts']``
|
||||||
|
by route without re-deriving from ``type``."""
|
||||||
|
|
||||||
|
type: ReceiptType
|
||||||
|
"""Within-route kind. e.g. for ``deliverables`` one of ``{report,
|
||||||
|
podcast, video_presentation, resume, image}``; for ``notion`` ``page``;
|
||||||
|
for ``slack`` ``message``."""
|
||||||
|
|
||||||
|
operation: ReceiptOperation
|
||||||
|
"""Verb. e.g. ``generate`` (deliverables), ``create`` / ``update`` /
|
||||||
|
``delete`` (most connectors), ``send`` / ``post`` (chat), ``write_file``
|
||||||
|
/ ``edit_file`` / ``rm`` / ``rmdir`` / ``move_file`` / ``mkdir`` (KB)."""
|
||||||
|
|
||||||
|
status: ReceiptStatus
|
||||||
|
"""``success`` / ``pending`` / ``failed``. The verification teaching
|
||||||
|
in ``shared/snippets/verifiable_handle.md`` keys off this field."""
|
||||||
|
|
||||||
|
external_id: str | None
|
||||||
|
"""Backend identifier. Report row id, Notion ``page_id``, Slack ``ts``,
|
||||||
|
Gmail ``message_id``, Linear identifier, KB ``virtualPath``, etc.
|
||||||
|
``None`` only when the operation failed before the backend assigned one."""
|
||||||
|
|
||||||
|
verifiable_url: str | None
|
||||||
|
"""URL the parent can pass to ``scrape_webpage`` to verify the
|
||||||
|
operation. ``None`` when no public URL exists (Gmail, KB, raw images
|
||||||
|
stored in the DB)."""
|
||||||
|
|
||||||
|
preview: str | None
|
||||||
|
"""Short snippet (~200 chars) of what was produced. First lines of
|
||||||
|
a generated report's markdown, transcript opener for a podcast,
|
||||||
|
thumbnail URL for an image. Lets the orchestrator decide whether to
|
||||||
|
re-render in the UI without re-loading the artefact."""
|
||||||
|
|
||||||
|
error: str | None
|
||||||
|
"""Filled iff ``status == "failed"``. Plain-text reason; the parent
|
||||||
|
surfaces it in its own ``next_step``."""
|
||||||
|
|
||||||
|
|
||||||
|
def make_receipt(
|
||||||
|
*,
|
||||||
|
route: ReceiptRoute,
|
||||||
|
type: str,
|
||||||
|
operation: str,
|
||||||
|
status: ReceiptStatus,
|
||||||
|
external_id: str | None = None,
|
||||||
|
verifiable_url: str | None = None,
|
||||||
|
preview: str | None = None,
|
||||||
|
error: str | None = None,
|
||||||
|
) -> Receipt:
|
||||||
|
"""Construct a :class:`Receipt` with non-``None`` fields only.
|
||||||
|
|
||||||
|
Drops keys whose value is ``None`` so downstream consumers can use
|
||||||
|
``"verifiable_url" in receipt`` to distinguish "tool returned no URL"
|
||||||
|
from "tool deliberately surfaced ``null``".
|
||||||
|
"""
|
||||||
|
out: dict[str, Any] = {
|
||||||
|
"route": route,
|
||||||
|
"type": type,
|
||||||
|
"operation": operation,
|
||||||
|
"status": status,
|
||||||
|
}
|
||||||
|
if external_id is not None:
|
||||||
|
out["external_id"] = external_id
|
||||||
|
if verifiable_url is not None:
|
||||||
|
out["verifiable_url"] = verifiable_url
|
||||||
|
if preview is not None:
|
||||||
|
out["preview"] = preview
|
||||||
|
if error is not None:
|
||||||
|
out["error"] = error
|
||||||
|
return out # type: ignore[return-value]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Receipt",
|
||||||
|
"ReceiptOperation",
|
||||||
|
"ReceiptRoute",
|
||||||
|
"ReceiptStatus",
|
||||||
|
"ReceiptType",
|
||||||
|
"make_receipt",
|
||||||
|
]
|
||||||
71
surfsense_backend/app/agents/shared/receipt_command.py
Normal file
71
surfsense_backend/app/agents/shared/receipt_command.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
"""Helper for wrapping a tool result with a Receipt in a ``Command(update=...)``.
|
||||||
|
|
||||||
|
Most mutating subagent tools historically returned a plain ``dict`` payload
|
||||||
|
which deepagents serialised straight into the ``ToolMessage`` content. To
|
||||||
|
participate in the verification teaching from
|
||||||
|
``multi_agent_chat/subagents/shared/snippets/verifiable_handle.md`` those
|
||||||
|
tools now also need to write a :class:`Receipt` into the parent's
|
||||||
|
``state['receipts']`` list (declared on
|
||||||
|
:class:`~app.agents.new_chat.filesystem_state.SurfSenseFilesystemState`
|
||||||
|
and backed by the append reducer).
|
||||||
|
|
||||||
|
:func:`with_receipt` wraps both behaviours: it returns the tool payload as
|
||||||
|
a JSON-encoded ``ToolMessage`` AND appends the receipt to state in a single
|
||||||
|
:class:`~langgraph.types.Command`. Use it at every ``return`` site of a
|
||||||
|
mutating tool — including failure paths (emit a receipt with
|
||||||
|
``status="failed"`` and the error message in ``error``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain_core.messages import ToolMessage
|
||||||
|
from langgraph.types import Command
|
||||||
|
|
||||||
|
from app.agents.shared.receipt import Receipt
|
||||||
|
|
||||||
|
|
||||||
|
def _content_to_text(payload: dict[str, Any] | str) -> str:
|
||||||
|
"""Serialise a tool payload to ``ToolMessage`` content.
|
||||||
|
|
||||||
|
Dicts go through ``json.dumps`` (matching deepagents' default tool-result
|
||||||
|
serialisation); strings are passed through. Anything else is coerced via
|
||||||
|
``str`` so we never raise here — a mis-typed tool return would already
|
||||||
|
have failed inside the tool body.
|
||||||
|
"""
|
||||||
|
if isinstance(payload, str):
|
||||||
|
return payload
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
return json.dumps(payload, default=str)
|
||||||
|
return str(payload)
|
||||||
|
|
||||||
|
|
||||||
|
def with_receipt(
|
||||||
|
*,
|
||||||
|
payload: dict[str, Any] | str,
|
||||||
|
receipt: Receipt,
|
||||||
|
tool_call_id: str,
|
||||||
|
) -> Command:
|
||||||
|
"""Return a Command that ships ``payload`` as a ToolMessage AND appends ``receipt``.
|
||||||
|
|
||||||
|
The append happens via the ``_list_append_reducer`` on the ``receipts``
|
||||||
|
field of :class:`~app.agents.new_chat.filesystem_state.SurfSenseFilesystemState`,
|
||||||
|
so concurrent subagent batches (item 4 in the plan) won't clobber each
|
||||||
|
other's receipts.
|
||||||
|
"""
|
||||||
|
return Command(
|
||||||
|
update={
|
||||||
|
"messages": [
|
||||||
|
ToolMessage(
|
||||||
|
content=_content_to_text(payload),
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"receipts": [receipt],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["with_receipt"]
|
||||||
|
|
@ -62,7 +62,9 @@ class EtlPipelineService:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if category == FileCategory.AUDIO:
|
if category == FileCategory.AUDIO:
|
||||||
content = await transcribe_audio(request.file_path, request.filename)
|
content = await transcribe_audio(
|
||||||
|
request.file_path, request.filename
|
||||||
|
)
|
||||||
result = EtlResult(
|
result = EtlResult(
|
||||||
markdown_content=content,
|
markdown_content=content,
|
||||||
etl_service="AUDIO",
|
etl_service="AUDIO",
|
||||||
|
|
|
||||||
|
|
@ -835,7 +835,14 @@ class ComposioService:
|
||||||
)
|
)
|
||||||
|
|
||||||
if not result.get("success"):
|
if not result.get("success"):
|
||||||
return [], None, result.get("error", "Unknown error")
|
# 4-tuple to match this function's declared return shape
|
||||||
|
# ``(messages, next_page_token, result_size_estimate, error)``.
|
||||||
|
# The error branch previously dropped the
|
||||||
|
# ``result_size_estimate`` slot, which crashed the caller's
|
||||||
|
# unpack with ``ValueError: not enough values to unpack
|
||||||
|
# (expected 4, got 3)`` and hid the real Composio error
|
||||||
|
# (e.g. expired connected account / invalid API key).
|
||||||
|
return [], None, None, result.get("error", "Unknown error")
|
||||||
|
|
||||||
data = result.get("data", {})
|
data = result.get("data", {})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -101,9 +101,7 @@ class GmailKBSyncService:
|
||||||
else:
|
else:
|
||||||
logger.warning("No LLM configured -- using fallback summary")
|
logger.warning("No LLM configured -- using fallback summary")
|
||||||
summary_content = f"Gmail Message: {subject}\n\n{indexable_content}"
|
summary_content = f"Gmail Message: {subject}\n\n{indexable_content}"
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, summary_content)
|
||||||
embed_text, summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = await create_document_chunks(indexable_content)
|
chunks = await create_document_chunks(indexable_content)
|
||||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
|
||||||
|
|
@ -116,9 +116,7 @@ class GoogleCalendarKBSyncService:
|
||||||
summary_content = (
|
summary_content = (
|
||||||
f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
|
f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
|
||||||
)
|
)
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, summary_content)
|
||||||
embed_text, summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = await create_document_chunks(indexable_content)
|
chunks = await create_document_chunks(indexable_content)
|
||||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
@ -297,9 +295,7 @@ class GoogleCalendarKBSyncService:
|
||||||
summary_content = (
|
summary_content = (
|
||||||
f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
|
f"Google Calendar Event: {event_summary}\n\n{indexable_content}"
|
||||||
)
|
)
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, summary_content)
|
||||||
embed_text, summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = await create_document_chunks(indexable_content)
|
chunks = await create_document_chunks(indexable_content)
|
||||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
|
||||||
|
|
@ -98,9 +98,7 @@ class JiraKBSyncService:
|
||||||
summary_content = (
|
summary_content = (
|
||||||
f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
|
f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
|
||||||
)
|
)
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, summary_content)
|
||||||
embed_text, summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = await create_document_chunks(issue_content)
|
chunks = await create_document_chunks(issue_content)
|
||||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
@ -214,9 +212,7 @@ class JiraKBSyncService:
|
||||||
summary_content = (
|
summary_content = (
|
||||||
f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
|
f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
|
||||||
)
|
)
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, summary_content)
|
||||||
embed_text, summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = await create_document_chunks(issue_content)
|
chunks = await create_document_chunks(issue_content)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -682,11 +682,7 @@ def get_planner_llm() -> ChatLiteLLM | None:
|
||||||
from app.agents.new_chat.llm_config import create_chat_litellm_from_config
|
from app.agents.new_chat.llm_config import create_chat_litellm_from_config
|
||||||
|
|
||||||
planner_cfg = next(
|
planner_cfg = next(
|
||||||
(
|
(cfg for cfg in config.GLOBAL_LLM_CONFIGS if cfg.get("is_planner") is True),
|
||||||
cfg
|
|
||||||
for cfg in config.GLOBAL_LLM_CONFIGS
|
|
||||||
if cfg.get("is_planner") is True
|
|
||||||
),
|
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
if not planner_cfg:
|
if not planner_cfg:
|
||||||
|
|
|
||||||
|
|
@ -96,9 +96,7 @@ class OneDriveKBSyncService:
|
||||||
else:
|
else:
|
||||||
logger.warning("No LLM configured — using fallback summary")
|
logger.warning("No LLM configured — using fallback summary")
|
||||||
summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}"
|
summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}"
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, summary_content)
|
||||||
embed_text, summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = await create_document_chunks(indexable_content)
|
chunks = await create_document_chunks(indexable_content)
|
||||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
|
||||||
|
|
@ -2608,9 +2608,7 @@ async def stream_resume_chat(
|
||||||
visibility = thread_visibility or ChatVisibility.PRIVATE
|
visibility = thread_visibility or ChatVisibility.PRIVATE
|
||||||
from app.config import config as _app_config
|
from app.config import config as _app_config
|
||||||
|
|
||||||
chat_agent_mode = (
|
chat_agent_mode = "multi" if _app_config.MULTI_AGENT_CHAT_ENABLED else "single"
|
||||||
"multi" if _app_config.MULTI_AGENT_CHAT_ENABLED else "single"
|
|
||||||
)
|
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
chat_span.set_attribute("agent.mode", chat_agent_mode)
|
chat_span.set_attribute("agent.mode", chat_agent_mode)
|
||||||
_t0 = time.perf_counter()
|
_t0 = time.perf_counter()
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,9 @@ import json
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain_core.messages import ToolMessage
|
||||||
|
from langgraph.types import Command
|
||||||
|
|
||||||
from app.tasks.chat.streaming.handlers.tools import (
|
from app.tasks.chat.streaming.handlers.tools import (
|
||||||
ToolCompletionEmissionContext,
|
ToolCompletionEmissionContext,
|
||||||
iter_tool_completion_emission_frames,
|
iter_tool_completion_emission_frames,
|
||||||
|
|
@ -19,6 +22,38 @@ from app.tasks.chat.streaming.relay.task_span import (
|
||||||
from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
|
from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
|
||||||
|
|
||||||
|
|
||||||
|
def _unwrap_command_output(raw_output: Any) -> Any:
|
||||||
|
"""Replace a ``Command`` from a tool return with its inner ``ToolMessage``.
|
||||||
|
|
||||||
|
Tools that participate in receipt-style state writes (see
|
||||||
|
``app.agents.shared.receipt_command.with_receipt``) return a
|
||||||
|
``Command(update={"messages": [ToolMessage(...)], "receipts": [...]})``.
|
||||||
|
LangChain's ``on_tool_end`` event surfaces that ``Command`` verbatim as
|
||||||
|
``data.output``, which the rest of this handler can't introspect: it has
|
||||||
|
no ``.content``, isn't a ``dict``, and stringifies to ``"Command(...)"``.
|
||||||
|
That stringified payload reaches the frontend and breaks tool-specific
|
||||||
|
UI components (e.g. the podcast card) that look for ``status`` /
|
||||||
|
``podcast_id`` at the top level.
|
||||||
|
|
||||||
|
We extract the first ``ToolMessage`` from the Command's ``messages`` list
|
||||||
|
so downstream code can read ``.content`` normally. Commands that don't
|
||||||
|
contain a ``ToolMessage`` (rare, e.g. pure state updates) are returned
|
||||||
|
unchanged — the existing ``str(raw_output)`` fallback handles them.
|
||||||
|
"""
|
||||||
|
if not isinstance(raw_output, Command):
|
||||||
|
return raw_output
|
||||||
|
update = raw_output.update
|
||||||
|
if not isinstance(update, dict):
|
||||||
|
return raw_output
|
||||||
|
messages = update.get("messages")
|
||||||
|
if not isinstance(messages, list):
|
||||||
|
return raw_output
|
||||||
|
for msg in messages:
|
||||||
|
if isinstance(msg, ToolMessage):
|
||||||
|
return msg
|
||||||
|
return raw_output
|
||||||
|
|
||||||
|
|
||||||
def iter_tool_end_frames(
|
def iter_tool_end_frames(
|
||||||
event: dict[str, Any],
|
event: dict[str, Any],
|
||||||
*,
|
*,
|
||||||
|
|
@ -33,7 +68,7 @@ def iter_tool_end_frames(
|
||||||
state.active_tool_depth = max(0, state.active_tool_depth - 1)
|
state.active_tool_depth = max(0, state.active_tool_depth - 1)
|
||||||
run_id = event.get("run_id", "")
|
run_id = event.get("run_id", "")
|
||||||
tool_name = event.get("name", "unknown_tool")
|
tool_name = event.get("name", "unknown_tool")
|
||||||
raw_output = event.get("data", {}).get("output", "")
|
raw_output = _unwrap_command_output(event.get("data", {}).get("output", ""))
|
||||||
staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None
|
staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None
|
||||||
|
|
||||||
if hasattr(raw_output, "content"):
|
if hasattr(raw_output, "content"):
|
||||||
|
|
|
||||||
|
|
@ -15,12 +15,24 @@ def iter_completion_emission_frames(
|
||||||
out = ctx.tool_output
|
out = ctx.tool_output
|
||||||
payload = out if isinstance(out, dict) else {"result": out}
|
payload = out if isinstance(out, dict) else {"result": out}
|
||||||
yield ctx.emit_tool_output_card(payload)
|
yield ctx.emit_tool_output_card(payload)
|
||||||
if isinstance(out, dict) and out.get("status") == "pending":
|
if not isinstance(out, dict):
|
||||||
|
return
|
||||||
|
status = out.get("status")
|
||||||
|
# ``ready`` is the live success status now that the tool waits for the
|
||||||
|
# Celery worker to reach a terminal state. ``pending`` is retained as a
|
||||||
|
# legacy branch for old saved chats that pre-date the wait-for-terminal
|
||||||
|
# change (see ``app.agents.shared.deliverable_wait``).
|
||||||
|
if status == "ready":
|
||||||
|
yield ctx.streaming_service.format_terminal_info(
|
||||||
|
f"Video presentation generated successfully: {out.get('title', 'Presentation')}",
|
||||||
|
"success",
|
||||||
|
)
|
||||||
|
elif status == "pending":
|
||||||
yield ctx.streaming_service.format_terminal_info(
|
yield ctx.streaming_service.format_terminal_info(
|
||||||
f"Video presentation queued: {out.get('title', 'Presentation')}",
|
f"Video presentation queued: {out.get('title', 'Presentation')}",
|
||||||
"success",
|
"success",
|
||||||
)
|
)
|
||||||
elif isinstance(out, dict) and out.get("status") == "failed":
|
elif status == "failed":
|
||||||
error_msg = out.get("error", "Unknown error")
|
error_msg = out.get("error", "Unknown error")
|
||||||
yield ctx.streaming_service.format_terminal_info(
|
yield ctx.streaming_service.format_terminal_info(
|
||||||
f"Presentation generation failed: {error_msg}",
|
f"Presentation generation failed: {error_msg}",
|
||||||
|
|
|
||||||
|
|
@ -222,9 +222,7 @@ async def generate_document_summary(
|
||||||
else:
|
else:
|
||||||
enhanced_summary_content = summary_content
|
enhanced_summary_content = summary_content
|
||||||
|
|
||||||
summary_embedding = await asyncio.to_thread(
|
summary_embedding = await asyncio.to_thread(embed_text, enhanced_summary_content)
|
||||||
embed_text, enhanced_summary_content
|
|
||||||
)
|
|
||||||
|
|
||||||
return enhanced_summary_content, summary_embedding
|
return enhanced_summary_content, summary_embedding
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue