refactor(agents): relocate remaining MAC-only kernel (permissions, deliverable_wait)

permissions.py (authorization Rule/Ruleset model) is consumed across all MAC subagents + the permissions middleware, with a single external consumer (user_tool_allowlist service) -> move to multi_agent_chat/shared/permissions.py and repoint all 42 sites. deliverable_wait.py (wait_for_deliverable) is used only by the podcast and video_presentation deliverable tools -> colocate into subagents/builtins/deliverables/. No behavior change; import-all + permission/allowlist/deliverable unit tests stay green.
2026-06-06 20:15:17 +02:00 · 2026-06-05 10:58:49 +02:00 · 2026-06-05 10:58:49 +02:00 · f615d6b530
commit f615d6b530
parent 714c5ffea9
47 changed files with 61 additions and 53 deletions
--- a/surfsense_backend/app/agents/shared/deliverable_wait.py
+++ b/surfsense_backend/app/agents/shared/deliverable_wait.py
@ -1,123 +0,0 @@
-"""Shared poll-until-terminal helper for Celery-backed deliverables.
-
-Lives in ``app.agents.shared`` (neutral kernel package, no dependency on
-``multi_agent_chat``) so both the shared tools under ``app/agents/shared/tools/``
-and the multi-agent subagent tools under
-``app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/`` can import
-it without creating a circular dependency.
-
-Background
----------
-Tools like ``generate_podcast`` and ``generate_video_presentation`` enqueue
-the heavy work to Celery and historically returned immediately with a
-"pending" status. That works for very-long deliverables but hurts UX for
-the common case (most podcasts finish in 10-30 seconds): the agent sends
-a "kicked off, check back in a minute" reply *before* the worker is done,
-so the user never gets a "ready" confirmation.
-
-This helper bridges that gap. The tool dispatches the Celery task as
-before, then polls the artefact row's ``status`` column **until it
-reaches a terminal value** (READY / FAILED). The tool then returns a
-real terminal outcome — never a pending one.
-
-No wall-clock budget here on purpose
------------------------------------
-Layering a second budget on top of the existing per-invocation safety
-nets just confused the UX. The real ceilings are:
-
-* **Multi-agent mode** — ``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS``
-  (default ``300.0``, ``0`` to disable) caps how long any single
-  ``task(subagent, ...)`` invocation can run. If a deliverable needs
-  longer than this, the subagent invocation is cancelled and the
-  orchestrator surfaces a "subagent timed out" ToolMessage. Operators
-  who routinely generate long videos should raise that ceiling (or set
-  it to ``0`` for true unbounded waits).
-* **Single-agent mode** — the chat's HTTP stream / process lifetime is
-  the only ceiling. Truly indefinite waits work here, but a dead Celery
-  worker will leave the row in PENDING/GENERATING forever; treat that
-  as an operational concern, not a UX concern.
-
-Configuration
-------------
-None. The poll cadence is hardcoded at 1.5s — small enough to feel
-responsive (~6 polls per typical 10s podcast), large enough to avoid
-hammering the DB under burst traffic. Override at the call site if a
-specific tool needs a different cadence.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import time
-from enum import Enum
-from typing import Any
-
-from sqlalchemy import select
-from sqlalchemy.orm import InstrumentedAttribute
-
-from app.db import shielded_async_session
-
-logger = logging.getLogger(__name__)
-
-
-_DEFAULT_POLL_INTERVAL_SECONDS: float = 1.5
-
-
-async def wait_for_deliverable(
-    *,
-    model: type,
-    row_id: int,
-    columns: list[InstrumentedAttribute[Any]],
-    terminal_statuses: set[Enum],
-    poll_interval_s: float = _DEFAULT_POLL_INTERVAL_SECONDS,
-) -> tuple[Enum, tuple[Any, ...], float]:
-    """Poll ``model`` row ``row_id`` until ``columns[0]`` reaches a terminal status.
-
-    Blocks until the row's status column matches one of
-    ``terminal_statuses``. There is no internal wall-clock budget; cancel
-    from the outside (subagent timeout, HTTP disconnect, task
-    cancellation) if you need a ceiling. See module docstring.
-
-    The first entry of ``columns`` must be the status column; additional
-    columns (e.g. ``Podcast.file_location``) are returned alongside the
-    final status so callers can build their payload without a second
-    roundtrip.
-
-    A fresh ``shielded_async_session`` is opened per poll so we never
-    hold a transaction across the wait, and a failed poll is logged but
-    does not abort the wait — transient DB hiccups should not collapse
-    the tool call.
-
-    Returns
-    -------
-    ``(terminal_status, columns, elapsed_seconds)``
-        ``columns`` mirrors the requested ``columns`` (including the
-        status itself in position 0).
-    """
-    if not columns:
-        raise ValueError("wait_for_deliverable requires at least the status column")
-
-    start = time.monotonic()
-
-    while True:
-        await asyncio.sleep(poll_interval_s)
-        row: tuple[Any, ...] | None = None
-        try:
-            async with shielded_async_session() as session:
-                result = await session.execute(
-                    select(*columns).where(model.id == row_id)
-                )
-                row = result.first()
-        except Exception as exc:
-            logger.warning(
-                "[deliverable_wait] poll failed model=%s id=%s err=%r",
-                getattr(model, "__name__", str(model)),
-                row_id,
-                exc,
-            )
-
-        if row is not None:
-            status_val = row[0]
-            if status_val in terminal_statuses:
-                return status_val, tuple(row), time.monotonic() - start
--- a/surfsense_backend/app/agents/shared/middleware/permission.py
+++ b/surfsense_backend/app/agents/shared/middleware/permission.py
@ -50,17 +50,17 @@ from langchain_core.messages import AIMessage, ToolMessage
 from langgraph.runtime import Runtime
 from langgraph.types import interrupt

-from app.agents.shared.errors import (
-    CorrectedError,
-    RejectedError,
-    StreamingError,
-)
-from app.agents.shared.permissions import (
+from app.agents.multi_agent_chat.shared.permissions import (
    Rule,
    Ruleset,
    aggregate_action,
    evaluate_many,
 )
+from app.agents.shared.errors import (
+    CorrectedError,
+    RejectedError,
+    StreamingError,
+)
 from app.observability import metrics as ot_metrics, otel as ot

 logger = logging.getLogger(__name__)
--- a/surfsense_backend/app/agents/shared/permissions.py
+++ b/surfsense_backend/app/agents/shared/permissions.py
@ -1,203 +0,0 @@
-"""
-Wildcard pattern matching + rule evaluation for the SurfSense permission system.
-
-Ported from OpenCode's ``packages/opencode/src/permission/evaluate.ts`` and
-``packages/opencode/src/util/wildcard.ts``. LangChain has no rule-based
-permission evaluator, so we keep OpenCode's semantics intact:
-
- ``Wildcard.match`` matches both the ``permission`` and the ``pattern``
-  fields of a rule against the requested ``(permission, pattern)`` pair.
-  ``*`` matches any segment, ``**`` matches across separators.
- The evaluator runs ``findLast`` over the **flattened** list of rules
-  from all rulesets — last matching rule wins.
- The default fallback is ``ask`` (NOT deny), matching OpenCode.
- Multi-pattern requests AND together: if ANY pattern resolves to
-  ``deny``, the whole request is denied; if ANY needs ``ask``, an
-  interrupt is raised; only when all patterns ``allow`` does the
-  request proceed.
-"""
-
-from __future__ import annotations
-
-import re
-from collections.abc import Iterable
-from dataclasses import dataclass, field
-from typing import Literal
-
-RuleAction = Literal["allow", "deny", "ask"]
-
-
-@dataclass(frozen=True)
-class Rule:
-    """A single permission rule.
-
-    Attributes:
-        permission: A wildcard-matched permission identifier
-            (e.g. ``"edit"``, ``"linear_*"``, ``"mcp:*"``,
-            ``"doom_loop"``). Anchored at start AND end of the input.
-        pattern: A wildcard-matched pattern over the request payload
-            (e.g. ``"/documents/secrets/**"``, ``"page_id=123"``,
-            ``"*"``). Anchored at start AND end.
-        action: One of ``"allow"`` / ``"deny"`` / ``"ask"``.
-    """
-
-    permission: str
-    pattern: str
-    action: RuleAction
-
-
-@dataclass
-class Ruleset:
-    """A list of rules with an associated origin used for debugging."""
-
-    rules: list[Rule] = field(default_factory=list)
-    origin: str = "unknown"  # e.g. "defaults", "global", "space", "thread", "runtime"
-
-
-# -----------------------------------------------------------------------------
-# Wildcard matcher
-# -----------------------------------------------------------------------------
-
-
-_GLOB_TOKEN = re.compile(r"\*\*|\*|[^*]+")
-
-
-def _wildcard_to_regex(pattern: str) -> re.Pattern[str]:
-    """Translate an opencode-style wildcard pattern to a compiled regex.
-
-    Rules:
-    - ``**`` matches any sequence of any characters (including separators).
-    - ``*`` matches any sequence of characters that does **not** include
-      the path separator ``/`` — same as glob.
-    - All other characters match literally.
-    - The pattern is anchored at both ends (``^...$``).
-    """
-    parts: list[str] = ["^"]
-    for token in _GLOB_TOKEN.findall(pattern):
-        if token == "**":
-            parts.append(r".*")
-        elif token == "*":
-            parts.append(r"[^/]*")
-        else:
-            parts.append(re.escape(token))
-    parts.append("$")
-    return re.compile("".join(parts))
-
-
-_REGEX_CACHE: dict[str, re.Pattern[str]] = {}
-
-
-def wildcard_match(value: str, pattern: str) -> bool:
-    """Return True if ``value`` matches the wildcard ``pattern``.
-
-    Special case: a bare ``"*"`` pattern matches any value, including
-    those containing ``/`` separators. This mirrors opencode's
-    ``Wildcard.match`` short-circuit and matches the convention that
-    ``pattern="*"`` means "any pattern" in permission rules.
-    """
-    if pattern == "*":
-        return True
-    compiled = _REGEX_CACHE.get(pattern)
-    if compiled is None:
-        compiled = _wildcard_to_regex(pattern)
-        _REGEX_CACHE[pattern] = compiled
-    return compiled.match(value) is not None
-
-
-# -----------------------------------------------------------------------------
-# Evaluator
-# -----------------------------------------------------------------------------
-
-
-def evaluate(
-    permission: str,
-    pattern: str,
-    *rulesets: Ruleset | Iterable[Rule],
-) -> Rule:
-    """Find the last rule matching ``(permission, pattern)`` from ``rulesets``.
-
-    Mirrors opencode ``permission/evaluate.ts:9-15`` precisely:
-    - Flatten rulesets in argument order.
-    - Walk the flat list **in reverse**.
-    - First reverse-match wins (i.e. the last specified rule wins).
-    - When no rule matches, default to ``Rule(permission, "*", "ask")``.
-
-    Args:
-        permission: The permission identifier being requested
-            (e.g. tool name, ``"edit"``, ``"doom_loop"``).
-        pattern: The request-specific pattern (e.g. file path,
-            primary arg value). Use ``"*"`` when no specific pattern
-            applies.
-        *rulesets: Layered rulesets, applied earliest to latest. Later
-            rulesets override earlier ones.
-
-    Returns:
-        The matched :class:`Rule`, or the default ask fallback.
-    """
-    flat: list[Rule] = []
-    for rs in rulesets:
-        if isinstance(rs, Ruleset):
-            flat.extend(rs.rules)
-        else:
-            flat.extend(rs)
-
-    for rule in reversed(flat):
-        if wildcard_match(permission, rule.permission) and wildcard_match(
-            pattern, rule.pattern
-        ):
-            return rule
-
-    return Rule(permission=permission, pattern="*", action="ask")
-
-
-def evaluate_many(
-    permission: str,
-    patterns: Iterable[str],
-    *rulesets: Ruleset | Iterable[Rule],
-) -> list[Rule]:
-    """Evaluate ``permission`` against each of ``patterns`` (multi-pattern AND).
-
-    Returns the list of resolved rules in the same order as ``patterns``.
-    The caller is responsible for combining the results — opencode-style
-    multi-pattern AND collapses ``deny`` first, then ``ask``, then
-    ``allow``.
-    """
-    return [evaluate(permission, p, *rulesets) for p in patterns]
-
-
-def aggregate_action(rules: Iterable[Rule]) -> RuleAction:
-    """Collapse a list of per-pattern rules into one action.
-
-    Order:
-    1. If any rule is ``deny`` -> ``deny``.
-    2. Else if any rule is ``ask`` -> ``ask``.
-    3. Else if at least one rule is ``allow`` -> ``allow``.
-    4. Else (empty input) -> ``ask`` (safe default mirroring ``evaluate``).
-
-    Mirrors opencode's behavior in ``permission/index.ts:180-272``.
-    """
-    saw_ask = False
-    saw_allow = False
-    for rule in rules:
-        if rule.action == "deny":
-            return "deny"
-        if rule.action == "ask":
-            saw_ask = True
-        elif rule.action == "allow":
-            saw_allow = True
-    if saw_ask:
-        return "ask"
-    if saw_allow:
-        return "allow"
-    return "ask"
-
-
-__all__ = [
-    "Rule",
-    "RuleAction",
-    "Ruleset",
-    "aggregate_action",
-    "evaluate",
-    "evaluate_many",
-    "wildcard_match",
-]