refactor(agents): relocate remaining MAC-only kernel (permissions, deliverable_wait)

permissions.py (authorization Rule/Ruleset model) is consumed across all
MAC subagents + the permissions middleware, with a single external
consumer (user_tool_allowlist service) -> move to
multi_agent_chat/shared/permissions.py and repoint all 42 sites.

deliverable_wait.py (wait_for_deliverable) is used only by the podcast and
video_presentation deliverable tools -> colocate into
subagents/builtins/deliverables/.

No behavior change; import-all + permission/allowlist/deliverable unit
tests stay green.
This commit is contained in:
CREDO23 2026-06-05 10:58:49 +02:00
parent 714c5ffea9
commit f615d6b530
47 changed files with 61 additions and 53 deletions

View file

@ -1,123 +0,0 @@
"""Shared poll-until-terminal helper for Celery-backed deliverables.
Lives in ``app.agents.shared`` (neutral kernel package, no dependency on
``multi_agent_chat``) so both the shared tools under ``app/agents/shared/tools/``
and the multi-agent subagent tools under
``app/agents/multi_agent_chat/subagents/builtins/deliverables/tools/`` can import
it without creating a circular dependency.
Background
----------
Tools like ``generate_podcast`` and ``generate_video_presentation`` enqueue
the heavy work to Celery and historically returned immediately with a
"pending" status. That works for very-long deliverables but hurts UX for
the common case (most podcasts finish in 10-30 seconds): the agent sends
a "kicked off, check back in a minute" reply *before* the worker is done,
so the user never gets a "ready" confirmation.
This helper bridges that gap. The tool dispatches the Celery task as
before, then polls the artefact row's ``status`` column **until it
reaches a terminal value** (READY / FAILED). The tool then returns a
real terminal outcome never a pending one.
No wall-clock budget here on purpose
------------------------------------
Layering a second budget on top of the existing per-invocation safety
nets just confused the UX. The real ceilings are:
* **Multi-agent mode** ``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS``
(default ``300.0``, ``0`` to disable) caps how long any single
``task(subagent, ...)`` invocation can run. If a deliverable needs
longer than this, the subagent invocation is cancelled and the
orchestrator surfaces a "subagent timed out" ToolMessage. Operators
who routinely generate long videos should raise that ceiling (or set
it to ``0`` for true unbounded waits).
* **Single-agent mode** the chat's HTTP stream / process lifetime is
the only ceiling. Truly indefinite waits work here, but a dead Celery
worker will leave the row in PENDING/GENERATING forever; treat that
as an operational concern, not a UX concern.
Configuration
-------------
None. The poll cadence is hardcoded at 1.5s small enough to feel
responsive (~6 polls per typical 10s podcast), large enough to avoid
hammering the DB under burst traffic. Override at the call site if a
specific tool needs a different cadence.
"""
from __future__ import annotations
import asyncio
import logging
import time
from enum import Enum
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import InstrumentedAttribute
from app.db import shielded_async_session
logger = logging.getLogger(__name__)
_DEFAULT_POLL_INTERVAL_SECONDS: float = 1.5
async def wait_for_deliverable(
*,
model: type,
row_id: int,
columns: list[InstrumentedAttribute[Any]],
terminal_statuses: set[Enum],
poll_interval_s: float = _DEFAULT_POLL_INTERVAL_SECONDS,
) -> tuple[Enum, tuple[Any, ...], float]:
"""Poll ``model`` row ``row_id`` until ``columns[0]`` reaches a terminal status.
Blocks until the row's status column matches one of
``terminal_statuses``. There is no internal wall-clock budget; cancel
from the outside (subagent timeout, HTTP disconnect, task
cancellation) if you need a ceiling. See module docstring.
The first entry of ``columns`` must be the status column; additional
columns (e.g. ``Podcast.file_location``) are returned alongside the
final status so callers can build their payload without a second
roundtrip.
A fresh ``shielded_async_session`` is opened per poll so we never
hold a transaction across the wait, and a failed poll is logged but
does not abort the wait transient DB hiccups should not collapse
the tool call.
Returns
-------
``(terminal_status, columns, elapsed_seconds)``
``columns`` mirrors the requested ``columns`` (including the
status itself in position 0).
"""
if not columns:
raise ValueError("wait_for_deliverable requires at least the status column")
start = time.monotonic()
while True:
await asyncio.sleep(poll_interval_s)
row: tuple[Any, ...] | None = None
try:
async with shielded_async_session() as session:
result = await session.execute(
select(*columns).where(model.id == row_id)
)
row = result.first()
except Exception as exc:
logger.warning(
"[deliverable_wait] poll failed model=%s id=%s err=%r",
getattr(model, "__name__", str(model)),
row_id,
exc,
)
if row is not None:
status_val = row[0]
if status_val in terminal_statuses:
return status_val, tuple(row), time.monotonic() - start

View file

@ -50,17 +50,17 @@ from langchain_core.messages import AIMessage, ToolMessage
from langgraph.runtime import Runtime
from langgraph.types import interrupt
from app.agents.shared.errors import (
CorrectedError,
RejectedError,
StreamingError,
)
from app.agents.shared.permissions import (
from app.agents.multi_agent_chat.shared.permissions import (
Rule,
Ruleset,
aggregate_action,
evaluate_many,
)
from app.agents.shared.errors import (
CorrectedError,
RejectedError,
StreamingError,
)
from app.observability import metrics as ot_metrics, otel as ot
logger = logging.getLogger(__name__)

View file

@ -1,203 +0,0 @@
"""
Wildcard pattern matching + rule evaluation for the SurfSense permission system.
Ported from OpenCode's ``packages/opencode/src/permission/evaluate.ts`` and
``packages/opencode/src/util/wildcard.ts``. LangChain has no rule-based
permission evaluator, so we keep OpenCode's semantics intact:
- ``Wildcard.match`` matches both the ``permission`` and the ``pattern``
fields of a rule against the requested ``(permission, pattern)`` pair.
``*`` matches any segment, ``**`` matches across separators.
- The evaluator runs ``findLast`` over the **flattened** list of rules
from all rulesets last matching rule wins.
- The default fallback is ``ask`` (NOT deny), matching OpenCode.
- Multi-pattern requests AND together: if ANY pattern resolves to
``deny``, the whole request is denied; if ANY needs ``ask``, an
interrupt is raised; only when all patterns ``allow`` does the
request proceed.
"""
from __future__ import annotations
import re
from collections.abc import Iterable
from dataclasses import dataclass, field
from typing import Literal
RuleAction = Literal["allow", "deny", "ask"]
@dataclass(frozen=True)
class Rule:
"""A single permission rule.
Attributes:
permission: A wildcard-matched permission identifier
(e.g. ``"edit"``, ``"linear_*"``, ``"mcp:*"``,
``"doom_loop"``). Anchored at start AND end of the input.
pattern: A wildcard-matched pattern over the request payload
(e.g. ``"/documents/secrets/**"``, ``"page_id=123"``,
``"*"``). Anchored at start AND end.
action: One of ``"allow"`` / ``"deny"`` / ``"ask"``.
"""
permission: str
pattern: str
action: RuleAction
@dataclass
class Ruleset:
"""A list of rules with an associated origin used for debugging."""
rules: list[Rule] = field(default_factory=list)
origin: str = "unknown" # e.g. "defaults", "global", "space", "thread", "runtime"
# -----------------------------------------------------------------------------
# Wildcard matcher
# -----------------------------------------------------------------------------
_GLOB_TOKEN = re.compile(r"\*\*|\*|[^*]+")
def _wildcard_to_regex(pattern: str) -> re.Pattern[str]:
"""Translate an opencode-style wildcard pattern to a compiled regex.
Rules:
- ``**`` matches any sequence of any characters (including separators).
- ``*`` matches any sequence of characters that does **not** include
the path separator ``/`` same as glob.
- All other characters match literally.
- The pattern is anchored at both ends (``^...$``).
"""
parts: list[str] = ["^"]
for token in _GLOB_TOKEN.findall(pattern):
if token == "**":
parts.append(r".*")
elif token == "*":
parts.append(r"[^/]*")
else:
parts.append(re.escape(token))
parts.append("$")
return re.compile("".join(parts))
_REGEX_CACHE: dict[str, re.Pattern[str]] = {}
def wildcard_match(value: str, pattern: str) -> bool:
"""Return True if ``value`` matches the wildcard ``pattern``.
Special case: a bare ``"*"`` pattern matches any value, including
those containing ``/`` separators. This mirrors opencode's
``Wildcard.match`` short-circuit and matches the convention that
``pattern="*"`` means "any pattern" in permission rules.
"""
if pattern == "*":
return True
compiled = _REGEX_CACHE.get(pattern)
if compiled is None:
compiled = _wildcard_to_regex(pattern)
_REGEX_CACHE[pattern] = compiled
return compiled.match(value) is not None
# -----------------------------------------------------------------------------
# Evaluator
# -----------------------------------------------------------------------------
def evaluate(
permission: str,
pattern: str,
*rulesets: Ruleset | Iterable[Rule],
) -> Rule:
"""Find the last rule matching ``(permission, pattern)`` from ``rulesets``.
Mirrors opencode ``permission/evaluate.ts:9-15`` precisely:
- Flatten rulesets in argument order.
- Walk the flat list **in reverse**.
- First reverse-match wins (i.e. the last specified rule wins).
- When no rule matches, default to ``Rule(permission, "*", "ask")``.
Args:
permission: The permission identifier being requested
(e.g. tool name, ``"edit"``, ``"doom_loop"``).
pattern: The request-specific pattern (e.g. file path,
primary arg value). Use ``"*"`` when no specific pattern
applies.
*rulesets: Layered rulesets, applied earliest to latest. Later
rulesets override earlier ones.
Returns:
The matched :class:`Rule`, or the default ask fallback.
"""
flat: list[Rule] = []
for rs in rulesets:
if isinstance(rs, Ruleset):
flat.extend(rs.rules)
else:
flat.extend(rs)
for rule in reversed(flat):
if wildcard_match(permission, rule.permission) and wildcard_match(
pattern, rule.pattern
):
return rule
return Rule(permission=permission, pattern="*", action="ask")
def evaluate_many(
permission: str,
patterns: Iterable[str],
*rulesets: Ruleset | Iterable[Rule],
) -> list[Rule]:
"""Evaluate ``permission`` against each of ``patterns`` (multi-pattern AND).
Returns the list of resolved rules in the same order as ``patterns``.
The caller is responsible for combining the results opencode-style
multi-pattern AND collapses ``deny`` first, then ``ask``, then
``allow``.
"""
return [evaluate(permission, p, *rulesets) for p in patterns]
def aggregate_action(rules: Iterable[Rule]) -> RuleAction:
"""Collapse a list of per-pattern rules into one action.
Order:
1. If any rule is ``deny`` -> ``deny``.
2. Else if any rule is ``ask`` -> ``ask``.
3. Else if at least one rule is ``allow`` -> ``allow``.
4. Else (empty input) -> ``ask`` (safe default mirroring ``evaluate``).
Mirrors opencode's behavior in ``permission/index.ts:180-272``.
"""
saw_ask = False
saw_allow = False
for rule in rules:
if rule.action == "deny":
return "deny"
if rule.action == "ask":
saw_ask = True
elif rule.action == "allow":
saw_allow = True
if saw_ask:
return "ask"
if saw_allow:
return "allow"
return "ask"
__all__ = [
"Rule",
"RuleAction",
"Ruleset",
"aggregate_action",
"evaluate",
"evaluate_many",
"wildcard_match",
]